├── __init__.py ├── GraphGenerator ├── __init__.py ├── models │ ├── __init__.py │ ├── bter_ops │ │ ├── tricnt_mex.mexa64 │ │ ├── tricnt_mex.mexw64 │ │ ├── ccperdegest_mex.mexw64 │ │ ├── template.m │ │ ├── LICENSE.txt │ │ ├── dplpdf.m │ │ ├── dglnpdf.m │ │ ├── ccperdegest.m │ │ ├── edges2graph.m │ │ ├── random_sample.m │ │ ├── bter_edges2graph.m │ │ ├── binstart.m │ │ ├── binlookup.m │ │ ├── tricnt.m │ │ ├── gendegdist.m │ │ ├── cc_param_search.m │ │ ├── ccperdeg.m │ │ ├── bindata.m │ │ ├── degdist_param_search.m │ │ └── tricnt_mex.c │ ├── kronecker_ops │ │ ├── kronecker_src.zip │ │ └── readme.txt │ ├── rmat.py │ ├── bigg_ops │ │ ├── __init__.py │ │ ├── tree_clib │ │ │ ├── __init__.py │ │ │ ├── reame.md │ │ │ ├── include │ │ │ │ ├── cuda_ops.h │ │ │ │ ├── config.h │ │ │ │ ├── tree_util.h │ │ │ │ ├── tree_clib.h │ │ │ │ └── struct_util.h │ │ │ ├── src │ │ │ │ └── lib │ │ │ │ │ ├── cuda_ops.cu │ │ │ │ │ ├── config.cpp │ │ │ │ │ └── tree_util.cpp │ │ │ ├── Makefile │ │ │ ├── Makefile_70 │ │ │ └── Makefile_75 │ │ └── tensor_ops.py │ ├── er.py │ ├── bter.py │ ├── ba.py │ ├── sbm.py │ ├── kronecker.py │ ├── vgae.py │ ├── ws.py │ ├── mmsb.py │ ├── graphite.py │ ├── rtg.py │ └── bigg.py ├── train │ ├── __init__.py │ ├── train_netgan.py │ └── train_base.py ├── utils │ ├── __init__.py │ ├── logger.py │ └── arg_utils.py ├── evaluate │ ├── __init__.py │ ├── efficiency.py │ ├── distance.py │ └── diff.py ├── metrics │ ├── __init__.py │ ├── speed.py │ ├── memory.py │ └── mmd.py ├── preprocessing │ ├── __init__.py │ ├── dataio.py │ └── utils.py ├── test │ ├── __init__.py │ └── test_bigg.py └── __main__.py ├── Tutorial for developer of GraphGenerator.pdf ├── requirements.txt ├── config ├── sbm.yaml ├── dcsbm.yaml ├── bter.yaml ├── b-a.yaml ├── e-r.yaml ├── w-s.yaml ├── rmat.yaml ├── kronecker.yaml ├── template.yaml ├── sbmgnn.yaml ├── graphite.yaml ├── vgae.yaml ├── netgan.yaml ├── graphrnn.yaml ├── bigg.yaml └── gran.yaml ├── LICENSE ├── setup.py ├── .gitignore └── README.md /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GraphGenerator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GraphGenerator/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GraphGenerator/train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GraphGenerator/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GraphGenerator/evaluate/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GraphGenerator/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GraphGenerator/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Tutorial for developer of GraphGenerator.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4DataSynth/GraphGenerator/HEAD/Tutorial for developer of GraphGenerator.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | networkx 3 | numpy 4 | pyemd 5 | python-louvain 6 | pyyaml 7 | scipy 8 | sklearn 9 | six 10 | torch 11 | tqdm 12 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/tricnt_mex.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4DataSynth/GraphGenerator/HEAD/GraphGenerator/models/bter_ops/tricnt_mex.mexa64 -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/tricnt_mex.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4DataSynth/GraphGenerator/HEAD/GraphGenerator/models/bter_ops/tricnt_mex.mexw64 -------------------------------------------------------------------------------- /config/sbm.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: SBM 3 | exp_dir: exp/SBM 4 | seed: 1234 5 | dataset: 6 | name: top10 7 | data_path: data/ 8 | model: 9 | name: SBM 10 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/ccperdegest_mex.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4DataSynth/GraphGenerator/HEAD/GraphGenerator/models/bter_ops/ccperdegest_mex.mexw64 -------------------------------------------------------------------------------- /GraphGenerator/models/kronecker_ops/kronecker_src.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AI4DataSynth/GraphGenerator/HEAD/GraphGenerator/models/kronecker_ops/kronecker_src.zip -------------------------------------------------------------------------------- /GraphGenerator/models/rmat.py: -------------------------------------------------------------------------------- 1 | import GraphGenerator.models.kronecker as kronecker 2 | 3 | 4 | def generate(*params): 5 | return kronecker.generate(*params) 6 | 7 | 8 | -------------------------------------------------------------------------------- /config/dcsbm.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: DCSBM 3 | exp_dir: exp/DCSBM 4 | seed: 1234 5 | dataset: 6 | name: top10 7 | data_path: data/ 8 | model: 9 | name: DCSBM 10 | -------------------------------------------------------------------------------- /GraphGenerator/test/__init__.py: -------------------------------------------------------------------------------- 1 | from .test_bigg import bigg_test as bigg 2 | 3 | 4 | def test_generator(args, config): 5 | eval(args.generator)(args, config) 6 | return 7 | -------------------------------------------------------------------------------- /config/bter.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: BTER 3 | exp_dir: exp/BTER 4 | seed: 1234 5 | dataset: 6 | name: top10 7 | data_path: data/ 8 | model: 9 | name: BTER 10 | num_gen: 10 11 | -------------------------------------------------------------------------------- /config/b-a.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: B-A 3 | exp_dir: exp/B-A 4 | seed: 1234 5 | dataset: 6 | name: top10 7 | data_path: data/ 8 | model: 9 | name: B-A 10 | num_nodes: 1000 11 | num_gen: 10 12 | -------------------------------------------------------------------------------- /config/e-r.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: E-R 3 | exp_dir: exp/E-R 4 | seed: 1234 5 | dataset: 6 | name: top10 7 | data_path: data/ 8 | model: 9 | name: E-R 10 | num_nodes: 1000 11 | num_gen: 10 12 | -------------------------------------------------------------------------------- /config/w-s.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: W-S 3 | exp_dir: exp/W-S 4 | seed: 1234 5 | dataset: 6 | name: top10 7 | data_path: data/ 8 | model: 9 | name: W-S 10 | num_nodes: 1000 11 | num_gen: 10 12 | -------------------------------------------------------------------------------- /config/rmat.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: RMAT 3 | exp_dir: exp/RMAT 4 | seed: 1234 5 | dataset: 6 | name: top10 7 | data_path: data/ 8 | model: 9 | name: RMAT 10 | init_mat: 0.9 0.3 0.3 0.1 11 | num_gen: 10 12 | -------------------------------------------------------------------------------- /config/kronecker.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: Kronecker 3 | exp_dir: exp/Kronecker 4 | seed: 1234 5 | device: cpu 6 | gpu: -1 7 | dataset: 8 | name: g1000000 9 | data_path: data/ 10 | model: 11 | name: Kronecker 12 | init_mat: 0.9 0.6; 0.6 0.1 13 | num_gen: 1 14 | -------------------------------------------------------------------------------- /GraphGenerator/preprocessing/dataio.py: -------------------------------------------------------------------------------- 1 | import pickle, os, sys 2 | 3 | 4 | def load_data(path): 5 | if os.path.exists(path): 6 | graph = pickle.load(open(path, "rb")) 7 | return graph 8 | else: 9 | print("Invalid input data...") 10 | sys.exit(1) 11 | 12 | 13 | def save_data(obj, name): 14 | pickle.dump(obj, open("{}".format(name), "wb")) 15 | return 0 16 | 17 | -------------------------------------------------------------------------------- /GraphGenerator/models/kronecker_ops/readme.txt: -------------------------------------------------------------------------------- 1 | # readme before using kronecker graph generator 2 | In order to reduce space, we compressed the source codes of kronecker graph generator. 3 | Before using kronecker, we need to decompress and compile them: 4 | 5 | 1. unzip `kronecker_src.zip` 6 | 7 | ```bash 8 | cd path_to/GraphGenerator/GraphGenerator/models/kronecker_ops 9 | unzip -o -d . kronecker_src.zip 10 | ``` 11 | 12 | 2. reinstall this package 13 | ```bash 14 | pip uninstall GraphGenerator 15 | cd path_to/GraphGenerator 16 | pip install -e . 17 | ``` 18 | 19 | -------------------------------------------------------------------------------- /config/template.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: Template_name 3 | exp_dir: exp/Template 4 | device: cuda:0 5 | gpu: 1 6 | #device: cpu 7 | seed: 1234 8 | dataset: 9 | name: Template_data 10 | data_path: data/ 11 | model: 12 | name: Template_model 13 | num_nodes: 1000 14 | embedding_dim: 32 15 | hidden_dim: 32 16 | train: 17 | optimizer: Adam 18 | lr: 1.0e-2 19 | max_epochs: 1000 20 | display_iter: 10 21 | snapshot_epoch: 1000 22 | test: 23 | test_model_dir: exp/Template_name/Template_model_Template_data_2021-Feb-25-10-38-59_12345 24 | test_model_name: model_snapshot_0001000.pth 25 | -------------------------------------------------------------------------------- /GraphGenerator/preprocessing/utils.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import sys 3 | 4 | 5 | def edgelist_to_graph(path): 6 | try: 7 | graph = nx.read_edgelist(path) 8 | return graph 9 | except: 10 | print("Wrong path entered! Absolute path of edgelist file pxpected.") 11 | sys.exit(1) 12 | 13 | 14 | def pathlist_to_graphlist(path): 15 | with open(path, "r") as f: 16 | path_list = f.readlines() 17 | path_list = [p.strip("\n") for p in path_list if p != "\n"] 18 | graph_list = [edgelist_to_graph(p) for p in path_list] 19 | return graph_list 20 | -------------------------------------------------------------------------------- /config/sbmgnn.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: SBMGNN 3 | exp_dir: exp/SBMGNN 4 | seed: 1234 5 | gpu: 1 6 | device: cuda:1 7 | dataset: 8 | name: top10 9 | data_path: data/ 10 | features: false 11 | model: 12 | name: SBMGNN 13 | hidden: [64, 50] 14 | num_nodes: 100 15 | g_hidden: 32 16 | deep_decoder: 1 17 | dropout: 0.5 18 | alpha0: 10. 19 | temp_prior: 0.5 20 | temp_post: 1. 21 | variational: false 22 | train: 23 | lr: 0.01 24 | max_epochs: 100 25 | weight_decay: 0.0 26 | use_k_fold: false 27 | k: 5 28 | early_stopping: 0 29 | split_idx: 0 30 | weighted_ce: 1 31 | reconstruct_x: false -------------------------------------------------------------------------------- /config/graphite.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: Graphite 3 | exp_dir: exp/Graphite 4 | device: cuda:1 5 | gpu: 1 6 | #device: cpu 7 | seed: 1234 8 | dataset: 9 | name: top10 10 | data_path: data/ 11 | model: 12 | name: Graphite 13 | num_nodes: 10000 14 | embedding_dim: 32 15 | hidden_dim: 32 16 | decoding_dim: 32 17 | variational: false 18 | has_feature: false 19 | train: 20 | optimizer: Adam 21 | lr: 1.0e-2 22 | max_epochs: 100 23 | display_iter: 10 24 | snapshot_epoch: 1000 25 | #test: 26 | # test_model_dir: exp/VGAE/VGAE_cora_2021-Feb-25-10-38-59_12345 27 | # test_model_name: model_snapshot_0001000.pth 28 | -------------------------------------------------------------------------------- /GraphGenerator/metrics/speed.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | def time_decorator(func): 4 | def time_record(*args, **kwargs): 5 | startt = time.time() 6 | res = func(*args, **kwargs) 7 | endt = time.time() 8 | time_consumption = endt - startt 9 | print("Time Consumption of {}: {:.6f}s.".format(func.__name__, time_consumption)) 10 | return res 11 | return time_record 12 | 13 | 14 | @time_decorator 15 | def test_deco(n): 16 | for i in range(n): 17 | continue 18 | 19 | 20 | if __name__ == '__main__': 21 | n = 1048576 22 | test_deco(n) 23 | for i in range(7): 24 | n *= 2 25 | test_deco(n) -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | -------------------------------------------------------------------------------- /config/vgae.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: VGAE 3 | exp_dir: exp/VGAE 4 | device: cuda:1 5 | gpu: 1 6 | #device: cpu 7 | seed: 1234 8 | dataset: 9 | name: top10 10 | data_path: data/ 11 | model: 12 | name: VGAE 13 | num_nodes: 100000 14 | embedding_dim: 32 15 | hidden_dim: 32 16 | variational: false 17 | has_feature: false 18 | num_GNN_layers: 2 19 | train: 20 | optimizer: Adam 21 | lr: 1.0e-2 22 | max_epochs: 100 23 | display_iter: 10 24 | snapshot_epoch: 1000 25 | #test: 26 | # test_model_dir: exp/VGAE/VGAE_cora_2021-Feb-25-10-38-59_12345 27 | # test_model_name: model_snapshot_0001000.pth 28 | eval: 29 | num_nodes: [100, 1000, 10000, 100000, 1000000] 30 | graph_type: W-S # selected from [E-R, B-A, W-S, grid] -------------------------------------------------------------------------------- /config/netgan.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: NetGAN 3 | exp_dir: exp/NetGAN 4 | device: cuda:1 5 | gpu: 1 6 | #device: cpu 7 | seed: 1234 8 | dataset: 9 | name: top10 10 | data_path: data/ 11 | model: 12 | name: NetGAN 13 | num_nodes: 1000 14 | embedding_dim: 256 15 | rw_len: 16 16 | train: 17 | optimizer: Adam 18 | lr: 1.0e-4 19 | batch_size: 128 20 | max_epochs: 200000 21 | eval_iter: 2000 22 | display_iter: 200000 23 | #display_iter: 20000 24 | snapshot_epoch: 2000 25 | val_share: 0.15 26 | test_share: 0.05 27 | stopping_criterion: val 28 | #stopping_criterion: eo # 'eo' means early stopping 29 | test: 30 | sample_num: 1000 31 | num_gen: 10 32 | test_model_dir: exp/Template_name/Template_model_Template_data_2021-Feb-25-10-38-59_12345 33 | test_model_name: model_snapshot_0001000.pth 34 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/reame.md: -------------------------------------------------------------------------------- 1 | # Installation on NVIDIA GeForce RTX 3090 and CUDA 11.1 2 | Using default Makefile, there is no bug when installing BiGG in this environment. 3 | 4 | # Installation on different devices and environments 5 | The installation of BiGG requires one more step, i.e., check the computing capability of your gpu. 6 | 7 | ## check the computing capability 8 | Visiting this website, we can query the corresponding computing capability: https://developer.nvidia.com/cuda-gpus 9 | 10 | ## choosing specific Makefile 11 | According to the query result, choosing specific Makefile_xx as your Makefile. 12 | 13 | For example, if the existing GPU device is GeForce RTX 2080, the `Makefile_75` or `Makefile_70` can be renamed 14 | as `Makefile` because the computing capability of 2080 Ti is `7.5`. 15 | 16 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/include/cuda_ops.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Google Research Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef CUDA_OP_H 16 | #define CUDA_OP_H 17 | 18 | #include 19 | 20 | void build_binary_mat(int n_rows, int n_ints, int n_feats, int* lens, 21 | uint32_t* bits, float* outptr); 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /config/graphrnn.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: GraphRNN 3 | exp_dir: exp/GraphRNN 4 | device: cuda:1 5 | gpu: 1 6 | #device: cpu 7 | seed: 1234 8 | dataset: 9 | name: top10 10 | data_path: data/ 11 | num_workers: 4 12 | model: 13 | #name: GraphRNN_RNN 14 | name: GraphRNN_MLP 15 | hidden_size_rnn: 128 16 | hidden_size_rnn_output: 16 17 | embedding_size_rnn: 64 18 | embedding_size_rnn_output: 8 19 | embedding_size_output: 64 20 | num_layers: 4 21 | max_num_node: null 22 | max_prev_node: null 23 | train: 24 | optimizer: Adam 25 | lr: 3.0e-3 26 | lr_rate: 0.3 27 | milestones: [1000] 28 | epochs: 2000 29 | epochs_log: 1 30 | batch_ratio: 1 31 | batch_size: 1 32 | validate_epoch: 100 33 | validate_sample: 1 34 | save_snapshot: true 35 | snapshot_epoch: 100 36 | resume: false 37 | resume_epoch: 100 38 | save: false 39 | save_epoch_by: 200 40 | test: 41 | batch_size: 1 42 | test_model_dir: exp/Template_name/Template_model_Template_data_2021-Feb-25-10-38-59_12345 43 | test_model_name: model_snapshot_0001000.pth 44 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/template.m: -------------------------------------------------------------------------------- 1 | 2 | % step1 3 | load('%##{Template Block}##%'); 4 | nnodes = size(G,1); 5 | nedges = nnz(G)/2; 6 | fprintf('Graph name: %s\n', graphname); 7 | fprintf('Number of nodes: %d\n', nnodes); 8 | fprintf('Number of edges: %d\n', nedges); 9 | 10 | % step2 11 | nd = accumarray(nonzeros(sum(G,2)),1); 12 | maxdegree = find(nd>0,1,'last'); 13 | fprintf('Maximum degree: %d\n', maxdegree); 14 | 15 | % step3 16 | [ccd,gcc] = ccperdeg(G); 17 | fprintf('Global clustering coefficient: %.2f\n', gcc); 18 | 19 | G_bter = {}; 20 | for i = 1:%##{Template Block}##%% step4 21 | fprintf('Running BTER...\n'); 22 | t1=tic; 23 | [E1,E2] = bter(nd,ccd); 24 | toc(t1) 25 | fprintf('Number of edges created by BTER: %d\n', size(E1,1) + size(E2,1));% step5 26 | fprintf('Turning edge list into adjacency matrix (including dedup)...\n'); 27 | t2=tic; 28 | tmpg_bter = bter_edges2graph(E1,E2); 29 | toc(t2); 30 | fprintf('Number of edges in dedup''d graph: %d\n', nnz(G)/2); 31 | G_bter{end+1} = tmpg_bter; 32 | end 33 | 34 | save('%##{Template Block}##%','G_bter') -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 VOIX 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /config/bigg.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: BiGG 3 | exp_dir: exp/BiGG 4 | device: cuda:2 5 | #device: cpu 6 | gpu: 2 7 | seed: 1234 8 | dataset: 9 | name: top10 10 | data_path: data/ 11 | directed: false 12 | self_loop: false 13 | bfs_permute: false 14 | model: 15 | name: BiGG 16 | max_num_nodes: 743 17 | embed_dim: 256 18 | num_RNN_layers: 2 19 | bits_compress: 256 20 | tree_pos_enc: false 21 | pos_enc: true 22 | pos_base: 10000 23 | greedy_frac: 0.0 24 | share_param: true 25 | blksize: -1 26 | train: 27 | batch_size: 32 28 | optimizer: Adam 29 | lr: 3.0e-4 30 | grad_clip: 5 31 | accum_grad: 1 32 | max_epochs: 100 33 | display_iter: 10 34 | save_snapshot: false 35 | snapshot_epoch: 100 36 | resume: false 37 | resume_epoch: 99 38 | resume_model_dir: exp/BiGG/BiGG_top10_2021-Mar-15-19-44-39_115041 39 | resume_model_name: epoch-100.ckpt 40 | test: 41 | num_test_gen: 10 42 | greedy_frac: 0.0 43 | display: false 44 | load_snapshot: false 45 | test_model_dir: exp/BiGG/BiGG_top10_2021-Mar-15-19-44-39_115041 46 | test_model_name: epoch-100.ckpt 47 | eval: 48 | num_nodes: [100, 1000, 10000, 100000, 1000000] 49 | graph_type: W-S # selected from [E-R, B-A, W-S, grid] -------------------------------------------------------------------------------- /GraphGenerator/test/test_bigg.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | import torch 3 | import torch.optim as optim 4 | 5 | import numpy as np 6 | import random 7 | import networkx as nx 8 | from GraphGenerator.utils.arg_utils import get_config, set_device 9 | from GraphGenerator.models.bigg_ops.tree_clib.tree_lib import setup_treelib, TreeLib 10 | from GraphGenerator.models.bigg_ops.tree_model import RecurTreeGen 11 | 12 | 13 | def bigg_test(args, config): 14 | random.seed(config.seed) 15 | torch.manual_seed(config.seed) 16 | np.random.seed(config.seed) 17 | set_device(config) 18 | setup_treelib(config) 19 | 20 | train_graphs = [nx.barabasi_albert_graph(10, 2)] 21 | TreeLib.InsertGraph(train_graphs[0]) 22 | max_num_nodes = max([len(gg.nodes) for gg in train_graphs]) 23 | config.model.max_num_nodes = max_num_nodes 24 | 25 | model = RecurTreeGen(config).to(config.device) 26 | optimizer = optim.Adam(model.parameters(), lr=config.train.lr, weight_decay=1e-4) 27 | for i in range(2): 28 | optimizer.zero_grad() 29 | ll, _ = model.forward_train([0]) 30 | loss = -ll / max_num_nodes 31 | print('iter', i, 'loss', loss.item()) 32 | loss.backward() 33 | optimizer.step() 34 | -------------------------------------------------------------------------------- /GraphGenerator/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def setup_logging(log_level, log_file, logger_name="exp_logger"): 5 | """ Setup logging """ 6 | numeric_level = getattr(logging, log_level.upper(), None) 7 | if not isinstance(numeric_level, int): 8 | raise ValueError("Invalid log level: %s" % log_level) 9 | 10 | logging.basicConfig( 11 | filename=log_file, 12 | filemode="w", 13 | format="%(levelname)-5s | %(asctime)s | File %(filename)-20s | Line %(lineno)-5d | %(message)s", 14 | datefmt="%m/%d/%Y %I:%M:%S %p", 15 | level=numeric_level) 16 | 17 | # define a Handler which writes messages to the sys.stderr 18 | console = logging.StreamHandler() 19 | console.setLevel(numeric_level) 20 | # set a format which is simpler for console use 21 | formatter = logging.Formatter( 22 | "%(levelname)-5s | %(asctime)s | %(filename)-25s | line %(lineno)-5d: %(message)s" 23 | ) 24 | # tell the handler to use this format 25 | console.setFormatter(formatter) 26 | # add the handler to the root logger 27 | logging.getLogger(logger_name).addHandler(console) 28 | 29 | return get_logger(logger_name) 30 | 31 | 32 | def get_logger(logger_name="exp_logger"): 33 | return logging.getLogger(logger_name) 34 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/include/config.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Google Research Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef cfg_H 16 | #define cfg_H 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | typedef float Dtype; 26 | 27 | struct cfg 28 | { 29 | static int max_num_nodes; 30 | static bool directed, self_loop, bfs_permute; 31 | static int bits_compress; 32 | static int dim_embed; 33 | static int gpu; 34 | static int seed; 35 | 36 | static std::default_random_engine generator; 37 | 38 | static void LoadParams(const int argc, const char** argv); 39 | 40 | static void SetRandom(); 41 | }; 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /GraphGenerator/metrics/memory.py: -------------------------------------------------------------------------------- 1 | import torch, os 2 | 3 | ## return current gpu memory cached 4 | # torch.cuda.memory_reserved() 5 | ## return peak gpu memory cached 6 | # torch.cuda.max_memory_reserved() 7 | ## reset peak gpu memory cached 8 | # torch.cuda.reset_peak_memory_stats() 9 | 10 | def get_peak_gpu_memory(device='cuda:0'): 11 | """ 12 | :return: maximum memory cached (Byte) 13 | """ 14 | return torch.cuda.max_memory_reserved(device) 15 | 16 | 17 | def flush_cached_gpu_memory(): 18 | torch.cuda.empty_cache() 19 | torch.cuda.reset_peak_memory_stats() 20 | 21 | 22 | def test_memory_usage(): 23 | flush_cached_gpu_memory() 24 | current_memory = get_peak_gpu_memory()//1024 25 | print("Current gpu memory cached: {} KiB".format(current_memory)) 26 | flush_cached_gpu_memory() 27 | a = torch.ones(3,3).cuda() 28 | print("Add a tensor to gpu.") 29 | current_memory = get_peak_gpu_memory() // 1024 30 | print("Current gpu memory cached: {} KiB".format(current_memory)) 31 | del a 32 | print("Delete a tensor from gpu.") 33 | flush_cached_gpu_memory() 34 | current_memory = get_peak_gpu_memory() // 1024 35 | print("Current gpu memory cached: {} KiB".format(current_memory)) 36 | 37 | 38 | if __name__ == '__main__': 39 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 40 | test_memory_usage() -------------------------------------------------------------------------------- /GraphGenerator/models/er.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import itertools 3 | import math 4 | import random 5 | 6 | 7 | def empty_graph(num_nodes): 8 | g = nx.Graph() 9 | g.add_nodes_from(range(num_nodes)) 10 | return g 11 | 12 | 13 | def complete_graph(num_nodes): 14 | g = empty_graph(num_nodes) 15 | edges = itertools.combinations(range(num_nodes), 2) 16 | g.add_edges_from(edges) 17 | return g 18 | 19 | 20 | def random_graph(num_nodes, p): 21 | g = empty_graph(num_nodes) 22 | if p <= 0: 23 | return g 24 | if p >= 1: 25 | return complete_graph(num_nodes) 26 | n = num_nodes 27 | w = -1 28 | lp = math.log(1.0 - p) 29 | # Nodes in graph are from 0,n-1 (start with v as the second node index). 30 | v = 1 31 | while v < n: 32 | lr = math.log(1.0 - random.random()) 33 | w = w + 1 + int(lr / lp) 34 | while w >= v and v < n: 35 | w = w - v 36 | v = v + 1 37 | if v < n: 38 | g.add_edge(v, w) 39 | return g 40 | 41 | 42 | def e_r(in_graph, config): 43 | """ 44 | E-R graph generator 45 | :param in_graph: referenced graph, type: nx.Graph 46 | :param config: configure object 47 | :return: generated graphs, type: list of nx.Graph 48 | """ 49 | num_edges = in_graph.number_of_edges() 50 | num_nodes = in_graph.number_of_nodes() 51 | p = num_edges/(num_nodes*(num_nodes-1)/2) 52 | out_graphs = [] 53 | for i in range(config.num_gen): 54 | out_graph = random_graph(num_nodes, p) 55 | out_graphs.append(out_graph) 56 | return out_graphs 57 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/src/lib/cuda_ops.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda_ops.h" // NOLINT 4 | #include "cuda_runtime.h" // NOLINT 5 | 6 | 7 | 8 | __global__ void binary_build_kernel(int n_ints, int n_feats, int* lens, 9 | uint32_t* bits, float* outptr) 10 | { 11 | int row = blockIdx.x; 12 | float* feat_ptr = outptr + row * n_feats; 13 | uint32_t* cur_bits = bits + row * n_ints; 14 | int bit_start = threadIdx.x; 15 | int bit_end = lens[row]; 16 | int bit_steps = blockDim.x; 17 | for (int i = bit_start; i < bit_end; i += bit_steps) 18 | { 19 | int slot = i / 32; 20 | uint32_t pos = i % 32; 21 | uint32_t bit = cur_bits[slot] & ((uint32_t)1 << pos); 22 | feat_ptr[i] = bit ? 1 : -1; 23 | } 24 | } 25 | 26 | void build_binary_mat(int n_rows, int n_ints, int n_feats, int* lens, 27 | uint32_t* bits, float* outptr) 28 | { 29 | int* lens_gpu; 30 | uint32_t* bits_gpu; 31 | cudaError_t t = cudaMalloc(&lens_gpu, sizeof(int) * n_rows); 32 | assert(t == cudaSuccess); 33 | t = cudaMalloc(&bits_gpu, sizeof(uint32_t) * n_ints * n_rows); 34 | assert(t == cudaSuccess); 35 | 36 | cudaMemcpy(lens_gpu, lens, sizeof(int) * n_rows, cudaMemcpyHostToDevice); 37 | cudaMemcpy(bits_gpu, bits, sizeof(uint32_t) * n_rows * n_ints, 38 | cudaMemcpyHostToDevice); 39 | 40 | dim3 grid(n_rows); 41 | dim3 block(1024); 42 | binary_build_kernel<<>>(n_ints, n_feats, lens_gpu, 43 | bits_gpu, outptr); 44 | cudaFree(lens_gpu); 45 | cudaFree(bits_gpu); 46 | } 47 | -------------------------------------------------------------------------------- /config/gran.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | exp_name: GRAN 3 | exp_dir: exp/GRAN 4 | use_gpu: true 5 | device: cuda:0 6 | gpu: 0 7 | #device: cpu 8 | seed: 1234 9 | dataset: 10 | loader_name: GRANData 11 | name: top10 12 | data_path: data/ 13 | node_order: DFS 14 | num_subgraph_batch: 32 15 | num_fwd_pass: 1 16 | has_node_feat: false 17 | train_ratio: 1.0 18 | dev_ratio: 1.0 19 | is_save_split: false 20 | is_sample_subgraph: true 21 | is_overwrite_precompute: false 22 | model: 23 | name: GRANMixtureBernoulli 24 | num_mix_component: 20 25 | is_sym: true 26 | block_size: 1 27 | sample_stride: 1 28 | max_num_nodes: 1000 29 | hidden_dim: 128 30 | embedding_dim: 128 31 | num_GNN_layers: 7 32 | num_GNN_prop: 1 33 | num_canonical_order: 1 34 | dimension_reduce: true 35 | has_attention: true 36 | edge_weight: 1.0e+0 37 | train: 38 | optimizer: Adam 39 | lr: 1.0e-4 40 | lr_decay: 0.3 41 | lr_decay_epoch: [10000] 42 | num_workers: 0 43 | max_epoch: 50 44 | batch_size: 1 45 | display_iter: 10 46 | snapshot_epoch: 100 47 | valid_epoch: 50 48 | wd: 0.0e-4 49 | save_snapshot: false 50 | momentum: 0.9 51 | shuffle: true 52 | is_resume: false 53 | resume_dir: # exp/GRAN/your_exp_folder 54 | resume_model: model_snapshot_0005000.pth 55 | test: 56 | batch_size: 1 57 | num_workers: 0 58 | num_test_gen: 2 # number of generated samples 59 | is_vis: false 60 | is_single_plot: false # visualize `num_vis` samples in a single image 61 | is_test_ER: false # test Erdos-Renyi baseline 62 | num_vis: 20 63 | vis_num_row: 5 # visualize `num_vis` samples in `vis_num_row` rows 64 | better_vis: true 65 | test_model_dir: snapshot_model 66 | test_model_name: gran_xx.pth 67 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | from setuptools.command.develop import develop 3 | import os 4 | import subprocess 5 | BASEPATH = os.path.dirname(os.path.abspath(__file__)) 6 | 7 | 8 | class CustomDevelop(develop): 9 | def run(self): 10 | original_cwd = os.getcwd() 11 | folder = os.path.join(BASEPATH, 'GraphGenerator/models/kronecker_ops') 12 | if not os.path.exists(os.path.join(folder, 'Makefile.config')): 13 | os.chdir(folder) 14 | subprocess.check_call(['unzip', '-o', '-d', '.', 'kronecker_src.zip']) 15 | folders = [ 16 | os.path.join(BASEPATH, 'GraphGenerator/models/bigg_ops/tree_clib'), 17 | os.path.join(BASEPATH, 'GraphGenerator/models/kronecker_ops/examples/kronfit') 18 | ] 19 | for folder in folders: 20 | os.chdir(folder) 21 | subprocess.check_call(['make']) 22 | folders = [ 23 | os.path.join(BASEPATH, 'GraphGenerator/evaluate'), 24 | ] 25 | for folder in folders: 26 | os.chdir(folder) 27 | subprocess.check_call(['g++', '-O2', '-std=c++11', '-o', 'orca', 'orca.cpp']) 28 | os.chdir(original_cwd) 29 | 30 | super().run() 31 | 32 | 33 | setuptools.setup( 34 | name="GraphGenerator", 35 | version="0.1", 36 | author="Sheng Xiang", 37 | author_email="xiangsheng218@gmail.com", 38 | description="Graph Generator package", 39 | long_description_content_type="text/markdown", 40 | packages=setuptools.find_packages(), 41 | classifiers=[ 42 | "Programming Language :: Python :: 3", 43 | "License :: OSI Approved :: MIT License", 44 | "Operating System :: OS Independent", 45 | ], 46 | python_requires='>=3.6', 47 | cmdclass={ 48 | 'develop': CustomDevelop 49 | } 50 | ) 51 | -------------------------------------------------------------------------------- /GraphGenerator/utils/arg_utils.py: -------------------------------------------------------------------------------- 1 | import time, os, yaml, torch, random 2 | import numpy as np 3 | from easydict import EasyDict as edict 4 | 5 | 6 | def get_config(config_file): 7 | """ Construct and snapshot hyper parameters """ 8 | # config = edict(yaml.load(open(config_file, 'r'), Loader=yaml.FullLoader)) 9 | config = edict(yaml.load(open(config_file, 'r'))) 10 | if config.seed is not None: 11 | np.random.seed(config.seed) 12 | random.seed(config.seed) 13 | # create hyper parameters 14 | config.run_id = str(os.getpid()) 15 | config.exp_name = '_'.join([ 16 | config.model.name, config.dataset.name, 17 | time.strftime('%Y-%b-%d-%H-%M-%S'), config.run_id 18 | ]) 19 | 20 | save_dir = os.path.join(config.exp_dir, config.exp_name) 21 | save_name = os.path.join(save_dir, 'config.yaml') 22 | config.save_dir = save_dir 23 | # snapshot hyperparameters 24 | mkdir(config.exp_dir) 25 | mkdir(save_dir) 26 | 27 | yaml.dump(edict2dict(config), open(save_name, 'w'), default_flow_style=False) 28 | 29 | return config 30 | 31 | 32 | def edict2dict(edict_obj): 33 | dict_obj = {} 34 | 35 | for key, vals in edict_obj.items(): 36 | if isinstance(vals, edict): 37 | dict_obj[key] = edict2dict(vals) 38 | else: 39 | dict_obj[key] = vals 40 | 41 | return dict_obj 42 | 43 | 44 | def mkdir(folder): 45 | if not os.path.isdir(folder): 46 | os.makedirs(folder) 47 | 48 | 49 | def set_device(config): 50 | if int(config.gpu) >= 0 and config.device.startswith('cuda:'): 51 | # os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu) 52 | # config.device = 'cuda:0' 53 | print('use gpu indexed: {}'.format(config.gpu)) 54 | else: 55 | config.gpu = -1 56 | os.environ["CUDA_VISIBLE_DEVICES"] = "" 57 | config.device = 'cpu' 58 | print('use cpu') 59 | 60 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 2 | Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 3 | January 2014 4 | 5 | ** License ** 6 | 7 | Copyright (c) 2014, Sandia National Laboratories 8 | All rights reserved. 9 | 10 | Redistribution and use in source and binary forms, with or without 11 | modification, are permitted provided that the following conditions are 12 | met: 13 | 14 | 1. Redistributions of source code must retain the above copyright notice, 15 | this list of conditions and the following disclaimer. 16 | 17 | 2. Redistributions in binary form must reproduce the above copyright 18 | notice, this list of conditions and the following disclaimer in the 19 | documentation and/or other materials provided with the distribution. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 22 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 23 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | **** 34 | 35 | Sandia National Laboratories is a multi-program laboratory managed and 36 | operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 37 | Martin Corporation, for the U.S. Department of Energy's National Nuclear 38 | Security Administration under contract DE-AC04-94AL85000. 39 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/include/tree_util.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Google Research Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef TREE_UTIL_H 16 | #define TREE_UTIL_H 17 | 18 | #include 19 | #include 20 | #include "struct_util.h" // NOLINT 21 | 22 | class AdjNode; 23 | extern int total_job_nums; 24 | extern std::vector global_job_nodes; 25 | 26 | class AdjNode 27 | { 28 | public: 29 | AdjNode(){} 30 | AdjNode(AdjNode* parent, int row, int col_begin, int col_end, int depth); 31 | ~AdjNode(); 32 | void init(AdjNode* parent, int row, int col_begin, int col_end, int depth); 33 | void split(); 34 | void update_bits(); 35 | 36 | AdjNode *parent, *lch, *rch; 37 | int global_idx; 38 | int row, col_begin, col_end, mid; 39 | int depth, n_cols; 40 | bool is_leaf, is_root; 41 | bool has_edge, is_lowlevel; 42 | BitSet bits_rep; 43 | int job_idx; 44 | }; 45 | 46 | extern PtHolder node_holder; 47 | 48 | class AdjRow 49 | { 50 | public: 51 | AdjRow(){} 52 | AdjRow(int row, int col_start, int col_end); 53 | ~AdjRow(); 54 | void init(int row, int col_start, int col_end); 55 | 56 | void insert_edges(std::vector& col_indices); 57 | AdjNode* root; 58 | int row, max_col; 59 | 60 | private: 61 | void add_edges(AdjNode* node, ColAutomata* col_sm); 62 | }; 63 | 64 | extern PtHolder row_holder; 65 | 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import networkx as nx 4 | from GraphGenerator.utils.data_utils import load_matlab_graph, save_matlab_graph 5 | 6 | 7 | def generate_matlab_mat(data_name, in_mat_path, out_mat_path, repeat=2): 8 | bter_path = "./GraphGenerator/models/bter_ops/" 9 | template_filename = os.path.join(bter_path, "template.m") 10 | with open(template_filename, "r") as r_f: 11 | template_context = r_f.read() 12 | tmp_filepath = os.path.join(bter_path, "{}.m".format(data_name)) 13 | with open(tmp_filepath, "w") as w_f: 14 | context = template_context.split("%##{Template Block}##%") 15 | w_f.write(context[0]) 16 | w_f.write(os.path.join("../../../", in_mat_path)) 17 | w_f.write(context[1]) 18 | w_f.write(str(repeat)) 19 | w_f.write(context[2]) 20 | w_f.write(os.path.join("../../../", out_mat_path)) 21 | w_f.write(context[3]) 22 | os.system("matlab -nosplash -nodesktop -r " 23 | "'cd ./GraphGenerator/models/bter_ops; {}; cd ../../..; quit'".format(data_name)) 24 | graphs = load_matlab_graph(fname=out_mat_path) 25 | # print(graphs) 26 | os.remove(tmp_filepath) 27 | return graphs 28 | 29 | 30 | def bter(input_graph, config): 31 | fname = os.path.join(config.exp_dir, config.exp_name, "{}.mat".format(config.dataset.name)) 32 | dump_name = os.path.join(config.exp_dir, config.exp_name, "bter_to_{}.mat".format(config.dataset.name)) 33 | sp_adj = nx.adjacency_matrix(input_graph) 34 | sp_adj.data = sp_adj.data.astype(np.float64) 35 | save_matlab_graph(fname, sp_adj, config.dataset.name) 36 | graphs = generate_matlab_mat(data_name=config.dataset.name, 37 | in_mat_path=fname, 38 | out_mat_path=dump_name, 39 | repeat=config.num_gen) 40 | # print(graphs) 41 | return [nx.Graph(graph) for graph in graphs[0].tolist()] 42 | 43 | 44 | if __name__ == '__main__': 45 | tmp_g = nx.grid_2d_graph(10, 10) 46 | save_matlab_graph("./tmp.mat", nx.adjacency_matrix(tmp_g), "tmp") 47 | -------------------------------------------------------------------------------- /GraphGenerator/models/ba.py: -------------------------------------------------------------------------------- 1 | from GraphGenerator.models.er import empty_graph 2 | import networkx as nx 3 | import numpy as np 4 | import random 5 | 6 | 7 | def _random_subset(seq, m): 8 | """ Return m unique elements from seq. 9 | 10 | This differs from random.sample which can return repeated 11 | elements if seq holds repeated elements. 12 | 13 | Note: eval('random') can be a random.Random or numpy.random.RandomState instance. 14 | """ 15 | targets = set() 16 | while len(targets) < m: 17 | x = random.choice(seq) 18 | targets.add(x) 19 | return targets 20 | 21 | 22 | def barabasi_albert_graph(n, m): 23 | if m < 1 or m >= n: 24 | raise nx.NetworkXError( 25 | f"Barabási–Albert network must have m >= 1 and m < n, m = {m}, n = {n}" 26 | ) 27 | 28 | # Add m initial nodes (m0 in barabasi-speak) 29 | G = empty_graph(m) 30 | # Target nodes for new edges 31 | targets = list(range(m)) 32 | # List of existing nodes, with nodes repeated once for each adjacent edge 33 | repeated_nodes = [] 34 | # Start adding the other n-m nodes. The first node is m. 35 | source = m 36 | while source < n: 37 | # Add edges to m nodes from the source. 38 | G.add_edges_from(zip([source] * m, targets)) 39 | # Add one node to the list for each new edge just created. 40 | repeated_nodes.extend(targets) 41 | # And the new node "source" has m edges to add to the list. 42 | repeated_nodes.extend([source] * m) 43 | # Now choose m unique nodes from the existing nodes 44 | # Pick uniformly from repeated_nodes (preferential attachment) 45 | targets = _random_subset(repeated_nodes, m) 46 | source += 1 47 | return G 48 | 49 | 50 | def b_a(in_graph, config): 51 | """ 52 | B-A graph generator 53 | :param in_graph: referenced graph, type: nx.Graph 54 | :param config: configure object 55 | :return: generated graphs, type: list of nx.Graph 56 | """ 57 | m = in_graph.number_of_edges() 58 | n = in_graph.number_of_nodes() 59 | k = int((n-np.sqrt(n**2-4*m))//2) 60 | out_graphs = [] 61 | for i in range(config.num_gen): 62 | out_graph = barabasi_albert_graph(n, k) 63 | out_graphs.append(out_graph) 64 | return out_graphs 65 | -------------------------------------------------------------------------------- /GraphGenerator/models/sbm.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import networkx as nx 3 | import numpy as np 4 | import bisect, pickle 5 | import random, argparse 6 | import community 7 | 8 | 9 | def sample_discrete(dist): 10 | # sample a discrete distribution dist with values = dist.keys() and 11 | # probabilities = dist.values() 12 | 13 | i = 0 14 | acc = 0 15 | values = {} 16 | probs = [] 17 | for e in dist: 18 | values[i] = e 19 | acc += dist[e] 20 | probs.append(acc) 21 | i += 1 22 | 23 | rand = random.random() 24 | pos = bisect.bisect(probs, rand) 25 | return values[pos] 26 | 27 | 28 | def get_parameters(G, method="sbm"): 29 | part = community.best_partition(G) 30 | M = {} 31 | for e in G.edges(): 32 | r = part[e[0]] 33 | s = part[e[1]] 34 | el = tuple(sorted([r, s])) 35 | M[el] = M.get(el, 0) + 1 36 | 37 | g = {} 38 | for k, v in part.items(): 39 | g[v] = g.get(v, []) + [k] 40 | 41 | k = G.degree() 42 | K = {} 43 | for c in g: 44 | K[c] = sum([k[i] for i in g[c]]) 45 | if method != "sbm": 46 | t = dict(k) 47 | for e in t: 48 | if t[e] != 0: 49 | t[e] = float(t[e])/K[part[e]] 50 | else: 51 | t = part.copy() 52 | for c in g: 53 | node_list = g[c] 54 | prob = 1./len(node_list) 55 | for n in node_list: 56 | t[n] = prob 57 | 58 | return (t, M, g) 59 | 60 | 61 | def generate_from_parameters(t, w, g): 62 | G = nx.Graph() 63 | for i in g: 64 | G.add_nodes_from(g[i]) 65 | 66 | # generate num of edges 67 | M = w.copy() 68 | for c in M: 69 | M[c] = np.random.poisson(M[c]) 70 | 71 | # assign edges to vertices 72 | edges = [] 73 | for c in M: 74 | r = c[0] 75 | s = c[1] 76 | for i in range(M[c]): 77 | n1 = sample_discrete({j: t[j] for j in g[r]}) 78 | n2 = sample_discrete({j: t[j] for j in g[s]}) 79 | edges.append((n1, n2)) 80 | 81 | G.add_edges_from(edges) 82 | return G 83 | 84 | 85 | def generate(G, method, repeat=1): 86 | t, w, g = get_parameters(G, method) 87 | return [generate_from_parameters(t, w, g) for i in range(repeat)] 88 | 89 | 90 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/src/lib/config.cpp: -------------------------------------------------------------------------------- 1 | #include "config.h" // NOLINT 2 | #include 3 | #ifdef USE_GPU 4 | #include "cuda_runtime.h" // NOLINT 5 | #endif 6 | 7 | int cfg::max_num_nodes = 1000000; 8 | int cfg::bits_compress = 0; 9 | int cfg::dim_embed = 0; 10 | bool cfg::directed = false; 11 | bool cfg::self_loop = false; 12 | int cfg::gpu = -1; 13 | bool cfg::bfs_permute = false; 14 | int cfg::seed = 1; 15 | std::default_random_engine cfg::generator; 16 | 17 | void cfg::LoadParams(const int argc, const char** argv) 18 | { 19 | for (int i = 1; i < argc; i += 2) 20 | { 21 | if (strcmp(argv[i], "-max_num_nodes") == 0) 22 | max_num_nodes = atoi(argv[i + 1]); // NOLINT 23 | if (strcmp(argv[i], "-directed") == 0) 24 | directed = atoi(argv[i + 1]); // NOLINT 25 | if (strcmp(argv[i], "-self_loop") == 0) 26 | self_loop = atoi(argv[i + 1]); // NOLINT 27 | if (strcmp(argv[i], "-bits_compress") == 0) 28 | bits_compress = atoi(argv[i + 1]); // NOLINT 29 | if (strcmp(argv[i], "-embed_dim") == 0) 30 | dim_embed = atoi(argv[i + 1]); // NOLINT 31 | if (strcmp(argv[i], "-gpu") == 0) 32 | gpu = atoi(argv[i + 1]); // NOLINT 33 | if (strcmp(argv[i], "-seed") == 0) 34 | seed = atoi(argv[i + 1]); // NOLINT 35 | if (strcmp(argv[i], "-bfs_permute") == 0) 36 | bfs_permute = atoi(argv[i + 1]); // NOLINT 37 | } 38 | std::cerr << "====== begin of tree_clib configuration ======" << std::endl; 39 | std::cerr << "| bfs_permute = " << bfs_permute << std::endl; 40 | std::cerr << "| max_num_nodes = " << max_num_nodes << std::endl; 41 | std::cerr << "| bits_compress = " << bits_compress << std::endl; 42 | std::cerr << "| dim_embed = " << dim_embed << std::endl; 43 | std::cerr << "| gpu = " << gpu << std::endl; 44 | std::cerr << "| seed = " << seed << std::endl; 45 | std::cerr << "====== end of tree_clib configuration ======" << std::endl; 46 | #ifdef USE_GPU 47 | if (gpu >= 0) 48 | { 49 | cudaError_t t = cudaSetDevice(gpu); 50 | //assert(t == cudaSuccess); 51 | } 52 | #endif 53 | } 54 | 55 | void cfg::SetRandom() 56 | { 57 | std::srand(cfg::seed); 58 | cfg::generator.seed(cfg::seed); 59 | } 60 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/Makefile: -------------------------------------------------------------------------------- 1 | dir_guard = @mkdir -p $(@D) 2 | FIND := find 3 | CXX := g++ 4 | 5 | CXXFLAGS += -Wall -O3 -std=c++11 6 | LDFLAGS += -lm 7 | 8 | UNAME := $(shell uname) 9 | 10 | CUDA_HOME := /usr/local/cuda 11 | NVCC := $(CUDA_HOME)/bin/nvcc 12 | USE_GPU = 1 13 | 14 | ifeq ($(UNAME), Darwin) 15 | USE_GPU = 0 16 | FOMP := 17 | else 18 | LDFLAGS += -fopenmp 19 | FOMP := -fopenmp 20 | endif 21 | 22 | ifeq ($(USE_GPU), 1) 23 | NVCCFLAGS += --default-stream per-thread 24 | LDFLAGS += -L$(CUDA_HOME)/lib64 -lcudart -lcublas -lcurand 25 | endif 26 | 27 | CUDA_ARCH := -gencode arch=compute_86,code=sm_86 28 | # -gencode arch=compute_70,code=sm_70 29 | 30 | build_root = build 31 | 32 | ifeq ($(USE_GPU), 1) 33 | include_dirs = ./include $(CUDA_HOME)/include 34 | else 35 | include_dirs = ./include 36 | endif 37 | 38 | 39 | CXXFLAGS += $(addprefix -I,$(include_dirs)) -Wno-unused-local-typedef 40 | CXXFLAGS += -fPIC 41 | cpp_files = $(shell $(FIND) src/lib -name "*.cpp" -print | rev | cut -d"/" -f1 | rev) 42 | cxx_obj_files = $(subst .cpp,.o,$(cpp_files)) 43 | obj_build_root = $(build_root)/objs 44 | objs = $(addprefix $(obj_build_root)/cxx/,$(cxx_obj_files)) 45 | 46 | 47 | ifeq ($(USE_GPU), 1) 48 | CXXFLAGS += -DUSE_GPU 49 | NVCCFLAGS += -DUSE_GPU 50 | NVCCFLAGS += $(addprefix -I,$(include_dirs)) 51 | NVCCFLAGS += -std=c++11 --use_fast_math --compiler-options '-fPIC' 52 | cu_files = $(shell $(FIND) src/lib -name "*.cu" -printf "%P\n") 53 | cu_obj_files = $(subst .cu,.o,$(cu_files)) 54 | objs += $(addprefix $(obj_build_root)/cuda/,$(cu_obj_files)) 55 | endif 56 | 57 | 58 | DEPS = $(objs:.o=.d) 59 | 60 | target = $(build_root)/dll/libtree.so 61 | target_dep = $(addsuffix .d,$(target)) 62 | 63 | .PRECIOUS: $(build_root)/lib/%.o 64 | 65 | all: $(target) 66 | 67 | $(target) : src/tree_main.cpp $(objs) 68 | $(dir_guard) 69 | $(CXX) -shared $(CXXFLAGS) -MMD -o $@ $(filter %.cpp %.o, $^) $(LDFLAGS) 70 | 71 | DEPS += $(target_dep) 72 | 73 | ifeq ($(USE_GPU), 1) 74 | $(obj_build_root)/cuda/%.o: src/lib/%.cu 75 | $(dir_guard) 76 | $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} -odir $(@D) 77 | $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@ 78 | endif 79 | 80 | $(obj_build_root)/cxx/%.o: src/lib/%.cpp 81 | $(dir_guard) 82 | $(CXX) $(CXXFLAGS) -MMD -c -o $@ $(filter %.cpp, $^) $(FOMP) 83 | 84 | clean: 85 | rm -rf $(build_root) 86 | 87 | -include $(DEPS) 88 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/Makefile_70: -------------------------------------------------------------------------------- 1 | dir_guard = @mkdir -p $(@D) 2 | FIND := find 3 | CXX := g++ 4 | 5 | CXXFLAGS += -Wall -O3 -std=c++11 6 | LDFLAGS += -lm 7 | 8 | UNAME := $(shell uname) 9 | 10 | CUDA_HOME := /usr/local/cuda 11 | NVCC := $(CUDA_HOME)/bin/nvcc 12 | USE_GPU = 1 13 | 14 | ifeq ($(UNAME), Darwin) 15 | USE_GPU = 0 16 | FOMP := 17 | else 18 | LDFLAGS += -fopenmp 19 | FOMP := -fopenmp 20 | endif 21 | 22 | ifeq ($(USE_GPU), 1) 23 | NVCCFLAGS += --default-stream per-thread 24 | LDFLAGS += -L$(CUDA_HOME)/lib64 -lcudart -lcublas -lcurand 25 | endif 26 | 27 | CUDA_ARCH := -gencode arch=compute_70,code=sm_70 28 | # -gencode arch=compute_70,code=sm_70 29 | 30 | build_root = build 31 | 32 | ifeq ($(USE_GPU), 1) 33 | include_dirs = ./include $(CUDA_HOME)/include 34 | else 35 | include_dirs = ./include 36 | endif 37 | 38 | 39 | CXXFLAGS += $(addprefix -I,$(include_dirs)) -Wno-unused-local-typedef 40 | CXXFLAGS += -fPIC 41 | cpp_files = $(shell $(FIND) src/lib -name "*.cpp" -print | rev | cut -d"/" -f1 | rev) 42 | cxx_obj_files = $(subst .cpp,.o,$(cpp_files)) 43 | obj_build_root = $(build_root)/objs 44 | objs = $(addprefix $(obj_build_root)/cxx/,$(cxx_obj_files)) 45 | 46 | 47 | ifeq ($(USE_GPU), 1) 48 | CXXFLAGS += -DUSE_GPU 49 | NVCCFLAGS += -DUSE_GPU 50 | NVCCFLAGS += $(addprefix -I,$(include_dirs)) 51 | NVCCFLAGS += -std=c++11 --use_fast_math --compiler-options '-fPIC' 52 | cu_files = $(shell $(FIND) src/lib -name "*.cu" -printf "%P\n") 53 | cu_obj_files = $(subst .cu,.o,$(cu_files)) 54 | objs += $(addprefix $(obj_build_root)/cuda/,$(cu_obj_files)) 55 | endif 56 | 57 | 58 | DEPS = $(objs:.o=.d) 59 | 60 | target = $(build_root)/dll/libtree.so 61 | target_dep = $(addsuffix .d,$(target)) 62 | 63 | .PRECIOUS: $(build_root)/lib/%.o 64 | 65 | all: $(target) 66 | 67 | $(target) : src/tree_main.cpp $(objs) 68 | $(dir_guard) 69 | $(CXX) -shared $(CXXFLAGS) -MMD -o $@ $(filter %.cpp %.o, $^) $(LDFLAGS) 70 | 71 | DEPS += $(target_dep) 72 | 73 | ifeq ($(USE_GPU), 1) 74 | $(obj_build_root)/cuda/%.o: src/lib/%.cu 75 | $(dir_guard) 76 | $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} -odir $(@D) 77 | $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@ 78 | endif 79 | 80 | $(obj_build_root)/cxx/%.o: src/lib/%.cpp 81 | $(dir_guard) 82 | $(CXX) $(CXXFLAGS) -MMD -c -o $@ $(filter %.cpp, $^) $(FOMP) 83 | 84 | clean: 85 | rm -rf $(build_root) 86 | 87 | -include $(DEPS) 88 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/Makefile_75: -------------------------------------------------------------------------------- 1 | dir_guard = @mkdir -p $(@D) 2 | FIND := find 3 | CXX := g++ 4 | 5 | CXXFLAGS += -Wall -O3 -std=c++11 6 | LDFLAGS += -lm 7 | 8 | UNAME := $(shell uname) 9 | 10 | CUDA_HOME := /usr/local/cuda 11 | NVCC := $(CUDA_HOME)/bin/nvcc 12 | USE_GPU = 1 13 | 14 | ifeq ($(UNAME), Darwin) 15 | USE_GPU = 0 16 | FOMP := 17 | else 18 | LDFLAGS += -fopenmp 19 | FOMP := -fopenmp 20 | endif 21 | 22 | ifeq ($(USE_GPU), 1) 23 | NVCCFLAGS += --default-stream per-thread 24 | LDFLAGS += -L$(CUDA_HOME)/lib64 -lcudart -lcublas -lcurand 25 | endif 26 | 27 | CUDA_ARCH := -gencode arch=compute_75,code=sm_75 28 | # -gencode arch=compute_70,code=sm_70 29 | 30 | build_root = build 31 | 32 | ifeq ($(USE_GPU), 1) 33 | include_dirs = ./include $(CUDA_HOME)/include 34 | else 35 | include_dirs = ./include 36 | endif 37 | 38 | 39 | CXXFLAGS += $(addprefix -I,$(include_dirs)) -Wno-unused-local-typedef 40 | CXXFLAGS += -fPIC 41 | cpp_files = $(shell $(FIND) src/lib -name "*.cpp" -print | rev | cut -d"/" -f1 | rev) 42 | cxx_obj_files = $(subst .cpp,.o,$(cpp_files)) 43 | obj_build_root = $(build_root)/objs 44 | objs = $(addprefix $(obj_build_root)/cxx/,$(cxx_obj_files)) 45 | 46 | 47 | ifeq ($(USE_GPU), 1) 48 | CXXFLAGS += -DUSE_GPU 49 | NVCCFLAGS += -DUSE_GPU 50 | NVCCFLAGS += $(addprefix -I,$(include_dirs)) 51 | NVCCFLAGS += -std=c++11 --use_fast_math --compiler-options '-fPIC' 52 | cu_files = $(shell $(FIND) src/lib -name "*.cu" -printf "%P\n") 53 | cu_obj_files = $(subst .cu,.o,$(cu_files)) 54 | objs += $(addprefix $(obj_build_root)/cuda/,$(cu_obj_files)) 55 | endif 56 | 57 | 58 | DEPS = $(objs:.o=.d) 59 | 60 | target = $(build_root)/dll/libtree.so 61 | target_dep = $(addsuffix .d,$(target)) 62 | 63 | .PRECIOUS: $(build_root)/lib/%.o 64 | 65 | all: $(target) 66 | 67 | $(target) : src/tree_main.cpp $(objs) 68 | $(dir_guard) 69 | $(CXX) -shared $(CXXFLAGS) -MMD -o $@ $(filter %.cpp %.o, $^) $(LDFLAGS) 70 | 71 | DEPS += $(target_dep) 72 | 73 | ifeq ($(USE_GPU), 1) 74 | $(obj_build_root)/cuda/%.o: src/lib/%.cu 75 | $(dir_guard) 76 | $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} -odir $(@D) 77 | $(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@ 78 | endif 79 | 80 | $(obj_build_root)/cxx/%.o: src/lib/%.cpp 81 | $(dir_guard) 82 | $(CXX) $(CXXFLAGS) -MMD -c -o $@ $(filter %.cpp, $^) $(FOMP) 83 | 84 | clean: 85 | rm -rf $(build_root) 86 | 87 | -include $(DEPS) 88 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # pycharm files 2 | .idea/ 3 | .idea 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | pip-wheel-metadata/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | /.idea/ 135 | -------------------------------------------------------------------------------- /GraphGenerator/models/kronecker.py: -------------------------------------------------------------------------------- 1 | import GraphGenerator.utils.data_utils as data_utils 2 | import subprocess 3 | import numpy as np 4 | import networkx as nx 5 | 6 | 7 | def str_to_float(str): 8 | a, b = str.split('\n')[1:3] 9 | a = a.strip().split() 10 | b = b.strip().split() 11 | return np.array([[float(a[0]), float(a[-1])], [float(b[0]), float(b[-1])]]) 12 | 13 | 14 | def krongen(init_mat, k): 15 | """ 16 | Kronecker graph generator. 17 | :param init_mat: initiator, default as a 2*2 shaped matrix 18 | :param k: iterations, default as int(log2(nodes)) 19 | :return: generated graph with type of 'nx.classes.graph.Graph' 20 | """ 21 | tmp = np.sum(init_mat) 22 | edge_num = int(tmp**k) 23 | og = nx.Graph() 24 | choice = ['00', '01', '10', '11'] 25 | prob = init_mat/tmp 26 | # time complexity is O(k*E) < O(N**2) 27 | for i in range(edge_num): 28 | x, y = 0, 0 29 | tmp_rand = np.random.choice(choice, k, True, prob.flatten()) 30 | for j, m_axis in enumerate(tmp_rand): 31 | add = 2**j 32 | x += int(m_axis[0])*add 33 | y += int(m_axis[1])*add 34 | og.add_edge(x, y) 35 | return og 36 | 37 | 38 | def generate(input_graph, config): 39 | sparse_adj = nx.adjacency_matrix(input_graph) 40 | k = int(np.log2(sparse_adj.shape[0])) + 1 41 | init_mat = np.array([[.5625, .1875], [.1875, .0625]]) 42 | if config.model.name == 'Kronecker': 43 | tmp_name = "./data/cit_{}.txt".format(config.dataset.name) 44 | data_utils.adj_to_edgelist(sparse_adj, tmp_name) 45 | sp_output = subprocess.check_output( 46 | args=["./GraphGenerator/models/kronecker_ops/examples/kronfit/kronfit", 47 | "-i:{}".format(tmp_name), 48 | '-m:"{}"'.format(config.model.init_mat), 49 | "-o:./{}/{}/{}_to_kronfit.log".format(config.exp_dir, config.exp_name, config.dataset.name), 50 | "-gi:100", "-n0:2"] 51 | ) 52 | utf_output = sp_output.decode('utf8').strip() 53 | START_STR = "PARAMS" 54 | output = utf_output[utf_output.find(START_STR):] 55 | init_mat = str_to_float(output) 56 | # dump_graphs(args.dataset, 'kronecker', init_mat, k) 57 | if config.model.name == 'RMAT': 58 | edge_num = sparse_adj.sum()/2. 59 | tmp = np.float_power(edge_num, 1/k) 60 | init_mat = init_mat*tmp 61 | print(init_mat) 62 | # dump_graphs(args.dataset, 'rmat', init_mat, k) 63 | return [krongen(init_mat, k) for i in range(config.num_gen)] 64 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/dplpdf.m: -------------------------------------------------------------------------------- 1 | function p = dplpdf(n,gamma) 2 | %DPLPDF Discrete power law probability density function. 3 | % 4 | % P = DPLPDF(N,GAMMA) returns the probabilities for a discrete 5 | % version of the power law probability density function. In 6 | % this case, Prob(x) ~ x^(-gamma) for x = 1:N. 7 | % 8 | % See also DGLNPDF, GENDEGDIST. 9 | % 10 | % Reference: 11 | % * T. G. Kolda, A. Pinar, T. Plantenga and C. Seshadhri. A Scalable 12 | % Generative Graph Model with Community Structure, arXiv:1302.6636, 13 | % March 2013. (http://arxiv.org/abs/1302.6636) 14 | % 15 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 16 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 17 | % January 2014 18 | 19 | %% License 20 | % Copyright (c) 2014, Sandia National Laboratories 21 | % All rights reserved. 22 | % 23 | % Redistribution and use in source and binary forms, with or without 24 | % modification, are permitted provided that the following conditions are 25 | % met: 26 | % 27 | % # Redistributions of source code must retain the above copyright notice, 28 | % this list of conditions and the following disclaimer. 29 | % # Redistributions in binary form must reproduce the above copyright 30 | % notice, this list of conditions and the following disclaimer in the 31 | % documentation and/or other materials provided with the distribution. 32 | % 33 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 34 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 35 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 36 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 37 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 38 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 39 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 40 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 41 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 42 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 43 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 | % 45 | % 46 | % Sandia National Laboratories is a multi-program laboratory managed and 47 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 48 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 49 | % Security Administration under contract DE-AC04-94AL85000. 50 | 51 | p = (1:n)'.^(-gamma); 52 | p = p / sum(p); 53 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/dglnpdf.m: -------------------------------------------------------------------------------- 1 | function p = dglnpdf(n,alpha,beta) 2 | %DGLNPDF Discrete generalized log-normal probability density function. 3 | % 4 | % P = DGLNPDF(N,ALPHA,BETA) returns the probabilities for a discrete 5 | % version of the generalized log-normal probability density function. In 6 | % this case, Prob(x) ~ exp(-(log(x)/alpha)^beta) for x = 1:N. 7 | % 8 | % See also DPLPDF, GENDEGDIST. 9 | % 10 | % Reference: 11 | % * T. G. Kolda, A. Pinar, T. Plantenga and C. Seshadhri. A Scalable 12 | % Generative Graph Model with Community Structure, arXiv:1302.6636, 13 | % March 2013. (http://arxiv.org/abs/1302.6636) 14 | % 15 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 16 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 17 | % January 2014 18 | 19 | %% License 20 | % Copyright (c) 2014, Sandia National Laboratories 21 | % All rights reserved. 22 | % 23 | % Redistribution and use in source and binary forms, with or without 24 | % modification, are permitted provided that the following conditions are 25 | % met: 26 | % 27 | % # Redistributions of source code must retain the above copyright notice, 28 | % this list of conditions and the following disclaimer. 29 | % # Redistributions in binary form must reproduce the above copyright 30 | % notice, this list of conditions and the following disclaimer in the 31 | % documentation and/or other materials provided with the distribution. 32 | % 33 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 34 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 35 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 36 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 37 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 38 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 39 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 40 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 41 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 42 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 43 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 44 | % 45 | % 46 | % Sandia National Laboratories is a multi-program laboratory managed and 47 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 48 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 49 | % Security Administration under contract DE-AC04-94AL85000. 50 | 51 | 52 | p = exp(-((log((1:n)'))/alpha).^beta); 53 | p = p / sum(p); 54 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/ccperdegest.m: -------------------------------------------------------------------------------- 1 | function ccpdb = ccperdegest(G,bins,nsamples) 2 | %CCPERDEGEST Estimate of mean clustering coefficient per degree bin. 3 | % 4 | % CCPD = CCPERDEGEST(G,B,N) computes the per-degree-bin clustering 5 | % coefficient, i.e., CCPD(k) is the mean clustering coefficient for nodes 6 | % in degree bin k. The graph G is assumed to be undirected, unweighted, 7 | % and to contain no self edges. This is *not* checked by the code. The 8 | % vector B gives the bin boundaries, see HISTC. The computation is 9 | % approximate, using wedge sampling. 10 | % 11 | % NOTE: This is an interface to the MEX function provided by 12 | % ccperdegest_mex.c. 13 | % 14 | % See also CCPERDEG, BINDATA. 15 | % 16 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 17 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 18 | % January 2014 19 | 20 | %% License 21 | % Copyright (c) 2014, Sandia National Laboratories 22 | % All rights reserved. 23 | % 24 | % Redistribution and use in source and binary forms, with or without 25 | % modification, are permitted provided that the following conditions are 26 | % met: 27 | % 28 | % # Redistributions of source code must retain the above copyright notice, 29 | % this list of conditions and the following disclaimer. 30 | % # Redistributions in binary form must reproduce the above copyright 31 | % notice, this list of conditions and the following disclaimer in the 32 | % documentation and/or other materials provided with the distribution. 33 | % 34 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 35 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 36 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 37 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 38 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 39 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 40 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 41 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 42 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 43 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 44 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 | % 46 | % 47 | % Sandia National Laboratories is a multi-program laboratory managed and 48 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 49 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 50 | % Security Administration under contract DE-AC04-94AL85000. 51 | 52 | ccpdb = ccperdegest_mex(G,bins,nsamples); 53 | 54 | -------------------------------------------------------------------------------- /GraphGenerator/evaluate/efficiency.py: -------------------------------------------------------------------------------- 1 | from GraphGenerator.metrics import speed, memory 2 | import networkx as nx 3 | import scipy.sparse as sp 4 | import torch, os, copy 5 | import numpy as np 6 | 7 | 8 | def coo_to_csp(sp_coo): 9 | num = sp_coo.shape[0] 10 | row = sp_coo.row 11 | col = sp_coo.col 12 | sp_tensor = torch.sparse.FloatTensor(torch.LongTensor(np.stack([row, col])), 13 | torch.tensor(sp_coo.data), 14 | torch.Size([num, num])) 15 | return sp_tensor 16 | 17 | 18 | def sp_normalize(adj_def, device='cpu'): 19 | """ 20 | :param adj: scipy.sparse.coo_matrix 21 | :param device: default as cpu 22 | :return: normalized_adj: 23 | """ 24 | adj_ = sp.coo_matrix(adj_def) 25 | adj_ = adj_ + sp.coo_matrix(sp.eye(adj_def.shape[0]), dtype=np.float32) 26 | rowsum = np.array(adj_.sum(axis=1)).reshape(-1) 27 | norm_unit = np.float_power(rowsum, -0.5).astype(np.float32) 28 | degree_mat_inv_sqrt = sp.diags(norm_unit) 29 | degree_mat_sqrt = copy.copy(degree_mat_inv_sqrt) 30 | # degree_mat_sqrt = degree_mat_inv_sqrt.to_dense() 31 | support = adj_.__matmul__(degree_mat_sqrt) 32 | # support = coo_to_csp(support.tocoo()) 33 | # degree_mat_inv_sqrt = coo_to_csp(degree_mat_inv_sqrt.tocoo()) 34 | adj_normalized = degree_mat_inv_sqrt.__matmul__(support) 35 | adj_normalized = coo_to_csp(adj_normalized.tocoo()) 36 | return adj_normalized 37 | 38 | 39 | @speed.time_decorator 40 | def eval_speed(func, args): 41 | pass 42 | 43 | 44 | def eval_efficiency(generator, config=None): 45 | from GraphGenerator.train import train_base as train 46 | # data_sizes = [100, int(1e+3), int(1e+4), int(1e+5), int(1e+6)] 47 | data_sizes = [20] 48 | # data_sizes = config.eval.num_nodes 49 | print("The tested graph size is: {}.".format(data_sizes)) 50 | output_data = [] 51 | for size in data_sizes: 52 | new_g = nx.watts_strogatz_graph(size, 4, 0.) 53 | new_adj = nx.adjacency_matrix(new_g) 54 | new_adj = sp.coo_matrix(new_adj) 55 | # adj_input = coo_to_csp(new_adj) 56 | print("Start (training and) inferencing graph with {} nodes...".format(size)) 57 | tmp_data = train.train_and_inference(new_g, generator, config=config) 58 | if isinstance(tmp_data, list): 59 | output_data.extend(tmp_data) 60 | else: 61 | output_data.append(tmp_data) 62 | return output_data 63 | 64 | 65 | if __name__ == '__main__': 66 | conf_name = "config/bigg.yaml" 67 | from GraphGenerator.utils.arg_utils import get_config, set_device 68 | config = get_config(conf_name) 69 | set_device(config) 70 | out = eval_efficiency("bigg", config) 71 | # -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/edges2graph.m: -------------------------------------------------------------------------------- 1 | function G = edges2graph(E,nnodes) 2 | %EDGES2GRAPH Create an undirected, simple graph from edge list. 3 | % 4 | % G = EDGES2GRAPH(E) creates an adjaceny matrix for the graph where 5 | % E(k,1) and E(k,2) specifies the kth edge. All edges are treated as 6 | % undirected. Duplicate edges are removed. Loops are removed. 7 | % 8 | % G = EDGES2GRAPH(E,N) specifies the number of number of nodes in the 9 | % graph. 10 | % 11 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 12 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 13 | % January 2014 14 | 15 | %% License 16 | % Copyright (c) 2014, Sandia National Laboratories 17 | % All rights reserved. 18 | % 19 | % Redistribution and use in source and binary forms, with or without 20 | % modification, are permitted provided that the following conditions are 21 | % met: 22 | % 23 | % # Redistributions of source code must retain the above copyright notice, 24 | % this list of conditions and the following disclaimer. 25 | % # Redistributions in binary form must reproduce the above copyright 26 | % notice, this list of conditions and the following disclaimer in the 27 | % documentation and/or other materials provided with the distribution. 28 | % 29 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 30 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 31 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 32 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 33 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 34 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 35 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 36 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 37 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 38 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 39 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 | % 41 | % 42 | % Sandia National Laboratories is a multi-program laboratory managed and 43 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 44 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 45 | % Security Administration under contract DE-AC04-94AL85000. 46 | 47 | if ~exist('nnodes','var') 48 | nnodes = max(E(:)); 49 | else 50 | % Error checking only 51 | tmp = max(E(:)); 52 | if (tmp > nnodes) 53 | fprintf('Highest index in E is %d, but N = %d', tmp, nnodes); 54 | end 55 | end 56 | 57 | ii = E(:,1); 58 | jj = E(:,2); 59 | G = spones(sparse([ii;jj],[jj;ii],1,nnodes,nnodes)); 60 | G = spdiags(zeros(nnodes,1),0,G); 61 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/random_sample.m: -------------------------------------------------------------------------------- 1 | function s = random_sample(cnts, nsamples) 2 | %RANDOM_SAMPLE creates a random sample proportional to the given counts. 3 | % 4 | % S = RANDOM_SAMPLE(C) choose N = round(sum(C)) samples (with 5 | % replacement) from {1,...,length(C)} proportional to the values in C. 6 | % So, if C = [2 1 1], then we might expect S (sorted) to be [ 1 1 2 3 ]. 7 | % However, we also allow for C to be non-integral. 8 | % 9 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 10 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 11 | % January 2014 12 | 13 | %% License 14 | % Copyright (c) 2014, Sandia National Laboratories 15 | % All rights reserved. 16 | % 17 | % Redistribution and use in source and binary forms, with or without 18 | % modification, are permitted provided that the following conditions are 19 | % met: 20 | % 21 | % # Redistributions of source code must retain the above copyright notice, 22 | % this list of conditions and the following disclaimer. 23 | % # Redistributions in binary form must reproduce the above copyright 24 | % notice, this list of conditions and the following disclaimer in the 25 | % documentation and/or other materials provided with the distribution. 26 | % 27 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 28 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 29 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 31 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 32 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 33 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 34 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 35 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 36 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 37 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 | % 39 | % 40 | % Sandia National Laboratories is a multi-program laboratory managed and 41 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 42 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 43 | % Security Administration under contract DE-AC04-94AL85000. 44 | 45 | if ~exist('nsamples','var') 46 | nsamples = round(sum(cnts)); 47 | else 48 | cnts = cnts .* nsamples / sum(cnts); 49 | end 50 | 51 | cumdist = [0; cumsum(cnts)]; 52 | bins = cumdist / cumdist(end); 53 | 54 | testval = abs(bins(end) - 1); 55 | if testval > eps 56 | warning('Last entry of bins is not exactly 1. Diff = %e.', testval); 57 | end 58 | 59 | [~, s] = histc(rand(nsamples,1),bins); -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/bter_edges2graph.m: -------------------------------------------------------------------------------- 1 | function [G,G1,G2] = bter_edges2graph(E1,E2) 2 | %BTER_EDGES2GRAPH Create a graph from edge lists. 3 | % 4 | % G = BTER_EDGES2GRAPH(E1,E2) returns a sparse adjancency matrix 5 | % corresponding to the given edge lists produced by BTER. The graph is 6 | % undirected, unweighted, and has no loops, even if E1 and E2 contain 7 | % these. 8 | % 9 | % [G,G1,G2] = BTER_EDGES2GRAPH(E1,E2) returns the graphs corresponding to 10 | % Phase 1 and Phase 2 in addition to the combined graph. 11 | % 12 | % See also BTER, EDGES2GRAPH 13 | % 14 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 15 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 16 | % January 2014 17 | 18 | %% License 19 | % Copyright (c) 2014, Sandia National Laboratories 20 | % All rights reserved. 21 | % 22 | % Redistribution and use in source and binary forms, with or without 23 | % modification, are permitted provided that the following conditions are 24 | % met: 25 | % 26 | % # Redistributions of source code must retain the above copyright notice, 27 | % this list of conditions and the following disclaimer. 28 | % # Redistributions in binary form must reproduce the above copyright 29 | % notice, this list of conditions and the following disclaimer in the 30 | % documentation and/or other materials provided with the distribution. 31 | % 32 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 33 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 34 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 35 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 36 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 37 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 38 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 39 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 40 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 41 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | % 44 | % 45 | % Sandia National Laboratories is a multi-program laboratory managed and 46 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 47 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 48 | % Security Administration under contract DE-AC04-94AL85000. 49 | 50 | if isempty(E1) 51 | nnodes = max(E2(:)); 52 | elseif isempty(E2) 53 | nnodes = max(E1(:)); 54 | else 55 | nnodes = max(max(E1(:)),max(E2(:))); 56 | end 57 | 58 | if (nargout < 3) 59 | G = edges2graph([E1;E2],nnodes); 60 | else 61 | G1 = edges2graph(E1,nnodes); 62 | G2 = edges2graph(E2,nnodes); 63 | G = spones(G1+G2); 64 | G = spdiags(zeros(nnodes,1),0,G); 65 | end 66 | 67 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/binstart.m: -------------------------------------------------------------------------------- 1 | function idx = binstart(i, omega, tau, idx0) 2 | %BINSTART - Specify start of bin for the specified parameters. 3 | % 4 | % K = BINSTART(I,OMEGA,TAU,K0) returns the index of the I-th bin defined 5 | % by parameters TAU, OMEGA, and K0. The parameters TAU, OMEGA, and K0 are 6 | % optional. The default values are OMEGA=2, TAU=1, K0=1. 7 | % 8 | % The end of a bin I one less than the end of the next bin, i.e., 9 | % KEND = BINSTART(I+1,OMEGA,TAU,K0)-1. 10 | % 11 | % See also BINLOOKUP, BINDATA. 12 | % 13 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 14 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 15 | % January 2014 16 | 17 | %% License 18 | % Copyright (c) 2014, Sandia National Laboratories 19 | % All rights reserved. 20 | % 21 | % Redistribution and use in source and binary forms, with or without 22 | % modification, are permitted provided that the following conditions are 23 | % met: 24 | % 25 | % # Redistributions of source code must retain the above copyright notice, 26 | % this list of conditions and the following disclaimer. 27 | % # Redistributions in binary form must reproduce the above copyright 28 | % notice, this list of conditions and the following disclaimer in the 29 | % documentation and/or other materials provided with the distribution. 30 | % 31 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 32 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 33 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 34 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 35 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 36 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 37 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 38 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 39 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 40 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 41 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 42 | % 43 | % 44 | % Sandia National Laboratories is a multi-program laboratory managed and 45 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 46 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 47 | % Security Administration under contract DE-AC04-94AL85000. 48 | 49 | % ** 50 | if ~exist('omega','var') || isempty(omega) 51 | omega = 2; 52 | end 53 | 54 | if ~exist('tau','var') || isempty(tau) 55 | tau = 1; 56 | end 57 | 58 | if ~exist('idx0','var') || isempty(idx0) 59 | idx0 = 1; 60 | end 61 | 62 | % ** 63 | n = length(i); 64 | idx = zeros(n,1); 65 | for k = 1:n 66 | if i(k) <= tau 67 | idx(k) = i(k) + idx0 - 1; 68 | else 69 | idx(k) = ceil((omega.^(i(k)-tau)-1)/(omega-1)) + tau + idx0 - 1; 70 | end 71 | end -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/binlookup.m: -------------------------------------------------------------------------------- 1 | function i = binlookup(idx, omega, tau, idx0) 2 | %BINLOOKUP For a given index, determine its appropriate bin. 3 | % 4 | % I = BINLOOKUP(K,OMEGA,TAU,K0) returns the bin number of index K, where 5 | % the bins are defined by paramtesr TAU, OMEGA, and K0. The parameters 6 | % TAU, OMEGA, and K0 are optional. If they are not defined or defined as 7 | % an emptyset ([]), then they take on the default values, which are 8 | % OMEGA=2, TAU=1, K0=1. 9 | % 10 | % Note: If K is a vector, than I is a vector of bins. 11 | % 12 | % See also BINSTART, BINDATA. 13 | % 14 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 15 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 16 | % January 2014 17 | 18 | %% License 19 | % Copyright (c) 2014, Sandia National Laboratories 20 | % All rights reserved. 21 | % 22 | % Redistribution and use in source and binary forms, with or without 23 | % modification, are permitted provided that the following conditions are 24 | % met: 25 | % 26 | % # Redistributions of source code must retain the above copyright notice, 27 | % this list of conditions and the following disclaimer. 28 | % # Redistributions in binary form must reproduce the above copyright 29 | % notice, this list of conditions and the following disclaimer in the 30 | % documentation and/or other materials provided with the distribution. 31 | % 32 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 33 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 34 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 35 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 36 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 37 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 38 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 39 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 40 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 41 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | % 44 | % 45 | % Sandia National Laboratories is a multi-program laboratory managed and 46 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 47 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 48 | % Security Administration under contract DE-AC04-94AL85000. 49 | 50 | % ** 51 | if ~exist('omega','var') || isempty(omega) 52 | omega = 2; 53 | end 54 | 55 | if ~exist('tau','var') || isempty(tau) 56 | tau = 1; 57 | end 58 | 59 | if ~exist('idx0','var') || isempty(idx0) 60 | idx0 = 1; 61 | end 62 | 63 | % ** 64 | if any(idx < idx0) 65 | error('Index is smaller than the start of the first bin'); 66 | end 67 | 68 | n = length(idx); 69 | i = zeros(n,1); 70 | for k = 1:n 71 | if (idx(k)-idx0+1) < tau 72 | i(k) = idx(k)-idx0+1; 73 | else 74 | tmp = 1 + (omega-1)*(idx(k)-idx0+1 - tau); 75 | i(k) = floor(log(tmp)/log(omega)) + tau ; 76 | end 77 | end -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/include/tree_clib.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Google Research Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef TREE_CLIB_H 16 | #define TREE_CLIB_H 17 | 18 | #include "config.h" // NOLINT 19 | 20 | extern "C" int Init(const int argc, const char **argv); 21 | 22 | extern "C" int PrepareTrain(int num_graphs, void* list_ids, 23 | void* list_start_node, void* list_col_start, 24 | void* list_col_end, int num_nodes, int new_batch); 25 | 26 | extern "C" int AddGraph(int graph_idx, int num_nodes, int num_edges, 27 | void* edge_pairs, int n_left, int n_right); 28 | 29 | extern "C" int TotalTreeNodes(); 30 | 31 | extern "C" int SetTreeEmbedIds(int depth, int lr, void* _bot_from, 32 | void* _bot_to, void* _prev_from, void* _prev_to); 33 | 34 | extern "C" int SetRowEmbedIds(int lr, int level, void* _bot_from, 35 | void* _bot_to, void* _prev_from, 36 | void* _prev_to, void* _past_from, void* _past_to); 37 | 38 | extern "C" int MaxTreeDepth(); 39 | 40 | extern "C" int NumBottomDep(int depth, int lr); 41 | 42 | extern "C" int NumPrevDep(int depth, int lr); 43 | 44 | extern "C" int NumRowBottomDep(int lr); 45 | 46 | extern "C" int NumRowPastDep(int lv, int lr); 47 | 48 | extern "C" int NumRowTopDep(int lv, int lr); 49 | 50 | extern "C" int RowSumSteps(); 51 | 52 | extern "C" int RowMergeSteps(); 53 | 54 | extern "C" int NumRowSumOut(int lr); 55 | 56 | extern "C" int NumRowSumNext(int lr); 57 | 58 | extern "C" int SetRowSumIds(int lr, void* _step_from, void* _step_to, 59 | void* _next_input, void* _next_states); 60 | 61 | extern "C" int SetRowSumInit(void* _init_idx); 62 | 63 | extern "C" int SetRowSumLast(void* _last_idx); 64 | 65 | extern "C" int HasChild(void* _has_child); 66 | 67 | extern "C" int NumCurNodes(int depth); 68 | 69 | extern "C" int GetInternalMask(int depth, void* _internal_mask); 70 | 71 | extern "C" int NumInternalNodes(int depth); 72 | 73 | extern "C" int GetChMask(int lr, int depth, void* _ch_mask); 74 | 75 | extern "C" int GetNumCh(int lr, int depth, void* _num_ch); 76 | 77 | extern "C" int SetLeftState(int depth, void* _bot_from, void* _bot_to, 78 | void* _prev_from, void* _prev_to); 79 | 80 | extern "C" int NumLeftBot(int depth); 81 | 82 | extern "C" int LeftRightSelect(int depth, void* _left_from, void* _left_to, 83 | void* _right_from, void* _right_to); 84 | 85 | extern "C" int MaxBinFeatDepth(); 86 | 87 | extern "C" int NumBinNodes(int depth); 88 | 89 | extern "C" int SetBinaryFeat(int d, void* _feat_ptr, int dev); 90 | 91 | extern "C" int GetNextStates(void* _state_idx); 92 | 93 | extern "C" int GetNumNextStates(); 94 | 95 | extern "C" int GetCurPos(void* _pos); 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/tricnt.m: -------------------------------------------------------------------------------- 1 | function [t,d,w] = tricnt(G,d,matlabbgl) 2 | %TRICNT Count number of triangles per vertex in a simple, undirected graph 3 | % 4 | % T = TRICNT(G) takes a sparse adjacency matrix G and computes the number 5 | % of triangles per vertex. Note taht there is no error checking on G. It 6 | % is up to the user to ensure that G is symmetric, has only 0/1 entries 7 | % (but *not* binary), and has no entries on the diagonal. 8 | % 9 | % T = TRICNT(G,D) takes a second argument which is the degree per vertex 10 | % and does not recalculate it. 11 | % 12 | % T = TRICNT(G,D,true) uses the clustering_coefficients from MATLAB_BGL. 13 | % This assumes that this package is installed and in the path. 14 | % 15 | % [T,D,W] = TRICNT(G) also returns the degree and number of wedges per 16 | % vertex. 17 | % 18 | % NOTE: This is an interface to the MEX function provided by 19 | % tricnt_mex.c, unless the clustering_coefficients function from 20 | % MATLAB_BGL is used. 21 | % 22 | % See also CCPERDEG. 23 | % 24 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 25 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 26 | % January 2014 27 | 28 | %% License 29 | % Copyright (c) 2014, Sandia National Laboratories 30 | % All rights reserved. 31 | % 32 | % Redistribution and use in source and binary forms, with or without 33 | % modification, are permitted provided that the following conditions are 34 | % met: 35 | % 36 | % # Redistributions of source code must retain the above copyright notice, 37 | % this list of conditions and the following disclaimer. 38 | % # Redistributions in binary form must reproduce the above copyright 39 | % notice, this list of conditions and the following disclaimer in the 40 | % documentation and/or other materials provided with the distribution. 41 | % 42 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 43 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 44 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 45 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 46 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 47 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 48 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 49 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 50 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 51 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 52 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 | % 54 | % 55 | % Sandia National Laboratories is a multi-program laboratory managed and 56 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 57 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 58 | % Security Administration under contract DE-AC04-94AL85000. 59 | 60 | 61 | if ~exist('matlabbgl','var') 62 | matlabbgl = false; 63 | end 64 | 65 | if ~exist('d','var') || isempty(d) 66 | d = full(sum(G,2)); 67 | end 68 | 69 | w = d.*(d-1)/2; 70 | 71 | if (matlabbgl) 72 | 73 | if ~exist('clustering_coefficients.m','file') 74 | error('Must install MATLAB_BGL toolbox'); 75 | end 76 | options.undirected = 1; 77 | options.unweighted = 1; 78 | cc = clustering_coefficients(G,options); 79 | t = round(w.*cc); 80 | 81 | else 82 | 83 | t = tricnt_mex(G); 84 | 85 | end 86 | 87 | -------------------------------------------------------------------------------- /GraphGenerator/models/vgae.py: -------------------------------------------------------------------------------- 1 | import torch, math 2 | from torch.nn.modules.module import Module 3 | from torch.nn.parameter import Parameter 4 | from torch.autograd import Variable 5 | import torch.nn as nn 6 | 7 | 8 | class GraphConvolution(Module): 9 | """ 10 | Simple GCN layer, similar to https://arxiv.org/abs/1609.02907 11 | """ 12 | 13 | def __init__(self, in_features, out_features, bias=True, act=lambda x: x): 14 | super(GraphConvolution, self).__init__() 15 | self.in_features = in_features 16 | self.out_features = out_features 17 | self.weight = Parameter(torch.FloatTensor(in_features, out_features)) 18 | self.act = act 19 | if bias: 20 | self.bias = Parameter(torch.FloatTensor(out_features)) 21 | else: 22 | self.register_parameter('bias', None) 23 | self.reset_parameters() 24 | 25 | def reset_parameters(self): 26 | stdv = 1. / math.sqrt(self.weight.size(1)) 27 | self.weight.data.uniform_(-stdv, stdv) 28 | if self.bias is not None: 29 | self.bias.data.uniform_(-stdv, stdv) 30 | 31 | def forward(self, input, adj): 32 | support = torch.mm(input, self.weight) 33 | output = torch.mm(adj, support) 34 | if self.bias is not None: 35 | output = output + self.bias 36 | return self.act(output) 37 | 38 | def __repr__(self): 39 | return self.__class__.__name__ + ' (' \ 40 | + str(self.in_features) + ' -> ' \ 41 | + str(self.out_features) + ')' 42 | 43 | 44 | class VGAE(nn.Module): 45 | def __init__(self, input_size, emb_size, hidden_size, act=lambda x: x, layers=2): 46 | super(VGAE, self).__init__() 47 | self.encode = GraphConvolution(input_size, hidden_size, act=act) 48 | self.medium = nn.ModuleList([GraphConvolution(hidden_size, hidden_size, act=act) for i in range(layers-2)]) 49 | self._mean = GraphConvolution(hidden_size, emb_size, act=act) 50 | self._logv = GraphConvolution(hidden_size, emb_size, act=act) 51 | self.mean = None 52 | self.logv = None 53 | 54 | def forward(self, adj, x=None, device='cuda:0'): 55 | if x is None: 56 | x = Variable(torch.rand(adj.shape[0], self.input_size, dtype=torch.float32)).to(device) 57 | support = self.encode(x, adj) 58 | for m in self.medium: 59 | support = m(support, adj) 60 | self.mean = self._mean(support, adj) 61 | self.logv = self._logv(support, adj) 62 | noise = Variable(torch.rand(self.mean.shape[0], self.mean.shape[1], dtype=torch.float32)).to(device) 63 | support = noise*torch.exp(self.logv) + self.mean 64 | score = torch.mm(support, support.T) 65 | return score 66 | 67 | 68 | class GAE(nn.Module): 69 | def __init__(self, input_size, emb_size, hidden_size, act=lambda x: x, layers=2): 70 | super(GAE, self).__init__() 71 | self.encode = GraphConvolution(input_size, hidden_size, act=act) 72 | self.medium = nn.ModuleList([GraphConvolution(hidden_size, hidden_size, act=act) for i in range(layers-2)]) 73 | self.mean = GraphConvolution(hidden_size, emb_size, act=act) 74 | 75 | def forward(self, adj, x=None, device='cuda:0'): 76 | if x is None: 77 | x = Variable(torch.rand(adj.shape[0], self.input_size, dtype=torch.float32)).to(device) 78 | support = self.encode(x, adj) 79 | for m in self.medium: 80 | support = m(support, adj) 81 | support = self.mean(support, adj) 82 | score = torch.mm(support, support.T) 83 | return score 84 | 85 | 86 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/gendegdist.m: -------------------------------------------------------------------------------- 1 | function dd = gendegdist(n,pdf,cutoff) 2 | %GENDEGDIST Create a random degree distribution from a given PDF. 3 | % 4 | % ND = GENDEGDIST(N,PDF) creates a degree distribution on N nodes using 5 | % the discrete probability distribution function specified by PDF. The 6 | % result is a degree distribution: ND(d) = number of nodes of degree d. 7 | % 8 | % ND = GENDEGDIST(N,PDF,D0) estimates the number of nodes for d < DO as 9 | % ND(d) = PDF(d) * N. This is much faster for large N, but D0 should not 10 | % be too small or it will cause errors in the degree distribution. 11 | % 12 | % Examples 13 | % maxdeg=1e5; alpha = 2; beta = 2; pdf = dglnpdf(maxdeg, alpha, beta); 14 | % dd = gendegdist(1e7, pdf, 1e2); 15 | % loglog(dd,'b*'); 16 | % 17 | % See also DGLNPDF, DGLNCDF. 18 | % 19 | % Reference: 20 | % * T. G. Kolda, A. Pinar, T. Plantenga and C. Seshadhri. A Scalable 21 | % Generative Graph Model with Community Structure, arXiv:1302.6636, 22 | % March 2013. (http://arxiv.org/abs/1302.6636) 23 | % 24 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 25 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 26 | % January 2014 27 | 28 | %% License 29 | % Copyright (c) 2014, Sandia National Laboratories 30 | % All rights reserved. 31 | % 32 | % Redistribution and use in source and binary forms, with or without 33 | % modification, are permitted provided that the following conditions are 34 | % met: 35 | % 36 | % # Redistributions of source code must retain the above copyright notice, 37 | % this list of conditions and the following disclaimer. 38 | % # Redistributions in binary form must reproduce the above copyright 39 | % notice, this list of conditions and the following disclaimer in the 40 | % documentation and/or other materials provided with the distribution. 41 | % 42 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 43 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 44 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 45 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 46 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 47 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 48 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 49 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 50 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 51 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 52 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 | % 54 | % 55 | % Sandia National Laboratories is a multi-program laboratory managed and 56 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 57 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 58 | % Security Administration under contract DE-AC04-94AL85000. 59 | 60 | % ** 61 | if ~exist('cutoff','var') 62 | cutoff = 0; 63 | end 64 | 65 | % ** For any degree smaller than the cutoff, the PDF*n is good enough. 66 | dd1(1:cutoff,1) = round(n*pdf(1:cutoff)); 67 | n1 = sum(dd1); %<- Number of nodes "distributed" so far. 68 | 69 | % ** Do the tail by actual sampling 70 | n2 = n - n1; 71 | tailpdf = pdf(cutoff+1:end)/sum(pdf(cutoff+1:end)); 72 | tailcdf = cumsum(tailpdf); 73 | idx2 = find(tailcdf < 1, 1, 'last'); 74 | tailcdf = [0; tailcdf(1:idx2); 1]; 75 | coins = rand(n2,1); 76 | cnts = histc(coins,tailcdf); 77 | 78 | % ** Assemble second half of dd 79 | idx3 = find(cnts > 0, 1, 'last'); 80 | dd2 = cnts(1:idx3); 81 | 82 | % ** 83 | dd = [dd1;dd2]; -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/cc_param_search.m: -------------------------------------------------------------------------------- 1 | function p1 = cc_param_search(nd,maxcc,gcc,varargin) 2 | %CC_PARAM_SEARCH Clustering coefficient parameter search 3 | % 4 | % XI = CC_PARAM_SEARCH(ND, MAXCCD, GCC) finds the parameter XI such that 5 | % the clustering coefficint profile defined by 6 | % 7 | % CCD(D) = MAXCCD * exp(-(D-1)*XI) for D >= 2, 8 | % 9 | % has the specified global clustering coefficient (GCC) and maximum 10 | % clustering coefficient (MAXCCD). 11 | % 12 | % Examples 13 | % % nd <- degree distribution 14 | % % maxccd_target <- target for maximum ccd value 15 | % % gcc_target <- target for global clustering coefficient 16 | % xi = cc_param_search(nd, maxccd_target, gcc_target); 17 | % ccd_target = [0; maxccd_target * exp(-(0:maxdeg-2)'.* xi)]; 18 | % maxdeg = find(nd>0,1,'last'); 19 | % 20 | % See also DEGDIST_PARAM_SEARCH, BTER 21 | % 22 | % Reference: 23 | % T. G. Kolda, A. Pinar, T. Plantenga and C. Seshadhri. A Scalable 24 | % Generative Graph Model with Community Structure, arXiv:1302.6636, 25 | % March 2013. (http://arxiv.org/abs/1302.6636) 26 | % 27 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 28 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 29 | % January 2014 30 | 31 | %% License 32 | % Copyright (c) 2014, Sandia National Laboratories 33 | % All rights reserved. 34 | % 35 | % Redistribution and use in source and binary forms, with or without 36 | % modification, are permitted provided that the following conditions are 37 | % met: 38 | % 39 | % # Redistributions of source code must retain the above copyright notice, 40 | % this list of conditions and the following disclaimer. 41 | % # Redistributions in binary form must reproduce the above copyright 42 | % notice, this list of conditions and the following disclaimer in the 43 | % documentation and/or other materials provided with the distribution. 44 | % 45 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 46 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 47 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 48 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 49 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 50 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 51 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 52 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 53 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 54 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 55 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 56 | % 57 | % 58 | % Sandia National Laboratories is a multi-program laboratory managed and 59 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 60 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 61 | % Security Administration under contract DE-AC04-94AL85000. 62 | 63 | 64 | params = inputParser; 65 | params.addParamValue('fminsearch_opts', optimset('TolFun', 1e-4, 'TolX', 1e-4)); 66 | params.parse(varargin{:}); 67 | options = params.Results.fminsearch_opts; 68 | 69 | fhandle = @(x) objfunc(nd, maxcc, gcc, x); 70 | [xstar,~,~] = fminsearch(fhandle, 0.5, options); 71 | p1 = xstar; 72 | 73 | function y = objfunc(nd,maxcc,gcc,xi) 74 | %OBJFUNC Compute objectiv function, as described above 75 | maxd = length(nd); 76 | ccd_mean = [0; maxcc*exp(-(0:maxd-2)'.* xi)]; 77 | nWedges = nd' .* ((1:maxd).*((1:maxd)-1)/2); 78 | gcc_xi = (nWedges*ccd_mean) / sum(nWedges); 79 | y = abs(gcc - gcc_xi); 80 | fprintf('xi = %e, target gcc = %f, current gcc = %f\n', xi, gcc, gcc_xi); 81 | 82 | 83 | -------------------------------------------------------------------------------- /GraphGenerator/train/train_netgan.py: -------------------------------------------------------------------------------- 1 | from GraphGenerator.models.netgan import * 2 | # import tensorflow as tf 3 | from GraphGenerator.utils.arg_utils import set_device 4 | import tensorflow.compat.v1 as tf 5 | import scipy.sparse as sp 6 | import numpy as np 7 | from matplotlib import pyplot as plt 8 | from sklearn.metrics import roc_auc_score, average_precision_score 9 | import time, os, pickle 10 | import networkx as nx 11 | import multiprocessing as mp 12 | 13 | 14 | def score_to_graph(_rws, _n): 15 | scores_mat = score_matrix_from_random_walks(_rws, _n).tocsr() 16 | tmp_graph = graph_from_scores(scores_mat, _A_obs.sum()) 17 | return nx.Graph(tmp_graph) 18 | 19 | 20 | def train_netgan(input_data, config): 21 | set_device(config) 22 | emb_size = config.model.embedding_dim 23 | l_rate = config.train.lr 24 | _A_obs = nx.adjacency_matrix(input_data) 25 | _A_obs = _A_obs - sp.csr_matrix(np.diag(_A_obs.diagonal())) 26 | _A_obs = _A_obs + _A_obs.T 27 | _A_obs[_A_obs > 1] = 1 28 | lcc = largest_connected_components(_A_obs) 29 | _A_obs = _A_obs[lcc, :][:, lcc] 30 | _N = _A_obs.shape[0] 31 | val_share = config.train.val_share 32 | test_share = config.train.test_share 33 | seed = config.seed 34 | train_ones, val_ones, val_zeros, test_ones, test_zeros = train_val_test_split_adjacency(_A_obs, val_share, 35 | test_share, seed, 36 | undirected=True, 37 | connected=True, 38 | asserts=True) 39 | train_graph = sp.coo_matrix((np.ones(len(train_ones)), (train_ones[:, 0], train_ones[:, 1]))).tocsr() 40 | assert (train_graph.toarray() == train_graph.toarray().T).all() 41 | rw_len = config.model.rw_len 42 | batch_size = config.train.batch_size 43 | 44 | walker = RandomWalker(train_graph, rw_len, p=1, q=1, batch_size=batch_size) 45 | 46 | walker.walk().__next__() 47 | netgan = NetGAN(_N, rw_len, walk_generator=walker.walk, gpu_id=0, use_gumbel=True, disc_iters=3, 48 | W_down_discriminator_size=emb_size, W_down_generator_size=emb_size, 49 | l2_penalty_generator=1e-7, l2_penalty_discriminator=5e-5, batch_size=batch_size, 50 | generator_layers=[40], discriminator_layers=[30], temp_start=5, learning_rate=l_rate) 51 | stopping_criterion = config.train.stopping_criterion 52 | 53 | assert stopping_criterion in ["val", "eo"], "Please set the desired stopping criterion." 54 | 55 | if stopping_criterion == "val": # use val criterion for early stopping 56 | stopping = None 57 | elif stopping_criterion == "eo": # use eo criterion for early stopping 58 | stopping = 0.5 # set the target edge overlap here 59 | else: 60 | stopping = None 61 | eval_iter = config.train.eval_iter 62 | display_iter = config.train.display_iter 63 | 64 | log_dict = netgan.train(A_orig=_A_obs, val_ones=val_ones, val_zeros=val_zeros, stopping=stopping, 65 | eval_every=eval_iter, plot_every=display_iter, max_patience=20, max_iters=200000) 66 | 67 | sample_many = netgan.generate_discrete(10000, reuse=True) 68 | 69 | samples = [] 70 | 71 | for _ in range(config.test.sample_num): 72 | if (_ + 1) % 1000 == 0: 73 | print(_ + 1) 74 | samples.append(sample_many.eval({netgan.tau: 0.5})) 75 | 76 | rws = np.array(samples).reshape([-1, rw_len]) 77 | pool = mp.Pool(processes=5) 78 | args_all = [(rws, _N) for i in range(config.test.num_gen)] 79 | results = [pool.apply_async(score_to_graph, args=args) for args in args_all] 80 | graphs = [p.get() for p in results] 81 | return graphs 82 | 83 | -------------------------------------------------------------------------------- /GraphGenerator/models/ws.py: -------------------------------------------------------------------------------- 1 | import pyemd, random 2 | from GraphGenerator.models.er import complete_graph 3 | from scipy.linalg import toeplitz 4 | import numpy as np 5 | import networkx as nx 6 | 7 | 8 | def watts_strogatz_graph(n, k, p): 9 | if k > n: 10 | raise nx.NetworkXError("k>n, choose smaller k or larger n") 11 | 12 | # If k == n, the graph is complete not Watts-Strogatz 13 | if k == n: 14 | return complete_graph(n) 15 | G = nx.Graph() 16 | nodes = list(range(n)) # nodes are labeled 0 to n-1 17 | # connect each node to k/2 neighbors 18 | for j in range(1, k // 2 + 1): 19 | targets = nodes[j:] + nodes[0:j] # first j nodes are now last in list 20 | G.add_edges_from(zip(nodes, targets)) 21 | # rewire edges from each node 22 | # loop over all nodes in order (label) and neighbors in order (distance) 23 | # no self loops or multiple edges allowed 24 | for j in range(1, k // 2 + 1): # outer loop is neighbors 25 | targets = nodes[j:] + nodes[0:j] # first j nodes are now last in list 26 | # inner loop in node order 27 | for u, v in zip(nodes, targets): 28 | if random.random() < p: 29 | w = random.choice(nodes) 30 | # Enforce no self-loops or multiple edges 31 | while w == u or G.has_edge(u, w): 32 | w = random.choice(nodes) 33 | if G.degree(u) >= n - 1: 34 | break # skip this rewiring 35 | else: 36 | G.remove_edge(u, v) 37 | G.add_edge(u, w) 38 | return G 39 | 40 | 41 | def wasserstein_distance(x, y, distance_scaling=1.0): 42 | support_size = max(len(x), len(y)) 43 | d_mat = toeplitz(range(support_size)).astype(np.float) 44 | distance_mat = d_mat / distance_scaling 45 | 46 | # convert histogram values x and y to float, and make them equal len 47 | x = x.astype(np.float) 48 | y = y.astype(np.float) 49 | if len(x) < len(y): 50 | x = np.hstack((x, [0.0] * (support_size - len(x)))) 51 | elif len(y) < len(x): 52 | y = np.hstack((y, [0.0] * (support_size - len(y)))) 53 | 54 | emd = pyemd.emd(x, y, distance_mat) 55 | return emd 56 | 57 | 58 | def degree_loss(x, n=3, real_g=None, generator='W-S', k=2): 59 | pred_g = nx.empty_graph() 60 | if generator == 'W-S': 61 | pred_g = watts_strogatz_graph(n, k, x) 62 | real_hist = np.array(nx.degree_histogram(real_g)) 63 | real_hist = real_hist / np.sum(real_hist) 64 | pred_hist = np.array(nx.degree_histogram(pred_g)) 65 | pred_hist = pred_hist / np.sum(pred_hist) 66 | loss = wasserstein_distance(real_hist, pred_hist) 67 | return loss 68 | 69 | 70 | def grid_search(x_min, x_max, x_step, n, real_g, generator, k=2, repeat=2): 71 | loss_all = [] 72 | x_list = np.arange(x_min, x_max, x_step) 73 | for x_test in x_list: 74 | tmp_loss = 0 75 | for i in range(repeat): 76 | tmp_loss += degree_loss(x_test, n=n, real_g=real_g, generator=generator, k=k) 77 | loss_all.append(tmp_loss) 78 | x_best = x_list[np.argmin(np.array(loss_all))] 79 | return x_best, min(loss_all) 80 | 81 | 82 | def generator_optimization(graph, generator='W-S'): 83 | graph_node = graph.number_of_nodes() 84 | graph_edge = graph.number_of_edges() 85 | k = round(graph_edge/graph_node) + 1 86 | p_selected = 1. 87 | print('graph with {} nodes'.format(graph_node)) 88 | n = graph_node 89 | if generator == 'W-S': 90 | #loss_all = [] 91 | #parameter_all = [] 92 | p_selected, _ = grid_search(1e-6, 1, 0.01, n, graph, generator, k, 10) 93 | return n, k, p_selected 94 | 95 | 96 | def generate_new_graph(parameters, generator, repeat=1): 97 | graph_list = [] 98 | for i in range(repeat): 99 | if generator == 'W-S': 100 | graph_list.append(watts_strogatz_graph(*parameters)) 101 | return graph_list 102 | 103 | 104 | def w_s(in_graph, config): 105 | """ 106 | W-S graph generator 107 | :param in_graph: referenced graph, type: nx.Graph 108 | :param config: configure object 109 | :return: generated graphs, type: list of nx.Graph 110 | """ 111 | parameters = generator_optimization(in_graph, config.model.name) 112 | return generate_new_graph(parameters, config.model.name, repeat=config.num_gen) 113 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/include/struct_util.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 The Google Research Authors. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #ifndef STRUCT_UTIL_H 16 | #define STRUCT_UTIL_H 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | class AdjRow; 25 | class AdjNode; 26 | 27 | const uint32_t ibits = 32; 28 | 29 | int num_ones(int n); 30 | 31 | class BitSet 32 | { 33 | public: 34 | BitSet(); 35 | BitSet(uint32_t _n_bits); 36 | BitSet left_shift(uint32_t n); 37 | BitSet or_op(BitSet& another); 38 | 39 | void set(uint32_t pos); 40 | bool get(uint32_t pos); 41 | 42 | uint32_t n_bits, n_macros; 43 | std::vector macro_bits; 44 | }; 45 | 46 | class GraphStruct 47 | { 48 | public: 49 | GraphStruct(int graph_id, int num_nodes, int num_edges, 50 | void* _edge_pairs = nullptr, int n_left = -1, int n_right = -1); 51 | 52 | void realize_nodes(int node_start, int node_end, 53 | int col_start, int col_end); 54 | GraphStruct* permute(); 55 | std::map > edge_list; 56 | std::vector active_rows; 57 | std::vector idx_map; 58 | int num_nodes, num_edges, graph_id; 59 | int node_start, node_end; 60 | }; 61 | 62 | extern std::vector graph_list; 63 | extern std::vector active_graphs; 64 | 65 | class AdjNode; 66 | 67 | class JobCollect 68 | { 69 | public: 70 | JobCollect(); 71 | void reset(); 72 | void build_row_indices(); 73 | void build_row_summary(); 74 | int add_job(AdjNode* node); 75 | void append_bool(std::vector< std::vector >& list, int depth, int val); 76 | std::vector global_job_nodes; 77 | std::vector job_position; 78 | std::vector has_ch; 79 | std::vector< std::vector > has_left, has_right, num_left, num_right; 80 | std::vector< std::vector > is_internal; 81 | std::vector n_cell_job_per_level, n_bin_job_per_level; 82 | std::vector< std::vector > bot_froms[2], bot_tos[2], prev_froms[2], prev_tos[2]; // NOLINT 83 | std::vector< std::vector > binary_feat_nodes; 84 | std::vector row_bot_froms[2], row_bot_tos[2]; 85 | std::vector< std::vector > row_top_froms[2], row_top_tos[2], row_prev_froms[2], row_prev_tos[2]; // NOLINT 86 | std::vector layer_sizes; 87 | std::vector< std::unordered_map > tree_idx_map; 88 | 89 | std::vector next_state_froms; 90 | std::vector< std::vector > bot_left_froms, bot_left_tos, next_left_froms, next_left_tos; // NOLINT 91 | std::vector< std::vector > step_inputs, step_nexts, step_froms, step_tos, step_indices; // NOLINT 92 | int max_rowsum_steps, max_tree_depth, max_row_merge_steps; 93 | }; 94 | 95 | extern JobCollect job_collect; 96 | 97 | class ColAutomata 98 | { 99 | public: 100 | ColAutomata(std::vector& indices); 101 | 102 | void add_edge(int col_idx); 103 | int next_edge(); 104 | int last_edge(); 105 | bool has_edge(int range_start, int range_end); 106 | 107 | int* indices; 108 | int pos, num_indices; 109 | }; 110 | 111 | class AdjNode; 112 | 113 | template 114 | class PtHolder 115 | { 116 | public: 117 | PtHolder(); 118 | void reset(); 119 | void clear(); 120 | 121 | template 122 | PtType* get_pt(Args&&... args) 123 | { 124 | PtType* ret; 125 | if (cur_pos >= pt_buff.size()) 126 | { 127 | ret = new PtType(std::forward(args)...); 128 | pt_buff.push_back(ret); 129 | } else { 130 | ret = pt_buff[cur_pos]; 131 | ret->init(std::forward(args)...); 132 | } 133 | assert(cur_pos < pt_buff.size()); 134 | cur_pos++; 135 | return ret; 136 | } 137 | 138 | std::vector pt_buff; 139 | size_t cur_pos; 140 | }; 141 | 142 | 143 | #endif 144 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tensor_ops.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2020 The Google Research Authors. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import absolute_import 17 | from __future__ import division 18 | from __future__ import print_function 19 | # pylint: skip-file 20 | 21 | import torch 22 | from torch.nn import Module 23 | from torch.nn.parameter import Parameter 24 | from torch.autograd import Function 25 | import numpy as np 26 | # from bigg.common.consts import t_float 27 | t_float = torch.float32 28 | 29 | 30 | class MultiIndexSelectFunc(Function): 31 | @staticmethod 32 | def forward(ctx, idx_froms, idx_tos, *mats): 33 | assert len(idx_tos) == len(idx_froms) == len(mats) 34 | cols = mats[0].shape[1] 35 | assert all([len(x.shape) == 2 for x in mats]) 36 | assert all([x.shape[1] == cols for x in mats]) 37 | 38 | num_rows = sum([len(x) for x in idx_tos]) 39 | out = mats[0].new(num_rows, cols) 40 | 41 | for i, mat in enumerate(mats): 42 | x_from = idx_froms[i] 43 | x_to = idx_tos[i] 44 | if x_from is None: 45 | out[x_to] = mat.detach() 46 | else: 47 | assert len(x_from) == len(x_to) 48 | out[x_to] = mat[x_from].detach() 49 | 50 | ctx.idx_froms = idx_froms 51 | ctx.idx_tos = idx_tos 52 | ctx.shapes = [x.shape for x in mats] 53 | return out 54 | 55 | @staticmethod 56 | def backward(ctx, grad_output): 57 | idx_froms, idx_tos = ctx.idx_froms, ctx.idx_tos 58 | 59 | list_grad_mats = [None, None] 60 | for i in range(len(idx_froms)): 61 | x_from = idx_froms[i] 62 | x_to = idx_tos[i] 63 | if x_from is None: 64 | grad_mat = grad_output[x_to].detach() 65 | else: 66 | grad_mat = grad_output.new(ctx.shapes[i]).zero_() 67 | grad_mat[x_from] = grad_output[x_to].detach() 68 | list_grad_mats.append(grad_mat) 69 | 70 | return tuple(list_grad_mats) 71 | 72 | 73 | class MultiIndexSelect(Module): 74 | def forward(self, idx_froms, idx_tos, *mats): 75 | return MultiIndexSelectFunc.apply(idx_froms, idx_tos, *mats) 76 | 77 | multi_index_select = MultiIndexSelect() 78 | 79 | def test_multi_select(): 80 | a = Parameter(torch.randn(4, 2)) 81 | b = Parameter(torch.randn(3, 2)) 82 | d = Parameter(torch.randn(5, 2)) 83 | 84 | idx_froms = [[0, 1], [1, 2], [3, 4]] 85 | idx_tos = [[4, 5], [0, 1], [2, 3]] 86 | c = multi_index_select(idx_froms, idx_tos, a, b, d) 87 | print('===a===') 88 | print(a) 89 | print('===b===') 90 | print(b) 91 | print('===d===') 92 | print(d) 93 | print('===c===') 94 | print(c) 95 | 96 | t = torch.sum(c) 97 | t.backward() 98 | print(a.grad) 99 | print(b.grad) 100 | print(d.grad) 101 | 102 | 103 | class PosEncoding(Module): 104 | def __init__(self, dim, device, base=10000, bias=0): 105 | super(PosEncoding, self).__init__() 106 | 107 | p = [] 108 | sft = [] 109 | for i in range(dim): 110 | b = (i - i % 2) / dim 111 | p.append(base ** -b) 112 | if i % 2: 113 | sft.append(np.pi / 2.0 + bias) 114 | else: 115 | sft.append(bias) 116 | self.device = device 117 | self.sft = torch.tensor(sft, dtype=t_float).view(1, -1).to(device) 118 | self.base = torch.tensor(p, dtype=t_float).view(1, -1).to(device) 119 | 120 | def forward(self, pos): 121 | with torch.no_grad(): 122 | if isinstance(pos, list): 123 | pos = torch.tensor(pos, dtype=t_float).to(self.device) 124 | pos = pos.view(-1, 1) 125 | x = pos / self.base + self.sft 126 | return torch.sin(x) 127 | 128 | 129 | if __name__ == '__main__': 130 | # test_multi_select() 131 | 132 | pos_enc = PosEncoding(128, 'cpu') 133 | print(pos_enc([1, 2, 3])) 134 | -------------------------------------------------------------------------------- /GraphGenerator/metrics/mmd.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import numpy as np 3 | import networkx as nx 4 | import concurrent.futures 5 | from functools import partial 6 | PRINT_TIME=False 7 | 8 | 9 | def gaussian_tv(x, y, sigma=1.0): 10 | support_size = max(len(x), len(y)) 11 | # convert histogram values x and y to float, and make them equal len 12 | x = x.astype(np.float) 13 | y = y.astype(np.float) 14 | if len(x) < len(y): 15 | x = np.hstack((x, [0.0] * (support_size - len(x)))) 16 | elif len(y) < len(x): 17 | y = np.hstack((y, [0.0] * (support_size - len(y)))) 18 | 19 | dist = np.abs(x - y).sum() / 2.0# one norm 20 | return np.exp(-dist * dist / (2 * sigma * sigma)) 21 | 22 | 23 | def kernel_parallel_unpacked(x, samples2, kernel): 24 | d = 0 25 | for s2 in samples2: 26 | d += kernel(x, s2) 27 | return d 28 | 29 | 30 | def kernel_parallel_worker(t): 31 | return kernel_parallel_unpacked(*t) 32 | 33 | 34 | def disc(samples1, samples2, kernel, is_parallel=True, *args, **kwargs): 35 | ''' Discrepancy between 2 samples ''' 36 | d = 0 37 | 38 | if not is_parallel: 39 | for s1 in samples1: 40 | for s2 in samples2: 41 | d += kernel(s1, s2, *args, **kwargs) 42 | else: 43 | # with concurrent.futures.ProcessPoolExecutor() as executor: 44 | # for dist in executor.map(kernel_parallel_worker, [ 45 | # (s1, samples2, partial(kernel, *args, **kwargs)) for s1 in samples1 46 | # ]): 47 | # d += dist 48 | 49 | with concurrent.futures.ThreadPoolExecutor() as executor: 50 | for dist in executor.map(kernel_parallel_worker, [ 51 | (s1, samples2, partial(kernel, *args, **kwargs)) for s1 in samples1 52 | ]): 53 | d += dist 54 | 55 | d /= len(samples1) * len(samples2) 56 | return d 57 | 58 | 59 | def compute_mmd(samples1, samples2, kernel, is_hist=True, *args, **kwargs): 60 | ''' MMD between two samples ''' 61 | print("--- MMD of sample1: {}, sample2:{}.---".format(len(samples1),len(samples2))) 62 | # normalize histograms into pmf 63 | if is_hist: 64 | samples1 = [s1 / np.sum(s1) for s1 in samples1] 65 | samples2 = [s2 / np.sum(s2) for s2 in samples2] 66 | # print('===============================') 67 | # print('s1: ', disc(samples1, samples1, kernel, *args, **kwargs)) 68 | # print('--------------------------') 69 | # print('s2: ', disc(samples2, samples2, kernel, *args, **kwargs)) 70 | # print('--------------------------') 71 | # print('cross: ', disc(samples1, samples2, kernel, *args, **kwargs)) 72 | # print('===============================') 73 | return disc(samples1, samples1, kernel, *args, **kwargs) + \ 74 | disc(samples2, samples2, kernel, *args, **kwargs) - \ 75 | 2 * disc(samples1, samples2, kernel, *args, **kwargs) 76 | 77 | 78 | def degree_worker(G): 79 | return np.array(nx.degree_histogram(G)) 80 | 81 | 82 | def degree_stats(graph_ref_list, graph_pred_list, is_parallel=True): 83 | ''' Compute the distance between the degree distributions of two unordered sets of graphs. 84 | Args: 85 | graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated 86 | ''' 87 | sample_ref = [] 88 | sample_pred = [] 89 | # in case an empty graph is generated 90 | graph_pred_list_remove_empty = [ 91 | G for G in graph_pred_list if not G.number_of_nodes() == 0 92 | ] 93 | 94 | prev = datetime.datetime.now() 95 | if is_parallel: 96 | with concurrent.futures.ThreadPoolExecutor() as executor: 97 | for deg_hist in executor.map(degree_worker, graph_ref_list): 98 | sample_ref.append(deg_hist) 99 | with concurrent.futures.ThreadPoolExecutor() as executor: 100 | for deg_hist in executor.map(degree_worker, graph_pred_list_remove_empty): 101 | sample_pred.append(deg_hist) 102 | else: 103 | for i in range(len(graph_ref_list)): 104 | degree_temp = np.array(nx.degree_histogram(graph_ref_list[i])) 105 | sample_ref.append(degree_temp) 106 | for i in range(len(graph_pred_list_remove_empty)): 107 | degree_temp = np.array(nx.degree_histogram(graph_pred_list_remove_empty[i])) 108 | sample_pred.append(degree_temp) 109 | 110 | # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd) 111 | # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd) 112 | mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv, sigma=2.0) 113 | # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian) 114 | 115 | elapsed = datetime.datetime.now() - prev 116 | if PRINT_TIME: 117 | print('Time computing degree mmd: ', elapsed) 118 | return mmd_dist 119 | 120 | 121 | def print_result(metrics, graph_ref, graph_pred): 122 | output = {} 123 | if 'degree' in metrics: 124 | eval_metric = degree_stats(graph_ref, graph_pred) 125 | print('Degree: {}'.format(eval_metric)) 126 | output['degree']=eval_metric 127 | return output 128 | 129 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GraphGenerator 2 | [![CodeSize](https://img.shields.io/github/languages/code-size/xiangsheng1325/GraphGenerator?style=plastic)](https://github.com/xiangsheng1325/GraphGenerator) 3 | [![Contributor](https://img.shields.io/github/contributors/xiangsheng1325/GraphGenerator?style=plastic&color=blue)](https://github.com/xiangsheng1325/GraphGenerator/graphs/contributors) 4 | [![Activity](https://img.shields.io/github/commit-activity/m/xiangsheng1325/GraphGenerator?style=plastic)](https://github.com/xiangsheng1325/GraphGenerator/pulse) 5 | 6 | Toolkit for simulating observed graphs, generating new graphs and evaluating graph generators. 7 | 8 | ## Installation 9 | ### Environments 10 | [![Python](https://img.shields.io/badge/Python-v3.6.8-blue?style=plastic)](https://www.python.org/) 11 | [![PyTorch](https://img.shields.io/badge/PyTorch-v1.8.1-green?style=plastic)](https://pypi.org/project/torch/) 12 | [![Tensorflow](https://img.shields.io/badge/Tensorflow-v2.4.0-blue?style=plastic)](https://pypi.org/project/tensorflow/) 13 | 14 | If users want to use some deep learning based graph generators, deep learning dependencies are required such as Pytorch or Tensorflow. 15 | We prefer to use PyTorch as dependency. 16 | 17 | **1. Install Pytorch** 18 | ```bash 19 | pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html 20 | ``` 21 | **2. Clone and install** 22 | ```bash 23 | git clone https://github.com/xiangsheng1325/GraphGenerator.git 24 | cd GraphGenerator 25 | pip install -r requirements.txt 26 | pip install -e . 27 | ``` 28 | ### Dependencies 29 | Users need to install specific dependencies to support some graph generators, which is listed here: 30 | 31 | |Graph Generator|Dependencies|Graph Generator|Dependencies| 32 | |--|--|--|--| 33 | |ARVGA|Tensorflow|GraphRNN|Pytorch| 34 | |BiGG|Pytorch|MMSB|Tensorflow Edward| 35 | |BTER|MATLAB|NetGAN|Tensorflow| 36 | |CondGEN|Pytorch|SBMGNN|Tensorflow| 37 | |GRAN|Pytorch|SGAE|Pytorch| 38 | |Graphite|Pytorch/Tensorflow|VGAE|Pytorch| 39 | 40 | 41 | ### Project organization 42 | This project is modularized to benefit further contributions on it. 43 | Please organize this project according to following structure: 44 | 45 | ``` 46 | GraphGenerator/ 47 | |___GraphGenerator/ # source code 48 | | |___models/ # graph generator implementations 49 | | | |___bigg/ 50 | | | | |___tree_model.py 51 | | | | |___... 52 | | | |___sbm.py 53 | | | |___... 54 | | |___metrics/ 55 | | | |___mmd.py 56 | | | |___... 57 | | |___train.py 58 | | |___... 59 | | 60 | |___setup.py 61 | | 62 | |___config/ # detailed configurations of complex models 63 | | |___graphite.yaml 64 | | |___... 65 | | 66 | |___data/ # raw data / cooked data 67 | | |___google.txt 68 | | |___... 69 | | 70 | |___exp # trained model and generated graphs 71 | | |___VGAE/ 72 | | |___... 73 | | 74 | |___... 75 | ``` 76 | 77 | ## GraphGenerator Usage 78 | Here are some examples of using this toolkit. 79 | 80 | **1. Preprocess data** 81 | 82 | We prefer to converting graph data into the same data type. If the input data is ready, this step can be skipped. 83 | 84 | _Example:_ 85 | * run `python -m GraphGenerator --phase preprocessing -i google.txt -o google.graph` 86 | 87 | **2. Test the usage of graph generator** 88 | 89 | Before training the deep learning based graph generators, 90 | we prefer to test whether there are bugs in our model implementations. 91 | If the generator runs well, this step can be skipped. 92 | 93 | _Example:_ 94 | * run `python -m GraphGenerator --phase test -g bigg --config config/bigg.yaml` 95 | 96 | Note that some algorithms may be affected by the CUDA version. (For example, Bigg may encounter problems during testing, 97 | please refer to [this page](https://github.com/xiangsheng1325/GraphGenerator/blob/main/GraphGenerator/models/bigg_ops/tree_clib/reame.md) 98 | to find resolutions.) 99 | 100 | 101 | **3. Train and infer new graphs** 102 | 103 | Enjoy your graph simulation and graph data generation. 104 | 105 | _Example:_ 106 | * run `python -m GraphGenerator --phase train -i google.graph -g vgae --config config/vgae.yaml` 107 | 108 | **4. Evaluate the results** 109 | 110 | Calculating the distance between two set of graphs to evaluate the experimental results. 111 | 112 | _Example:_ 113 | * run `python -m GraphGenerator --phase evaluate -i new_google.graphs -r google.graph` 114 | 115 | # Reference 116 | Please use the following BibTex to cite this work if it makes contributions to your publications. 117 | 118 | BibTex: 119 | ``` 120 | @Article{Xiang2021General, 121 | author={Xiang, Sheng and Wen, Dong and Cheng, Dawei and Zhang, Ying and Qin, Lu and Qian, Zhengping and Lin, Xuemin}, 122 | title={General Graph Generators: Experiments, Analyses, and Improvements}, 123 | url={https://doi.org/10.1007/s00778-021-00701-5}, 124 | doi={10.1007/s00778-021-00701-5}, 125 | journal={The VLDB Journal}, 126 | publisher={Springer} 127 | issn={0949-877X}, 128 | pages={1--29}, 129 | year={2021}, 130 | month={Oct}, 131 | day={07}, 132 | } 133 | ``` 134 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/ccperdeg.m: -------------------------------------------------------------------------------- 1 | function [cd,gcc,info] = ccperdeg(G,varargin) 2 | %CCPERDEG Mean clustering coefficient per degree 3 | % 4 | % CD = CCPERDEG(G) computes the per-degree clustering coefficient, i.e., 5 | % CD(d) is the mean clustering coefficient for nodes of degree d. If bins 6 | % are used, CD(d) returns the clustering coefficient for the bin 7 | % containing degree d. 8 | % 9 | % [CD,GCC] = CCPERDEG(G) also returns the global clustering coefficient. 10 | % 11 | % [CD,GCC,INFO] = CCPERDEG(G) also returns additional information. 12 | % 13 | % [...] = CCPERDEG(G,'param',value accepts parameter-value pairs: 14 | % 15 | % - 'nsamples' - Number of samples to use. Set to zero for exact 16 | % calcuation. Default: 0 17 | % - 'bins' - Specify the degree bins for binned data. Default: [] 18 | % - 'tau' - Specify tau-value for binning. Default: [] 19 | % - 'omega' - Specify omega-value for binning. Default: [] 20 | % - 'matlabbgl' - Specify use of MATLAB-BGL clusteringcoefficients 21 | % function rather than included code. Default: false 22 | % 23 | % Note that the 'bins' parameters overrides the 'tau' and 'omega' 24 | % specifications. Otherwise, both 'tau' and 'omega' must be specified to 25 | % create bins. 26 | % 27 | % See also TRICNT, BINDATA. 28 | % 29 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 30 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 31 | % January 2014 32 | 33 | %% License 34 | % Copyright (c) 2014, Sandia National Laboratories 35 | % All rights reserved. 36 | % 37 | % Redistribution and use in source and binary forms, with or without 38 | % modification, are permitted provided that the following conditions are 39 | % met: 40 | % 41 | % # Redistributions of source code must retain the above copyright notice, 42 | % this list of conditions and the following disclaimer. 43 | % # Redistributions in binary form must reproduce the above copyright 44 | % notice, this list of conditions and the following disclaimer in the 45 | % documentation and/or other materials provided with the distribution. 46 | % 47 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 48 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 49 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 50 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 51 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 52 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 53 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 54 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 55 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 56 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 57 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 58 | % 59 | % 60 | % Sandia National Laboratories is a multi-program laboratory managed and 61 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 62 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 63 | % Security Administration under contract DE-AC04-94AL85000. 64 | 65 | % ** Process inputs 66 | params = inputParser; 67 | params.addParamValue('nsamples', 0); 68 | params.addParamValue('bins',[]); 69 | params.addParamValue('tau', []); 70 | params.addParamValue('omega', []); 71 | params.addParamValue('matlabbgl', false); 72 | params.parse(varargin{:}); 73 | 74 | nsamples = params.Results.nsamples; 75 | bins = params.Results.bins; 76 | tau = params.Results.tau; 77 | omega = params.Results.omega; 78 | matlabbgl = params.Results.matlabbgl; 79 | 80 | 81 | % ** Create bins 82 | d = full(sum(G,2)); 83 | maxd = max(d); 84 | 85 | if isempty(bins) 86 | if isempty(omega) || isempty(tau) 87 | bins = (1:(maxd+1))'; 88 | else 89 | nbins = binlookup(maxd+1,omega,tau); 90 | bins = binstart((1:(nbins+1))',omega,tau); 91 | end 92 | end 93 | 94 | % ** 95 | if nsamples == 0 96 | 97 | [t,d,w] = tricnt(G,d,matlabbgl); 98 | [~,binId] = histc(d,bins); 99 | tf = binId > 0; 100 | binWedges = accumarray(binId(tf),w(tf)); 101 | nbins = length(binWedges); 102 | binTriangles = accumarray(binId(tf),t(tf),[nbins 1]); 103 | cdb = binTriangles ./ max(1,binWedges); 104 | gcc = sum(t)/sum(w); 105 | 106 | else 107 | cdb = ccperdegest(G,bins,nsamples); 108 | [~,binId] = histc(d,bins); 109 | tf = binId > 0; 110 | w = d.*(d-1)/2; 111 | binWedges = accumarray(binId(tf),w(tf),size(cdb)); 112 | gcc = (binWedges'*cdb) / sum(binWedges); 113 | t = []; 114 | binTriangles = []; 115 | end 116 | 117 | [~,binId] = histc(1:maxd,bins); 118 | cd(1:maxd,1) = cdb(binId); 119 | 120 | % Shorten the bins array to be the same length as cdb 121 | idx = find(cdb > 0, 1, 'last'); 122 | cdb = cdb(1:idx); 123 | bins = bins(1:idx); 124 | 125 | 126 | % Create info 127 | info.nsamples = nsamples; 128 | info.gcc = gcc; 129 | info.bins = bins; 130 | info.cc_per_bin = cdb; 131 | info.deg_per_vertex = d; 132 | info.wedges_per_vertex = w; 133 | info.tris_per_vertex = t; 134 | info.wedges_per_bin = binWedges; 135 | info.tris_per_bin = binTriangles; 136 | -------------------------------------------------------------------------------- /GraphGenerator/models/mmsb.py: -------------------------------------------------------------------------------- 1 | """Stochastic block model.""" 2 | 3 | import argparse 4 | import os 5 | from time import time 6 | 7 | import edward as ed 8 | # import edward2 as ed 9 | import networkx as nx 10 | import numpy as np 11 | import tensorflow as tf 12 | import tensorflow_probability as tfp 13 | from tensorflow.python.ops.distributions.distributions import Bernoulli, Multinomial, Beta, Dirichlet 14 | from edward.models.point_mass import distributions_PointMass as PointMass 15 | from observations import karate 16 | from sklearn.metrics.cluster import adjusted_rand_score 17 | 18 | 19 | CUDA = 0 20 | # ed.set_seed(int(time())) 21 | 22 | 23 | # ed.set_seed(42) 24 | 25 | # DATA 26 | # X_data, Z_true = karate("data") 27 | 28 | def disjoint_cliques_test_graph(num_cliques, clique_size): 29 | G = nx.disjoint_union_all([nx.complete_graph(clique_size) for _ in range(num_cliques)]) 30 | return nx.to_numpy_matrix(G) 31 | 32 | 33 | def mmsb(N, K, data): 34 | # sparsity 35 | rho = 0.3 36 | # MODEL 37 | # probability of belonging to each of K blocks for each node 38 | gamma = Dirichlet(concentration=tf.ones([K])) 39 | # block connectivity 40 | Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K])) 41 | # probability of belonging to each of K blocks for all nodes 42 | Z = Multinomial(total_count=1.0, probs=gamma, sample_shape=N) 43 | # adjacency 44 | X = Bernoulli(probs=(1 - rho) * tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z)))) 45 | 46 | # INFERENCE (EM algorithm) 47 | qgamma = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([K])))) 48 | qPi = PointMass(params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, K])))) 49 | qZ = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, K])))) 50 | 51 | # qgamma = Normal(loc=tf.get_variable("qgamma/loc", [K]), 52 | # scale=tf.nn.softplus( 53 | # tf.get_variable("qgamma/scale", [K]))) 54 | # qPi = Normal(loc=tf.get_variable("qPi/loc", [K, K]), 55 | # scale=tf.nn.softplus( 56 | # tf.get_variable("qPi/scale", [K, K]))) 57 | # qZ = Normal(loc=tf.get_variable("qZ/loc", [N, K]), 58 | # scale=tf.nn.softplus( 59 | # tf.get_variable("qZ/scale", [N, K]))) 60 | 61 | # inference = ed.KLqp({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: data}) 62 | inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: data}) 63 | 64 | # inference.run() 65 | n_iter = 6000 66 | inference.initialize(optimizer=tf.train.AdamOptimizer(learning_rate=0.01), n_iter=n_iter) 67 | 68 | tf.global_variables_initializer().run() 69 | 70 | for _ in range(inference.n_iter): 71 | info_dict = inference.update() 72 | inference.print_progress(info_dict) 73 | 74 | inference.finalize() 75 | print('qgamma after: ', qgamma.mean().eval()) 76 | return qZ.mean().eval(), qPi.eval() 77 | 78 | 79 | def arg_parse(): 80 | parser = argparse.ArgumentParser(description='MMSB arguments.') 81 | parser.add_argument('--dataset', dest='dataset', 82 | help='Input dataset.') 83 | parser.add_argument('--K', dest='K', type=int, 84 | help='Number of blocks.') 85 | parser.add_argument('--samples-per-G', dest='samples', type=int, 86 | help='Number of samples for every graph.') 87 | 88 | parser.set_defaults(dataset='grid', 89 | K=4, 90 | samples=1) 91 | return parser.parse_args() 92 | 93 | 94 | def graph_gen_from_blockmodel(B, Z): 95 | n_blocks = len(B) 96 | B = np.array(B) 97 | Z = np.array(Z) 98 | adj_prob = np.dot(Z, np.dot(B, np.transpose(Z))) 99 | adj = np.random.binomial(1, adj_prob * 0.3) 100 | return nx.from_numpy_matrix(adj) 101 | 102 | 103 | if __name__ == '__main__': 104 | prog_args = arg_parse() 105 | os.environ['CUDA_VISIBLE_DEVICES'] = str(CUDA) 106 | print('CUDA', CUDA) 107 | 108 | X_dataset = [] 109 | # X_data = nx.to_numpy_matrix(nx.connected_caveman_graph(4, 7)) 110 | if prog_args.dataset == 'clique_test': 111 | X_data = disjoint_cliques_test_graph(4, 7) 112 | X_dataset.append(X_data) 113 | elif prog_args.dataset == 'grid': 114 | graphs = [] 115 | for i in range(10, 20): 116 | for j in range(10, 20): 117 | graphs.append(nx.grid_2d_graph(i, j)) 118 | X_dataset = [nx.to_numpy_matrix(g) for g in graphs] 119 | 120 | print('Number of graphs: ', len(X_dataset)) 121 | K = prog_args.K # number of clusters 122 | gen_graphs = [] 123 | for i in range(len(X_dataset)): 124 | if i % 5 == 0: 125 | print(i) 126 | X_data = X_dataset[i] 127 | N = X_data.shape[0] # number of vertices 128 | 129 | Zp, B = mmsb(N, K, X_data) 130 | # print("Block: ", B) 131 | Z_pred = Zp.argmax(axis=1) 132 | print("Result (label flip can happen):") 133 | # print("prob: ", Zp) 134 | print("Predicted") 135 | print(Z_pred) 136 | # print(Z_true) 137 | # print("Adjusted Rand Index =", adjusted_rand_score(Z_pred, Z_true)) 138 | for j in range(prog_args.samples): 139 | gen_graphs.append(graph_gen_from_blockmodel(B, Zp)) 140 | 141 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg_ops/tree_clib/src/lib/tree_util.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "config.h" // NOLINT 6 | #include "tree_util.h" // NOLINT 7 | #include "struct_util.h" // NOLINT 8 | 9 | 10 | AdjNode::AdjNode(AdjNode* parent, int row, int col_begin, int col_end, 11 | int depth) 12 | { 13 | this->init(parent, row, col_begin, col_end, depth); 14 | } 15 | 16 | AdjNode::~AdjNode() 17 | { 18 | if (this->lch != nullptr) 19 | delete this->lch; 20 | if (this->rch != nullptr) 21 | delete this->rch; 22 | } 23 | 24 | void AdjNode::init(AdjNode* parent, int row, int col_begin, int col_end, 25 | int depth) 26 | { 27 | this->lch = nullptr; 28 | this->rch = nullptr; 29 | this->parent = parent; 30 | this->row = row; 31 | this->col_begin = col_begin; 32 | this->col_end = col_end; 33 | this->depth = depth; 34 | this->mid = (col_begin + col_end) / 2; 35 | this->n_cols = col_end - col_begin; 36 | this->is_lowlevel = this->n_cols <= cfg::bits_compress; 37 | this->is_leaf = (this->n_cols <= 1); 38 | this->is_root = (this->parent == nullptr); 39 | if (is_lowlevel) 40 | this->bits_rep = BitSet(cfg::bits_compress); 41 | this->has_edge = false; 42 | this->job_idx = -1; 43 | } 44 | 45 | void AdjNode::update_bits() 46 | { 47 | if (!is_lowlevel) 48 | return; 49 | if (is_leaf) 50 | { 51 | if (has_edge) 52 | bits_rep.set(0); 53 | } else { 54 | bits_rep = lch->bits_rep.left_shift(rch->n_cols); 55 | bits_rep = bits_rep.or_op(rch->bits_rep); 56 | } 57 | } 58 | 59 | void AdjNode::split() 60 | { 61 | if (this->lch != nullptr && this->rch != nullptr) 62 | return; 63 | if (this->is_leaf) 64 | return; 65 | this->lch = node_holder.get_pt(this, row, col_begin, mid, depth + 1); 66 | this->rch = node_holder.get_pt(this, row, mid, col_end, depth + 1); 67 | } 68 | 69 | AdjRow::AdjRow(int row, int col_start, int col_end) 70 | { 71 | init(row, col_start, col_end); 72 | } 73 | 74 | AdjRow::~AdjRow() 75 | { 76 | if (this->root != nullptr) 77 | delete this->root; 78 | } 79 | 80 | void AdjRow::init(int row, int col_start, int col_end) 81 | { 82 | this->row = row; 83 | assert(!cfg::directed); 84 | int max_col = row; 85 | if (cfg::self_loop) 86 | max_col += 1; 87 | if (col_start < 0 || col_end < 0) 88 | { 89 | col_start = 0; 90 | col_end = max_col; 91 | } 92 | this->root = node_holder.get_pt(nullptr, row, col_start, col_end, 0); 93 | } 94 | 95 | 96 | void AdjRow::insert_edges(std::vector& col_indices) 97 | { 98 | auto* col_sm = new ColAutomata(col_indices); 99 | this->add_edges(this->root, col_sm); 100 | delete col_sm; 101 | } 102 | 103 | void AdjRow::add_edges(AdjNode* node, ColAutomata* col_sm) 104 | { 105 | if (node->is_root) 106 | { 107 | node->has_edge = col_sm->num_indices > 0; 108 | job_collect.has_ch.push_back(node->has_edge); 109 | } else { 110 | node->has_edge = true; 111 | } 112 | if (!node->has_edge) 113 | return; 114 | job_collect.append_bool(job_collect.is_internal, node->depth, 115 | !(node->is_leaf)); 116 | if (node->is_leaf) { 117 | col_sm->add_edge(node->col_begin); 118 | node->update_bits(); 119 | } else { 120 | node->split(); 121 | bool has_left = (col_sm->next_edge() < node->mid); 122 | if (has_left) 123 | this->add_edges(node->lch, col_sm); 124 | job_collect.append_bool(job_collect.has_left, node->depth, has_left); 125 | job_collect.append_bool(job_collect.num_left, node->depth, 126 | node->lch->n_cols); 127 | bool has_right = has_left ? 128 | col_sm->has_edge(node->mid, node->col_end) : true; 129 | if (has_right) 130 | this->add_edges(node->rch, col_sm); 131 | job_collect.append_bool(job_collect.has_right, node->depth, has_right); 132 | job_collect.append_bool(job_collect.num_right, node->depth, 133 | node->rch->n_cols); 134 | node->update_bits(); 135 | node->job_idx = job_collect.add_job(node); 136 | 137 | int cur_idx = (int)job_collect.has_left[node->depth].size() - 1; 138 | auto* ch = node->lch; 139 | if (ch->has_edge && !ch->is_leaf && !ch->is_lowlevel) 140 | { 141 | int pos = job_collect.job_position[ch->job_idx]; 142 | job_collect.append_bool(job_collect.next_left_froms, node->depth, 143 | pos); 144 | job_collect.append_bool(job_collect.next_left_tos, node->depth, 145 | cur_idx); 146 | } else { 147 | int bid = ch->has_edge ? 1 : 0; 148 | if (ch->has_edge && !ch->is_leaf) 149 | bid = 2 + job_collect.job_position[ch->job_idx]; 150 | job_collect.append_bool(job_collect.bot_left_froms, node->depth, 151 | bid); 152 | job_collect.append_bool(job_collect.bot_left_tos, node->depth, 153 | cur_idx); 154 | } 155 | } 156 | } 157 | 158 | 159 | PtHolder node_holder; 160 | PtHolder row_holder; 161 | -------------------------------------------------------------------------------- /GraphGenerator/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse, sys, pickle, warnings, os, torch 2 | 3 | # import torch.cuda 4 | 5 | warnings.filterwarnings("ignore") 6 | from GraphGenerator.preprocessing import dataio 7 | from GraphGenerator.utils.arg_utils import get_config, set_device 8 | import pandas as pd 9 | 10 | 11 | def print_variables(vdict, name="args"): 12 | print("-----------------------------------------") 13 | print("|This is the summary of {}:".format(name)) 14 | var = vdict 15 | for i in var: 16 | if var[i] is None: 17 | continue 18 | print("|{:11}\t: {}".format(i, var[i])) 19 | print("-----------------------------------------") 20 | 21 | 22 | if __name__ == '__main__': 23 | # get arguments 24 | parser = argparse.ArgumentParser() 25 | parser.add_argument("-p", "--phase", help="Choose phase.", default="preprocessing", type=str, 26 | choices=["preprocessing", "train", "evaluate", "test"], 27 | required=True) 28 | parser.add_argument("-i", "--input", help="Path of input file. Example:```-i google.txt```", default=None) 29 | parser.add_argument("-o", "--output", help="Specify the name of output file.", default=None) 30 | parser.add_argument("-c", "--config", help="Specify the path of config file.", default=None) 31 | parser.add_argument("-g", "--generator", help="choose the generator. Example:```-g sbm```", default="vgae", 32 | choices=["e-r", "b-a", "w-s", "rtg", "bter", "sbm", "dcsbm", "rmat", "kronecker", 33 | "mmsb", "vgae", "graphite", "sbmgnn", "graphrnn", "gran", "bigg", "arvga", 34 | "netgan", "condgen", "sgae"]) 35 | parser.add_argument("-e", "--evaluate", help="choose the evaluating metrics.", default=None) 36 | parser.add_argument("-r", "--ref", help="Path of referenced graphs(Only required in evaluate phase)", default=None) 37 | args = parser.parse_args() 38 | print_variables(vars(args)) 39 | if args.phase == 'preprocessing': 40 | from GraphGenerator.preprocessing import utils 41 | tmp_path = args.input 42 | print("# Load edgelist...") 43 | graph = utils.edgelist_to_graph(tmp_path) 44 | graphlist = [graph] 45 | print("# Save graphlist...") 46 | if args.output is None: 47 | output_name = "{}.graphs".format(args.input) 48 | else: 49 | output_name = args.output 50 | dataio.save_data(graphlist, name=output_name) 51 | 52 | elif args.phase == 'train': 53 | config = get_config(args.config) 54 | set_device(config) 55 | from GraphGenerator.train import train_base as train 56 | print("Start loading data...") 57 | input_data = dataio.load_data(args.input) 58 | if args.config is None: 59 | args.config = "config/{}.yaml".format(args.generator) 60 | # os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu) 61 | print("Start (training and) inferencing graph...") 62 | output_data = [] 63 | if isinstance(input_data, list): 64 | for graph in input_data: 65 | tmp_data = train.train_and_inference(graph, args.generator, config=config) 66 | if isinstance(tmp_data, list): 67 | output_data.extend(tmp_data) 68 | else: 69 | output_data.append(tmp_data) 70 | else: 71 | tmp_data = train.train_and_inference(input_data, args.generator) 72 | if isinstance(tmp_data, list): 73 | output_data.extend(tmp_data) 74 | else: 75 | output_data.append(tmp_data) 76 | print("Start saving generated graphs...") 77 | if args.output is None: 78 | output_name = "{}_to_{}.graphs".format(config.dataset.name, args.generator) 79 | else: 80 | output_name = args.output 81 | dataio.save_data(output_data, name=os.path.join(config.exp_dir, config.exp_name, output_name)) 82 | elif args.phase == 'evaluate': 83 | config = get_config(args.config) 84 | set_device(config) 85 | if args.evaluate == 'efficiency': 86 | from GraphGenerator.evaluate.efficiency import eval_efficiency 87 | print("Start evaluating the efficiency of graph generator [{}].".format(args.generator)) 88 | result = eval_efficiency(args.generator, config) 89 | elif args.evaluate == 'performance': 90 | from GraphGenerator.metrics import mmd 91 | print("Start evaluating the quality of generated graphs...") 92 | graphs_ref = dataio.load_data(args.ref) 93 | graphs_pred = dataio.load_data(args.input) 94 | result = mmd.print_result(args.evaluate, graphs_ref, graphs_pred) 95 | if args.output is None: 96 | output_name = "{}_to_{}.csv".format(args.ref, args.input) 97 | else: 98 | output_name = args.output 99 | tmp_pd = pd.DataFrame(result) 100 | tmp_pd.to_csv(output_name) 101 | elif args.phase == 'test': 102 | config = get_config(args.config) 103 | set_device(config) 104 | from GraphGenerator.test import test_generator 105 | print("Start test the package...") 106 | test_generator(args, config) 107 | print("Memory reserved: {} KiB.".format(torch.cuda.memory_reserved(config.device)//1024)) 108 | print("Test finished.") 109 | print("Done!") 110 | # sys.exit(0) 111 | -------------------------------------------------------------------------------- /GraphGenerator/evaluate/distance.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # 3 | # Adapt from https://github.com/JiaxuanYou/graph-generation 4 | # 5 | ############################################################################### 6 | import pyemd 7 | import numpy as np 8 | import networkx as nx 9 | import concurrent.futures 10 | from functools import partial 11 | from scipy.linalg import toeplitz 12 | 13 | 14 | def vanilla_emd(x, y, distance_scaling=1.0): 15 | support_size = max(len(x), len(y)) 16 | d_mat = toeplitz(range(support_size)).astype(np.float) 17 | distance_mat = d_mat / distance_scaling 18 | 19 | # convert histogram values x and y to float, and make them equal len 20 | x = x.astype(np.float) 21 | y = y.astype(np.float) 22 | if len(x) < len(y): 23 | x = np.hstack((x, [0.0] * (support_size - len(x)))) 24 | elif len(y) < len(x): 25 | y = np.hstack((y, [0.0] * (support_size - len(y)))) 26 | 27 | emd = pyemd.emd(x, y, distance_mat) 28 | return emd 29 | 30 | 31 | def l2(x, y): 32 | dist = np.linalg.norm(x - y, 2) 33 | return dist 34 | 35 | 36 | def emd(x, y, sigma=1.0, distance_scaling=1.0): 37 | ''' EMD 38 | Args: 39 | x, y: 1D pmf of two distributions with the same support 40 | sigma: standard deviation 41 | ''' 42 | support_size = max(len(x), len(y)) 43 | d_mat = toeplitz(range(support_size)).astype(np.float) 44 | distance_mat = d_mat / distance_scaling 45 | 46 | # convert histogram values x and y to float, and make them equal len 47 | x = x.astype(np.float) 48 | y = y.astype(np.float) 49 | if len(x) < len(y): 50 | x = np.hstack((x, [0.0] * (support_size - len(x)))) 51 | elif len(y) < len(x): 52 | y = np.hstack((y, [0.0] * (support_size - len(y)))) 53 | 54 | return np.abs(pyemd.emd(x, y, distance_mat)) 55 | 56 | 57 | def gaussian_emd(x, y, sigma=1.0, distance_scaling=1.0): 58 | ''' Gaussian kernel with squared distance in exponential term replaced by EMD 59 | Args: 60 | x, y: 1D pmf of two distributions with the same support 61 | sigma: standard deviation 62 | ''' 63 | support_size = max(len(x), len(y)) 64 | d_mat = toeplitz(range(support_size)).astype(np.float) 65 | distance_mat = d_mat / distance_scaling 66 | 67 | # convert histogram values x and y to float, and make them equal len 68 | x = x.astype(np.float) 69 | y = y.astype(np.float) 70 | if len(x) < len(y): 71 | x = np.hstack((x, [0.0] * (support_size - len(x)))) 72 | elif len(y) < len(x): 73 | y = np.hstack((y, [0.0] * (support_size - len(y)))) 74 | 75 | emd = pyemd.emd(x, y, distance_mat) 76 | return np.exp(-emd * emd / (2 * sigma * sigma)) 77 | 78 | 79 | def gaussian(x, y, sigma=1.0): 80 | support_size = max(len(x), len(y)) 81 | # convert histogram values x and y to float, and make them equal len 82 | x = x.astype(np.float) 83 | y = y.astype(np.float) 84 | if len(x) < len(y): 85 | x = np.hstack((x, [0.0] * (support_size - len(x)))) 86 | elif len(y) < len(x): 87 | y = np.hstack((y, [0.0] * (support_size - len(y)))) 88 | 89 | dist = np.linalg.norm(x - y, 2)# two norm (二范数) 90 | return np.exp(-dist * dist / (2 * sigma * sigma)) 91 | 92 | 93 | def gaussian_tv(x, y, sigma=1.0): 94 | support_size = max(len(x), len(y)) 95 | # convert histogram values x and y to float, and make them equal len 96 | x = x.astype(np.float) 97 | y = y.astype(np.float) 98 | if len(x) < len(y): 99 | x = np.hstack((x, [0.0] * (support_size - len(x)))) 100 | elif len(y) < len(x): 101 | y = np.hstack((y, [0.0] * (support_size - len(y)))) 102 | 103 | dist = np.abs(x - y).sum() / 2.0# one norm 一范数 104 | return np.exp(-dist * dist / (2 * sigma * sigma)) 105 | 106 | 107 | def kernel_parallel_unpacked(x, samples2, kernel): 108 | d = 0 109 | for s2 in samples2: 110 | d += kernel(x, s2) 111 | return d 112 | 113 | 114 | def kernel_parallel_worker(t): 115 | return kernel_parallel_unpacked(*t) 116 | 117 | 118 | def disc(samples1, samples2, kernel, is_parallel=True, *args, **kwargs): 119 | ''' Discrepancy between 2 samples ''' 120 | d = 0 121 | 122 | if not is_parallel: 123 | for s1 in samples1: 124 | for s2 in samples2: 125 | d += kernel(s1, s2, *args, **kwargs) 126 | else: 127 | # with concurrent.futures.ProcessPoolExecutor() as executor: 128 | # for dist in executor.map(kernel_parallel_worker, [ 129 | # (s1, samples2, partial(kernel, *args, **kwargs)) for s1 in samples1 130 | # ]): 131 | # d += dist 132 | 133 | with concurrent.futures.ThreadPoolExecutor() as executor: 134 | for dist in executor.map(kernel_parallel_worker, [ 135 | (s1, samples2, partial(kernel, *args, **kwargs)) for s1 in samples1 136 | ]): 137 | d += dist 138 | 139 | d /= len(samples1) * len(samples2) 140 | return d 141 | 142 | 143 | def compute_mmd(samples1, samples2, kernel, is_hist=True, *args, **kwargs): 144 | ''' MMD between two samples ''' 145 | print("--- MMD of sample1: {}, sample2:{}.---".format(len(samples1),len(samples2))) 146 | # normalize histograms into pmf 147 | if is_hist: 148 | samples1 = [s1 / np.sum(s1) for s1 in samples1] 149 | samples2 = [s2 / np.sum(s2) for s2 in samples2] 150 | # print('===============================') 151 | # print('s1: ', disc(samples1, samples1, kernel, *args, **kwargs)) 152 | # print('--------------------------') 153 | # print('s2: ', disc(samples2, samples2, kernel, *args, **kwargs)) 154 | # print('--------------------------') 155 | # print('cross: ', disc(samples1, samples2, kernel, *args, **kwargs)) 156 | # print('===============================') 157 | return disc(samples1, samples1, kernel, *args, **kwargs) + \ 158 | disc(samples2, samples2, kernel, *args, **kwargs) - \ 159 | 2 * disc(samples1, samples2, kernel, *args, **kwargs) 160 | 161 | 162 | def compute_emd(samples1, samples2, kernel, is_hist=True, *args, **kwargs): 163 | ''' EMD between average of two samples ''' 164 | # normalize histograms into pmf 165 | if is_hist: 166 | samples1 = [np.mean(samples1)] 167 | samples2 = [np.mean(samples2)] 168 | # print('===============================') 169 | # print('s1: ', disc(samples1, samples1, kernel, *args, **kwargs)) 170 | # print('--------------------------') 171 | # print('s2: ', disc(samples2, samples2, kernel, *args, **kwargs)) 172 | # print('--------------------------') 173 | # print('cross: ', disc(samples1, samples2, kernel, *args, **kwargs)) 174 | # print('===============================') 175 | return disc(samples1, samples2, kernel, *args, 176 | **kwargs), [samples1[0], samples2[0]] 177 | -------------------------------------------------------------------------------- /GraphGenerator/models/graphite.py: -------------------------------------------------------------------------------- 1 | import torch, math 2 | from torch.nn.modules.module import Module 3 | from torch.nn.parameter import Parameter 4 | from torch.autograd import Variable 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | class GraphConvolution(Module): 10 | """ 11 | Simple GCN layer, similar to https://arxiv.org/abs/1609.02907 12 | """ 13 | 14 | def __init__(self, in_features, out_features, bias=True, act=lambda x: x): 15 | super(GraphConvolution, self).__init__() 16 | self.in_features = in_features 17 | self.out_features = out_features 18 | self.weight = Parameter(torch.FloatTensor(in_features, out_features)) 19 | self.act = act 20 | if bias: 21 | self.bias = Parameter(torch.FloatTensor(out_features)) 22 | else: 23 | self.register_parameter('bias', None) 24 | self.reset_parameters() 25 | 26 | def reset_parameters(self): 27 | stdv = 1. / math.sqrt(self.weight.size(1)) 28 | self.weight.data.uniform_(-stdv, stdv) 29 | if self.bias is not None: 30 | self.bias.data.uniform_(-stdv, stdv) 31 | 32 | def forward(self, input, adj): 33 | support = torch.mm(input, self.weight) 34 | output = torch.mm(adj, support) 35 | if self.bias is not None: 36 | output = output + self.bias 37 | return self.act(output) 38 | 39 | def __repr__(self): 40 | return self.__class__.__name__ + ' (' \ 41 | + str(self.in_features) + ' -> ' \ 42 | + str(self.out_features) + ')' 43 | 44 | 45 | class GraphiteLayer(Module): 46 | """ 47 | Simple Graphite layer, similar to https://arxiv.org/abs/1803.10459 48 | """ 49 | def __init__(self, input_dim, output_dim, bias=True, act=lambda x: x): 50 | super(GraphiteLayer, self).__init__() 51 | self.in_features = input_dim 52 | self.out_features = output_dim 53 | self.weight = Parameter(torch.FloatTensor(input_dim, output_dim)) 54 | self.act = act 55 | if bias: 56 | self.bias = Parameter(torch.FloatTensor(output_dim)) 57 | else: 58 | self.register_parameter('bias', None) 59 | self.reset_parameters() 60 | 61 | def reset_parameters(self): 62 | stdv = 1. / math.sqrt(self.weight.size(1)) 63 | self.weight.data.uniform_(-stdv, stdv) 64 | if self.bias is not None: 65 | self.bias.data.uniform_(-stdv, stdv) 66 | 67 | def forward(self, x, input1, input2): 68 | x = torch.mm(x, self.weight) 69 | if self.bias is not None: 70 | x = x + self.bias 71 | x = torch.mm(input1, torch.mm(input1.T, x))+torch.mm(input2, torch.mm(input2.T, x)) 72 | return self.act(x) 73 | 74 | 75 | class GraphiteVAE(nn.Module): 76 | def __init__(self, num_features, hidden_dim, embed_dim, decode_dim, act=F.relu, autoregressive_scalar=0.5): 77 | super(GraphiteVAE, self).__init__() 78 | self.hidden = GraphConvolution(num_features, hidden_dim, act=act) 79 | self.z_mean = GraphConvolution(hidden_dim, embed_dim, act=act) 80 | self.mean = None 81 | self.z_logv = GraphConvolution(hidden_dim, embed_dim, act=act) 82 | self.logv = None 83 | self.decode0 = GraphiteLayer(num_features, decode_dim, act=act) 84 | self.decode1 = GraphiteLayer(embed_dim, decode_dim, act=act) 85 | self.decode2 = GraphiteLayer(decode_dim, embed_dim, act=lambda x: x) 86 | self.autoregressive_scalar = autoregressive_scalar 87 | 88 | def forward(self, adj, x=None, device='cuda:0'): 89 | support = self.hidden(x, adj) 90 | self.mean = self.z_mean(support, adj) 91 | self.logv = self.z_logv(support, adj) 92 | noise = Variable(torch.rand(self.mean.shape[0], self.mean.shape[1], dtype=torch.float32)).to(device) 93 | support = noise * torch.exp(self.logv) + self.mean 94 | recon_1 = F.normalize(support, p=2, dim=1) 95 | recon_2 = torch.ones(recon_1.shape).to(device) 96 | recon_2 /= torch.sqrt(recon_2.sum(1, keepdim=True)) 97 | d = torch.mm(recon_1, torch.unsqueeze(recon_1.sum(0), 1)) + \ 98 | torch.mm(recon_2, torch.unsqueeze(recon_2.sum(0), 1)) 99 | d = d.pow(-0.5) 100 | recon_1 = recon_1*d 101 | recon_2 = recon_2*d 102 | update = self.decode1(support, recon_1, recon_2) + self.decode0(x, recon_1, recon_2) 103 | update = self.decode2((update, recon_1, recon_2)) 104 | update = (1-self.autoregressive_scalar) * support + self.autoregressive_scalar * update 105 | reconstructions = torch.mm(update, update.T) 106 | return reconstructions 107 | # return update 108 | 109 | 110 | class GraphiteAE(nn.Module): 111 | def __init__(self, num_features, hidden_dim, embed_dim, decode_dim, act=F.relu, autoregressive_scalar=0.5): 112 | super(GraphiteAE, self).__init__() 113 | self.hidden = GraphConvolution(num_features, hidden_dim, act=act) 114 | self.z_mean = GraphConvolution(hidden_dim, embed_dim, act=act) 115 | self.mean = None 116 | # self.z_logv = GraphConvolution(hidden_dim, embed_dim, act=act) 117 | # self.logv = None 118 | self.decode0 = GraphiteLayer(num_features, decode_dim, act=act) 119 | self.decode1 = GraphiteLayer(embed_dim, decode_dim, act=act) 120 | self.decode2 = GraphiteLayer(decode_dim, embed_dim, act=lambda x: x) 121 | self.autoregressive_scalar = autoregressive_scalar 122 | 123 | def forward(self, adj, x=None, device='cuda:0'): 124 | support = self.hidden(x, adj) 125 | support = self.z_mean(support, adj) 126 | # self.logv = self.z_logv(support, adj) 127 | # noise = Variable(torch.rand(self.mean.shape[0], self.mean.shape[1], dtype=torch.float32)).to(device) 128 | # support = noise * torch.exp(self.logv) + self.mean 129 | recon_1 = F.normalize(support, p=2, dim=1) 130 | recon_2 = torch.ones(recon_1.shape).to(device) 131 | recon_2 /= torch.sqrt(recon_2.sum(1, keepdim=True)) 132 | d = torch.mm(recon_1, torch.unsqueeze(recon_1.sum(0), 1)) + \ 133 | torch.mm(recon_2, torch.unsqueeze(recon_2.sum(0), 1)) 134 | d = d.pow(-0.5) 135 | recon_1 = recon_1 * d 136 | recon_2 = recon_2 * d 137 | update = self.decode1(support, recon_1, recon_2) + self.decode0(x, recon_1, recon_2) 138 | update = self.decode2(update, recon_1, recon_2) 139 | update = (1 - self.autoregressive_scalar) * support + self.autoregressive_scalar * update 140 | reconstructions = torch.mm(update, update.T) 141 | return reconstructions 142 | # return update 143 | 144 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/bindata.m: -------------------------------------------------------------------------------- 1 | function [xx,yy,bins] = bindata(x,y,varargin) 2 | %BINDATA Exponential data binning. 3 | % 4 | % [XX,YY] = BINDATA(X,Y) logarithmically bins data. We assume both X and 5 | % Y are column vectors of the same length. By default, the k-th bin is 6 | % [2^k, 2^k-1]. The return values are defined as follows: XX(k) = 2^k is 7 | % the "bin label" and YY(k) = sum_i { Y(i) s.t. 2^k <= X(i) < 2^(k-1) } 8 | % is the "bin value". 9 | % 10 | % [XX,YY] = BINDATA([],Y) sets X = (1:length(Y))'. 11 | % 12 | % [XX,YY,BB] = BINDATA(...) returns the data bins, i.e., the k-th bin is 13 | % defined by [BB(k), BB(k+1)-1]. This can be useful if the meaning of XX 14 | % is modified by the parameters defined below. 15 | % 16 | % [XX,YY,BB] = BINDATA(X,Y,'param',value,...) also accepts 17 | % parameter-value pairs, as described below. 18 | % 19 | % --- Bin Definitions --- 20 | % The start of the k-th bin is given by 21 | % BB(k) = k + idx0 - 1 if k <= tau, else 22 | % BB(k) = ceil((omega.^(k-tau)-1)/(omega-1)) + tau + idx0 - 1. 23 | % 24 | % o 'omega' - Bin increase multiplier. Default: 2. 25 | % o 'tau' - Number of singleton bins. Default: 1. 26 | % o 'idx0' - Starting index to be binned. Default: 1. 27 | % 28 | % --- Binning Behavior --- 29 | % o 'bin' - Do binning? If false, returns X and Y unchanged unless X 30 | % was empty on input, in which case it's been reset to 31 | % (1:length(Y)). Default: true. 32 | % o 'ybinfun' - Function for the "bin value", used to combine all the 33 | % y-values in the same bin. Default: @sum. 34 | % o 'xbinfun' - Function for the "bin index". By default, XX(k)=BB(k). 35 | % If a function is specified, however, then this is used to combine 36 | % all the values in the same bin. Specifying @mean, for instance, 37 | % gives a weighted mean of the x-value as the bin index. 38 | % Default: [] (indicate to use the bin starts). 39 | % 40 | % --- Preprocessing --- 41 | % o 'prebin' - Collect values together for same x. This has the side 42 | % effect of ensuring the x values are dense, even for zero y values. 43 | % Default: false. 44 | % o 'prebinfun' - Specified function to combine values with same x. 45 | % Default: @mean. 46 | % 47 | % --- Postprocessing --- 48 | % o 'nozeros' - Remove any zero yy-values (and corresponding xx) from 49 | % the output. Default: false. 50 | % 51 | % EXAMPLES 52 | % y = [10 8 6 0 4]'; 53 | % [xx,yy] = bindata([],y) % Create 3 bins and gives total per bin. 54 | % [xx,yy] = bindata([],y,'bin',false) % Returns xx = (1:5)' and yy = y. 55 | % x = [2 3 5 5 6]'; 56 | % 57 | % See also BINLOOKUP, BINSTART. 58 | % 59 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 60 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 61 | % January 2014 62 | 63 | %% License 64 | % Copyright (c) 2014, Sandia National Laboratories 65 | % All rights reserved. 66 | % 67 | % Redistribution and use in source and binary forms, with or without 68 | % modification, are permitted provided that the following conditions are 69 | % met: 70 | % 71 | % # Redistributions of source code must retain the above copyright notice, 72 | % this list of conditions and the following disclaimer. 73 | % # Redistributions in binary form must reproduce the above copyright 74 | % notice, this list of conditions and the following disclaimer in the 75 | % documentation and/or other materials provided with the distribution. 76 | % 77 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 78 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 79 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 80 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 81 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 82 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 83 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 84 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 85 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 86 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 87 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 88 | % 89 | % 90 | % Sandia National Laboratories is a multi-program laboratory managed and 91 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 92 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 93 | % Security Administration under contract DE-AC04-94AL85000. 94 | 95 | % ** Parse inputs 96 | params = inputParser; 97 | params.addParamValue('bin', true); 98 | params.addParamValue('prebin', false); 99 | params.addParamValue('prebinfun', @mean); 100 | params.addParamValue('omega', 2); 101 | params.addParamValue('tau', 1); 102 | params.addParamValue('idx0', 1); 103 | params.addParamValue('xbinfun', []); 104 | params.addParamValue('ybinfun', @sum); 105 | params.addParamValue('nozeros', false); 106 | params.parse(varargin{:}); 107 | 108 | binparams = {params.Results.omega, params.Results.tau, params.Results.idx0}; 109 | % ** Check and fix empty x 110 | if isempty(x) 111 | x = (1:length(y))'; 112 | end 113 | 114 | % ** Make sure both x and y are column vectors 115 | x = reshape(x,[],1); 116 | y = reshape(y,[],1); 117 | 118 | % ** Check inputs are the same length 119 | if numel(x) ~= numel(y) 120 | error('Input vectors are not the same length'); 121 | end 122 | 123 | % ** Check for no binning 124 | if ~params.Results.bin % No binning 125 | xx = x; 126 | yy = y; 127 | bins = []; 128 | return; 129 | end 130 | 131 | % ** Number of bins? 132 | nbins = binlookup(max(x), binparams{:}); 133 | 134 | % ** Pre-binning? 135 | % Pre-binning creates dense x and y arrays, with an entry for every 136 | % possible x-value. If there are multiple copies of x, then the default is 137 | % to take the mean of the associated y-values. 138 | if params.Results.prebin 139 | xmax = binstart(nbins+1, binparams{:})-1; 140 | y = accumarray(x,y,[xmax 1],params.Results.prebinfun); 141 | x = (1:xmax)'; 142 | end 143 | 144 | % ** Determine xx 145 | idx = binlookup(x, binparams{:}); 146 | if isempty(params.Results.xbinfun) 147 | xx = binstart((1:nbins)', binparams{:}); 148 | else 149 | xx = accumarray(idx, x, [], params.Results.xbinfun); 150 | end 151 | yy = accumarray(idx, y, [], params.Results.ybinfun); 152 | bins = binstart((1:(nbins+1))', binparams{:}); 153 | 154 | % ** Remove zero entries? 155 | if params.Results.nozeros 156 | tf = yy > 0; 157 | yy = yy(tf); 158 | xx = xx(tf); 159 | end -------------------------------------------------------------------------------- /GraphGenerator/evaluate/diff.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import powerlaw 3 | import networkx as nx 4 | import igraph,datetime 5 | import scipy.sparse as sp 6 | from scipy.sparse.csgraph import connected_components 7 | 8 | 9 | # eval utils 10 | def statistics_degrees(A_in): 11 | degrees = A_in.sum(axis=0) 12 | return np.max(degrees), np.min(degrees), np.mean(degrees) 13 | 14 | 15 | def statistics_LCC(A_in): 16 | unique, counts = np.unique(connected_components(A_in)[1], return_counts=True) 17 | LCC = np.where(connected_components(A_in)[1] == np.argmax(counts))[0] 18 | return LCC 19 | 20 | 21 | def statistics_wedge_count(A_in): 22 | degrees = A_in.sum(axis=0).flatten() 23 | return float(np.sum(np.array([0.5 * x * (x - 1) for x in degrees]))) 24 | 25 | 26 | def statistics_claw_count(A_in): 27 | degrees = A_in.sum(axis=0).flatten() 28 | return float(np.sum(np.array([1 / 6. * x * (x - 1) * (x - 2) for x in degrees]))) 29 | 30 | 31 | def statistics_triangle_count(A_in): 32 | A_graph = nx.from_numpy_matrix(A_in) 33 | triangles = nx.triangles(A_graph) 34 | t = np.sum(list(triangles.values())) / 3 35 | return int(t) 36 | 37 | 38 | def squares(g): 39 | cliques = g.cliques(min=4, max=4) 40 | result = [0] * g.vcount() 41 | for i, j, k, l in cliques: 42 | result[i] += 1 43 | result[j] += 1 44 | result[k] += 1 45 | result[l] += 1 46 | return result 47 | 48 | 49 | def statistics_square_count(A_in): 50 | A_igraph = igraph.Graph.Adjacency((A_in > 0).tolist()).as_undirected() 51 | return int(np.sum(squares(A_igraph)) / 4) 52 | 53 | 54 | def statistics_power_law_alpha(A_in): 55 | degrees = A_in.sum(axis=0).flatten() 56 | return powerlaw.Fit(degrees, xmin=max(np.min(degrees),1)).power_law.alpha 57 | 58 | 59 | def statistics_gini(A_in): 60 | n = A_in.shape[0] 61 | degrees = np.array(A_in.sum(axis=0)).flatten() 62 | degrees_sorted = np.sort(degrees) 63 | G = (2 * np.sum(np.array([i * degrees_sorted[i] for i in range(len(degrees))]))) / (n * np.sum(degrees)) - ( 64 | n + 1) / n 65 | return float(G) 66 | 67 | 68 | def statistics_edge_distribution_entropy(A_in): 69 | degrees = A_in.sum(axis=0).flatten() 70 | m = 0.5 * np.sum(np.square(A_in)) 71 | n = A_in.shape[0] 72 | 73 | H_er = 1 / np.log(n) * np.sum(-degrees / (2 * float(m)) * np.log((degrees+.0001) / (2 * float(m)))) 74 | return H_er 75 | 76 | 77 | def statistics_cluster_coefficient(A_in): 78 | G = nx.Graph(A_in) 79 | return nx.average_clustering(G) 80 | 81 | 82 | def statistics_compute_cpl(A): 83 | P = sp.csgraph.shortest_path(sp.csr_matrix(A)) 84 | return P[((1 - np.isinf(P)) * (1 - np.eye(P.shape[0]))).astype(np.bool)].mean() 85 | #return P[((1 - np.isinf(P)) * (1 - np.eye(P.shape[0]))).astype(np.bool)] 86 | 87 | 88 | def compute_graph_statistics(A_in, Z_obs=None): 89 | A = A_in.copy() 90 | 91 | assert((A == A.T).all()) 92 | A_graph = nx.from_numpy_matrix(A).to_undirected() 93 | 94 | statistics = {} 95 | 96 | d_max, d_min, d_mean = statistics_degrees(A)# 0.5s 97 | 98 | # Degree statistics 99 | statistics['deg_max'] = d_max 100 | statistics['deg_min'] = d_min 101 | statistics['deg_mean'] = d_mean 102 | 103 | # node number & edger number 104 | #statistics['node_num'] = A_graph.number_of_nodes() 105 | #statistics['edge_num'] = A_graph.number_of_edges() 106 | 107 | # largest connected component 108 | LCC = statistics_LCC(A)# 33.1s 109 | 110 | statistics['LCC'] = LCC.shape[0] 111 | # wedge count 112 | statistics['wedge_count'] = statistics_wedge_count(A)# 0.4s 113 | 114 | # claw count 115 | statistics['claw_count'] = statistics_claw_count(A)# 0.5s 116 | 117 | # triangle count 118 | statistics['triangle_count'] = statistics_triangle_count(A)# 4.7s 119 | 120 | # Square count 121 | statistics['square_count'] = statistics_square_count(A)# 41.5s 122 | 123 | # power law exponent 124 | statistics['power_law_exp'] = statistics_power_law_alpha(A)# 1.1s 125 | 126 | # gini coefficient 127 | statistics['gini'] = statistics_gini(A)# 0.5s 128 | 129 | # Relative edge distribution entropy 130 | statistics['rel_edge_distr_entropy'] = statistics_edge_distribution_entropy(A)# 3.5s 131 | 132 | # Assortativity 133 | statistics['assortativity'] = nx.degree_assortativity_coefficient(A_graph)# unknown 134 | 135 | # Clustering coefficient 136 | statistics['clustering_coefficient'] = statistics_cluster_coefficient(A)# 8.4s 137 | 138 | # Number of connected components 139 | #statistics['n_components'] = connected_components(A)[0] 140 | 141 | # if Z_obs is not None: 142 | # # inter- and intra-community density 143 | # intra, inter = statistics_cluster_props(A, Z_obs) 144 | # statistics['intra_community_density'] = intra 145 | # statistics['inter_community_density'] = inter 146 | 147 | statistics['cpl'] = statistics_compute_cpl(A)# 252.4s 148 | 149 | return statistics 150 | 151 | 152 | def compute_graph_statistics_short(A_in, Z_obs=None): 153 | A = A_in.copy() 154 | assert((A == A.T).all()) 155 | statistics = {} 156 | # power law exponent 157 | statistics['power_law_exp'] = statistics_power_law_alpha(A)# 1.1s 158 | # gini coefficient 159 | statistics['gini'] = statistics_gini(A)# 0.5s 160 | statistics['cpl'] = statistics_compute_cpl(A)# 252.4s 161 | return statistics 162 | 163 | 164 | def diff_graphs(graphs_ref, graphs_pred): 165 | diff_d = {} 166 | for g1 in graphs_ref: 167 | d1 = compute_graph_statistics(nx.to_numpy_array(g1)) 168 | for g2 in graphs_pred: 169 | d2 = compute_graph_statistics(nx.to_numpy_array(g2)) 170 | for k in list(d1.keys()): 171 | tmp = diff_d.get(k, 0.) 172 | diff_d[k] = tmp + round(abs(d1[k] - d2[k]), 5) 173 | sample_num = len(graphs_ref)*len(graphs_pred) 174 | for k in list(d1.keys()): 175 | tmp = diff_d.get(k, 0.) 176 | diff_d[k] = tmp/sample_num 177 | return diff_d 178 | 179 | 180 | def diff_graphs_short(graphs_ref, graphs_pred): 181 | diff_d = {} 182 | for g1 in graphs_ref: 183 | d1 = compute_graph_statistics_short(nx.to_numpy_array(g1)) 184 | for g2 in graphs_pred: 185 | d2 = compute_graph_statistics_short(nx.to_numpy_array(g2)) 186 | for k in list(d1.keys()): 187 | tmp = diff_d.get(k, 0.) 188 | diff_d[k] = tmp + round(abs(d1[k] - d2[k]), 5) 189 | sample_num = len(graphs_ref)*len(graphs_pred) 190 | for k in list(d1.keys()): 191 | tmp = diff_d.get(k, 0.) 192 | diff_d[k] = tmp/sample_num 193 | return diff_d 194 | 195 | 196 | def preprocess_graph(g): 197 | g.remove_edges_from(nx.selfloop_edges(g)) 198 | g =g.subgraph(max(nx.connected_components(g), key=len)) 199 | g = nx.convert_node_labels_to_integers(g) 200 | return g 201 | -------------------------------------------------------------------------------- /GraphGenerator/models/rtg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | import itertools as it 4 | from scipy.linalg import toeplitz 5 | import pyemd 6 | import concurrent.futures 7 | import multiprocessing as mp 8 | 9 | 10 | def rtg_graph(num_edges, num_chars, beta, q, num_timestick=1, 11 | bipartite=False, self_loop=False, parallel=True): 12 | if num_chars > 26: 13 | raise ValueError('Number of characters cannot be greater than 26') 14 | 15 | all_chars = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 16 | 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'] 17 | all_chars2 = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 18 | 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'] 19 | 20 | chars = all_chars[:num_chars] + ['#'] 21 | if bipartite: 22 | chars2 = all_chars2[:num_chars] + ['$'] 23 | else: 24 | chars2 = all_chars[:num_chars] + ['#'] 25 | 26 | keyboard = create_2d_keyboard(num_chars, q, beta) 27 | edges = [] 28 | graph = nx.Graph() 29 | if parallel: 30 | with concurrent.futures.ThreadPoolExecutor() as executor: 31 | for edge in executor.map(edge_kernel, [ 32 | (chars, chars2, keyboard, 33 | bipartite, self_loop) for _ in range(num_edges) 34 | ]): 35 | edges.append(edge) 36 | else: 37 | for _ in range(num_edges): 38 | edges.append(create_edge(chars, chars2, keyboard, 39 | bipartite, self_loop)) 40 | graph.add_edges_from(edges) 41 | return graph 42 | 43 | 44 | def create_2d_keyboard(num_chars, q, beta): 45 | # assign unequal probabilities to the keys 46 | p = np.zeros(num_chars + 1) 47 | p_remaining = 1 - q 48 | for i in range(num_chars - 1): 49 | p[i] = np.random.rand() * p_remaining 50 | p_remaining -= p[i] 51 | p[num_chars - 1] = p_remaining 52 | # last key is the seperator 53 | p[num_chars] = q 54 | 55 | # init the keyboard with indipendant cross product probs 56 | keyboard = np.outer(p, p) 57 | # multiply the imbalance factor 58 | keyboard = keyboard * beta 59 | # set diagonal to 0 60 | np.fill_diagonal(keyboard, 0) 61 | # calculate remaining probabilities for the diagonal 62 | # such that each row and column sums up to the 63 | # marginal probability 64 | remaining_diag = p - keyboard.sum(axis=0) 65 | dia_idx = np.diag_indices_from(keyboard) 66 | keyboard[dia_idx] = remaining_diag 67 | 68 | return keyboard 69 | 70 | 71 | def create_edge(chars, chars2, keyboard, bipartite, self_loop): 72 | src_finished = False 73 | dst_finished = False 74 | src = '' 75 | dst = '' 76 | char_combi = np.fromiter(it.product(chars, chars2), 77 | dtype='1str,1str') 78 | 79 | if not self_loop and not bipartite: 80 | # for the first try the key that produces a selfloop 81 | # on the delimeter is permitted (to reduce the number 82 | # of selfloops) 83 | first_try_keyboard = np.copy(keyboard) 84 | first_try_keyboard[-1, -1] = 0 85 | first_try_keyboard = first_try_keyboard / first_try_keyboard.sum() 86 | src, dst = np.random.choice(char_combi, p=first_try_keyboard.flatten()) 87 | if src == '#': 88 | src_finished = True 89 | if dst == '#' or dst == '$': 90 | dst_finished = True 91 | 92 | while not (src_finished and dst_finished): 93 | s, d = np.random.choice(char_combi, p=keyboard.flatten()) 94 | if not src_finished: 95 | src += s 96 | if not dst_finished: 97 | dst += d 98 | if s == '#': 99 | src_finished = True 100 | if d == '#' or d == '$': 101 | dst_finished = True 102 | 103 | # if we produced a self loop but they are not allowed 104 | # we generate a new edge by running the whole function 105 | # again 106 | if ((not self_loop) and (src == dst)): 107 | return create_edge(chars, chars2, keyboard, bipartite, self_loop) 108 | else: 109 | return (src, dst) 110 | 111 | 112 | def edge_kernel(t): 113 | return create_edge(*t) 114 | 115 | 116 | def wasserstein_distance(x, y, distance_scaling=1.0): 117 | support_size = max(len(x), len(y)) 118 | d_mat = toeplitz(range(support_size)).astype(np.float) 119 | distance_mat = d_mat / distance_scaling 120 | 121 | # convert histogram values x and y to float, and make them equal len 122 | x = x.astype(np.float) 123 | y = y.astype(np.float) 124 | if len(x) < len(y): 125 | x = np.hstack((x, [0.0] * (support_size - len(x)))) 126 | elif len(y) < len(x): 127 | y = np.hstack((y, [0.0] * (support_size - len(y)))) 128 | 129 | emd = pyemd.emd(x, y, distance_mat) 130 | return emd 131 | 132 | 133 | def degree_loss(x, n=3, real_g=None, generator='RTG', k=2): 134 | pred_g = nx.empty_graph() 135 | if generator in ['RTG', 'rtg']: 136 | pred_g = rtg_graph(n, 26, beta=x, q=k) 137 | real_hist = np.array(nx.degree_histogram(real_g)) 138 | real_hist = real_hist / np.sum(real_hist) 139 | pred_hist = np.array(nx.degree_histogram(pred_g)) 140 | pred_hist = pred_hist / np.sum(pred_hist) 141 | loss = wasserstein_distance(real_hist, pred_hist) 142 | return loss 143 | 144 | 145 | def grid_search(x_min, x_max, x_step, n, real_g, generator, k=2, repeat=2): 146 | loss_all = [] 147 | x_list = np.arange(x_min, x_max, x_step) 148 | for x_test in x_list: 149 | tmp_loss = 0 150 | for i in range(repeat): 151 | tmp_loss += degree_loss(x_test, n=n, real_g=real_g, generator=generator, k=k) 152 | loss_all.append(tmp_loss) 153 | x_best = x_list[np.argmin(np.array(loss_all))] 154 | return x_best, min(loss_all) 155 | 156 | 157 | def generator_optimization(graph, generator='RTG'): 158 | graph_node = graph.number_of_nodes() 159 | print('graph with {} nodes'.format(graph_node)) 160 | parameter_temp = 1 161 | if generator == 'RTG': 162 | pool = mp.Pool(processes=8) 163 | edge_num = graph.number_of_edges() 164 | args_all = [(.09, 1, .1, edge_num, graph, generator, q ** 2 / 100) for q in range(1, 10)] 165 | results = [pool.apply_async(grid_search, args=args) for args in args_all] 166 | output = [p.get() for p in results] 167 | parameter_all = [o[0] for o in output] 168 | loss_all = [o[1] for o in output] 169 | idx = np.argmin(np.array(loss_all)) 170 | parameter_temp = parameter_all[int(idx)] 171 | parameter_temp = (edge_num, 26, parameter_temp, (list(range(1, 10))[int(idx)]) ** 2 / 100) 172 | return parameter_temp 173 | 174 | 175 | def generate_new_graph(parameters, generator, repeat=1): 176 | graph_list = [] 177 | for i in range(repeat): 178 | if generator in ['rtg', 'RTG']: 179 | graph_list.append(rtg_graph(*parameters)) 180 | return graph_list 181 | 182 | 183 | def rtg(in_graph, config): 184 | """ 185 | RTG graph generator 186 | :param in_graph: referenced graph, type: nx.Graph 187 | :param config: configure object 188 | :return: generated graphs, type: list of nx.Graph 189 | """ 190 | parameters = generator_optimization(in_graph, config.model.name) 191 | return generate_new_graph(parameters, config.model.name, repeat=config.num_gen) 192 | 193 | 194 | if __name__ == '__main__': 195 | tmp = rtg_graph(5429, 20, 0.09, 0.01, 1) 196 | -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/degdist_param_search.m: -------------------------------------------------------------------------------- 1 | function [p1,p2] = degdist_param_search(avgdeg,maxdeg,varargin) 2 | %DEGDIST_PARAM_SEARCH Find parameters for "ideal" degree distribution. 3 | % 4 | % [A,B] = DEGDIST_PARAM_SEARCH(AVG,BND) will attempt to find ideal 5 | % parameters for generating a discrete generalized log-normal 6 | % distribution with the expected average degree (AVG) and maximum degree 7 | % bound (BND) with probability less than 1e-10. 8 | % 9 | % G = DEGDIST_PARAM_SEARCH(AVG,BND,'type','dpl') is the same as above 10 | % except that it will attempt to find the ideal parameter for generating 11 | % a discrete power law distribution. 12 | % 13 | % Optional Parameters: 14 | % o 'type' - Type of degree distribution. Choices are discrete 15 | % generalized log normal ('dgln') or discrete power law ('dpl'). 16 | % o 'maxdeg_prbnd' - Ideally, the probability of a node with degree BND 17 | % (the maximum possible) is less than this bound. Default: 1e-10. 18 | % o 'fminsearch_opts' - The options passed to the function fminsearch. 19 | % Default: optimset('TolFun', 1e-4, 'TolX', 1e-4). 20 | % o 'verbose' - True to print out details of the progress of the search. 21 | % Default: true. 22 | % 23 | % See also GENDEGDIST, DGLNPDF, DPLPDF. 24 | % 25 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 26 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 27 | % January 2014 28 | 29 | %% License 30 | % Copyright (c) 2014, Sandia National Laboratories 31 | % All rights reserved. 32 | % 33 | % Redistribution and use in source and binary forms, with or without 34 | % modification, are permitted provided that the following conditions are 35 | % met: 36 | % 37 | % # Redistributions of source code must retain the above copyright notice, 38 | % this list of conditions and the following disclaimer. 39 | % # Redistributions in binary form must reproduce the above copyright 40 | % notice, this list of conditions and the following disclaimer in the 41 | % documentation and/or other materials provided with the distribution. 42 | % 43 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 44 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 45 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 46 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 47 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 48 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 49 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 50 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 51 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 52 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 | % 55 | % 56 | % Sandia National Laboratories is a multi-program laboratory managed and 57 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 58 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 59 | % Security Administration under contract DE-AC04-94AL85000. 60 | 61 | params = inputParser; 62 | params.addParamValue('maxdeg_prbnd', 1e-10); 63 | params.addParamValue('fminsearch_opts', optimset('TolFun', 1e-4, 'TolX', 1e-4)); 64 | params.addParamValue('type','dgln'); 65 | params.addParamValue('verbose',true); 66 | params.parse(varargin{:}); 67 | 68 | options = params.Results.fminsearch_opts; 69 | bnd = params.Results.maxdeg_prbnd; 70 | type = params.Results.type; 71 | verbose = params.Results.verbose; 72 | 73 | if strcmp(type,'dgln') 74 | fhandle = @(x) dglnobjfunc(x(1), x(2), maxdeg, bnd, avgdeg, verbose); 75 | [xstar,fstar,exitflag] = fminsearch(fhandle, [2 2], options); 76 | p1 = xstar(1); 77 | p2 = xstar(2); 78 | elseif strcmp(type,'dpl') 79 | fhandle = @(x) dplobjfunc(x, maxdeg, bnd, avgdeg, verbose); 80 | [xstar,fstar,exitflag] = fminsearch(fhandle, 2, options); 81 | p1 = xstar; 82 | p2 = 0; 83 | else 84 | error('Invalid type'); 85 | end 86 | 87 | if ((exitflag ~= 1) || (fstar > 0.01)) 88 | warning('Could not find ideal solution. F(X)=%e, Exit Flag = %d.\n', fstar, exitflag); 89 | end 90 | 91 | function y = dglnobjfunc(alpha,beta,maxdeg,bnd,avgdeg,verbose) 92 | %DGLNOBJFUNC Function to evaluate degree distribution 93 | % 94 | % Y = DGLNOBJFUNC(ALPHA,BETA,MEXDEG,BND,AVGDEG,BND) computes a score for 95 | % the DGLN degree distribution with MAXDEG and parameters ALPHA and 96 | % BETA. The goal is that the final degree distribution should have an 97 | % average degree of AVGDEG and the probability of obtaining the maximum 98 | % degree should be less than BND. A perfect match would have a score of 99 | % zero. 100 | % 101 | % Y = DGLNOBJFUNC(...,VERBOSE) also indicates whether or not the function 102 | % should print anything. By default, VERBOSE = true. 103 | % 104 | %T. Kolda, November 2012. 105 | 106 | % ** Input checking 107 | if ~exist('verbose','var') 108 | verbose = true; 109 | end 110 | 111 | % ** Find maximum expected degree 112 | % We want to find x such that P(random vertex has degree > x) < bnd. 113 | p = dglnpdf(maxdeg,alpha,beta); 114 | 115 | % Penalty should grow quickly! 116 | if p(end) > bnd 117 | y1 = (exp(1+p(end)-bnd))^2 - 1; 118 | else 119 | y1 = 0; 120 | end 121 | 122 | % ** Find expected average degree 123 | a = ((1:maxdeg)*p); % Compute average degree 124 | y2 = (a-avgdeg)^2; 125 | 126 | % ** Sum the two values 127 | y = y1+y2; 128 | 129 | % ** Optional printing 130 | 131 | if verbose 132 | fprintf('alpha=%.3f, beta=%.3f, maxdeg=%d, p(maxdeg)=%e, avgdeg=%.1f, y=%.2f\n', ... 133 | alpha, beta, maxdeg, p(end), a, y); 134 | end 135 | 136 | function y = dplobjfunc(gamma,maxdeg,bnd,avgdeg,verbose) 137 | %DPLOBJFUNC Function to evaluate degree distribution 138 | % 139 | % Y = DPLOBJFUNC(GAMMA,MEXDEG,BND,AVGDEG,BND) computes a score for 140 | % the powerlaw degree distribution with MAXDEG and parameter GAMMA. The 141 | % goal is that the final degree distribution should have an average 142 | % degree of AVGDEG and the probability of obtaining the maximum degree 143 | % should be less than BND. A perfect match would have a score of zero. 144 | % 145 | % Y = DPLOBJFUNC(...,VERBOSE) also indicates whether or not the function 146 | % should print anything. By default, VERBOSE = true. 147 | % 148 | %T. G. Kolda and others, Sandia National Laboratories, November 2012. 149 | 150 | % Sandia National Laboratories is a multi-program laboratory managed and 151 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 152 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear 153 | % Security Administration under contract DE-AC04-94AL85000. 154 | 155 | % ** Input checking 156 | if ~exist('verbose','var') 157 | verbose = true; 158 | end 159 | 160 | % ** Find maximum expected degree 161 | % We want to find x such that P(random vertex has degree > x) < bnd. 162 | p = dplpdf(maxdeg,gamma); 163 | 164 | % Penalty should grow quickly! 165 | if p(end) > bnd 166 | y1 = (exp(1+p(end)-bnd))^2 - 1; 167 | else 168 | y1 = 0; 169 | end 170 | 171 | % ** Find expected average degree 172 | a = ((1:maxdeg)*p); % Compute average degree 173 | y2 = (a-avgdeg)^2; 174 | 175 | % ** Sum the two values 176 | y = y1+y2; 177 | 178 | % ** Optional printing 179 | 180 | if verbose 181 | fprintf('gamma=%.3f, maxdeg=%d, p(maxdeg)=%e, avgdeg=%.1f, y=%.2f\n', ... 182 | gamma, maxdeg, p(end), a, y); 183 | end -------------------------------------------------------------------------------- /GraphGenerator/models/bter_ops/tricnt_mex.c: -------------------------------------------------------------------------------- 1 | /* TRICNT_MEX.C: Computes the number of triangles adjacent to each vertex. 2 | 3 | The code uses full enumeration. Each edge is assigned to its lower degree vertex, 4 | and each vertex checks wedges formed by edges assigned to itself. 5 | 6 | For computational results for this algorithm, see 7 | C. Seshadhri, A. Pinar, and T.G. Kolda, 8 | Triadic Measures on Graphs: The Power of Wedge Sampling, 9 | Proc. SIAM Data Mining, May 2013. 10 | 11 | Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National 12 | Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/, 13 | January 2014 14 | 15 | ** License ** 16 | Copyright (c) 2014, Sandia National Laboratories 17 | All rights reserved. 18 | 19 | Redistribution and use in source and binary forms, with or without 20 | modification, are permitted provided that the following conditions are 21 | met: 22 | 23 | 1. Redistributions of source code must retain the above copyright notice, 24 | this list of conditions and the following disclaimer. 25 | 26 | 2. Redistributions in binary form must reproduce the above copyright 27 | notice, this list of conditions and the following disclaimer in the 28 | documentation and/or other materials provided with the distribution. 29 | 30 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 31 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 32 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 33 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 34 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 35 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 36 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 37 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 38 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 39 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 40 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41 | 42 | Sandia National Laboratories is a multi-program laboratory managed and 43 | operated by Sandia Corporation, a wholly owned subsidiary of Lockheed 44 | Martin Corporation, for the U.S. Department of Energy's National Nuclear 45 | Security Administration under contract DE-AC04-94AL85000. 46 | */ 47 | 48 | #include "mex.h" 49 | #include 50 | #include 51 | #include 52 | #include 53 | #include 54 | 55 | struct graph /* Stored in MATLAB Compressed Sparse Column format */ 56 | { 57 | int V; /* Number of vertices */ 58 | int E; /* Number of edges */ 59 | mwIndex *ptr; /* ptr[j] = start of column j within ind array */ 60 | mwIndex *ind; /* ind[ptr[j]] = row index for first nonzero in column j */ 61 | }; 62 | 63 | /* 64 | Checks if u is adjacent to v in G 65 | Returns 1 if they are adjacent; and 0 otherwise. 66 | */ 67 | int check_pair(struct graph *G, mwIndex u, mwIndex v) 68 | { 69 | int i; 70 | for (i = G->ptr[u]; i < G->ptr[u+1]; i ++) 71 | { 72 | if (G->ind[i] == v) 73 | { 74 | return(1); 75 | } 76 | } 77 | return(0); 78 | } 79 | 80 | /* 81 | Marks on triangles formed by vertices on the list "list" and vertex r 82 | Inputs: G: graph 83 | list: list of vertices that are adjacent to r 84 | n: length of the "list" 85 | r: vertex r that is the center of wedgesbeing checked 86 | td: an array that stores the number of triangles adjacent to each vertex; 87 | the array entries are incremented with the new triagles identified 88 | 89 | Output: cnt: number of triangles found 90 | 91 | */ 92 | int mark_triangles(struct graph *G, mwIndex *list, int n, int r, double *td) 93 | { 94 | int i, j, cnt, x, y; 95 | mwIndex *ptr; 96 | 97 | ptr = G->ptr; 98 | cnt = 0; 99 | for(i = 0; i < n; i ++) 100 | { 101 | x = ptr[list[i]+1] - ptr[list[i]]; 102 | for(j = i+1; j < n; j ++) /* Check every pair of vertices on the list "list" to see if they form a triangle */ 103 | { 104 | if (x < (ptr[list[j]+1] - ptr[list[j]])) /* enables searching via the shorter list */ 105 | { 106 | y = check_pair(G, list[i], list[j]); 107 | } 108 | else 109 | { 110 | y = check_pair(G, list[j], list[i]); 111 | } 112 | if (y) /* increment the counters if a triangle is identified */ 113 | { 114 | cnt ++; 115 | td[r] ++; 116 | td[list[j]] ++; 117 | td[list[i]] ++; 118 | } 119 | } 120 | } 121 | return(cnt); 122 | } 123 | 124 | /* The main function that counts all the triangles 125 | Arguments 126 | - G: input graph [unmodified] 127 | - td: array to be filled in with the number of triangles per vertex 128 | Return value 129 | - Total number of triangles (int) 130 | 131 | */ 132 | int tri_enumerate(struct graph *G, double *td) 133 | { 134 | mwIndex i, j, N, tcnt, t, *d; 135 | mwIndex *ptr, *ptr2, *ind; 136 | 137 | N = G->V; 138 | ptr = G->ptr; 139 | ind = G->ind; 140 | 141 | /* d[i] is the degree of the ith vertex */ 142 | d = (mwIndex*) malloc( sizeof(mwIndex) * N ); 143 | 144 | /* ptr2 will be used to shorten adjacency lists, where each edge is assigned the vertex with a smaller degree */ 145 | ptr2 = (mwIndex*) malloc( sizeof(mwIndex) * (N+1) ); 146 | 147 | /* Initialize td to zero */ 148 | memset(td, 0, sizeof(double) * N); 149 | 150 | /* make degree list */ 151 | for(i = 0; i < N; i ++) 152 | { 153 | d[i] = ptr[i+1] - ptr[i]; 154 | ptr2[i] = ptr[i+1]; 155 | } 156 | 157 | /* Each vertex is assigned to its vertex with a smaller degree 158 | edges assigned to a vetex are moved towards the start of each list such that 159 | neighbors for which the edges assigned to vertex i are listed in ind[ptr[i]] to ind[ptr2[i]-1] 160 | Note that ind[ptr[i]] to ind[ptr[i]-1] stil stores all neighbors of the ith vertex 161 | */ 162 | for (i = 0; i < N; i ++) 163 | { 164 | for (j = ptr[i]; j < ptr2[i]; j ++) 165 | { 166 | if ((d[i] > d[ind[j]]) || ((d[i] == d[ind[j]]) && (i > ind[j]))) 167 | { 168 | ptr2[i] --; 169 | 170 | /* swap */ 171 | t = ind[ptr2[i]]; 172 | ind[ptr2[i]] = ind[j]; 173 | ind[j] = t; 174 | 175 | j--; 176 | } 177 | } 178 | } 179 | 180 | 181 | /* Check for triangles centered on each vertex with the edges assigned to it */ 182 | tcnt = 0; 183 | for (i = 0; i < N; i ++) 184 | { 185 | tcnt += mark_triangles(G, ind + ptr[i], ptr2[i] - ptr[i], i, td); 186 | } 187 | 188 | free(d); 189 | free(ptr2); 190 | return(tcnt); 191 | } 192 | 193 | /* ---------------------------------------------------------------------------------- 194 | This function provides the interface to Matlab 195 | To call this function, you need to execute in Matlab the following 196 | >> mex tricnt_mex.c -largeArrayDims 197 | 198 | The matlab function sould be called as 199 | >> t = tricnt_mex(G) 200 | 201 | G is assumed to be a sparse adjacency matrix for a simple graph. 202 | It returns a vector t, such that t[i] is the number of triangles 203 | adjacent to the ith vertex. 204 | ------------------------------------------------------------------------------------ */ 205 | void mexFunction (int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) 206 | { 207 | double *dtd; 208 | struct graph G; 209 | 210 | /* Check inputs */ 211 | if ((nrhs != 1) || (!mxIsSparse (prhs[0])) ) 212 | { 213 | mexErrMsgTxt ("expects sparse matrix"); 214 | } 215 | 216 | /* Read sparse matrix input */ 217 | G.V = mxGetN (prhs [0]); 218 | G.E = mxGetNzmax (prhs [0]); 219 | G.ind = mxGetIr (prhs[0]); 220 | G.ptr = mxGetJc (prhs[0]); 221 | 222 | /* Create array for the return argument. */ 223 | plhs[0] = mxCreateDoubleMatrix(G.V, 1, mxREAL); 224 | dtd = mxGetPr(plhs[0]); 225 | 226 | /* Compute the number of triangles for each vertex */ 227 | tri_enumerate(&G,dtd); 228 | } 229 | 230 | 231 | 232 | 233 | 234 | -------------------------------------------------------------------------------- /GraphGenerator/train/train_base.py: -------------------------------------------------------------------------------- 1 | import scipy.sparse as sp 2 | from GraphGenerator.utils.arg_utils import set_device 3 | import networkx as nx 4 | import torch.optim as optim 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | from GraphGenerator.metrics.memory import get_peak_gpu_memory, flush_cached_gpu_memory 8 | import numpy as np 9 | import sys, torch, copy, datetime 10 | from GraphGenerator.evaluate.efficiency import coo_to_csp, sp_normalize 11 | 12 | 13 | def train_autoencoder_base(sp_adj, feature, config, model, optimizer): 14 | norm = sp_adj.shape[0] * sp_adj.shape[0] / float((sp_adj.shape[0] * sp_adj.shape[0] - sp_adj.sum()) * 2) 15 | pos_weight = torch.tensor(float(sp_adj.shape[0] * sp_adj.shape[0] - sp_adj.sum()) / sp_adj.sum()).to(config.device) 16 | adj_def = torch.from_numpy(sp_adj.toarray()).to(config.device) 17 | adj_normalized = sp_normalize(sp_adj, config.device) 18 | adj_normalized = Variable(adj_normalized).to(config.device) 19 | training_time = datetime.timedelta() 20 | for epoch in range(config.train.max_epochs): 21 | epoch_start = datetime.datetime.now() 22 | adj_score = model(adj_normalized, feature, device=config.device) 23 | train_loss = norm * F.binary_cross_entropy_with_logits(adj_score, adj_def, 24 | pos_weight=pos_weight) 25 | if config.model.variational: 26 | kl_div = 0.5/adj_score.size(0)*(1+2*model.logv-model.mean**2-torch.exp(model.logv)**2).sum(1).mean() 27 | train_loss -= kl_div 28 | if config.model.name == 'SBMGNN': 29 | train_loss += model.calculate_kl_div(['kl_kumar_beta', 'kl_discrete']) 30 | optimizer.zero_grad() 31 | train_loss.backward() 32 | optimizer.step() 33 | epoch_time = datetime.datetime.now() - epoch_start 34 | training_time += epoch_time 35 | print('[%03d/%d]: loss:%.4f, time per epoch:%.8s' 36 | % (epoch + 1, 37 | config.train.max_epochs, 38 | train_loss, 39 | str(epoch_time)[-12:])) 40 | print('### Training Time Consumption:%.8s' 41 | % str(training_time)[-12:]) 42 | return model 43 | 44 | 45 | def top_n_indexes(arr, n): 46 | idx = np.argpartition(arr, arr.size - n, axis=None)[-n:] 47 | width = arr.shape[1] 48 | return [divmod(i, width) for i in idx] 49 | 50 | 51 | def topk_adj(adj, k): 52 | if isinstance(adj, torch.Tensor): 53 | adj_ = adj.data.cpu().numpy() 54 | else: 55 | adj_ = adj 56 | assert ((adj_ == adj_.T).all()) 57 | adj_ = (adj_ - np.min(adj_)) / np.ptp(adj_) 58 | adj_ -= np.diag(np.diag(adj_)) 59 | res = np.zeros(adj.shape) 60 | tri_adj = np.triu(adj_) 61 | inds = top_n_indexes(tri_adj, int(k//2)) 62 | for ind in inds: 63 | i = ind[0] 64 | j = ind[1] 65 | res[i, j] = 1.0 66 | res[j, i] = 1.0 67 | return res 68 | 69 | 70 | def infer_autoencoder(sp_adj, feature, config, model, repeat=1): 71 | generated_graphs = [] 72 | with torch.no_grad(): 73 | adj_normalized = sp_normalize(sp_adj, config.device) 74 | adj_normalized = Variable(adj_normalized).to(config.device) 75 | for i in range(repeat): 76 | adj_score = model(adj_normalized, feature, device=config.device) 77 | adj = topk_adj(adj_score, k=sp_adj.sum()) 78 | tmp_graph = nx.from_numpy_array(adj) 79 | generated_graphs.append(tmp_graph) 80 | return generated_graphs 81 | 82 | 83 | def train_and_inference(input_data, generator, config=None, repeat=1): 84 | """ 85 | train model using input graph, and infer new graphs 86 | :param input_data: input graph(s), whose type is networkx.Graph or list of nx.Graph 87 | :param generator: name of graph generator 88 | :param config: configuration of graph generator 89 | :param repeat: number of new graphs 90 | :return: generated graphs 91 | """ 92 | # graphs = [] 93 | if generator in ['e-r', 'w-s', 'b-a', 'E-R', 'W-S', 'B-A']: 94 | import GraphGenerator.models.er as er 95 | import GraphGenerator.models.ws as ws 96 | import GraphGenerator.models.ba as ba 97 | tmp_name = generator.lower() 98 | model_name = "{}.{}".format(tmp_name.replace('-', ''), tmp_name.replace('-', '_')) 99 | graphs = eval(model_name)(input_data, config) 100 | elif generator in ['rtg', 'RTG', 'bter', 'BTER']: 101 | import GraphGenerator.models.rtg as rtg 102 | import GraphGenerator.models.bter as bter 103 | model_name = "{}.{}".format(generator, generator) 104 | graphs = eval(model_name)(input_data, config) 105 | elif generator in ['sbm', 'dcsbm']: 106 | import GraphGenerator.models.sbm as sbm 107 | graphs = sbm.generate(input_data, generator, repeat) 108 | elif generator in ['rmat', 'kronecker']: 109 | import GraphGenerator.models.kronecker as kronecker 110 | import GraphGenerator.models.rmat as rmat 111 | graphs = eval(generator).generate(input_data, config) 112 | elif generator in ['vgae', 'graphite', 'sbmgnn']: 113 | set_device(config) 114 | sp_adj = nx.adjacency_matrix(input_data).astype(np.float32) 115 | # print("Shape!", sp_adj.shape) 116 | feature = coo_to_csp(sp.diags(np.array([1. for i in range(sp_adj.shape[0])], 117 | dtype=np.float32)).tocoo()).to(config.device) 118 | if generator == 'vgae': 119 | import GraphGenerator.models.vgae as vgae 120 | if config.model.variational: 121 | model_name = "{}.{}".format(generator, "VGAE") 122 | else: 123 | model_name = "{}.{}".format(generator, "GAE") 124 | model = eval(model_name)(config.model.num_nodes, 125 | config.model.embedding_dim, 126 | config.model.hidden_dim, 127 | act=F.relu, 128 | layers=config.model.num_GNN_layers).to(config.device) 129 | elif generator == 'graphite': 130 | import GraphGenerator.models.graphite as graphite 131 | if config.model.variational: 132 | model_name = "{}.{}".format(generator, "GraphiteVAE") 133 | else: 134 | model_name = "{}.{}".format(generator, "GraphiteAE") 135 | model = eval(model_name)(config.model.num_nodes, 136 | config.model.hidden_dim, 137 | config.model.embedding_dim, 138 | config.model.decoding_dim, 139 | act=F.relu).to(config.device) 140 | elif generator == 'sbmgnn': 141 | import GraphGenerator.models.sbmgnn as sbmgnn 142 | model_name = "{}.{}".format(generator, 'SBMGNN') 143 | model = eval(model_name)(config.model.num_nodes, 144 | config.model.hidden, 145 | config=config).to(config.device) 146 | else: 147 | # model = None 148 | sys.exit(1) 149 | optimizer = optim.Adam(model.parameters(), lr=config.train.lr) 150 | model = train_autoencoder_base(sp_adj, feature, config, model, optimizer) 151 | tmp_memory = get_peak_gpu_memory(device=config.device) 152 | print("Peak GPU memory reserved in training process: {} MiB".format(tmp_memory//1024//1024)) 153 | flush_cached_gpu_memory() 154 | graphs = infer_autoencoder(sp_adj, feature, config, model, repeat=repeat) 155 | elif generator in ['graphrnn', 'gran', 'bigg']: 156 | import GraphGenerator.train.train_graphrnn as graphrnn 157 | import GraphGenerator.models.bigg as bigg 158 | import GraphGenerator.models.gran as gran 159 | if isinstance(input_data, nx.Graph): 160 | input_data = [input_data] 161 | trained_model = eval("{}.train_{}".format(generator, generator))(input_data, config) 162 | tmp_memory = get_peak_gpu_memory(device=config.device) 163 | print("Peak GPU memory reserved in training process: {} MiB".format(tmp_memory//1024//1024)) 164 | flush_cached_gpu_memory() 165 | graphs = eval("{}.infer_{}".format(generator, generator))(input_data, config, trained_model) 166 | else: 167 | print("Wrong generator name! Process exit..") 168 | sys.exit(1) 169 | return graphs 170 | 171 | -------------------------------------------------------------------------------- /GraphGenerator/models/bigg.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import sys 4 | import pickle as cp 5 | import networkx as nx 6 | import numpy as np 7 | import random 8 | from tqdm import tqdm 9 | import torch, torch.cuda 10 | import torch.optim as optim 11 | from collections import OrderedDict 12 | from GraphGenerator.utils.arg_utils import get_config, set_device 13 | from GraphGenerator.models.bigg_ops.tree_clib.tree_lib import setup_treelib, TreeLib 14 | from GraphGenerator.models.bigg_ops.tree_model import RecurTreeGen 15 | 16 | 17 | def get_node_dist(graphs): 18 | num_node_dist = np.bincount([len(gg.nodes) for gg in graphs]) 19 | num_node_dist = num_node_dist / np.sum(num_node_dist) 20 | return num_node_dist 21 | 22 | 23 | def sqrtn_forward_backward(model, 24 | graph_ids, 25 | list_node_starts, 26 | num_nodes, 27 | blksize, 28 | loss_scale, 29 | init_states=[None, None], 30 | top_grad=None, 31 | **kwargs): 32 | assert len(graph_ids) == 1 33 | if blksize < 0 or blksize > num_nodes: 34 | blksize = num_nodes 35 | 36 | prev_states = init_states 37 | cache_stages = list(range(0, num_nodes, blksize)) 38 | 39 | list_caches = [] 40 | for st_delta in cache_stages[:-1]: 41 | node_st = list_node_starts[0] + st_delta 42 | with torch.no_grad(): 43 | cur_num = num_nodes - node_st if node_st + blksize > num_nodes else blksize 44 | _, new_states = model.forward_row_summaries(graph_ids, 45 | list_node_starts=[node_st], 46 | num_nodes=cur_num, 47 | prev_rowsum_states=prev_states, 48 | **kwargs) 49 | prev_states = new_states 50 | list_caches.append(new_states) 51 | 52 | tot_ll = 0.0 53 | for i in range(len(cache_stages) - 1, -1, -1): 54 | st_delta = cache_stages[i] 55 | node_st = list_node_starts[0] + st_delta 56 | cur_num = num_nodes - node_st if node_st + blksize > num_nodes else blksize 57 | prev_states = list_caches[i - 1] if i else init_states 58 | if prev_states[0] is not None: 59 | for x in prev_states: 60 | x.requires_grad = True 61 | ll, cur_states = model.forward_train(graph_ids, 62 | list_node_starts=[node_st], 63 | num_nodes=cur_num, 64 | prev_rowsum_states=prev_states, 65 | **kwargs) 66 | tot_ll += ll.item() 67 | loss = -ll * loss_scale 68 | if top_grad is not None: 69 | torch.autograd.backward([loss, *cur_states], [None, *top_grad]) 70 | else: 71 | loss.backward() 72 | if i: 73 | top_grad = [x.grad.detach() for x in prev_states] 74 | 75 | return tot_ll, top_grad 76 | 77 | 78 | def train_bigg(train_graphs, config): 79 | # print("### Type:", type(train_graphs)) 80 | random.seed(config.seed) 81 | torch.manual_seed(config.seed) 82 | np.random.seed(config.seed) 83 | set_device(config) 84 | setup_treelib(config) 85 | for g in train_graphs: 86 | TreeLib.InsertGraph(g) 87 | max_num_nodes = max([len(gg.nodes) for gg in train_graphs]) 88 | config.model.max_num_nodes = max_num_nodes 89 | 90 | model = RecurTreeGen(config).to(config.device) 91 | if config.train.resume and os.path.isfile(config.train.resume_model_dir): 92 | print('loading from', config.train.resume_model_dir) 93 | resume_model_path = os.path.join(config.train.resume_model_dir, 94 | config.train.resume_model_name) 95 | model.load_state_dict(torch.load(resume_model_path)) 96 | 97 | optimizer = optim.Adam(model.parameters(), lr=config.train.lr, weight_decay=1e-4) 98 | indices = list(range(len(train_graphs))) 99 | if config.train.resume_epoch is None: 100 | config.train.resume_epoch = 0 101 | training_time = {'time_all':0., 'epochs':0,} 102 | for epoch in range(config.train.resume_epoch, config.train.max_epochs): 103 | pbar = tqdm(range(config.train.snapshot_epoch)) 104 | 105 | optimizer.zero_grad() 106 | for idx in pbar: 107 | random.shuffle(indices) 108 | batch_indices = indices[:config.train.batch_size] 109 | 110 | num_nodes = sum([len(train_graphs[i]) for i in batch_indices]) 111 | if config.model.blksize < 0 or num_nodes <= config.model.blksize: 112 | start_time = time.time() 113 | ll, _ = model.forward_train(batch_indices) 114 | loss = -ll / num_nodes 115 | loss.backward() 116 | end_time = time.time() 117 | training_time['time_all'] = training_time['time_all'] + end_time - start_time 118 | training_time['epochs'] = training_time['epochs'] + 1 119 | loss = loss.item() 120 | else: 121 | ll = 0.0 122 | for i in batch_indices: 123 | n = len(train_graphs[i]) 124 | cur_ll, _ = sqrtn_forward_backward(model, graph_ids=[i], list_node_starts=[0], 125 | num_nodes=n, blksize=config.model.blksize, loss_scale=1.0 / n) 126 | ll += cur_ll 127 | loss = -ll / num_nodes 128 | if (idx + 1) % config.train.accum_grad == 0: 129 | if config.train.grad_clip > 0: 130 | torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.train.grad_clip) 131 | optimizer.step() 132 | optimizer.zero_grad() 133 | pbar.set_description('epoch %.2f, loss: %.4f' % (epoch + (idx + 1) / config.train.snapshot_epoch, loss)) 134 | if config.train.save_snapshot: 135 | torch.save(model.state_dict(), os.path.join(config.exp_dir, config.exp_name, 'epoch-%d.ckpt' % (epoch + 1))) 136 | train_time = training_time['time_all'] / training_time['epochs'] 137 | print("Time consumption of one epoch of training BiGG is: {:.6f}".format(train_time)) 138 | return model 139 | 140 | 141 | def infer_bigg(test_graphs, config, model=None): 142 | random.seed(config.seed) 143 | torch.manual_seed(config.seed) 144 | np.random.seed(config.seed) 145 | set_device(config) 146 | setup_treelib(config) 147 | max_num_nodes = max([len(gg.nodes) for gg in test_graphs]) 148 | config.model.max_num_nodes = max_num_nodes 149 | if model is None: 150 | model = RecurTreeGen(config).to(config.device) 151 | for g in test_graphs: 152 | TreeLib.InsertGraph(g) 153 | test_model_path = os.path.join(config.test.test_model_dir, 154 | config.test.test_model_name) 155 | if config.test.load_snapshot and os.path.isfile(config.test.test_model_dir): 156 | print('loading from', config.test.test_model_dir) 157 | model.load_state_dict(torch.load(test_model_path)) 158 | 159 | # get num nodes dist 160 | num_node_dist = get_node_dist(test_graphs) 161 | gen_graphs = [] 162 | infering_time = {'time_all': 0., 'epochs': 0, } 163 | with torch.no_grad(): 164 | for _ in tqdm(range(config.test.num_test_gen)): 165 | num_nodes = np.argmax(np.random.multinomial(1, num_node_dist)) 166 | start_time = time.time() 167 | _, pred_edges, _ = model(num_nodes, display=config.test.display) 168 | for e in pred_edges: 169 | assert e[0] > e[1] 170 | pred_g = nx.Graph() 171 | pred_g.add_edges_from(pred_edges) 172 | end_time = time.time() 173 | infering_time['time_all'] = infering_time['time_all'] + end_time - start_time 174 | infering_time['epochs'] = infering_time['epochs'] + 1 175 | gen_graphs.append(pred_g) 176 | # print('saving graphs') 177 | # with open(test_model_path + '.graphs-%s' % str(config.test.greedy_frac), 'wb') as f: 178 | # cp.dump(gen_graphs, f, cp.HIGHEST_PROTOCOL) 179 | # print('evaluating') 180 | infer_time = infering_time['time_all'] / infering_time['epochs'] 181 | print("\nTime consumption of infering one graph by BiGG is: {:.6f}".format(infer_time)) 182 | return gen_graphs 183 | # sys.exit(0) 184 | --------------------------------------------------------------------------------