├── __init__.py
├── GraphGenerator
    ├── __init__.py
    ├── models
    │   ├── __init__.py
    │   ├── bter_ops
    │   │   ├── tricnt_mex.mexa64
    │   │   ├── tricnt_mex.mexw64
    │   │   ├── ccperdegest_mex.mexw64
    │   │   ├── template.m
    │   │   ├── LICENSE.txt
    │   │   ├── dplpdf.m
    │   │   ├── dglnpdf.m
    │   │   ├── ccperdegest.m
    │   │   ├── edges2graph.m
    │   │   ├── random_sample.m
    │   │   ├── bter_edges2graph.m
    │   │   ├── binstart.m
    │   │   ├── binlookup.m
    │   │   ├── tricnt.m
    │   │   ├── gendegdist.m
    │   │   ├── cc_param_search.m
    │   │   ├── ccperdeg.m
    │   │   ├── bindata.m
    │   │   ├── degdist_param_search.m
    │   │   └── tricnt_mex.c
    │   ├── kronecker_ops
    │   │   ├── kronecker_src.zip
    │   │   └── readme.txt
    │   ├── rmat.py
    │   ├── bigg_ops
    │   │   ├── __init__.py
    │   │   ├── tree_clib
    │   │   │   ├── __init__.py
    │   │   │   ├── reame.md
    │   │   │   ├── include
    │   │   │   │   ├── cuda_ops.h
    │   │   │   │   ├── config.h
    │   │   │   │   ├── tree_util.h
    │   │   │   │   ├── tree_clib.h
    │   │   │   │   └── struct_util.h
    │   │   │   ├── src
    │   │   │   │   └── lib
    │   │   │   │   │   ├── cuda_ops.cu
    │   │   │   │   │   ├── config.cpp
    │   │   │   │   │   └── tree_util.cpp
    │   │   │   ├── Makefile
    │   │   │   ├── Makefile_70
    │   │   │   └── Makefile_75
    │   │   └── tensor_ops.py
    │   ├── er.py
    │   ├── bter.py
    │   ├── ba.py
    │   ├── sbm.py
    │   ├── kronecker.py
    │   ├── vgae.py
    │   ├── ws.py
    │   ├── mmsb.py
    │   ├── graphite.py
    │   ├── rtg.py
    │   └── bigg.py
    ├── train
    │   ├── __init__.py
    │   ├── train_netgan.py
    │   └── train_base.py
    ├── utils
    │   ├── __init__.py
    │   ├── logger.py
    │   └── arg_utils.py
    ├── evaluate
    │   ├── __init__.py
    │   ├── efficiency.py
    │   ├── distance.py
    │   └── diff.py
    ├── metrics
    │   ├── __init__.py
    │   ├── speed.py
    │   ├── memory.py
    │   └── mmd.py
    ├── preprocessing
    │   ├── __init__.py
    │   ├── dataio.py
    │   └── utils.py
    ├── test
    │   ├── __init__.py
    │   └── test_bigg.py
    └── __main__.py
├── Tutorial for developer of GraphGenerator.pdf
├── requirements.txt
├── config
    ├── sbm.yaml
    ├── dcsbm.yaml
    ├── bter.yaml
    ├── b-a.yaml
    ├── e-r.yaml
    ├── w-s.yaml
    ├── rmat.yaml
    ├── kronecker.yaml
    ├── template.yaml
    ├── sbmgnn.yaml
    ├── graphite.yaml
    ├── vgae.yaml
    ├── netgan.yaml
    ├── graphrnn.yaml
    ├── bigg.yaml
    └── gran.yaml
├── LICENSE
├── setup.py
├── .gitignore
└── README.md


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GraphGenerator/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GraphGenerator/train/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GraphGenerator/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GraphGenerator/evaluate/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GraphGenerator/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/GraphGenerator/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Tutorial for developer of GraphGenerator.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4DataSynth/GraphGenerator/HEAD/Tutorial for developer of GraphGenerator.pdf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | matplotlib
 2 | networkx
 3 | numpy
 4 | pyemd
 5 | python-louvain
 6 | pyyaml
 7 | scipy
 8 | sklearn
 9 | six
10 | torch
11 | tqdm
12 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/tricnt_mex.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4DataSynth/GraphGenerator/HEAD/GraphGenerator/models/bter_ops/tricnt_mex.mexa64


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/tricnt_mex.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4DataSynth/GraphGenerator/HEAD/GraphGenerator/models/bter_ops/tricnt_mex.mexw64


--------------------------------------------------------------------------------
/config/sbm.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: SBM
 3 | exp_dir: exp/SBM
 4 | seed: 1234
 5 | dataset:
 6 |   name: top10
 7 |   data_path: data/
 8 | model:
 9 |   name: SBM
10 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/ccperdegest_mex.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4DataSynth/GraphGenerator/HEAD/GraphGenerator/models/bter_ops/ccperdegest_mex.mexw64


--------------------------------------------------------------------------------
/GraphGenerator/models/kronecker_ops/kronecker_src.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AI4DataSynth/GraphGenerator/HEAD/GraphGenerator/models/kronecker_ops/kronecker_src.zip


--------------------------------------------------------------------------------
/GraphGenerator/models/rmat.py:
--------------------------------------------------------------------------------
1 | import GraphGenerator.models.kronecker as kronecker
2 | 
3 | 
4 | def generate(*params):
5 |     return kronecker.generate(*params)
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/config/dcsbm.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: DCSBM
 3 | exp_dir: exp/DCSBM
 4 | seed: 1234
 5 | dataset:
 6 |   name: top10
 7 |   data_path: data/
 8 | model:
 9 |   name: DCSBM
10 | 


--------------------------------------------------------------------------------
/GraphGenerator/test/__init__.py:
--------------------------------------------------------------------------------
1 | from .test_bigg import bigg_test as bigg
2 | 
3 | 
4 | def test_generator(args, config):
5 |     eval(args.generator)(args, config)
6 |     return
7 | 


--------------------------------------------------------------------------------
/config/bter.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: BTER
 3 | exp_dir: exp/BTER
 4 | seed: 1234
 5 | dataset:
 6 |   name: top10
 7 |   data_path: data/
 8 | model:
 9 |   name: BTER
10 | num_gen: 10
11 | 


--------------------------------------------------------------------------------
/config/b-a.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: B-A
 3 | exp_dir: exp/B-A
 4 | seed: 1234
 5 | dataset:
 6 |   name: top10
 7 |   data_path: data/
 8 | model:
 9 |   name: B-A
10 |   num_nodes: 1000
11 | num_gen: 10
12 | 


--------------------------------------------------------------------------------
/config/e-r.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: E-R
 3 | exp_dir: exp/E-R
 4 | seed: 1234
 5 | dataset:
 6 |   name: top10
 7 |   data_path: data/
 8 | model:
 9 |   name: E-R
10 |   num_nodes: 1000
11 | num_gen: 10
12 | 


--------------------------------------------------------------------------------
/config/w-s.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: W-S
 3 | exp_dir: exp/W-S
 4 | seed: 1234
 5 | dataset:
 6 |   name: top10
 7 |   data_path: data/
 8 | model:
 9 |   name: W-S
10 |   num_nodes: 1000
11 | num_gen: 10
12 | 


--------------------------------------------------------------------------------
/config/rmat.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: RMAT
 3 | exp_dir: exp/RMAT
 4 | seed: 1234
 5 | dataset:
 6 |   name: top10
 7 |   data_path: data/
 8 | model:
 9 |   name: RMAT
10 |   init_mat: 0.9 0.3 0.3 0.1
11 | num_gen: 10
12 | 


--------------------------------------------------------------------------------
/config/kronecker.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: Kronecker
 3 | exp_dir: exp/Kronecker
 4 | seed: 1234
 5 | device: cpu
 6 | gpu: -1
 7 | dataset:
 8 |   name: g1000000
 9 |   data_path: data/
10 | model:
11 |   name: Kronecker
12 |   init_mat: 0.9 0.6; 0.6 0.1
13 | num_gen: 1
14 | 


--------------------------------------------------------------------------------
/GraphGenerator/preprocessing/dataio.py:
--------------------------------------------------------------------------------
 1 | import pickle, os, sys
 2 | 
 3 | 
 4 | def load_data(path):
 5 |     if os.path.exists(path):
 6 |         graph = pickle.load(open(path, "rb"))
 7 |         return graph
 8 |     else:
 9 |         print("Invalid input data...")
10 |         sys.exit(1)
11 | 
12 | 
13 | def save_data(obj, name):
14 |     pickle.dump(obj, open("{}".format(name), "wb"))
15 |     return 0
16 | 
17 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/kronecker_ops/readme.txt:
--------------------------------------------------------------------------------
 1 | # readme before using kronecker graph generator
 2 | In order to reduce space, we compressed the source codes of kronecker graph generator.
 3 | Before using kronecker, we need to decompress and compile them:
 4 | 
 5 | 1. unzip `kronecker_src.zip`
 6 | 
 7 | ```bash
 8 | cd path_to/GraphGenerator/GraphGenerator/models/kronecker_ops
 9 | unzip -o -d . kronecker_src.zip
10 | ```
11 | 
12 | 2. reinstall this package
13 | ```bash
14 | pip uninstall GraphGenerator
15 | cd path_to/GraphGenerator
16 | pip install -e .
17 | ```
18 | 
19 | 


--------------------------------------------------------------------------------
/config/template.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: Template_name
 3 | exp_dir: exp/Template
 4 | device: cuda:0
 5 | gpu: 1
 6 | #device: cpu
 7 | seed: 1234
 8 | dataset:
 9 |   name: Template_data
10 |   data_path: data/
11 | model:
12 |   name: Template_model
13 |   num_nodes: 1000
14 |   embedding_dim: 32
15 |   hidden_dim: 32
16 | train:
17 |   optimizer: Adam
18 |   lr: 1.0e-2
19 |   max_epochs: 1000
20 |   display_iter: 10
21 |   snapshot_epoch: 1000
22 | test:
23 |   test_model_dir: exp/Template_name/Template_model_Template_data_2021-Feb-25-10-38-59_12345
24 |   test_model_name: model_snapshot_0001000.pth
25 | 


--------------------------------------------------------------------------------
/GraphGenerator/preprocessing/utils.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import sys
 3 | 
 4 | 
 5 | def edgelist_to_graph(path):
 6 |     try:
 7 |         graph = nx.read_edgelist(path)
 8 |         return graph
 9 |     except:
10 |         print("Wrong path entered! Absolute path of edgelist file pxpected.")
11 |         sys.exit(1)
12 | 
13 | 
14 | def pathlist_to_graphlist(path):
15 |     with open(path, "r") as f:
16 |         path_list = f.readlines()
17 |     path_list = [p.strip("\n") for p in path_list if p != "\n"]
18 |     graph_list = [edgelist_to_graph(p) for p in path_list]
19 |     return graph_list
20 | 


--------------------------------------------------------------------------------
/config/sbmgnn.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: SBMGNN
 3 | exp_dir: exp/SBMGNN
 4 | seed: 1234
 5 | gpu: 1
 6 | device: cuda:1
 7 | dataset:
 8 |   name: top10
 9 |   data_path: data/
10 |   features: false
11 | model:
12 |   name: SBMGNN
13 |   hidden: [64, 50]
14 |   num_nodes: 100
15 |   g_hidden: 32
16 |   deep_decoder: 1
17 |   dropout: 0.5
18 |   alpha0: 10.
19 |   temp_prior: 0.5
20 |   temp_post: 1.
21 |   variational: false
22 | train:
23 |   lr: 0.01
24 |   max_epochs: 100
25 |   weight_decay: 0.0
26 |   use_k_fold: false
27 |   k: 5
28 |   early_stopping: 0
29 |   split_idx: 0
30 |   weighted_ce: 1
31 |   reconstruct_x: false


--------------------------------------------------------------------------------
/config/graphite.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: Graphite
 3 | exp_dir: exp/Graphite
 4 | device: cuda:1
 5 | gpu: 1
 6 | #device: cpu
 7 | seed: 1234
 8 | dataset:
 9 |   name: top10
10 |   data_path: data/
11 | model:
12 |   name: Graphite
13 |   num_nodes: 10000
14 |   embedding_dim: 32
15 |   hidden_dim: 32
16 |   decoding_dim: 32
17 |   variational: false
18 |   has_feature: false
19 | train:
20 |   optimizer: Adam
21 |   lr: 1.0e-2
22 |   max_epochs: 100
23 |   display_iter: 10
24 |   snapshot_epoch: 1000
25 | #test:
26 | #  test_model_dir: exp/VGAE/VGAE_cora_2021-Feb-25-10-38-59_12345
27 | #  test_model_name: model_snapshot_0001000.pth
28 | 


--------------------------------------------------------------------------------
/GraphGenerator/metrics/speed.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | def time_decorator(func):
 4 |     def time_record(*args, **kwargs):
 5 |         startt = time.time()
 6 |         res = func(*args, **kwargs)
 7 |         endt = time.time()
 8 |         time_consumption = endt - startt
 9 |         print("Time Consumption of {}: {:.6f}s.".format(func.__name__, time_consumption))
10 |         return res
11 |     return time_record
12 | 
13 | 
14 | @time_decorator
15 | def test_deco(n):
16 |     for i in range(n):
17 |         continue
18 | 
19 | 
20 | if __name__ == '__main__':
21 |     n = 1048576
22 |     test_deco(n)
23 |     for i in range(7):
24 |         n *= 2
25 |         test_deco(n)


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google Research Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2020 The Google Research Authors.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | 


--------------------------------------------------------------------------------
/config/vgae.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: VGAE
 3 | exp_dir: exp/VGAE
 4 | device: cuda:1
 5 | gpu: 1
 6 | #device: cpu
 7 | seed: 1234
 8 | dataset:
 9 |   name: top10
10 |   data_path: data/
11 | model:
12 |   name: VGAE
13 |   num_nodes: 100000
14 |   embedding_dim: 32
15 |   hidden_dim: 32
16 |   variational: false
17 |   has_feature: false
18 |   num_GNN_layers: 2
19 | train:
20 |   optimizer: Adam
21 |   lr: 1.0e-2
22 |   max_epochs: 100
23 |   display_iter: 10
24 |   snapshot_epoch: 1000
25 | #test:
26 | #  test_model_dir: exp/VGAE/VGAE_cora_2021-Feb-25-10-38-59_12345
27 | #  test_model_name: model_snapshot_0001000.pth
28 | eval:
29 |   num_nodes: [100, 1000, 10000, 100000, 1000000]
30 |   graph_type: W-S # selected from [E-R, B-A, W-S, grid]


--------------------------------------------------------------------------------
/config/netgan.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: NetGAN
 3 | exp_dir: exp/NetGAN
 4 | device: cuda:1
 5 | gpu: 1
 6 | #device: cpu
 7 | seed: 1234
 8 | dataset:
 9 |   name: top10
10 |   data_path: data/
11 | model:
12 |   name: NetGAN
13 |   num_nodes: 1000
14 |   embedding_dim: 256
15 |   rw_len: 16
16 | train:
17 |   optimizer: Adam
18 |   lr: 1.0e-4
19 |   batch_size: 128
20 |   max_epochs: 200000
21 |   eval_iter: 2000
22 |   display_iter: 200000
23 |   #display_iter: 20000
24 |   snapshot_epoch: 2000
25 |   val_share: 0.15
26 |   test_share: 0.05
27 |   stopping_criterion: val
28 |   #stopping_criterion: eo # 'eo' means early stopping
29 | test:
30 |   sample_num: 1000
31 |   num_gen: 10
32 |   test_model_dir: exp/Template_name/Template_model_Template_data_2021-Feb-25-10-38-59_12345
33 |   test_model_name: model_snapshot_0001000.pth
34 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/reame.md:
--------------------------------------------------------------------------------
 1 | # Installation on NVIDIA GeForce RTX 3090 and CUDA 11.1
 2 | Using default Makefile, there is no bug when installing BiGG in this environment.
 3 | 
 4 | # Installation on different devices and environments
 5 | The installation of BiGG requires one more step, i.e., check the computing capability of your gpu.
 6 | 
 7 | ## check the computing capability
 8 | Visiting this website, we can query the corresponding computing capability: https://developer.nvidia.com/cuda-gpus
 9 | 
10 | ## choosing specific Makefile
11 | According to the query result, choosing specific Makefile_xx as your Makefile.
12 | 
13 | For example, if the existing GPU device is GeForce RTX 2080, the `Makefile_75` or `Makefile_70` can be renamed
14 | as `Makefile` because the computing capability of 2080 Ti is `7.5`.
15 | 
16 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/include/cuda_ops.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Google Research Authors.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef CUDA_OP_H
16 | #define CUDA_OP_H
17 | 
18 | #include <cstdint>
19 | 
20 | void build_binary_mat(int n_rows, int n_ints, int n_feats, int* lens,
21 |                       uint32_t* bits, float* outptr);
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/config/graphrnn.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: GraphRNN
 3 | exp_dir: exp/GraphRNN
 4 | device: cuda:1
 5 | gpu: 1
 6 | #device: cpu
 7 | seed: 1234
 8 | dataset:
 9 |   name: top10
10 |   data_path: data/
11 |   num_workers: 4
12 | model:
13 |   #name: GraphRNN_RNN
14 |   name: GraphRNN_MLP
15 |   hidden_size_rnn: 128
16 |   hidden_size_rnn_output: 16
17 |   embedding_size_rnn: 64
18 |   embedding_size_rnn_output: 8
19 |   embedding_size_output: 64
20 |   num_layers: 4
21 |   max_num_node: null
22 |   max_prev_node: null
23 | train:
24 |   optimizer: Adam
25 |   lr: 3.0e-3
26 |   lr_rate: 0.3
27 |   milestones: [1000]
28 |   epochs: 2000
29 |   epochs_log: 1
30 |   batch_ratio: 1
31 |   batch_size: 1
32 |   validate_epoch: 100
33 |   validate_sample: 1
34 |   save_snapshot: true
35 |   snapshot_epoch: 100
36 |   resume: false
37 |   resume_epoch: 100
38 |   save: false
39 |   save_epoch_by: 200
40 | test:
41 |   batch_size: 1
42 |   test_model_dir: exp/Template_name/Template_model_Template_data_2021-Feb-25-10-38-59_12345
43 |   test_model_name: model_snapshot_0001000.pth
44 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/template.m:
--------------------------------------------------------------------------------
 1 | 
 2 | % step1
 3 | load('%##{Template Block}##%');
 4 | nnodes = size(G,1);
 5 | nedges = nnz(G)/2;
 6 | fprintf('Graph name: %s\n', graphname);
 7 | fprintf('Number of nodes: %d\n', nnodes);
 8 | fprintf('Number of edges: %d\n', nedges);
 9 | 
10 | % step2
11 | nd = accumarray(nonzeros(sum(G,2)),1);
12 | maxdegree = find(nd>0,1,'last');
13 | fprintf('Maximum degree: %d\n', maxdegree);
14 | 
15 | % step3
16 | [ccd,gcc] = ccperdeg(G);
17 | fprintf('Global clustering coefficient: %.2f\n', gcc);
18 | 
19 | G_bter = {};
20 | for i = 1:%##{Template Block}##%% step4
21 |     fprintf('Running BTER...\n');
22 |     t1=tic;
23 |     [E1,E2] = bter(nd,ccd);
24 |     toc(t1)
25 |     fprintf('Number of edges created by BTER: %d\n', size(E1,1) + size(E2,1));% step5
26 |     fprintf('Turning edge list into adjacency matrix (including dedup)...\n');
27 |     t2=tic;
28 |     tmpg_bter = bter_edges2graph(E1,E2);
29 |     toc(t2);
30 |     fprintf('Number of edges in dedup''d graph: %d\n', nnz(G)/2);
31 |     G_bter{end+1} = tmpg_bter;
32 | end
33 | 
34 | save('%##{Template Block}##%','G_bter')


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 VOIX
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/config/bigg.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: BiGG
 3 | exp_dir: exp/BiGG
 4 | device: cuda:2
 5 | #device: cpu
 6 | gpu: 2
 7 | seed: 1234
 8 | dataset:
 9 |   name: top10
10 |   data_path: data/
11 |   directed: false
12 |   self_loop: false
13 |   bfs_permute: false
14 | model:
15 |   name: BiGG
16 |   max_num_nodes: 743
17 |   embed_dim: 256
18 |   num_RNN_layers: 2
19 |   bits_compress: 256
20 |   tree_pos_enc: false
21 |   pos_enc: true
22 |   pos_base: 10000
23 |   greedy_frac: 0.0
24 |   share_param: true
25 |   blksize: -1
26 | train:
27 |   batch_size: 32
28 |   optimizer: Adam
29 |   lr: 3.0e-4
30 |   grad_clip: 5
31 |   accum_grad: 1
32 |   max_epochs: 100
33 |   display_iter: 10
34 |   save_snapshot: false
35 |   snapshot_epoch: 100
36 |   resume: false
37 |   resume_epoch: 99
38 |   resume_model_dir: exp/BiGG/BiGG_top10_2021-Mar-15-19-44-39_115041
39 |   resume_model_name: epoch-100.ckpt
40 | test:
41 |   num_test_gen: 10
42 |   greedy_frac: 0.0
43 |   display: false
44 |   load_snapshot: false
45 |   test_model_dir: exp/BiGG/BiGG_top10_2021-Mar-15-19-44-39_115041
46 |   test_model_name: epoch-100.ckpt
47 | eval:
48 |   num_nodes: [100, 1000, 10000, 100000, 1000000]
49 |   graph_type: W-S # selected from [E-R, B-A, W-S, grid]


--------------------------------------------------------------------------------
/GraphGenerator/test/test_bigg.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | import torch
 3 | import torch.optim as optim
 4 | 
 5 | import numpy as np
 6 | import random
 7 | import networkx as nx
 8 | from GraphGenerator.utils.arg_utils import get_config, set_device
 9 | from GraphGenerator.models.bigg_ops.tree_clib.tree_lib import setup_treelib, TreeLib
10 | from GraphGenerator.models.bigg_ops.tree_model import RecurTreeGen
11 | 
12 | 
13 | def bigg_test(args, config):
14 |     random.seed(config.seed)
15 |     torch.manual_seed(config.seed)
16 |     np.random.seed(config.seed)
17 |     set_device(config)
18 |     setup_treelib(config)
19 | 
20 |     train_graphs = [nx.barabasi_albert_graph(10, 2)]
21 |     TreeLib.InsertGraph(train_graphs[0])
22 |     max_num_nodes = max([len(gg.nodes) for gg in train_graphs])
23 |     config.model.max_num_nodes = max_num_nodes
24 | 
25 |     model = RecurTreeGen(config).to(config.device)
26 |     optimizer = optim.Adam(model.parameters(), lr=config.train.lr, weight_decay=1e-4)
27 |     for i in range(2):
28 |         optimizer.zero_grad()
29 |         ll, _ = model.forward_train([0])
30 |         loss = -ll / max_num_nodes
31 |         print('iter', i, 'loss', loss.item())
32 |         loss.backward()
33 |         optimizer.step()
34 | 


--------------------------------------------------------------------------------
/GraphGenerator/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def setup_logging(log_level, log_file, logger_name="exp_logger"):
 5 |   """ Setup logging """
 6 |   numeric_level = getattr(logging, log_level.upper(), None)
 7 |   if not isinstance(numeric_level, int):
 8 |     raise ValueError("Invalid log level: %s" % log_level)
 9 | 
10 |   logging.basicConfig(
11 |       filename=log_file,
12 |       filemode="w",
13 |       format="%(levelname)-5s | %(asctime)s | File %(filename)-20s | Line %(lineno)-5d | %(message)s",
14 |       datefmt="%m/%d/%Y %I:%M:%S %p",
15 |       level=numeric_level)
16 | 
17 |   # define a Handler which writes messages to the sys.stderr
18 |   console = logging.StreamHandler()
19 |   console.setLevel(numeric_level)
20 |   # set a format which is simpler for console use
21 |   formatter = logging.Formatter(
22 |       "%(levelname)-5s | %(asctime)s | %(filename)-25s | line %(lineno)-5d: %(message)s"
23 |   )
24 |   # tell the handler to use this format
25 |   console.setFormatter(formatter)
26 |   # add the handler to the root logger
27 |   logging.getLogger(logger_name).addHandler(console)
28 | 
29 |   return get_logger(logger_name)
30 | 
31 | 
32 | def get_logger(logger_name="exp_logger"):
33 |   return logging.getLogger(logger_name)
34 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/include/config.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Google Research Authors.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef cfg_H
16 | #define cfg_H
17 | 
18 | #include <iostream>
19 | #include <cstring>
20 | #include <random>
21 | #include <fstream>
22 | #include <set>
23 | #include <map>
24 | 
25 | typedef float Dtype;
26 | 
27 | struct cfg
28 | {
29 |     static int max_num_nodes;
30 |     static bool directed, self_loop, bfs_permute;
31 |     static int bits_compress;
32 |     static int dim_embed;
33 |     static int gpu;
34 |     static int seed;
35 | 
36 |     static std::default_random_engine generator;
37 | 
38 |     static void LoadParams(const int argc, const char** argv);
39 | 
40 |     static void SetRandom();
41 | };
42 | 
43 | #endif
44 | 


--------------------------------------------------------------------------------
/GraphGenerator/metrics/memory.py:
--------------------------------------------------------------------------------
 1 | import torch, os
 2 | 
 3 | ## return current gpu memory cached
 4 | # torch.cuda.memory_reserved()
 5 | ## return peak gpu memory cached
 6 | # torch.cuda.max_memory_reserved()
 7 | ## reset peak gpu memory cached
 8 | # torch.cuda.reset_peak_memory_stats()
 9 | 
10 | def get_peak_gpu_memory(device='cuda:0'):
11 |     """
12 |     :return: maximum memory cached (Byte)
13 |     """
14 |     return torch.cuda.max_memory_reserved(device)
15 | 
16 | 
17 | def flush_cached_gpu_memory():
18 |     torch.cuda.empty_cache()
19 |     torch.cuda.reset_peak_memory_stats()
20 | 
21 | 
22 | def test_memory_usage():
23 |     flush_cached_gpu_memory()
24 |     current_memory = get_peak_gpu_memory()//1024
25 |     print("Current gpu memory cached: {} KiB".format(current_memory))
26 |     flush_cached_gpu_memory()
27 |     a = torch.ones(3,3).cuda()
28 |     print("Add a tensor to gpu.")
29 |     current_memory = get_peak_gpu_memory() // 1024
30 |     print("Current gpu memory cached: {} KiB".format(current_memory))
31 |     del a
32 |     print("Delete a tensor from gpu.")
33 |     flush_cached_gpu_memory()
34 |     current_memory = get_peak_gpu_memory() // 1024
35 |     print("Current gpu memory cached: {} KiB".format(current_memory))
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     os.environ["CUDA_VISIBLE_DEVICES"] = "1"
40 |     test_memory_usage()


--------------------------------------------------------------------------------
/GraphGenerator/models/er.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import itertools
 3 | import math
 4 | import random
 5 | 
 6 | 
 7 | def empty_graph(num_nodes):
 8 |     g = nx.Graph()
 9 |     g.add_nodes_from(range(num_nodes))
10 |     return g
11 | 
12 | 
13 | def complete_graph(num_nodes):
14 |     g = empty_graph(num_nodes)
15 |     edges = itertools.combinations(range(num_nodes), 2)
16 |     g.add_edges_from(edges)
17 |     return g
18 | 
19 | 
20 | def random_graph(num_nodes, p):
21 |     g = empty_graph(num_nodes)
22 |     if p <= 0:
23 |         return g
24 |     if p >= 1:
25 |         return complete_graph(num_nodes)
26 |     n = num_nodes
27 |     w = -1
28 |     lp = math.log(1.0 - p)
29 |     # Nodes in graph are from 0,n-1 (start with v as the second node index).
30 |     v = 1
31 |     while v < n:
32 |         lr = math.log(1.0 - random.random())
33 |         w = w + 1 + int(lr / lp)
34 |         while w >= v and v < n:
35 |             w = w - v
36 |             v = v + 1
37 |         if v < n:
38 |             g.add_edge(v, w)
39 |     return g
40 | 
41 | 
42 | def e_r(in_graph, config):
43 |     """
44 |     E-R graph generator
45 |     :param in_graph: referenced graph, type: nx.Graph
46 |     :param config: configure object
47 |     :return: generated graphs, type: list of nx.Graph
48 |     """
49 |     num_edges = in_graph.number_of_edges()
50 |     num_nodes = in_graph.number_of_nodes()
51 |     p = num_edges/(num_nodes*(num_nodes-1)/2)
52 |     out_graphs = []
53 |     for i in range(config.num_gen):
54 |         out_graph = random_graph(num_nodes, p)
55 |         out_graphs.append(out_graph)
56 |     return out_graphs
57 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/src/lib/cuda_ops.cu:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <cassert>
 3 | #include "cuda_ops.h"  // NOLINT
 4 | #include "cuda_runtime.h"  // NOLINT
 5 | 
 6 | 
 7 | 
 8 | __global__ void binary_build_kernel(int n_ints, int n_feats, int* lens,
 9 |     uint32_t* bits, float* outptr)
10 | {
11 |     int row = blockIdx.x;
12 |     float* feat_ptr = outptr + row * n_feats;
13 |     uint32_t* cur_bits = bits + row * n_ints;
14 |     int bit_start = threadIdx.x;
15 |     int bit_end = lens[row];
16 |     int bit_steps = blockDim.x;
17 |     for (int i = bit_start; i < bit_end; i += bit_steps)
18 |     {
19 |         int slot = i / 32;
20 |         uint32_t pos = i % 32;
21 |         uint32_t bit = cur_bits[slot] & ((uint32_t)1 << pos);
22 |         feat_ptr[i] = bit ? 1 : -1;
23 |     }
24 | }
25 | 
26 | void build_binary_mat(int n_rows, int n_ints, int n_feats, int* lens,
27 |                       uint32_t* bits, float* outptr)
28 | {
29 |     int* lens_gpu;
30 |     uint32_t* bits_gpu;
31 |     cudaError_t t = cudaMalloc(&lens_gpu, sizeof(int) * n_rows);
32 |     assert(t == cudaSuccess);
33 |     t = cudaMalloc(&bits_gpu, sizeof(uint32_t) * n_ints * n_rows);
34 |     assert(t == cudaSuccess);
35 | 
36 |     cudaMemcpy(lens_gpu, lens, sizeof(int) * n_rows, cudaMemcpyHostToDevice);
37 |     cudaMemcpy(bits_gpu, bits, sizeof(uint32_t) * n_rows * n_ints,
38 |                cudaMemcpyHostToDevice);
39 | 
40 |     dim3 grid(n_rows);
41 |     dim3 block(1024);
42 |     binary_build_kernel<<<grid, block>>>(n_ints, n_feats, lens_gpu,
43 |                                          bits_gpu, outptr);
44 |     cudaFree(lens_gpu);
45 |     cudaFree(bits_gpu);
46 | }
47 | 


--------------------------------------------------------------------------------
/config/gran.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | exp_name: GRAN
 3 | exp_dir: exp/GRAN
 4 | use_gpu: true
 5 | device: cuda:0
 6 | gpu: 0
 7 | #device: cpu
 8 | seed: 1234
 9 | dataset:
10 |   loader_name: GRANData
11 |   name: top10
12 |   data_path: data/
13 |   node_order: DFS
14 |   num_subgraph_batch: 32
15 |   num_fwd_pass: 1
16 |   has_node_feat: false
17 |   train_ratio: 1.0
18 |   dev_ratio: 1.0
19 |   is_save_split: false
20 |   is_sample_subgraph: true
21 |   is_overwrite_precompute: false
22 | model:
23 |   name: GRANMixtureBernoulli
24 |   num_mix_component: 20
25 |   is_sym: true
26 |   block_size: 1
27 |   sample_stride: 1
28 |   max_num_nodes: 1000
29 |   hidden_dim: 128
30 |   embedding_dim: 128
31 |   num_GNN_layers: 7
32 |   num_GNN_prop: 1
33 |   num_canonical_order: 1
34 |   dimension_reduce: true
35 |   has_attention: true
36 |   edge_weight: 1.0e+0
37 | train:
38 |   optimizer: Adam
39 |   lr: 1.0e-4
40 |   lr_decay: 0.3
41 |   lr_decay_epoch: [10000]
42 |   num_workers: 0
43 |   max_epoch: 50
44 |   batch_size: 1
45 |   display_iter: 10
46 |   snapshot_epoch: 100
47 |   valid_epoch: 50
48 |   wd: 0.0e-4
49 |   save_snapshot: false
50 |   momentum: 0.9
51 |   shuffle: true
52 |   is_resume: false
53 |   resume_dir: # exp/GRAN/your_exp_folder
54 |   resume_model: model_snapshot_0005000.pth
55 | test:
56 |   batch_size: 1
57 |   num_workers: 0
58 |   num_test_gen: 2 # number of generated samples
59 |   is_vis: false
60 |   is_single_plot: false # visualize `num_vis` samples in a single image
61 |   is_test_ER: false # test Erdos-Renyi baseline
62 |   num_vis: 20
63 |   vis_num_row: 5 # visualize `num_vis` samples in `vis_num_row` rows
64 |   better_vis: true
65 |   test_model_dir: snapshot_model
66 |   test_model_name: gran_xx.pth
67 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | from setuptools.command.develop import develop
 3 | import os
 4 | import subprocess
 5 | BASEPATH = os.path.dirname(os.path.abspath(__file__))
 6 | 
 7 | 
 8 | class CustomDevelop(develop):
 9 |     def run(self):
10 |         original_cwd = os.getcwd()
11 |         folder = os.path.join(BASEPATH, 'GraphGenerator/models/kronecker_ops')
12 |         if not os.path.exists(os.path.join(folder, 'Makefile.config')):
13 |             os.chdir(folder)
14 |             subprocess.check_call(['unzip', '-o', '-d', '.', 'kronecker_src.zip'])
15 |         folders = [
16 |             os.path.join(BASEPATH, 'GraphGenerator/models/bigg_ops/tree_clib'),
17 |             os.path.join(BASEPATH, 'GraphGenerator/models/kronecker_ops/examples/kronfit')
18 |         ]
19 |         for folder in folders:
20 |             os.chdir(folder)
21 |             subprocess.check_call(['make'])
22 |         folders = [
23 |             os.path.join(BASEPATH, 'GraphGenerator/evaluate'),
24 |         ]
25 |         for folder in folders:
26 |             os.chdir(folder)
27 |             subprocess.check_call(['g++', '-O2', '-std=c++11', '-o', 'orca', 'orca.cpp'])
28 |         os.chdir(original_cwd)
29 | 
30 |         super().run()
31 | 
32 | 
33 | setuptools.setup(
34 |     name="GraphGenerator",
35 |     version="0.1",
36 |     author="Sheng Xiang",
37 |     author_email="xiangsheng218@gmail.com",
38 |     description="Graph Generator package",
39 |     long_description_content_type="text/markdown",
40 |     packages=setuptools.find_packages(),
41 |     classifiers=[
42 |         "Programming Language :: Python :: 3",
43 |         "License :: OSI Approved :: MIT License",
44 |         "Operating System :: OS Independent",
45 |     ],
46 |     python_requires='>=3.6',
47 |     cmdclass={
48 |         'develop': CustomDevelop
49 |     }
50 | )
51 | 


--------------------------------------------------------------------------------
/GraphGenerator/utils/arg_utils.py:
--------------------------------------------------------------------------------
 1 | import time, os, yaml, torch, random
 2 | import numpy as np
 3 | from easydict import EasyDict as edict
 4 | 
 5 | 
 6 | def get_config(config_file):
 7 |     """ Construct and snapshot hyper parameters """
 8 |     # config = edict(yaml.load(open(config_file, 'r'), Loader=yaml.FullLoader))
 9 |     config = edict(yaml.load(open(config_file, 'r')))
10 |     if config.seed is not None:
11 |         np.random.seed(config.seed)
12 |         random.seed(config.seed)
13 |     # create hyper parameters
14 |     config.run_id = str(os.getpid())
15 |     config.exp_name = '_'.join([
16 |         config.model.name, config.dataset.name,
17 |         time.strftime('%Y-%b-%d-%H-%M-%S'), config.run_id
18 |     ])
19 | 
20 |     save_dir = os.path.join(config.exp_dir, config.exp_name)
21 |     save_name = os.path.join(save_dir, 'config.yaml')
22 |     config.save_dir = save_dir
23 |     # snapshot hyperparameters
24 |     mkdir(config.exp_dir)
25 |     mkdir(save_dir)
26 | 
27 |     yaml.dump(edict2dict(config), open(save_name, 'w'), default_flow_style=False)
28 | 
29 |     return config
30 | 
31 | 
32 | def edict2dict(edict_obj):
33 |     dict_obj = {}
34 | 
35 |     for key, vals in edict_obj.items():
36 |         if isinstance(vals, edict):
37 |             dict_obj[key] = edict2dict(vals)
38 |         else:
39 |             dict_obj[key] = vals
40 | 
41 |     return dict_obj
42 | 
43 | 
44 | def mkdir(folder):
45 |   if not os.path.isdir(folder):
46 |     os.makedirs(folder)
47 | 
48 | 
49 | def set_device(config):
50 |     if int(config.gpu) >= 0 and config.device.startswith('cuda:'):
51 |         # os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu)
52 |         # config.device = 'cuda:0'
53 |         print('use gpu indexed: {}'.format(config.gpu))
54 |     else:
55 |         config.gpu = -1
56 |         os.environ["CUDA_VISIBLE_DEVICES"] = ""
57 |         config.device = 'cpu'
58 |         print('use cpu')
59 | 
60 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
 2 | Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
 3 | January 2014  
 4 | 
 5 | ** License **
 6 | 
 7 | Copyright (c) 2014, Sandia National Laboratories
 8 | All rights reserved.
 9 | 
10 | Redistribution and use in source and binary forms, with or without
11 | modification, are permitted provided that the following conditions are
12 | met:  
13 | 
14 | 1. Redistributions of source code must retain the above copyright notice,
15 | this list of conditions and the following disclaimer. 
16 | 
17 | 2. Redistributions in binary form must reproduce the above copyright
18 | notice, this list of conditions and the following disclaimer in the
19 | documentation and/or other materials provided with the distribution.  
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
23 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
32 | 
33 | ****
34 | 
35 | Sandia National Laboratories is a multi-program laboratory managed and
36 | operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
37 | Martin Corporation, for the U.S. Department of Energy's National Nuclear
38 | Security Administration under contract DE-AC04-94AL85000.                                         
39 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/include/tree_util.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Google Research Authors.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef TREE_UTIL_H
16 | #define TREE_UTIL_H
17 | 
18 | #include <vector>
19 | #include <map>
20 | #include "struct_util.h"  // NOLINT
21 | 
22 | class AdjNode;
23 | extern int total_job_nums;
24 | extern std::vector<AdjNode*> global_job_nodes;
25 | 
26 | class AdjNode
27 | {
28 |  public:
29 |     AdjNode(){}
30 |     AdjNode(AdjNode* parent, int row, int col_begin, int col_end, int depth);
31 |     ~AdjNode();
32 |     void init(AdjNode* parent, int row, int col_begin, int col_end, int depth);
33 |     void split();
34 |     void update_bits();
35 | 
36 |     AdjNode *parent, *lch, *rch;
37 |     int global_idx;
38 |     int row, col_begin, col_end, mid;
39 |     int depth, n_cols;
40 |     bool is_leaf, is_root;
41 |     bool has_edge, is_lowlevel;
42 |     BitSet bits_rep;
43 |     int job_idx;
44 | };
45 | 
46 | extern PtHolder<AdjNode> node_holder;
47 | 
48 | class AdjRow
49 | {
50 |  public:
51 |     AdjRow(){}
52 |     AdjRow(int row, int col_start, int col_end);
53 |     ~AdjRow();
54 |     void init(int row, int col_start, int col_end);
55 | 
56 |     void insert_edges(std::vector<int>& col_indices);
57 |     AdjNode* root;
58 |     int row, max_col;
59 | 
60 |  private:
61 |     void add_edges(AdjNode* node, ColAutomata* col_sm);
62 | };
63 | 
64 | extern PtHolder<AdjRow> row_holder;
65 | 
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import networkx as nx
 4 | from GraphGenerator.utils.data_utils import load_matlab_graph, save_matlab_graph
 5 | 
 6 | 
 7 | def generate_matlab_mat(data_name, in_mat_path, out_mat_path, repeat=2):
 8 |     bter_path = "./GraphGenerator/models/bter_ops/"
 9 |     template_filename = os.path.join(bter_path, "template.m")
10 |     with open(template_filename, "r") as r_f:
11 |         template_context = r_f.read()
12 |     tmp_filepath = os.path.join(bter_path, "{}.m".format(data_name))
13 |     with open(tmp_filepath, "w") as w_f:
14 |         context = template_context.split("%##{Template Block}##%")
15 |         w_f.write(context[0])
16 |         w_f.write(os.path.join("../../../", in_mat_path))
17 |         w_f.write(context[1])
18 |         w_f.write(str(repeat))
19 |         w_f.write(context[2])
20 |         w_f.write(os.path.join("../../../", out_mat_path))
21 |         w_f.write(context[3])
22 |     os.system("matlab -nosplash -nodesktop -r "
23 |               "'cd ./GraphGenerator/models/bter_ops; {}; cd ../../..; quit'".format(data_name))
24 |     graphs = load_matlab_graph(fname=out_mat_path)
25 |     # print(graphs)
26 |     os.remove(tmp_filepath)
27 |     return graphs
28 | 
29 | 
30 | def bter(input_graph, config):
31 |     fname = os.path.join(config.exp_dir, config.exp_name, "{}.mat".format(config.dataset.name))
32 |     dump_name = os.path.join(config.exp_dir, config.exp_name, "bter_to_{}.mat".format(config.dataset.name))
33 |     sp_adj = nx.adjacency_matrix(input_graph)
34 |     sp_adj.data = sp_adj.data.astype(np.float64)
35 |     save_matlab_graph(fname, sp_adj, config.dataset.name)
36 |     graphs = generate_matlab_mat(data_name=config.dataset.name,
37 |                                  in_mat_path=fname,
38 |                                  out_mat_path=dump_name,
39 |                                  repeat=config.num_gen)
40 |     # print(graphs)
41 |     return [nx.Graph(graph) for graph in graphs[0].tolist()]
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     tmp_g = nx.grid_2d_graph(10, 10)
46 |     save_matlab_graph("./tmp.mat", nx.adjacency_matrix(tmp_g), "tmp")
47 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/ba.py:
--------------------------------------------------------------------------------
 1 | from GraphGenerator.models.er import empty_graph
 2 | import networkx as nx
 3 | import numpy as np
 4 | import random
 5 | 
 6 | 
 7 | def _random_subset(seq, m):
 8 |     """ Return m unique elements from seq.
 9 | 
10 |     This differs from random.sample which can return repeated
11 |     elements if seq holds repeated elements.
12 | 
13 |     Note: eval('random') can be a random.Random or numpy.random.RandomState instance.
14 |     """
15 |     targets = set()
16 |     while len(targets) < m:
17 |         x = random.choice(seq)
18 |         targets.add(x)
19 |     return targets
20 | 
21 | 
22 | def barabasi_albert_graph(n, m):
23 |     if m < 1 or m >= n:
24 |         raise nx.NetworkXError(
25 |             f"Barabási–Albert network must have m >= 1 and m < n, m = {m}, n = {n}"
26 |         )
27 | 
28 |     # Add m initial nodes (m0 in barabasi-speak)
29 |     G = empty_graph(m)
30 |     # Target nodes for new edges
31 |     targets = list(range(m))
32 |     # List of existing nodes, with nodes repeated once for each adjacent edge
33 |     repeated_nodes = []
34 |     # Start adding the other n-m nodes. The first node is m.
35 |     source = m
36 |     while source < n:
37 |         # Add edges to m nodes from the source.
38 |         G.add_edges_from(zip([source] * m, targets))
39 |         # Add one node to the list for each new edge just created.
40 |         repeated_nodes.extend(targets)
41 |         # And the new node "source" has m edges to add to the list.
42 |         repeated_nodes.extend([source] * m)
43 |         # Now choose m unique nodes from the existing nodes
44 |         # Pick uniformly from repeated_nodes (preferential attachment)
45 |         targets = _random_subset(repeated_nodes, m)
46 |         source += 1
47 |     return G
48 | 
49 | 
50 | def b_a(in_graph, config):
51 |     """
52 |     B-A graph generator
53 |     :param in_graph: referenced graph, type: nx.Graph
54 |     :param config: configure object
55 |     :return: generated graphs, type: list of nx.Graph
56 |     """
57 |     m = in_graph.number_of_edges()
58 |     n = in_graph.number_of_nodes()
59 |     k = int((n-np.sqrt(n**2-4*m))//2)
60 |     out_graphs = []
61 |     for i in range(config.num_gen):
62 |         out_graph = barabasi_albert_graph(n, k)
63 |         out_graphs.append(out_graph)
64 |     return out_graphs
65 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/sbm.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import networkx as nx
 3 | import numpy as np
 4 | import bisect, pickle
 5 | import random, argparse
 6 | import community
 7 | 
 8 | 
 9 | def sample_discrete(dist):
10 |     # sample a discrete distribution dist with values = dist.keys() and
11 |     # probabilities = dist.values()
12 | 
13 |     i = 0
14 |     acc = 0
15 |     values = {}
16 |     probs = []
17 |     for e in dist:
18 |         values[i] = e
19 |         acc += dist[e]
20 |         probs.append(acc)
21 |         i += 1
22 | 
23 |     rand = random.random()
24 |     pos = bisect.bisect(probs, rand)
25 |     return values[pos]
26 | 
27 | 
28 | def get_parameters(G, method="sbm"):
29 |     part = community.best_partition(G)
30 |     M = {}
31 |     for e in G.edges():
32 |         r = part[e[0]]
33 |         s = part[e[1]]
34 |         el = tuple(sorted([r, s]))
35 |         M[el] = M.get(el, 0) + 1
36 | 
37 |     g = {}
38 |     for k, v in part.items():
39 |         g[v] = g.get(v, []) + [k]
40 | 
41 |     k = G.degree()
42 |     K = {}
43 |     for c in g:
44 |         K[c] = sum([k[i] for i in g[c]])
45 |     if method != "sbm":
46 |         t = dict(k)
47 |         for e in t:
48 |             if t[e] != 0:
49 |                 t[e] = float(t[e])/K[part[e]]
50 |     else:
51 |         t = part.copy()
52 |         for c in g:
53 |             node_list = g[c]
54 |             prob = 1./len(node_list)
55 |             for n in node_list:
56 |                 t[n] = prob
57 | 
58 |     return (t, M, g)
59 | 
60 | 
61 | def generate_from_parameters(t, w, g):
62 |     G = nx.Graph()
63 |     for i in g:
64 |         G.add_nodes_from(g[i])
65 | 
66 |     # generate num of edges
67 |     M = w.copy()
68 |     for c in M:
69 |         M[c] = np.random.poisson(M[c])
70 | 
71 |     # assign edges to vertices
72 |     edges = []
73 |     for c in M:
74 |         r = c[0]
75 |         s = c[1]
76 |         for i in range(M[c]):
77 |             n1 = sample_discrete({j: t[j] for j in g[r]})
78 |             n2 = sample_discrete({j: t[j] for j in g[s]})
79 |             edges.append((n1, n2))
80 | 
81 |     G.add_edges_from(edges)
82 |     return G
83 | 
84 | 
85 | def generate(G, method, repeat=1):
86 |     t, w, g = get_parameters(G, method)
87 |     return [generate_from_parameters(t, w, g) for i in range(repeat)]
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/src/lib/config.cpp:
--------------------------------------------------------------------------------
 1 | #include "config.h"  // NOLINT
 2 | #include <cassert>
 3 | #ifdef USE_GPU
 4 | #include "cuda_runtime.h"  // NOLINT
 5 | #endif
 6 | 
 7 | int cfg::max_num_nodes = 1000000;
 8 | int cfg::bits_compress = 0;
 9 | int cfg::dim_embed = 0;
10 | bool cfg::directed = false;
11 | bool cfg::self_loop = false;
12 | int cfg::gpu = -1;
13 | bool cfg::bfs_permute = false;
14 | int cfg::seed = 1;
15 | std::default_random_engine cfg::generator;
16 | 
17 | void cfg::LoadParams(const int argc, const char** argv)
18 | {
19 |     for (int i = 1; i < argc; i += 2)
20 |     {
21 |         if (strcmp(argv[i], "-max_num_nodes") == 0)
22 |             max_num_nodes = atoi(argv[i + 1]);  // NOLINT
23 |         if (strcmp(argv[i], "-directed") == 0)
24 |             directed = atoi(argv[i + 1]);  // NOLINT
25 |         if (strcmp(argv[i], "-self_loop") == 0)
26 |             self_loop = atoi(argv[i + 1]);  // NOLINT
27 |         if (strcmp(argv[i], "-bits_compress") == 0)
28 |             bits_compress = atoi(argv[i + 1]);  // NOLINT
29 |         if (strcmp(argv[i], "-embed_dim") == 0)
30 |             dim_embed = atoi(argv[i + 1]);  // NOLINT
31 |         if (strcmp(argv[i], "-gpu") == 0)
32 |             gpu = atoi(argv[i + 1]);  // NOLINT
33 |         if (strcmp(argv[i], "-seed") == 0)
34 |             seed = atoi(argv[i + 1]);  // NOLINT
35 |         if (strcmp(argv[i], "-bfs_permute") == 0)
36 |             bfs_permute = atoi(argv[i + 1]);  // NOLINT
37 |     }
38 |     std::cerr << "====== begin of tree_clib configuration ======" << std::endl;
39 |     std::cerr << "| bfs_permute = " << bfs_permute << std::endl;
40 |     std::cerr << "| max_num_nodes = " << max_num_nodes << std::endl;
41 |     std::cerr << "| bits_compress = " << bits_compress << std::endl;
42 |     std::cerr << "| dim_embed = " << dim_embed << std::endl;
43 |     std::cerr << "| gpu = " << gpu << std::endl;
44 |     std::cerr << "| seed = " << seed << std::endl;
45 |     std::cerr << "======   end of tree_clib configuration ======" << std::endl;
46 | #ifdef USE_GPU
47 |     if (gpu >= 0)
48 |     {
49 |         cudaError_t t = cudaSetDevice(gpu);
50 |         //assert(t == cudaSuccess);
51 |     }
52 | #endif
53 | }
54 | 
55 | void cfg::SetRandom()
56 | {
57 |     std::srand(cfg::seed);
58 |     cfg::generator.seed(cfg::seed);
59 | }
60 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/Makefile:
--------------------------------------------------------------------------------
 1 | dir_guard = @mkdir -p $(@D)
 2 | FIND := find
 3 | CXX := g++
 4 | 
 5 | CXXFLAGS += -Wall -O3 -std=c++11
 6 | LDFLAGS += -lm 
 7 | 
 8 | UNAME := $(shell uname)
 9 | 
10 | CUDA_HOME := /usr/local/cuda
11 | NVCC := $(CUDA_HOME)/bin/nvcc
12 | USE_GPU = 1
13 | 
14 | ifeq ($(UNAME), Darwin)
15 |     USE_GPU = 0
16 |     FOMP := 
17 | else
18 |     LDFLAGS += -fopenmp
19 |     FOMP := -fopenmp
20 | endif
21 | 
22 | ifeq ($(USE_GPU), 1)
23 |     NVCCFLAGS += --default-stream per-thread
24 |     LDFLAGS += -L$(CUDA_HOME)/lib64 -lcudart -lcublas -lcurand
25 | endif
26 | 
27 | CUDA_ARCH := -gencode arch=compute_86,code=sm_86
28 | # -gencode arch=compute_70,code=sm_70
29 | 
30 | build_root = build
31 | 
32 | ifeq ($(USE_GPU), 1)    
33 | 	include_dirs = ./include $(CUDA_HOME)/include
34 | else
35 | 	include_dirs = ./include	
36 | endif
37 | 
38 | 
39 | CXXFLAGS += $(addprefix -I,$(include_dirs)) -Wno-unused-local-typedef
40 | CXXFLAGS += -fPIC
41 | cpp_files = $(shell $(FIND) src/lib -name "*.cpp" -print | rev | cut -d"/" -f1 | rev)
42 | cxx_obj_files = $(subst .cpp,.o,$(cpp_files))
43 | obj_build_root = $(build_root)/objs
44 | objs = $(addprefix $(obj_build_root)/cxx/,$(cxx_obj_files))
45 | 
46 | 
47 | ifeq ($(USE_GPU), 1)
48 |     CXXFLAGS += -DUSE_GPU
49 |     NVCCFLAGS += -DUSE_GPU
50 |     NVCCFLAGS += $(addprefix -I,$(include_dirs))
51 |     NVCCFLAGS += -std=c++11 --use_fast_math --compiler-options '-fPIC'
52 |     cu_files = $(shell $(FIND) src/lib -name "*.cu" -printf "%P\n")
53 |     cu_obj_files = $(subst .cu,.o,$(cu_files))
54 |     objs += $(addprefix $(obj_build_root)/cuda/,$(cu_obj_files))
55 | endif
56 | 
57 | 
58 | DEPS = $(objs:.o=.d)
59 | 
60 | target = $(build_root)/dll/libtree.so
61 | target_dep = $(addsuffix .d,$(target))
62 | 
63 | .PRECIOUS: $(build_root)/lib/%.o
64 | 
65 | all: $(target)
66 | 
67 | $(target) : src/tree_main.cpp $(objs)
68 | 	$(dir_guard)
69 | 	$(CXX) -shared $(CXXFLAGS) -MMD -o $@ $(filter %.cpp %.o, $^) $(LDFLAGS)
70 | 
71 | DEPS += $(target_dep)
72 | 
73 | ifeq ($(USE_GPU), 1)
74 | $(obj_build_root)/cuda/%.o: src/lib/%.cu
75 | 	$(dir_guard)
76 | 	$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} -odir $(@D)
77 | 	$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
78 | endif
79 | 
80 | $(obj_build_root)/cxx/%.o: src/lib/%.cpp
81 | 	$(dir_guard)
82 | 	$(CXX) $(CXXFLAGS) -MMD -c -o $@ $(filter %.cpp, $^) $(FOMP)
83 | 
84 | clean:
85 | 	rm -rf $(build_root)
86 | 
87 | -include $(DEPS)
88 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/Makefile_70:
--------------------------------------------------------------------------------
 1 | dir_guard = @mkdir -p $(@D)
 2 | FIND := find
 3 | CXX := g++
 4 | 
 5 | CXXFLAGS += -Wall -O3 -std=c++11
 6 | LDFLAGS += -lm 
 7 | 
 8 | UNAME := $(shell uname)
 9 | 
10 | CUDA_HOME := /usr/local/cuda
11 | NVCC := $(CUDA_HOME)/bin/nvcc
12 | USE_GPU = 1
13 | 
14 | ifeq ($(UNAME), Darwin)
15 |     USE_GPU = 0
16 |     FOMP := 
17 | else
18 |     LDFLAGS += -fopenmp
19 |     FOMP := -fopenmp
20 | endif
21 | 
22 | ifeq ($(USE_GPU), 1)
23 |     NVCCFLAGS += --default-stream per-thread
24 |     LDFLAGS += -L$(CUDA_HOME)/lib64 -lcudart -lcublas -lcurand
25 | endif
26 | 
27 | CUDA_ARCH := -gencode arch=compute_70,code=sm_70
28 | # -gencode arch=compute_70,code=sm_70
29 | 
30 | build_root = build
31 | 
32 | ifeq ($(USE_GPU), 1)    
33 | 	include_dirs = ./include $(CUDA_HOME)/include
34 | else
35 | 	include_dirs = ./include	
36 | endif
37 | 
38 | 
39 | CXXFLAGS += $(addprefix -I,$(include_dirs)) -Wno-unused-local-typedef
40 | CXXFLAGS += -fPIC
41 | cpp_files = $(shell $(FIND) src/lib -name "*.cpp" -print | rev | cut -d"/" -f1 | rev)
42 | cxx_obj_files = $(subst .cpp,.o,$(cpp_files))
43 | obj_build_root = $(build_root)/objs
44 | objs = $(addprefix $(obj_build_root)/cxx/,$(cxx_obj_files))
45 | 
46 | 
47 | ifeq ($(USE_GPU), 1)
48 |     CXXFLAGS += -DUSE_GPU
49 |     NVCCFLAGS += -DUSE_GPU
50 |     NVCCFLAGS += $(addprefix -I,$(include_dirs))
51 |     NVCCFLAGS += -std=c++11 --use_fast_math --compiler-options '-fPIC'
52 |     cu_files = $(shell $(FIND) src/lib -name "*.cu" -printf "%P\n")
53 |     cu_obj_files = $(subst .cu,.o,$(cu_files))
54 |     objs += $(addprefix $(obj_build_root)/cuda/,$(cu_obj_files))
55 | endif
56 | 
57 | 
58 | DEPS = $(objs:.o=.d)
59 | 
60 | target = $(build_root)/dll/libtree.so
61 | target_dep = $(addsuffix .d,$(target))
62 | 
63 | .PRECIOUS: $(build_root)/lib/%.o
64 | 
65 | all: $(target)
66 | 
67 | $(target) : src/tree_main.cpp $(objs)
68 | 	$(dir_guard)
69 | 	$(CXX) -shared $(CXXFLAGS) -MMD -o $@ $(filter %.cpp %.o, $^) $(LDFLAGS)
70 | 
71 | DEPS += $(target_dep)
72 | 
73 | ifeq ($(USE_GPU), 1)
74 | $(obj_build_root)/cuda/%.o: src/lib/%.cu
75 | 	$(dir_guard)
76 | 	$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} -odir $(@D)
77 | 	$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
78 | endif
79 | 
80 | $(obj_build_root)/cxx/%.o: src/lib/%.cpp
81 | 	$(dir_guard)
82 | 	$(CXX) $(CXXFLAGS) -MMD -c -o $@ $(filter %.cpp, $^) $(FOMP)
83 | 
84 | clean:
85 | 	rm -rf $(build_root)
86 | 
87 | -include $(DEPS)
88 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/Makefile_75:
--------------------------------------------------------------------------------
 1 | dir_guard = @mkdir -p $(@D)
 2 | FIND := find
 3 | CXX := g++
 4 | 
 5 | CXXFLAGS += -Wall -O3 -std=c++11
 6 | LDFLAGS += -lm 
 7 | 
 8 | UNAME := $(shell uname)
 9 | 
10 | CUDA_HOME := /usr/local/cuda
11 | NVCC := $(CUDA_HOME)/bin/nvcc
12 | USE_GPU = 1
13 | 
14 | ifeq ($(UNAME), Darwin)
15 |     USE_GPU = 0
16 |     FOMP := 
17 | else
18 |     LDFLAGS += -fopenmp
19 |     FOMP := -fopenmp
20 | endif
21 | 
22 | ifeq ($(USE_GPU), 1)
23 |     NVCCFLAGS += --default-stream per-thread
24 |     LDFLAGS += -L$(CUDA_HOME)/lib64 -lcudart -lcublas -lcurand
25 | endif
26 | 
27 | CUDA_ARCH := -gencode arch=compute_75,code=sm_75
28 | # -gencode arch=compute_70,code=sm_70
29 | 
30 | build_root = build
31 | 
32 | ifeq ($(USE_GPU), 1)    
33 | 	include_dirs = ./include $(CUDA_HOME)/include
34 | else
35 | 	include_dirs = ./include	
36 | endif
37 | 
38 | 
39 | CXXFLAGS += $(addprefix -I,$(include_dirs)) -Wno-unused-local-typedef
40 | CXXFLAGS += -fPIC
41 | cpp_files = $(shell $(FIND) src/lib -name "*.cpp" -print | rev | cut -d"/" -f1 | rev)
42 | cxx_obj_files = $(subst .cpp,.o,$(cpp_files))
43 | obj_build_root = $(build_root)/objs
44 | objs = $(addprefix $(obj_build_root)/cxx/,$(cxx_obj_files))
45 | 
46 | 
47 | ifeq ($(USE_GPU), 1)
48 |     CXXFLAGS += -DUSE_GPU
49 |     NVCCFLAGS += -DUSE_GPU
50 |     NVCCFLAGS += $(addprefix -I,$(include_dirs))
51 |     NVCCFLAGS += -std=c++11 --use_fast_math --compiler-options '-fPIC'
52 |     cu_files = $(shell $(FIND) src/lib -name "*.cu" -printf "%P\n")
53 |     cu_obj_files = $(subst .cu,.o,$(cu_files))
54 |     objs += $(addprefix $(obj_build_root)/cuda/,$(cu_obj_files))
55 | endif
56 | 
57 | 
58 | DEPS = $(objs:.o=.d)
59 | 
60 | target = $(build_root)/dll/libtree.so
61 | target_dep = $(addsuffix .d,$(target))
62 | 
63 | .PRECIOUS: $(build_root)/lib/%.o
64 | 
65 | all: $(target)
66 | 
67 | $(target) : src/tree_main.cpp $(objs)
68 | 	$(dir_guard)
69 | 	$(CXX) -shared $(CXXFLAGS) -MMD -o $@ $(filter %.cpp %.o, $^) $(LDFLAGS)
70 | 
71 | DEPS += $(target_dep)
72 | 
73 | ifeq ($(USE_GPU), 1)
74 | $(obj_build_root)/cuda/%.o: src/lib/%.cu
75 | 	$(dir_guard)
76 | 	$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -M $< -o ${@:.o=.d} -odir $(@D)
77 | 	$(NVCC) $(NVCCFLAGS) $(CUDA_ARCH) -c $< -o $@
78 | endif
79 | 
80 | $(obj_build_root)/cxx/%.o: src/lib/%.cpp
81 | 	$(dir_guard)
82 | 	$(CXX) $(CXXFLAGS) -MMD -c -o $@ $(filter %.cpp, $^) $(FOMP)
83 | 
84 | clean:
85 | 	rm -rf $(build_root)
86 | 
87 | -include $(DEPS)
88 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # pycharm files
  2 | .idea/
  3 | .idea
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | 
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | pip-wheel-metadata/
 28 | share/python-wheels/
 29 | *.egg-info/
 30 | .installed.cfg
 31 | *.egg
 32 | MANIFEST
 33 | 
 34 | # PyInstaller
 35 | #  Usually these files are written by a python script from a template
 36 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 37 | *.manifest
 38 | *.spec
 39 | 
 40 | # Installer logs
 41 | pip-log.txt
 42 | pip-delete-this-directory.txt
 43 | 
 44 | # Unit test / coverage reports
 45 | htmlcov/
 46 | .tox/
 47 | .nox/
 48 | .coverage
 49 | .coverage.*
 50 | .cache
 51 | nosetests.xml
 52 | coverage.xml
 53 | *.cover
 54 | *.py,cover
 55 | .hypothesis/
 56 | .pytest_cache/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # IPython
 85 | profile_default/
 86 | ipython_config.py
 87 | 
 88 | # pyenv
 89 | .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 99 | __pypackages__/
100 | 
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 | 
105 | # SageMath parsed files
106 | *.sage.py
107 | 
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 | 
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 | 
121 | # Rope project settings
122 | .ropeproject
123 | 
124 | # mkdocs documentation
125 | /site
126 | 
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 | 
132 | # Pyre type checker
133 | .pyre/
134 | /.idea/
135 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/kronecker.py:
--------------------------------------------------------------------------------
 1 | import GraphGenerator.utils.data_utils as data_utils
 2 | import subprocess
 3 | import numpy as np
 4 | import networkx as nx
 5 | 
 6 | 
 7 | def str_to_float(str):
 8 |     a, b = str.split('\n')[1:3]
 9 |     a = a.strip().split()
10 |     b = b.strip().split()
11 |     return np.array([[float(a[0]), float(a[-1])], [float(b[0]), float(b[-1])]])
12 | 
13 | 
14 | def krongen(init_mat, k):
15 |     """
16 |     Kronecker graph generator.
17 |     :param init_mat: initiator, default as a 2*2 shaped matrix
18 |     :param k: iterations, default as int(log2(nodes))
19 |     :return: generated graph with type of 'nx.classes.graph.Graph'
20 |     """
21 |     tmp = np.sum(init_mat)
22 |     edge_num = int(tmp**k)
23 |     og = nx.Graph()
24 |     choice = ['00', '01', '10', '11']
25 |     prob = init_mat/tmp
26 |     # time complexity is O(k*E) < O(N**2)
27 |     for i in range(edge_num):
28 |         x, y = 0, 0
29 |         tmp_rand = np.random.choice(choice, k, True, prob.flatten())
30 |         for j, m_axis in enumerate(tmp_rand):
31 |             add = 2**j
32 |             x += int(m_axis[0])*add
33 |             y += int(m_axis[1])*add
34 |         og.add_edge(x, y)
35 |     return og
36 | 
37 | 
38 | def generate(input_graph, config):
39 |     sparse_adj = nx.adjacency_matrix(input_graph)
40 |     k = int(np.log2(sparse_adj.shape[0])) + 1
41 |     init_mat = np.array([[.5625, .1875], [.1875, .0625]])
42 |     if config.model.name == 'Kronecker':
43 |         tmp_name = "./data/cit_{}.txt".format(config.dataset.name)
44 |         data_utils.adj_to_edgelist(sparse_adj, tmp_name)
45 |         sp_output = subprocess.check_output(
46 |             args=["./GraphGenerator/models/kronecker_ops/examples/kronfit/kronfit",
47 |                   "-i:{}".format(tmp_name),
48 |                   '-m:"{}"'.format(config.model.init_mat),
49 |                   "-o:./{}/{}/{}_to_kronfit.log".format(config.exp_dir, config.exp_name, config.dataset.name),
50 |                   "-gi:100", "-n0:2"]
51 |         )
52 |         utf_output = sp_output.decode('utf8').strip()
53 |         START_STR = "PARAMS"
54 |         output = utf_output[utf_output.find(START_STR):]
55 |         init_mat = str_to_float(output)
56 |         # dump_graphs(args.dataset, 'kronecker', init_mat, k)
57 |     if config.model.name == 'RMAT':
58 |         edge_num = sparse_adj.sum()/2.
59 |         tmp = np.float_power(edge_num, 1/k)
60 |         init_mat = init_mat*tmp
61 |         print(init_mat)
62 |         # dump_graphs(args.dataset, 'rmat', init_mat, k)
63 |     return [krongen(init_mat, k) for i in range(config.num_gen)]
64 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/dplpdf.m:
--------------------------------------------------------------------------------
 1 | function p = dplpdf(n,gamma)
 2 | %DPLPDF Discrete power law probability density function.
 3 | %
 4 | %   P = DPLPDF(N,GAMMA) returns the probabilities for a discrete
 5 | %   version of the power law probability density function. In
 6 | %   this case, Prob(x) ~ x^(-gamma) for x = 1:N.
 7 | %
 8 | %   See also DGLNPDF, GENDEGDIST.
 9 | %
10 | %   Reference:
11 | %   * T. G. Kolda, A. Pinar, T. Plantenga and C. Seshadhri. A Scalable
12 | %     Generative Graph Model with Community Structure,  arXiv:1302.6636,
13 | %     March 2013. (http://arxiv.org/abs/1302.6636)
14 | %
15 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
16 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
17 | % January 2014  
18 | 
19 | %% License
20 | % Copyright (c) 2014, Sandia National Laboratories
21 | % All rights reserved.
22 | %
23 | % Redistribution and use in source and binary forms, with or without
24 | % modification, are permitted provided that the following conditions are
25 | % met:  
26 | %
27 | % # Redistributions of source code must retain the above copyright notice,
28 | % this list of conditions and the following disclaimer. 
29 | % # Redistributions in binary form must reproduce the above copyright
30 | % notice, this list of conditions and the following disclaimer in the
31 | % documentation and/or other materials provided with the distribution.  
32 | %
33 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
34 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
35 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
36 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
37 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
38 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
39 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
40 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
41 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
42 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
44 | %
45 | %
46 | % Sandia National Laboratories is a multi-program laboratory managed and
47 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
48 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
49 | % Security Administration under contract DE-AC04-94AL85000. 
50 | 
51 | p = (1:n)'.^(-gamma);
52 | p = p / sum(p);
53 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/dglnpdf.m:
--------------------------------------------------------------------------------
 1 | function p = dglnpdf(n,alpha,beta)
 2 | %DGLNPDF Discrete generalized log-normal probability density function.
 3 | %
 4 | %   P = DGLNPDF(N,ALPHA,BETA) returns the probabilities for a discrete
 5 | %   version of the generalized log-normal probability density function. In
 6 | %   this case, Prob(x) ~ exp(-(log(x)/alpha)^beta) for x = 1:N.
 7 | %
 8 | %   See also DPLPDF, GENDEGDIST.
 9 | %
10 | %   Reference:
11 | %   * T. G. Kolda, A. Pinar, T. Plantenga and C. Seshadhri. A Scalable
12 | %     Generative Graph Model with Community Structure,  arXiv:1302.6636,
13 | %     March 2013. (http://arxiv.org/abs/1302.6636)
14 | %
15 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
16 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
17 | % January 2014  
18 | 
19 | %% License
20 | % Copyright (c) 2014, Sandia National Laboratories
21 | % All rights reserved.
22 | %
23 | % Redistribution and use in source and binary forms, with or without
24 | % modification, are permitted provided that the following conditions are
25 | % met:  
26 | %
27 | % # Redistributions of source code must retain the above copyright notice,
28 | % this list of conditions and the following disclaimer. 
29 | % # Redistributions in binary form must reproduce the above copyright
30 | % notice, this list of conditions and the following disclaimer in the
31 | % documentation and/or other materials provided with the distribution.  
32 | %
33 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
34 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
35 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
36 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
37 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
38 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
39 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
40 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
41 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
42 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
44 | %
45 | %
46 | % Sandia National Laboratories is a multi-program laboratory managed and
47 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
48 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
49 | % Security Administration under contract DE-AC04-94AL85000. 
50 | 
51 | 
52 | p = exp(-((log((1:n)'))/alpha).^beta);
53 | p = p / sum(p);
54 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/ccperdegest.m:
--------------------------------------------------------------------------------
 1 | function ccpdb = ccperdegest(G,bins,nsamples)
 2 | %CCPERDEGEST Estimate of mean clustering coefficient per degree bin.
 3 | %
 4 | %   CCPD = CCPERDEGEST(G,B,N) computes the per-degree-bin clustering
 5 | %   coefficient, i.e., CCPD(k) is the mean clustering coefficient for nodes
 6 | %   in degree bin k. The graph G is assumed to be undirected, unweighted,
 7 | %   and to contain no self edges. This is *not* checked by the code. The
 8 | %   vector B gives the bin boundaries, see HISTC. The computation is
 9 | %   approximate, using wedge sampling.  
10 | %
11 | %   NOTE: This is an interface to the MEX function provided by
12 | %   ccperdegest_mex.c.
13 | %
14 | %   See also CCPERDEG, BINDATA.
15 | %
16 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
17 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
18 | % January 2014  
19 | 
20 | %% License
21 | % Copyright (c) 2014, Sandia National Laboratories
22 | % All rights reserved.
23 | %
24 | % Redistribution and use in source and binary forms, with or without
25 | % modification, are permitted provided that the following conditions are
26 | % met:  
27 | %
28 | % # Redistributions of source code must retain the above copyright notice,
29 | % this list of conditions and the following disclaimer. 
30 | % # Redistributions in binary form must reproduce the above copyright
31 | % notice, this list of conditions and the following disclaimer in the
32 | % documentation and/or other materials provided with the distribution.  
33 | %
34 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
35 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
36 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
37 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
38 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
39 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
40 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
41 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
42 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
43 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
45 | %
46 | %
47 | % Sandia National Laboratories is a multi-program laboratory managed and
48 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
49 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
50 | % Security Administration under contract DE-AC04-94AL85000. 
51 |  
52 | ccpdb = ccperdegest_mex(G,bins,nsamples);   
53 | 
54 | 


--------------------------------------------------------------------------------
/GraphGenerator/evaluate/efficiency.py:
--------------------------------------------------------------------------------
 1 | from GraphGenerator.metrics import speed, memory
 2 | import networkx as nx
 3 | import scipy.sparse as sp
 4 | import torch, os, copy
 5 | import numpy as np
 6 | 
 7 | 
 8 | def coo_to_csp(sp_coo):
 9 |     num = sp_coo.shape[0]
10 |     row = sp_coo.row
11 |     col = sp_coo.col
12 |     sp_tensor = torch.sparse.FloatTensor(torch.LongTensor(np.stack([row, col])),
13 |                                          torch.tensor(sp_coo.data),
14 |                                          torch.Size([num, num]))
15 |     return sp_tensor
16 | 
17 | 
18 | def sp_normalize(adj_def, device='cpu'):
19 |     """
20 |     :param adj: scipy.sparse.coo_matrix
21 |     :param device: default as cpu
22 |     :return: normalized_adj:
23 |     """
24 |     adj_ = sp.coo_matrix(adj_def)
25 |     adj_ = adj_ + sp.coo_matrix(sp.eye(adj_def.shape[0]), dtype=np.float32)
26 |     rowsum = np.array(adj_.sum(axis=1)).reshape(-1)
27 |     norm_unit = np.float_power(rowsum, -0.5).astype(np.float32)
28 |     degree_mat_inv_sqrt = sp.diags(norm_unit)
29 |     degree_mat_sqrt = copy.copy(degree_mat_inv_sqrt)
30 |     # degree_mat_sqrt = degree_mat_inv_sqrt.to_dense()
31 |     support = adj_.__matmul__(degree_mat_sqrt)
32 |     # support = coo_to_csp(support.tocoo())
33 |     # degree_mat_inv_sqrt = coo_to_csp(degree_mat_inv_sqrt.tocoo())
34 |     adj_normalized = degree_mat_inv_sqrt.__matmul__(support)
35 |     adj_normalized = coo_to_csp(adj_normalized.tocoo())
36 |     return adj_normalized
37 | 
38 | 
39 | @speed.time_decorator
40 | def eval_speed(func, args):
41 |     pass
42 | 
43 | 
44 | def eval_efficiency(generator, config=None):
45 |     from GraphGenerator.train import train_base as train
46 |     # data_sizes = [100, int(1e+3), int(1e+4), int(1e+5), int(1e+6)]
47 |     data_sizes = [20]
48 |     # data_sizes = config.eval.num_nodes
49 |     print("The tested graph size is: {}.".format(data_sizes))
50 |     output_data = []
51 |     for size in data_sizes:
52 |         new_g = nx.watts_strogatz_graph(size, 4, 0.)
53 |         new_adj = nx.adjacency_matrix(new_g)
54 |         new_adj = sp.coo_matrix(new_adj)
55 |         # adj_input = coo_to_csp(new_adj)
56 |         print("Start (training and) inferencing graph with {} nodes...".format(size))
57 |         tmp_data = train.train_and_inference(new_g, generator, config=config)
58 |         if isinstance(tmp_data, list):
59 |             output_data.extend(tmp_data)
60 |         else:
61 |             output_data.append(tmp_data)
62 |     return output_data
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     conf_name = "config/bigg.yaml"
67 |     from GraphGenerator.utils.arg_utils import get_config, set_device
68 |     config = get_config(conf_name)
69 |     set_device(config)
70 |     out = eval_efficiency("bigg", config)
71 |     #


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/edges2graph.m:
--------------------------------------------------------------------------------
 1 | function G = edges2graph(E,nnodes)
 2 | %EDGES2GRAPH Create an undirected, simple graph from edge list.
 3 | %
 4 | %   G = EDGES2GRAPH(E) creates an adjaceny matrix for the graph where
 5 | %   E(k,1) and E(k,2) specifies the kth edge. All edges are treated as
 6 | %   undirected. Duplicate edges are removed. Loops are removed.
 7 | %
 8 | %   G = EDGES2GRAPH(E,N) specifies the number of number of nodes in the
 9 | %   graph.  
10 | %
11 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
12 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
13 | % January 2014  
14 | 
15 | %% License
16 | % Copyright (c) 2014, Sandia National Laboratories
17 | % All rights reserved.
18 | %
19 | % Redistribution and use in source and binary forms, with or without
20 | % modification, are permitted provided that the following conditions are
21 | % met:  
22 | %
23 | % # Redistributions of source code must retain the above copyright notice,
24 | % this list of conditions and the following disclaimer. 
25 | % # Redistributions in binary form must reproduce the above copyright
26 | % notice, this list of conditions and the following disclaimer in the
27 | % documentation and/or other materials provided with the distribution.  
28 | %
29 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
30 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
31 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
32 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
33 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
34 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
35 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
36 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
37 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
38 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
39 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
40 | %
41 | %
42 | % Sandia National Laboratories is a multi-program laboratory managed and
43 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
44 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
45 | % Security Administration under contract DE-AC04-94AL85000. 
46 | 
47 | if ~exist('nnodes','var')
48 |     nnodes = max(E(:));
49 | else
50 |     % Error checking only
51 |     tmp = max(E(:));
52 |     if (tmp > nnodes)
53 |         fprintf('Highest index in E is %d, but N = %d', tmp, nnodes);
54 |     end
55 | end
56 | 
57 | ii = E(:,1);
58 | jj = E(:,2);
59 | G = spones(sparse([ii;jj],[jj;ii],1,nnodes,nnodes));
60 | G = spdiags(zeros(nnodes,1),0,G);
61 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/random_sample.m:
--------------------------------------------------------------------------------
 1 | function s = random_sample(cnts, nsamples)
 2 | %RANDOM_SAMPLE creates a random sample proportional to the given counts.
 3 | %
 4 | %   S = RANDOM_SAMPLE(C) choose N = round(sum(C)) samples (with
 5 | %   replacement) from {1,...,length(C)} proportional to the values in C.
 6 | %   So, if C = [2 1 1], then we might expect S (sorted) to be [ 1 1 2 3 ].
 7 | %   However, we also allow for C to be non-integral.
 8 | %
 9 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
10 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
11 | % January 2014  
12 | 
13 | %% License
14 | % Copyright (c) 2014, Sandia National Laboratories
15 | % All rights reserved.
16 | %
17 | % Redistribution and use in source and binary forms, with or without
18 | % modification, are permitted provided that the following conditions are
19 | % met:  
20 | %
21 | % # Redistributions of source code must retain the above copyright notice,
22 | % this list of conditions and the following disclaimer. 
23 | % # Redistributions in binary form must reproduce the above copyright
24 | % notice, this list of conditions and the following disclaimer in the
25 | % documentation and/or other materials provided with the distribution.  
26 | %
27 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
28 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
29 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
31 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
38 | %
39 | %
40 | % Sandia National Laboratories is a multi-program laboratory managed and
41 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
42 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
43 | % Security Administration under contract DE-AC04-94AL85000. 
44 | 
45 | if ~exist('nsamples','var')
46 |     nsamples = round(sum(cnts));
47 | else
48 |     cnts = cnts .* nsamples / sum(cnts);
49 | end
50 | 
51 | cumdist = [0; cumsum(cnts)];
52 | bins = cumdist / cumdist(end);
53 | 
54 | testval = abs(bins(end) - 1);
55 | if  testval > eps
56 |     warning('Last entry of bins is not exactly 1. Diff = %e.', testval);
57 | end
58 | 
59 | [~, s] = histc(rand(nsamples,1),bins);


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/bter_edges2graph.m:
--------------------------------------------------------------------------------
 1 | function [G,G1,G2] = bter_edges2graph(E1,E2)
 2 | %BTER_EDGES2GRAPH Create a graph from edge lists.
 3 | %
 4 | %   G = BTER_EDGES2GRAPH(E1,E2) returns a sparse adjancency matrix
 5 | %   corresponding to the given edge lists produced by BTER. The graph is
 6 | %   undirected, unweighted, and has no loops, even if E1 and E2 contain
 7 | %   these. 
 8 | %
 9 | %   [G,G1,G2] = BTER_EDGES2GRAPH(E1,E2) returns the graphs corresponding to
10 | %   Phase 1 and Phase 2 in addition to the combined graph.
11 | %
12 | %   See also BTER, EDGES2GRAPH
13 | %
14 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
15 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
16 | % January 2014  
17 | 
18 | %% License
19 | % Copyright (c) 2014, Sandia National Laboratories
20 | % All rights reserved.
21 | %
22 | % Redistribution and use in source and binary forms, with or without
23 | % modification, are permitted provided that the following conditions are
24 | % met:  
25 | %
26 | % # Redistributions of source code must retain the above copyright notice,
27 | % this list of conditions and the following disclaimer. 
28 | % # Redistributions in binary form must reproduce the above copyright
29 | % notice, this list of conditions and the following disclaimer in the
30 | % documentation and/or other materials provided with the distribution.  
31 | %
32 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
33 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
34 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
35 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
36 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
37 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
38 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
39 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
43 | %
44 | %
45 | % Sandia National Laboratories is a multi-program laboratory managed and
46 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
47 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
48 | % Security Administration under contract DE-AC04-94AL85000. 
49 | 
50 | if isempty(E1)
51 |     nnodes = max(E2(:));
52 | elseif isempty(E2)
53 |     nnodes = max(E1(:));
54 | else
55 |     nnodes = max(max(E1(:)),max(E2(:)));
56 | end
57 | 
58 | if (nargout < 3)
59 |     G = edges2graph([E1;E2],nnodes);
60 | else    
61 |     G1 = edges2graph(E1,nnodes);    
62 |     G2 = edges2graph(E2,nnodes);    
63 |     G = spones(G1+G2);
64 |     G = spdiags(zeros(nnodes,1),0,G);
65 | end
66 | 
67 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/binstart.m:
--------------------------------------------------------------------------------
 1 | function idx = binstart(i, omega, tau, idx0)
 2 | %BINSTART - Specify start of bin for the specified parameters.
 3 | %
 4 | %   K = BINSTART(I,OMEGA,TAU,K0) returns the index of the I-th bin defined
 5 | %   by parameters TAU, OMEGA, and K0. The parameters TAU, OMEGA, and K0 are
 6 | %   optional. The default values are OMEGA=2, TAU=1, K0=1.
 7 | %
 8 | %   The end of a bin I one less than the end of the next bin, i.e., 
 9 | %   KEND = BINSTART(I+1,OMEGA,TAU,K0)-1.
10 | %
11 | %   See also BINLOOKUP, BINDATA.
12 | %
13 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
14 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
15 | % January 2014  
16 | 
17 | %% License
18 | % Copyright (c) 2014, Sandia National Laboratories
19 | % All rights reserved.
20 | %
21 | % Redistribution and use in source and binary forms, with or without
22 | % modification, are permitted provided that the following conditions are
23 | % met:  
24 | %
25 | % # Redistributions of source code must retain the above copyright notice,
26 | % this list of conditions and the following disclaimer. 
27 | % # Redistributions in binary form must reproduce the above copyright
28 | % notice, this list of conditions and the following disclaimer in the
29 | % documentation and/or other materials provided with the distribution.  
30 | %
31 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
32 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
33 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
34 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
35 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
36 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
37 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
38 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
39 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
40 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
41 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
42 | %
43 | %
44 | % Sandia National Laboratories is a multi-program laboratory managed and
45 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
46 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
47 | % Security Administration under contract DE-AC04-94AL85000. 
48 | 
49 | % **
50 | if ~exist('omega','var') || isempty(omega)
51 |     omega = 2;
52 | end
53 | 
54 | if ~exist('tau','var') || isempty(tau)
55 |     tau = 1;
56 | end
57 | 
58 | if ~exist('idx0','var') || isempty(idx0)
59 |     idx0 = 1;
60 | end
61 | 
62 | % **
63 | n = length(i);
64 | idx = zeros(n,1);
65 | for k = 1:n
66 |     if i(k) <= tau
67 |         idx(k) = i(k) + idx0 - 1;
68 |     else
69 |         idx(k) = ceil((omega.^(i(k)-tau)-1)/(omega-1)) + tau + idx0 - 1;
70 |     end
71 | end


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/binlookup.m:
--------------------------------------------------------------------------------
 1 | function i = binlookup(idx, omega, tau, idx0)
 2 | %BINLOOKUP For a given index, determine its appropriate bin.
 3 | %
 4 | %   I = BINLOOKUP(K,OMEGA,TAU,K0) returns the bin number of index K, where
 5 | %   the bins are defined by paramtesr TAU, OMEGA, and K0. The parameters
 6 | %   TAU, OMEGA, and K0 are  optional. If they are not defined or defined as
 7 | %   an emptyset ([]), then they take on the default values, which are
 8 | %   OMEGA=2, TAU=1, K0=1. 
 9 | %
10 | %   Note: If K is a vector, than I is a vector of bins.
11 | %
12 | %   See also BINSTART, BINDATA.
13 | %
14 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
15 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
16 | % January 2014  
17 | 
18 | %% License
19 | % Copyright (c) 2014, Sandia National Laboratories
20 | % All rights reserved.
21 | %
22 | % Redistribution and use in source and binary forms, with or without
23 | % modification, are permitted provided that the following conditions are
24 | % met:  
25 | %
26 | % # Redistributions of source code must retain the above copyright notice,
27 | % this list of conditions and the following disclaimer. 
28 | % # Redistributions in binary form must reproduce the above copyright
29 | % notice, this list of conditions and the following disclaimer in the
30 | % documentation and/or other materials provided with the distribution.  
31 | %
32 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
33 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
34 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
35 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
36 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
37 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
38 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
39 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
40 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
43 | %
44 | %
45 | % Sandia National Laboratories is a multi-program laboratory managed and
46 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
47 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
48 | % Security Administration under contract DE-AC04-94AL85000. 
49 | 
50 | % **
51 | if ~exist('omega','var') || isempty(omega)
52 |     omega = 2;
53 | end
54 | 
55 | if ~exist('tau','var') || isempty(tau)
56 |     tau = 1;
57 | end
58 | 
59 | if ~exist('idx0','var') || isempty(idx0)
60 |     idx0 = 1;
61 | end
62 | 
63 | % **
64 | if any(idx < idx0)
65 |     error('Index is smaller than the start of the first bin');
66 | end
67 | 
68 | n = length(idx);
69 | i = zeros(n,1);
70 | for k = 1:n
71 |     if (idx(k)-idx0+1) < tau
72 |         i(k) = idx(k)-idx0+1;
73 |     else
74 |         tmp = 1 + (omega-1)*(idx(k)-idx0+1 - tau);
75 |         i(k) = floor(log(tmp)/log(omega)) + tau ;
76 |     end
77 | end


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/include/tree_clib.h:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 The Google Research Authors.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #ifndef TREE_CLIB_H
16 | #define TREE_CLIB_H
17 | 
18 | #include "config.h"  // NOLINT
19 | 
20 | extern "C" int Init(const int argc, const char **argv);
21 | 
22 | extern "C" int PrepareTrain(int num_graphs, void* list_ids,
23 |                             void* list_start_node, void* list_col_start,
24 |                             void* list_col_end, int num_nodes, int new_batch);
25 | 
26 | extern "C" int AddGraph(int graph_idx, int num_nodes, int num_edges,
27 |                         void* edge_pairs, int n_left, int n_right);
28 | 
29 | extern "C" int TotalTreeNodes();
30 | 
31 | extern "C" int SetTreeEmbedIds(int depth, int lr, void* _bot_from,
32 |                                void* _bot_to, void* _prev_from, void* _prev_to);
33 | 
34 | extern "C" int SetRowEmbedIds(int lr, int level, void* _bot_from,
35 |                               void* _bot_to, void* _prev_from,
36 |                               void* _prev_to, void* _past_from, void* _past_to);
37 | 
38 | extern "C" int MaxTreeDepth();
39 | 
40 | extern "C" int NumBottomDep(int depth, int lr);
41 | 
42 | extern "C" int NumPrevDep(int depth, int lr);
43 | 
44 | extern "C" int NumRowBottomDep(int lr);
45 | 
46 | extern "C" int NumRowPastDep(int lv, int lr);
47 | 
48 | extern "C" int NumRowTopDep(int lv, int lr);
49 | 
50 | extern "C" int RowSumSteps();
51 | 
52 | extern "C" int RowMergeSteps();
53 | 
54 | extern "C" int NumRowSumOut(int lr);
55 | 
56 | extern "C" int NumRowSumNext(int lr);
57 | 
58 | extern "C" int SetRowSumIds(int lr, void* _step_from, void* _step_to,
59 |                             void* _next_input, void* _next_states);
60 | 
61 | extern "C" int SetRowSumInit(void* _init_idx);
62 | 
63 | extern "C" int SetRowSumLast(void* _last_idx);
64 | 
65 | extern "C" int HasChild(void* _has_child);
66 | 
67 | extern "C" int NumCurNodes(int depth);
68 | 
69 | extern "C" int GetInternalMask(int depth, void* _internal_mask);
70 | 
71 | extern "C" int NumInternalNodes(int depth);
72 | 
73 | extern "C" int GetChMask(int lr, int depth, void* _ch_mask);
74 | 
75 | extern "C" int GetNumCh(int lr, int depth, void* _num_ch);
76 | 
77 | extern "C" int SetLeftState(int depth, void* _bot_from, void* _bot_to,
78 |                             void* _prev_from, void* _prev_to);
79 | 
80 | extern "C" int NumLeftBot(int depth);
81 | 
82 | extern "C" int LeftRightSelect(int depth, void* _left_from, void* _left_to,
83 |                                void* _right_from, void* _right_to);
84 | 
85 | extern "C" int MaxBinFeatDepth();
86 | 
87 | extern "C" int NumBinNodes(int depth);
88 | 
89 | extern "C" int SetBinaryFeat(int d, void* _feat_ptr, int dev);
90 | 
91 | extern "C" int GetNextStates(void* _state_idx);
92 | 
93 | extern "C" int GetNumNextStates();
94 | 
95 | extern "C" int GetCurPos(void* _pos);
96 | 
97 | #endif
98 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/tricnt.m:
--------------------------------------------------------------------------------
 1 | function [t,d,w] = tricnt(G,d,matlabbgl)
 2 | %TRICNT Count number of triangles per vertex in a simple, undirected graph
 3 | %
 4 | %   T = TRICNT(G) takes a sparse adjacency matrix G and computes the number
 5 | %   of triangles per vertex. Note taht there is no error checking on G. It
 6 | %   is up to the user to ensure that G is symmetric, has only 0/1 entries
 7 | %   (but *not* binary), and has no entries on the diagonal.
 8 | %
 9 | %   T = TRICNT(G,D) takes a second argument which is the degree per vertex
10 | %   and does not recalculate it.
11 | %
12 | %   T = TRICNT(G,D,true) uses the clustering_coefficients from MATLAB_BGL.
13 | %   This assumes that this package is installed and in the path.
14 | %
15 | %   [T,D,W] = TRICNT(G) also returns the degree and number of wedges per
16 | %   vertex.
17 | %
18 | %   NOTE: This is an interface to the MEX function provided by
19 | %   tricnt_mex.c, unless the clustering_coefficients function from
20 | %   MATLAB_BGL is used.
21 | % 
22 | %   See also CCPERDEG.
23 | %
24 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
25 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
26 | % January 2014  
27 | 
28 | %% License
29 | % Copyright (c) 2014, Sandia National Laboratories
30 | % All rights reserved.
31 | %
32 | % Redistribution and use in source and binary forms, with or without
33 | % modification, are permitted provided that the following conditions are
34 | % met:  
35 | %
36 | % # Redistributions of source code must retain the above copyright notice,
37 | % this list of conditions and the following disclaimer. 
38 | % # Redistributions in binary form must reproduce the above copyright
39 | % notice, this list of conditions and the following disclaimer in the
40 | % documentation and/or other materials provided with the distribution.  
41 | %
42 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
43 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
44 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
45 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
46 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
47 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
48 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
49 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
50 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
51 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
52 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
53 | %
54 | %
55 | % Sandia National Laboratories is a multi-program laboratory managed and
56 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
57 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
58 | % Security Administration under contract DE-AC04-94AL85000. 
59 | 
60 | 
61 | if ~exist('matlabbgl','var')
62 |     matlabbgl = false;
63 | end
64 | 
65 | if ~exist('d','var') || isempty(d)
66 |     d = full(sum(G,2));
67 | end
68 | 
69 | w = d.*(d-1)/2;
70 | 
71 | if (matlabbgl)
72 |     
73 |     if ~exist('clustering_coefficients.m','file')
74 |         error('Must install MATLAB_BGL toolbox');
75 |     end
76 |     options.undirected = 1;
77 |     options.unweighted = 1;
78 |     cc = clustering_coefficients(G,options);
79 |     t = round(w.*cc);
80 |    
81 | else
82 |     
83 |     t = tricnt_mex(G);
84 | 
85 | end
86 | 
87 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/vgae.py:
--------------------------------------------------------------------------------
 1 | import torch, math
 2 | from torch.nn.modules.module import Module
 3 | from torch.nn.parameter import Parameter
 4 | from torch.autograd import Variable
 5 | import torch.nn as nn
 6 | 
 7 | 
 8 | class GraphConvolution(Module):
 9 |     """
10 |     Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
11 |     """
12 | 
13 |     def __init__(self, in_features, out_features, bias=True, act=lambda x: x):
14 |         super(GraphConvolution, self).__init__()
15 |         self.in_features = in_features
16 |         self.out_features = out_features
17 |         self.weight = Parameter(torch.FloatTensor(in_features, out_features))
18 |         self.act = act
19 |         if bias:
20 |             self.bias = Parameter(torch.FloatTensor(out_features))
21 |         else:
22 |             self.register_parameter('bias', None)
23 |         self.reset_parameters()
24 | 
25 |     def reset_parameters(self):
26 |         stdv = 1. / math.sqrt(self.weight.size(1))
27 |         self.weight.data.uniform_(-stdv, stdv)
28 |         if self.bias is not None:
29 |             self.bias.data.uniform_(-stdv, stdv)
30 | 
31 |     def forward(self, input, adj):
32 |         support = torch.mm(input, self.weight)
33 |         output = torch.mm(adj, support)
34 |         if self.bias is not None:
35 |             output = output + self.bias
36 |         return self.act(output)
37 | 
38 |     def __repr__(self):
39 |         return self.__class__.__name__ + ' (' \
40 |                + str(self.in_features) + ' -> ' \
41 |                + str(self.out_features) + ')'
42 | 
43 | 
44 | class VGAE(nn.Module):
45 |     def __init__(self, input_size, emb_size, hidden_size, act=lambda x: x, layers=2):
46 |         super(VGAE, self).__init__()
47 |         self.encode = GraphConvolution(input_size, hidden_size, act=act)
48 |         self.medium = nn.ModuleList([GraphConvolution(hidden_size, hidden_size, act=act) for i in range(layers-2)])
49 |         self._mean = GraphConvolution(hidden_size, emb_size, act=act)
50 |         self._logv = GraphConvolution(hidden_size, emb_size, act=act)
51 |         self.mean = None
52 |         self.logv = None
53 | 
54 |     def forward(self, adj, x=None, device='cuda:0'):
55 |         if x is None:
56 |             x = Variable(torch.rand(adj.shape[0], self.input_size, dtype=torch.float32)).to(device)
57 |         support = self.encode(x, adj)
58 |         for m in self.medium:
59 |             support = m(support, adj)
60 |         self.mean = self._mean(support, adj)
61 |         self.logv = self._logv(support, adj)
62 |         noise = Variable(torch.rand(self.mean.shape[0], self.mean.shape[1], dtype=torch.float32)).to(device)
63 |         support = noise*torch.exp(self.logv) + self.mean
64 |         score = torch.mm(support, support.T)
65 |         return score
66 | 
67 | 
68 | class GAE(nn.Module):
69 |     def __init__(self, input_size, emb_size, hidden_size, act=lambda x: x, layers=2):
70 |         super(GAE, self).__init__()
71 |         self.encode = GraphConvolution(input_size, hidden_size, act=act)
72 |         self.medium = nn.ModuleList([GraphConvolution(hidden_size, hidden_size, act=act) for i in range(layers-2)])
73 |         self.mean = GraphConvolution(hidden_size, emb_size, act=act)
74 | 
75 |     def forward(self, adj, x=None, device='cuda:0'):
76 |         if x is None:
77 |             x = Variable(torch.rand(adj.shape[0], self.input_size, dtype=torch.float32)).to(device)
78 |         support = self.encode(x, adj)
79 |         for m in self.medium:
80 |             support = m(support, adj)
81 |         support = self.mean(support, adj)
82 |         score = torch.mm(support, support.T)
83 |         return score
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/gendegdist.m:
--------------------------------------------------------------------------------
 1 | function dd = gendegdist(n,pdf,cutoff)
 2 | %GENDEGDIST Create a random degree distribution from a given PDF.
 3 | %
 4 | %   ND = GENDEGDIST(N,PDF) creates a degree distribution on N nodes using
 5 | %   the discrete probability distribution function specified by PDF. The
 6 | %   result is a degree distribution: ND(d) = number of nodes of degree d. 
 7 | %
 8 | %   ND = GENDEGDIST(N,PDF,D0) estimates the number of nodes for d < DO as
 9 | %   ND(d) = PDF(d) * N. This is much faster for large N, but D0 should not
10 | %   be too small or it will cause errors in the degree distribution.
11 | %
12 | %   Examples
13 | %   maxdeg=1e5; alpha = 2; beta = 2; pdf = dglnpdf(maxdeg, alpha, beta);
14 | %   dd = gendegdist(1e7, pdf, 1e2);
15 | %   loglog(dd,'b*');
16 | %
17 | %   See also DGLNPDF, DGLNCDF.
18 | %
19 | %   Reference:
20 | %   * T. G. Kolda, A. Pinar, T. Plantenga and C. Seshadhri. A Scalable
21 | %     Generative Graph Model with Community Structure,  arXiv:1302.6636,
22 | %     March 2013. (http://arxiv.org/abs/1302.6636)
23 | %
24 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
25 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
26 | % January 2014  
27 | 
28 | %% License
29 | % Copyright (c) 2014, Sandia National Laboratories
30 | % All rights reserved.
31 | %
32 | % Redistribution and use in source and binary forms, with or without
33 | % modification, are permitted provided that the following conditions are
34 | % met:  
35 | %
36 | % # Redistributions of source code must retain the above copyright notice,
37 | % this list of conditions and the following disclaimer. 
38 | % # Redistributions in binary form must reproduce the above copyright
39 | % notice, this list of conditions and the following disclaimer in the
40 | % documentation and/or other materials provided with the distribution.  
41 | %
42 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
43 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
44 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
45 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
46 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
47 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
48 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
49 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
50 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
51 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
52 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
53 | %
54 | %
55 | % Sandia National Laboratories is a multi-program laboratory managed and
56 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
57 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
58 | % Security Administration under contract DE-AC04-94AL85000. 
59 | 
60 | % ** 
61 | if ~exist('cutoff','var')
62 |     cutoff = 0;
63 | end
64 | 
65 | % ** For any degree smaller than the cutoff, the PDF*n is good enough.
66 | dd1(1:cutoff,1) = round(n*pdf(1:cutoff));
67 | n1 = sum(dd1); %<- Number of nodes "distributed" so far.
68 | 
69 | % ** Do the tail by actual sampling
70 | n2 = n - n1;
71 | tailpdf = pdf(cutoff+1:end)/sum(pdf(cutoff+1:end));
72 | tailcdf = cumsum(tailpdf);
73 | idx2 = find(tailcdf < 1, 1, 'last');
74 | tailcdf = [0; tailcdf(1:idx2); 1];
75 | coins = rand(n2,1);
76 | cnts = histc(coins,tailcdf);
77 | 
78 | % ** Assemble second half of dd
79 | idx3 = find(cnts > 0, 1, 'last');
80 | dd2 = cnts(1:idx3);
81 | 
82 | % **
83 | dd = [dd1;dd2];


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/cc_param_search.m:
--------------------------------------------------------------------------------
 1 | function p1 = cc_param_search(nd,maxcc,gcc,varargin)
 2 | %CC_PARAM_SEARCH Clustering coefficient parameter search
 3 | %
 4 | %   XI = CC_PARAM_SEARCH(ND, MAXCCD, GCC) finds the parameter XI such that
 5 | %   the clustering coefficint profile defined by
 6 | %
 7 | %   CCD(D) = MAXCCD * exp(-(D-1)*XI) for D >= 2,
 8 | %
 9 | %   has the specified global clustering coefficient (GCC) and maximum
10 | %   clustering coefficient (MAXCCD).
11 | %
12 | %   Examples
13 | %     % nd <- degree distribution
14 | %     % maxccd_target <- target for maximum ccd value
15 | %     % gcc_target <- target for global clustering coefficient
16 | %     xi = cc_param_search(nd, maxccd_target, gcc_target);
17 | %     ccd_target = [0; maxccd_target * exp(-(0:maxdeg-2)'.* xi)];
18 | %     maxdeg = find(nd>0,1,'last');
19 | %
20 | %   See also DEGDIST_PARAM_SEARCH, BTER
21 | %
22 | %   Reference:
23 | %   T. G. Kolda, A. Pinar, T. Plantenga and C. Seshadhri. A Scalable
24 | %   Generative Graph Model with Community Structure, arXiv:1302.6636, 
25 | %   March 2013. (http://arxiv.org/abs/1302.6636)
26 | %
27 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
28 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
29 | % January 2014  
30 | 
31 | %% License
32 | % Copyright (c) 2014, Sandia National Laboratories
33 | % All rights reserved.
34 | %
35 | % Redistribution and use in source and binary forms, with or without
36 | % modification, are permitted provided that the following conditions are
37 | % met:  
38 | %
39 | % # Redistributions of source code must retain the above copyright notice,
40 | % this list of conditions and the following disclaimer. 
41 | % # Redistributions in binary form must reproduce the above copyright
42 | % notice, this list of conditions and the following disclaimer in the
43 | % documentation and/or other materials provided with the distribution.  
44 | %
45 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
46 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
47 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
49 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
50 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
51 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
52 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
53 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
54 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
55 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
56 | %
57 | %
58 | % Sandia National Laboratories is a multi-program laboratory managed and
59 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
60 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
61 | % Security Administration under contract DE-AC04-94AL85000. 
62 | 
63 | 
64 | params = inputParser;
65 | params.addParamValue('fminsearch_opts', optimset('TolFun', 1e-4, 'TolX', 1e-4));
66 | params.parse(varargin{:});
67 | options = params.Results.fminsearch_opts;
68 | 
69 | fhandle = @(x) objfunc(nd, maxcc, gcc, x);
70 | [xstar,~,~] = fminsearch(fhandle, 0.5, options);
71 | p1 = xstar;
72 | 
73 | function y = objfunc(nd,maxcc,gcc,xi)
74 | %OBJFUNC Compute objectiv function, as described above
75 | maxd = length(nd);
76 | ccd_mean = [0; maxcc*exp(-(0:maxd-2)'.* xi)];
77 | nWedges = nd' .* ((1:maxd).*((1:maxd)-1)/2);
78 | gcc_xi = (nWedges*ccd_mean) / sum(nWedges);
79 | y = abs(gcc - gcc_xi);
80 | fprintf('xi = %e, target gcc = %f, current gcc = %f\n', xi, gcc, gcc_xi);
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/GraphGenerator/train/train_netgan.py:
--------------------------------------------------------------------------------
 1 | from GraphGenerator.models.netgan import *
 2 | # import tensorflow as tf
 3 | from GraphGenerator.utils.arg_utils import set_device
 4 | import tensorflow.compat.v1 as tf
 5 | import scipy.sparse as sp
 6 | import numpy as np
 7 | from matplotlib import pyplot as plt
 8 | from sklearn.metrics import roc_auc_score, average_precision_score
 9 | import time, os, pickle
10 | import networkx as nx
11 | import multiprocessing as mp
12 | 
13 | 
14 | def score_to_graph(_rws, _n):
15 |     scores_mat = score_matrix_from_random_walks(_rws, _n).tocsr()
16 |     tmp_graph = graph_from_scores(scores_mat, _A_obs.sum())
17 |     return nx.Graph(tmp_graph)
18 | 
19 | 
20 | def train_netgan(input_data, config):
21 |     set_device(config)
22 |     emb_size = config.model.embedding_dim
23 |     l_rate = config.train.lr
24 |     _A_obs = nx.adjacency_matrix(input_data)
25 |     _A_obs = _A_obs - sp.csr_matrix(np.diag(_A_obs.diagonal()))
26 |     _A_obs = _A_obs + _A_obs.T
27 |     _A_obs[_A_obs > 1] = 1
28 |     lcc = largest_connected_components(_A_obs)
29 |     _A_obs = _A_obs[lcc, :][:, lcc]
30 |     _N = _A_obs.shape[0]
31 |     val_share = config.train.val_share
32 |     test_share = config.train.test_share
33 |     seed = config.seed
34 |     train_ones, val_ones, val_zeros, test_ones, test_zeros = train_val_test_split_adjacency(_A_obs, val_share,
35 |                                                                                             test_share, seed,
36 |                                                                                             undirected=True,
37 |                                                                                             connected=True,
38 |                                                                                             asserts=True)
39 |     train_graph = sp.coo_matrix((np.ones(len(train_ones)), (train_ones[:, 0], train_ones[:, 1]))).tocsr()
40 |     assert (train_graph.toarray() == train_graph.toarray().T).all()
41 |     rw_len = config.model.rw_len
42 |     batch_size = config.train.batch_size
43 | 
44 |     walker = RandomWalker(train_graph, rw_len, p=1, q=1, batch_size=batch_size)
45 | 
46 |     walker.walk().__next__()
47 |     netgan = NetGAN(_N, rw_len, walk_generator=walker.walk, gpu_id=0, use_gumbel=True, disc_iters=3,
48 |                     W_down_discriminator_size=emb_size, W_down_generator_size=emb_size,
49 |                     l2_penalty_generator=1e-7, l2_penalty_discriminator=5e-5, batch_size=batch_size,
50 |                     generator_layers=[40], discriminator_layers=[30], temp_start=5, learning_rate=l_rate)
51 |     stopping_criterion = config.train.stopping_criterion
52 | 
53 |     assert stopping_criterion in ["val", "eo"], "Please set the desired stopping criterion."
54 | 
55 |     if stopping_criterion == "val":  # use val criterion for early stopping
56 |         stopping = None
57 |     elif stopping_criterion == "eo":  # use eo criterion for early stopping
58 |         stopping = 0.5  # set the target edge overlap here
59 |     else:
60 |         stopping = None
61 |     eval_iter = config.train.eval_iter
62 |     display_iter = config.train.display_iter
63 | 
64 |     log_dict = netgan.train(A_orig=_A_obs, val_ones=val_ones, val_zeros=val_zeros, stopping=stopping,
65 |                             eval_every=eval_iter, plot_every=display_iter, max_patience=20, max_iters=200000)
66 | 
67 |     sample_many = netgan.generate_discrete(10000, reuse=True)
68 | 
69 |     samples = []
70 | 
71 |     for _ in range(config.test.sample_num):
72 |         if (_ + 1) % 1000 == 0:
73 |             print(_ + 1)
74 |         samples.append(sample_many.eval({netgan.tau: 0.5}))
75 | 
76 |     rws = np.array(samples).reshape([-1, rw_len])
77 |     pool = mp.Pool(processes=5)
78 |     args_all = [(rws, _N) for i in range(config.test.num_gen)]
79 |     results = [pool.apply_async(score_to_graph, args=args) for args in args_all]
80 |     graphs = [p.get() for p in results]
81 |     return graphs
82 | 
83 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/ws.py:
--------------------------------------------------------------------------------
  1 | import pyemd, random
  2 | from GraphGenerator.models.er import complete_graph
  3 | from scipy.linalg import toeplitz
  4 | import numpy as np
  5 | import networkx as nx
  6 | 
  7 | 
  8 | def watts_strogatz_graph(n, k, p):
  9 |     if k > n:
 10 |         raise nx.NetworkXError("k>n, choose smaller k or larger n")
 11 | 
 12 |     # If k == n, the graph is complete not Watts-Strogatz
 13 |     if k == n:
 14 |         return complete_graph(n)
 15 |     G = nx.Graph()
 16 |     nodes = list(range(n))  # nodes are labeled 0 to n-1
 17 |     # connect each node to k/2 neighbors
 18 |     for j in range(1, k // 2 + 1):
 19 |         targets = nodes[j:] + nodes[0:j]  # first j nodes are now last in list
 20 |         G.add_edges_from(zip(nodes, targets))
 21 |     # rewire edges from each node
 22 |     # loop over all nodes in order (label) and neighbors in order (distance)
 23 |     # no self loops or multiple edges allowed
 24 |     for j in range(1, k // 2 + 1):  # outer loop is neighbors
 25 |         targets = nodes[j:] + nodes[0:j]  # first j nodes are now last in list
 26 |         # inner loop in node order
 27 |         for u, v in zip(nodes, targets):
 28 |             if random.random() < p:
 29 |                 w = random.choice(nodes)
 30 |                 # Enforce no self-loops or multiple edges
 31 |                 while w == u or G.has_edge(u, w):
 32 |                     w = random.choice(nodes)
 33 |                     if G.degree(u) >= n - 1:
 34 |                         break  # skip this rewiring
 35 |                 else:
 36 |                     G.remove_edge(u, v)
 37 |                     G.add_edge(u, w)
 38 |     return G
 39 | 
 40 | 
 41 | def wasserstein_distance(x, y, distance_scaling=1.0):
 42 |     support_size = max(len(x), len(y))
 43 |     d_mat = toeplitz(range(support_size)).astype(np.float)
 44 |     distance_mat = d_mat / distance_scaling
 45 | 
 46 |     # convert histogram values x and y to float, and make them equal len
 47 |     x = x.astype(np.float)
 48 |     y = y.astype(np.float)
 49 |     if len(x) < len(y):
 50 |         x = np.hstack((x, [0.0] * (support_size - len(x))))
 51 |     elif len(y) < len(x):
 52 |         y = np.hstack((y, [0.0] * (support_size - len(y))))
 53 | 
 54 |     emd = pyemd.emd(x, y, distance_mat)
 55 |     return emd
 56 | 
 57 | 
 58 | def degree_loss(x, n=3, real_g=None, generator='W-S', k=2):
 59 |     pred_g = nx.empty_graph()
 60 |     if generator == 'W-S':
 61 |         pred_g = watts_strogatz_graph(n, k, x)
 62 |     real_hist = np.array(nx.degree_histogram(real_g))
 63 |     real_hist = real_hist / np.sum(real_hist)
 64 |     pred_hist = np.array(nx.degree_histogram(pred_g))
 65 |     pred_hist = pred_hist / np.sum(pred_hist)
 66 |     loss = wasserstein_distance(real_hist, pred_hist)
 67 |     return loss
 68 | 
 69 | 
 70 | def grid_search(x_min, x_max, x_step, n, real_g, generator, k=2, repeat=2):
 71 |     loss_all = []
 72 |     x_list = np.arange(x_min, x_max, x_step)
 73 |     for x_test in x_list:
 74 |         tmp_loss = 0
 75 |         for i in range(repeat):
 76 |             tmp_loss += degree_loss(x_test, n=n, real_g=real_g, generator=generator, k=k)
 77 |         loss_all.append(tmp_loss)
 78 |     x_best = x_list[np.argmin(np.array(loss_all))]
 79 |     return x_best, min(loss_all)
 80 | 
 81 | 
 82 | def generator_optimization(graph, generator='W-S'):
 83 |     graph_node = graph.number_of_nodes()
 84 |     graph_edge = graph.number_of_edges()
 85 |     k = round(graph_edge/graph_node) + 1
 86 |     p_selected = 1.
 87 |     print('graph with {} nodes'.format(graph_node))
 88 |     n = graph_node
 89 |     if generator == 'W-S':
 90 |         #loss_all = []
 91 |         #parameter_all = []
 92 |         p_selected, _ = grid_search(1e-6, 1, 0.01, n, graph, generator, k, 10)
 93 |     return n, k, p_selected
 94 | 
 95 | 
 96 | def generate_new_graph(parameters, generator, repeat=1):
 97 |     graph_list = []
 98 |     for i in range(repeat):
 99 |         if generator == 'W-S':
100 |             graph_list.append(watts_strogatz_graph(*parameters))
101 |     return graph_list
102 | 
103 | 
104 | def w_s(in_graph, config):
105 |     """
106 |     W-S graph generator
107 |     :param in_graph: referenced graph, type: nx.Graph
108 |     :param config: configure object
109 |     :return: generated graphs, type: list of nx.Graph
110 |     """
111 |     parameters = generator_optimization(in_graph, config.model.name)
112 |     return generate_new_graph(parameters, config.model.name, repeat=config.num_gen)
113 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/include/struct_util.h:
--------------------------------------------------------------------------------
  1 | // Copyright 2020 The Google Research Authors.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | 
 15 | #ifndef STRUCT_UTIL_H
 16 | #define STRUCT_UTIL_H
 17 | 
 18 | #include <vector>
 19 | #include <map>
 20 | #include <cassert>
 21 | #include <atomic>
 22 | #include <unordered_map>
 23 | 
 24 | class AdjRow;
 25 | class AdjNode;
 26 | 
 27 | const uint32_t ibits = 32;
 28 | 
 29 | int num_ones(int n);
 30 | 
 31 | class BitSet
 32 | {
 33 |  public:
 34 |     BitSet();
 35 |     BitSet(uint32_t _n_bits);
 36 |     BitSet left_shift(uint32_t n);
 37 |     BitSet or_op(BitSet& another);
 38 | 
 39 |     void set(uint32_t pos);
 40 |     bool get(uint32_t pos);
 41 | 
 42 |     uint32_t n_bits, n_macros;
 43 |     std::vector<uint32_t> macro_bits;
 44 | };
 45 | 
 46 | class GraphStruct
 47 | {
 48 |  public:
 49 |     GraphStruct(int graph_id, int num_nodes, int num_edges,
 50 |                 void* _edge_pairs = nullptr, int n_left = -1, int n_right = -1);
 51 | 
 52 |     void realize_nodes(int node_start, int node_end,
 53 |                        int col_start, int col_end);
 54 |     GraphStruct* permute();
 55 |     std::map<int, std::vector<int> > edge_list;
 56 |     std::vector<AdjRow*> active_rows;
 57 |     std::vector<int> idx_map;
 58 |     int num_nodes, num_edges, graph_id;
 59 |     int node_start, node_end;
 60 | };
 61 | 
 62 | extern std::vector<GraphStruct*> graph_list;
 63 | extern std::vector<GraphStruct*> active_graphs;
 64 | 
 65 | class AdjNode;
 66 | 
 67 | class JobCollect
 68 | {
 69 |  public:
 70 |     JobCollect();
 71 |     void reset();
 72 |     void build_row_indices();
 73 |     void build_row_summary();
 74 |     int add_job(AdjNode* node);
 75 |     void append_bool(std::vector< std::vector<int> >& list, int depth, int val);
 76 |     std::vector<AdjNode*> global_job_nodes;
 77 |     std::vector<int> job_position;
 78 |     std::vector<int> has_ch;
 79 |     std::vector< std::vector<int> > has_left, has_right, num_left, num_right;
 80 |     std::vector< std::vector<int> > is_internal;
 81 |     std::vector<int> n_cell_job_per_level, n_bin_job_per_level;
 82 |     std::vector< std::vector<int> > bot_froms[2], bot_tos[2], prev_froms[2], prev_tos[2]; // NOLINT
 83 |     std::vector< std::vector<AdjNode*> > binary_feat_nodes;
 84 |     std::vector<int> row_bot_froms[2], row_bot_tos[2];
 85 |     std::vector< std::vector<int> > row_top_froms[2], row_top_tos[2], row_prev_froms[2], row_prev_tos[2];  // NOLINT
 86 |     std::vector<int> layer_sizes;
 87 |     std::vector< std::unordered_map<int, int> > tree_idx_map;
 88 | 
 89 |     std::vector<int> next_state_froms;
 90 |     std::vector< std::vector<int> > bot_left_froms, bot_left_tos, next_left_froms, next_left_tos;  // NOLINT
 91 |     std::vector< std::vector<int> > step_inputs, step_nexts, step_froms, step_tos, step_indices;  // NOLINT
 92 |     int max_rowsum_steps, max_tree_depth, max_row_merge_steps;
 93 | };
 94 | 
 95 | extern JobCollect job_collect;
 96 | 
 97 | class ColAutomata
 98 | {
 99 |  public:
100 |     ColAutomata(std::vector<int>& indices);
101 | 
102 |     void add_edge(int col_idx);
103 |     int next_edge();
104 |     int last_edge();
105 |     bool has_edge(int range_start, int range_end);
106 | 
107 |     int* indices;
108 |     int pos, num_indices;
109 | };
110 | 
111 | class AdjNode;
112 | 
113 | template<typename PtType>
114 | class PtHolder
115 | {
116 |  public:
117 |     PtHolder();
118 |     void reset();
119 |     void clear();
120 | 
121 |     template<typename...Args>
122 |     PtType* get_pt(Args&&... args)
123 |     {
124 |         PtType* ret;
125 |         if (cur_pos >= pt_buff.size())
126 |         {
127 |             ret = new PtType(std::forward<Args>(args)...);
128 |             pt_buff.push_back(ret);
129 |         } else {
130 |             ret = pt_buff[cur_pos];
131 |             ret->init(std::forward<Args>(args)...);
132 |         }
133 |         assert(cur_pos < pt_buff.size());
134 |         cur_pos++;
135 |         return ret;
136 |     }
137 | 
138 |     std::vector<PtType*> pt_buff;
139 |     size_t cur_pos;
140 | };
141 | 
142 | 
143 | #endif
144 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tensor_ops.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2020 The Google Research Authors.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | from __future__ import absolute_import
 17 | from __future__ import division
 18 | from __future__ import print_function
 19 | # pylint: skip-file
 20 | 
 21 | import torch
 22 | from torch.nn import Module
 23 | from torch.nn.parameter import Parameter
 24 | from torch.autograd import Function
 25 | import numpy as np
 26 | # from bigg.common.consts import t_float
 27 | t_float = torch.float32
 28 | 
 29 | 
 30 | class MultiIndexSelectFunc(Function):
 31 |     @staticmethod
 32 |     def forward(ctx, idx_froms, idx_tos, *mats):
 33 |         assert len(idx_tos) == len(idx_froms) == len(mats)
 34 |         cols = mats[0].shape[1]
 35 |         assert all([len(x.shape) == 2 for x in mats])
 36 |         assert all([x.shape[1] == cols for x in mats])
 37 | 
 38 |         num_rows = sum([len(x) for x in idx_tos])
 39 |         out = mats[0].new(num_rows, cols)
 40 | 
 41 |         for i, mat in enumerate(mats):
 42 |             x_from = idx_froms[i]
 43 |             x_to = idx_tos[i]
 44 |             if x_from is None:
 45 |                 out[x_to] = mat.detach()
 46 |             else:
 47 |                 assert len(x_from) == len(x_to)
 48 |                 out[x_to] = mat[x_from].detach()
 49 | 
 50 |         ctx.idx_froms = idx_froms
 51 |         ctx.idx_tos = idx_tos
 52 |         ctx.shapes = [x.shape for x in mats]
 53 |         return out
 54 | 
 55 |     @staticmethod
 56 |     def backward(ctx, grad_output):
 57 |         idx_froms, idx_tos = ctx.idx_froms, ctx.idx_tos
 58 | 
 59 |         list_grad_mats = [None, None]
 60 |         for i in range(len(idx_froms)):
 61 |             x_from = idx_froms[i]
 62 |             x_to = idx_tos[i]
 63 |             if x_from is None:
 64 |                 grad_mat = grad_output[x_to].detach()
 65 |             else:
 66 |                 grad_mat = grad_output.new(ctx.shapes[i]).zero_()
 67 |                 grad_mat[x_from] = grad_output[x_to].detach()
 68 |             list_grad_mats.append(grad_mat)
 69 | 
 70 |         return tuple(list_grad_mats)
 71 | 
 72 | 
 73 | class MultiIndexSelect(Module):
 74 |     def forward(self, idx_froms, idx_tos, *mats):
 75 |         return MultiIndexSelectFunc.apply(idx_froms, idx_tos, *mats)
 76 | 
 77 | multi_index_select = MultiIndexSelect()
 78 | 
 79 | def test_multi_select():
 80 |     a = Parameter(torch.randn(4, 2))
 81 |     b = Parameter(torch.randn(3, 2))
 82 |     d = Parameter(torch.randn(5, 2))
 83 | 
 84 |     idx_froms = [[0, 1], [1, 2], [3, 4]]
 85 |     idx_tos = [[4, 5], [0, 1], [2, 3]]
 86 |     c = multi_index_select(idx_froms, idx_tos, a, b, d)
 87 |     print('===a===')
 88 |     print(a)
 89 |     print('===b===')
 90 |     print(b)
 91 |     print('===d===')
 92 |     print(d)
 93 |     print('===c===')
 94 |     print(c)
 95 | 
 96 |     t = torch.sum(c)
 97 |     t.backward()
 98 |     print(a.grad)
 99 |     print(b.grad)
100 |     print(d.grad)
101 | 
102 | 
103 | class PosEncoding(Module):
104 |     def __init__(self, dim, device, base=10000, bias=0):
105 |         super(PosEncoding, self).__init__()
106 | 
107 |         p = []
108 |         sft = []
109 |         for i in range(dim):
110 |             b = (i - i % 2) / dim
111 |             p.append(base ** -b)
112 |             if i % 2:
113 |                 sft.append(np.pi / 2.0 + bias)
114 |             else:
115 |                 sft.append(bias)
116 |         self.device = device
117 |         self.sft = torch.tensor(sft, dtype=t_float).view(1, -1).to(device)
118 |         self.base = torch.tensor(p, dtype=t_float).view(1, -1).to(device)
119 | 
120 |     def forward(self, pos):
121 |         with torch.no_grad():
122 |             if isinstance(pos, list):
123 |                 pos = torch.tensor(pos, dtype=t_float).to(self.device)
124 |             pos = pos.view(-1, 1)
125 |             x = pos / self.base + self.sft
126 |             return torch.sin(x)
127 | 
128 | 
129 | if __name__ == '__main__':
130 |     # test_multi_select()
131 | 
132 |     pos_enc = PosEncoding(128, 'cpu')
133 |     print(pos_enc([1, 2, 3]))
134 | 


--------------------------------------------------------------------------------
/GraphGenerator/metrics/mmd.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import numpy as np
  3 | import networkx as nx
  4 | import concurrent.futures
  5 | from functools import partial
  6 | PRINT_TIME=False
  7 | 
  8 | 
  9 | def gaussian_tv(x, y, sigma=1.0):
 10 |   support_size = max(len(x), len(y))
 11 |   # convert histogram values x and y to float, and make them equal len
 12 |   x = x.astype(np.float)
 13 |   y = y.astype(np.float)
 14 |   if len(x) < len(y):
 15 |     x = np.hstack((x, [0.0] * (support_size - len(x))))
 16 |   elif len(y) < len(x):
 17 |     y = np.hstack((y, [0.0] * (support_size - len(y))))
 18 | 
 19 |   dist = np.abs(x - y).sum() / 2.0# one norm
 20 |   return np.exp(-dist * dist / (2 * sigma * sigma))
 21 | 
 22 | 
 23 | def kernel_parallel_unpacked(x, samples2, kernel):
 24 |   d = 0
 25 |   for s2 in samples2:
 26 |     d += kernel(x, s2)
 27 |   return d
 28 | 
 29 | 
 30 | def kernel_parallel_worker(t):
 31 |   return kernel_parallel_unpacked(*t)
 32 | 
 33 | 
 34 | def disc(samples1, samples2, kernel, is_parallel=True, *args, **kwargs):
 35 |   ''' Discrepancy between 2 samples '''
 36 |   d = 0
 37 | 
 38 |   if not is_parallel:
 39 |     for s1 in samples1:
 40 |       for s2 in samples2:
 41 |         d += kernel(s1, s2, *args, **kwargs)
 42 |   else:
 43 |     # with concurrent.futures.ProcessPoolExecutor() as executor:
 44 |     #   for dist in executor.map(kernel_parallel_worker, [
 45 |     #       (s1, samples2, partial(kernel, *args, **kwargs)) for s1 in samples1
 46 |     #   ]):
 47 |     #     d += dist
 48 | 
 49 |     with concurrent.futures.ThreadPoolExecutor() as executor:
 50 |       for dist in executor.map(kernel_parallel_worker, [
 51 |           (s1, samples2, partial(kernel, *args, **kwargs)) for s1 in samples1
 52 |       ]):
 53 |         d += dist
 54 | 
 55 |   d /= len(samples1) * len(samples2)
 56 |   return d
 57 | 
 58 | 
 59 | def compute_mmd(samples1, samples2, kernel, is_hist=True, *args, **kwargs):
 60 |   ''' MMD between two samples '''
 61 |   print("--- MMD of sample1: {}, sample2:{}.---".format(len(samples1),len(samples2)))
 62 |   # normalize histograms into pmf
 63 |   if is_hist:
 64 |     samples1 = [s1 / np.sum(s1) for s1 in samples1]
 65 |     samples2 = [s2 / np.sum(s2) for s2 in samples2]
 66 |   # print('===============================')
 67 |   # print('s1: ', disc(samples1, samples1, kernel, *args, **kwargs))
 68 |   # print('--------------------------')
 69 |   # print('s2: ', disc(samples2, samples2, kernel, *args, **kwargs))
 70 |   # print('--------------------------')
 71 |   # print('cross: ', disc(samples1, samples2, kernel, *args, **kwargs))
 72 |   # print('===============================')
 73 |   return disc(samples1, samples1, kernel, *args, **kwargs) + \
 74 |           disc(samples2, samples2, kernel, *args, **kwargs) - \
 75 |           2 * disc(samples1, samples2, kernel, *args, **kwargs)
 76 | 
 77 | 
 78 | def degree_worker(G):
 79 |     return np.array(nx.degree_histogram(G))
 80 | 
 81 | 
 82 | def degree_stats(graph_ref_list, graph_pred_list, is_parallel=True):
 83 |     ''' Compute the distance between the degree distributions of two unordered sets of graphs.
 84 |     Args:
 85 |       graph_ref_list, graph_target_list: two lists of networkx graphs to be evaluated
 86 |     '''
 87 |     sample_ref = []
 88 |     sample_pred = []
 89 |     # in case an empty graph is generated
 90 |     graph_pred_list_remove_empty = [
 91 |         G for G in graph_pred_list if not G.number_of_nodes() == 0
 92 |     ]
 93 | 
 94 |     prev = datetime.datetime.now()
 95 |     if is_parallel:
 96 |         with concurrent.futures.ThreadPoolExecutor() as executor:
 97 |             for deg_hist in executor.map(degree_worker, graph_ref_list):
 98 |                 sample_ref.append(deg_hist)
 99 |         with concurrent.futures.ThreadPoolExecutor() as executor:
100 |             for deg_hist in executor.map(degree_worker, graph_pred_list_remove_empty):
101 |                 sample_pred.append(deg_hist)
102 |     else:
103 |         for i in range(len(graph_ref_list)):
104 |             degree_temp = np.array(nx.degree_histogram(graph_ref_list[i]))
105 |             sample_ref.append(degree_temp)
106 |         for i in range(len(graph_pred_list_remove_empty)):
107 |             degree_temp = np.array(nx.degree_histogram(graph_pred_list_remove_empty[i]))
108 |             sample_pred.append(degree_temp)
109 | 
110 |     # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_emd)
111 |     # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=emd)
112 |     mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian_tv, sigma=2.0)
113 |     # mmd_dist = compute_mmd(sample_ref, sample_pred, kernel=gaussian)
114 | 
115 |     elapsed = datetime.datetime.now() - prev
116 |     if PRINT_TIME:
117 |         print('Time computing degree mmd: ', elapsed)
118 |     return mmd_dist
119 | 
120 | 
121 | def print_result(metrics, graph_ref, graph_pred):
122 |     output = {}
123 |     if 'degree' in metrics:
124 |         eval_metric = degree_stats(graph_ref, graph_pred)
125 |         print('Degree: {}'.format(eval_metric))
126 |         output['degree']=eval_metric
127 |     return output
128 | 
129 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # GraphGenerator
  2 | [![CodeSize](https://img.shields.io/github/languages/code-size/xiangsheng1325/GraphGenerator?style=plastic)](https://github.com/xiangsheng1325/GraphGenerator)
  3 | [![Contributor](https://img.shields.io/github/contributors/xiangsheng1325/GraphGenerator?style=plastic&color=blue)](https://github.com/xiangsheng1325/GraphGenerator/graphs/contributors)
  4 | [![Activity](https://img.shields.io/github/commit-activity/m/xiangsheng1325/GraphGenerator?style=plastic)](https://github.com/xiangsheng1325/GraphGenerator/pulse)
  5 | 
  6 | Toolkit for simulating observed graphs, generating new graphs and evaluating graph generators.
  7 | 
  8 | ## Installation
  9 | ### Environments
 10 | [![Python](https://img.shields.io/badge/Python-v3.6.8-blue?style=plastic)](https://www.python.org/)
 11 | [![PyTorch](https://img.shields.io/badge/PyTorch-v1.8.1-green?style=plastic)](https://pypi.org/project/torch/)
 12 | [![Tensorflow](https://img.shields.io/badge/Tensorflow-v2.4.0-blue?style=plastic)](https://pypi.org/project/tensorflow/)
 13 | 
 14 | If users want to use some deep learning based graph generators, deep learning dependencies are required such as Pytorch or Tensorflow.
 15 | We prefer to use PyTorch as dependency.
 16 | 
 17 | **1. Install Pytorch**
 18 | ```bash
 19 | pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
 20 | ```
 21 | **2. Clone and install**
 22 | ```bash
 23 | git clone https://github.com/xiangsheng1325/GraphGenerator.git
 24 | cd GraphGenerator
 25 | pip install -r requirements.txt
 26 | pip install -e .
 27 | ```
 28 | ### Dependencies
 29 | Users need to install specific dependencies to support some graph generators, which is listed here:
 30 | 
 31 | |Graph Generator|Dependencies|Graph Generator|Dependencies|
 32 | |--|--|--|--|
 33 | |ARVGA|Tensorflow|GraphRNN|Pytorch|
 34 | |BiGG|Pytorch|MMSB|Tensorflow Edward|
 35 | |BTER|MATLAB|NetGAN|Tensorflow|
 36 | |CondGEN|Pytorch|SBMGNN|Tensorflow|
 37 | |GRAN|Pytorch|SGAE|Pytorch|
 38 | |Graphite|Pytorch/Tensorflow|VGAE|Pytorch|
 39 | 
 40 | 
 41 | ### Project organization
 42 | This project is modularized to benefit further contributions on it.
 43 | Please organize this project according to following structure:
 44 | 
 45 | ```
 46 | GraphGenerator/
 47 | |___GraphGenerator/  # source code
 48 | |   |___models/ # graph generator implementations
 49 | |   |   |___bigg/
 50 | |   |   |   |___tree_model.py
 51 | |   |   |   |___...
 52 | |   |   |___sbm.py
 53 | |   |   |___...
 54 | |   |___metrics/
 55 | |   |   |___mmd.py
 56 | |   |   |___...
 57 | |   |___train.py
 58 | |   |___...
 59 | |
 60 | |___setup.py 
 61 | |
 62 | |___config/  # detailed configurations of complex models
 63 | |   |___graphite.yaml
 64 | |   |___...
 65 | |
 66 | |___data/  # raw data / cooked data
 67 | |   |___google.txt
 68 | |   |___...
 69 | |
 70 | |___exp # trained model and generated graphs
 71 | |   |___VGAE/
 72 | |   |___...
 73 | |
 74 | |___...
 75 | ```
 76 | 
 77 | ## GraphGenerator Usage
 78 | Here are some examples of using this toolkit.
 79 | 
 80 | **1. Preprocess data**
 81 | 
 82 | We prefer to converting graph data into the same data type. If the input data is ready, this step can be skipped.
 83 | 
 84 | _Example:_
 85 | * run `python -m GraphGenerator --phase preprocessing -i google.txt -o google.graph`
 86 | 
 87 | **2. Test the usage of graph generator**
 88 | 
 89 | Before training the deep learning based graph generators,
 90 | we prefer to test whether there are bugs in our model implementations.
 91 | If the generator runs well, this step can be skipped.
 92 | 
 93 | _Example:_
 94 | * run `python -m GraphGenerator --phase test -g bigg --config config/bigg.yaml`
 95 | 
 96 | Note that some algorithms may be affected by the CUDA version. (For example, Bigg may encounter problems during testing,
 97 | please refer to [this page](https://github.com/xiangsheng1325/GraphGenerator/blob/main/GraphGenerator/models/bigg_ops/tree_clib/reame.md)
 98 | to find resolutions.)
 99 | 
100 | 
101 | **3. Train and infer new graphs**
102 | 
103 | Enjoy your graph simulation and graph data generation.
104 | 
105 | _Example:_
106 | * run `python -m GraphGenerator --phase train -i google.graph -g vgae --config config/vgae.yaml`
107 | 
108 | **4. Evaluate the results**
109 | 
110 | Calculating the distance between two set of graphs to evaluate the experimental results. 
111 | 
112 | _Example:_
113 | * run `python -m GraphGenerator --phase evaluate -i new_google.graphs -r google.graph`
114 | 
115 | # Reference
116 | Please use the following BibTex to cite this work if it makes contributions to your publications.
117 | 
118 | BibTex:
119 | ```
120 | @Article{Xiang2021General,
121 |     author={Xiang, Sheng and Wen, Dong and Cheng, Dawei and Zhang, Ying and Qin, Lu and Qian, Zhengping and Lin, Xuemin},
122 |     title={General Graph Generators: Experiments, Analyses, and Improvements},
123 |     url={https://doi.org/10.1007/s00778-021-00701-5},
124 |     doi={10.1007/s00778-021-00701-5},
125 |     journal={The VLDB Journal},
126 |     publisher={Springer}
127 |     issn={0949-877X},
128 |     pages={1--29},
129 |     year={2021},
130 |     month={Oct},
131 |     day={07},
132 | }
133 | ```
134 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/ccperdeg.m:
--------------------------------------------------------------------------------
  1 | function [cd,gcc,info] = ccperdeg(G,varargin)
  2 | %CCPERDEG Mean clustering coefficient per degree
  3 | %
  4 | %   CD = CCPERDEG(G) computes the per-degree clustering coefficient, i.e.,
  5 | %   CD(d) is the mean clustering coefficient for nodes of degree d. If bins
  6 | %   are used, CD(d) returns the clustering coefficient for the bin
  7 | %   containing degree d.
  8 | %   
  9 | %   [CD,GCC] = CCPERDEG(G) also returns the global clustering coefficient.
 10 | %
 11 | %   [CD,GCC,INFO] = CCPERDEG(G) also returns additional information.
 12 | %
 13 | %   [...] = CCPERDEG(G,'param',value accepts parameter-value pairs:
 14 | %
 15 | %   - 'nsamples'  - Number of samples to use. Set to zero for exact
 16 | %                   calcuation. Default: 0
 17 | %   - 'bins'      - Specify the degree bins for binned data. Default: []
 18 | %   - 'tau'       - Specify tau-value for binning. Default: []
 19 | %   - 'omega'     - Specify omega-value for binning. Default: []
 20 | %   - 'matlabbgl' - Specify use of MATLAB-BGL clusteringcoefficients
 21 | %                   function rather than included code. Default: false
 22 | %
 23 | %   Note that the 'bins' parameters overrides the 'tau' and 'omega'
 24 | %   specifications. Otherwise, both 'tau' and 'omega' must be specified to
 25 | %   create bins.
 26 | %
 27 | %   See also TRICNT, BINDATA.
 28 | %
 29 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
 30 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
 31 | % January 2014  
 32 | 
 33 | %% License
 34 | % Copyright (c) 2014, Sandia National Laboratories
 35 | % All rights reserved.
 36 | %
 37 | % Redistribution and use in source and binary forms, with or without
 38 | % modification, are permitted provided that the following conditions are
 39 | % met:  
 40 | %
 41 | % # Redistributions of source code must retain the above copyright notice,
 42 | % this list of conditions and the following disclaimer. 
 43 | % # Redistributions in binary form must reproduce the above copyright
 44 | % notice, this list of conditions and the following disclaimer in the
 45 | % documentation and/or other materials provided with the distribution.  
 46 | %
 47 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 48 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 49 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 50 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
 51 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 52 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 53 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 54 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 55 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 56 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 57 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
 58 | %
 59 | %
 60 | % Sandia National Laboratories is a multi-program laboratory managed and
 61 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
 62 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
 63 | % Security Administration under contract DE-AC04-94AL85000. 
 64 | 
 65 | % ** Process inputs
 66 | params = inputParser;
 67 | params.addParamValue('nsamples', 0);
 68 | params.addParamValue('bins',[]);
 69 | params.addParamValue('tau', []);
 70 | params.addParamValue('omega', []);
 71 | params.addParamValue('matlabbgl', false);
 72 | params.parse(varargin{:});
 73 | 
 74 | nsamples = params.Results.nsamples;
 75 | bins = params.Results.bins;
 76 | tau = params.Results.tau;
 77 | omega = params.Results.omega;
 78 | matlabbgl = params.Results.matlabbgl;
 79 | 
 80 | 
 81 | % ** Create bins
 82 | d = full(sum(G,2));
 83 | maxd = max(d);
 84 | 
 85 | if isempty(bins)
 86 |     if isempty(omega) || isempty(tau)
 87 |         bins = (1:(maxd+1))';
 88 |     else
 89 |         nbins = binlookup(maxd+1,omega,tau); 
 90 |         bins = binstart((1:(nbins+1))',omega,tau);
 91 |     end
 92 | end
 93 | 
 94 | % **
 95 | if nsamples == 0
 96 |     
 97 |     [t,d,w] = tricnt(G,d,matlabbgl);             
 98 |     [~,binId] = histc(d,bins);
 99 |     tf = binId > 0;
100 |     binWedges = accumarray(binId(tf),w(tf));
101 |     nbins = length(binWedges);
102 |     binTriangles = accumarray(binId(tf),t(tf),[nbins 1]);
103 |     cdb = binTriangles ./ max(1,binWedges);   
104 |     gcc = sum(t)/sum(w);
105 | 
106 | else    
107 |    cdb = ccperdegest(G,bins,nsamples); 
108 |    [~,binId] = histc(d,bins);
109 |    tf = binId > 0;
110 |    w = d.*(d-1)/2;
111 |    binWedges = accumarray(binId(tf),w(tf),size(cdb));
112 |    gcc = (binWedges'*cdb) / sum(binWedges);
113 |    t = [];
114 |    binTriangles = [];
115 | end
116 | 
117 | [~,binId] = histc(1:maxd,bins);
118 | cd(1:maxd,1) = cdb(binId);
119 | 
120 | % Shorten the bins array to be the same length as cdb
121 | idx = find(cdb > 0, 1, 'last');
122 | cdb = cdb(1:idx);
123 | bins = bins(1:idx);
124 | 
125 | 
126 | % Create info
127 | info.nsamples = nsamples;
128 | info.gcc = gcc;
129 | info.bins = bins;
130 | info.cc_per_bin = cdb;
131 | info.deg_per_vertex = d;
132 | info.wedges_per_vertex = w;
133 | info.tris_per_vertex = t;
134 | info.wedges_per_bin = binWedges;
135 | info.tris_per_bin = binTriangles;
136 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/mmsb.py:
--------------------------------------------------------------------------------
  1 | """Stochastic block model."""
  2 | 
  3 | import argparse
  4 | import os
  5 | from time import time
  6 | 
  7 | import edward as ed
  8 | # import edward2 as ed
  9 | import networkx as nx
 10 | import numpy as np
 11 | import tensorflow as tf
 12 | import tensorflow_probability as tfp
 13 | from tensorflow.python.ops.distributions.distributions import Bernoulli, Multinomial, Beta, Dirichlet
 14 | from edward.models.point_mass import distributions_PointMass as PointMass
 15 | from observations import karate
 16 | from sklearn.metrics.cluster import adjusted_rand_score
 17 | 
 18 | 
 19 | CUDA = 0
 20 | # ed.set_seed(int(time()))
 21 | 
 22 | 
 23 | # ed.set_seed(42)
 24 | 
 25 | # DATA
 26 | # X_data, Z_true = karate("data")
 27 | 
 28 | def disjoint_cliques_test_graph(num_cliques, clique_size):
 29 |     G = nx.disjoint_union_all([nx.complete_graph(clique_size) for _ in range(num_cliques)])
 30 |     return nx.to_numpy_matrix(G)
 31 | 
 32 | 
 33 | def mmsb(N, K, data):
 34 |     # sparsity
 35 |     rho = 0.3
 36 |     # MODEL
 37 |     # probability of belonging to each of K blocks for each node
 38 |     gamma = Dirichlet(concentration=tf.ones([K]))
 39 |     # block connectivity
 40 |     Pi = Beta(concentration0=tf.ones([K, K]), concentration1=tf.ones([K, K]))
 41 |     # probability of belonging to each of K blocks for all nodes
 42 |     Z = Multinomial(total_count=1.0, probs=gamma, sample_shape=N)
 43 |     # adjacency
 44 |     X = Bernoulli(probs=(1 - rho) * tf.matmul(Z, tf.matmul(Pi, tf.transpose(Z))))
 45 | 
 46 |     # INFERENCE (EM algorithm)
 47 |     qgamma = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([K]))))
 48 |     qPi = PointMass(params=tf.nn.sigmoid(tf.Variable(tf.random_normal([K, K]))))
 49 |     qZ = PointMass(params=tf.nn.softmax(tf.Variable(tf.random_normal([N, K]))))
 50 | 
 51 |     # qgamma = Normal(loc=tf.get_variable("qgamma/loc", [K]),
 52 |     #                scale=tf.nn.softplus(
 53 |     #                        tf.get_variable("qgamma/scale", [K])))
 54 |     # qPi = Normal(loc=tf.get_variable("qPi/loc", [K, K]),
 55 |     #                scale=tf.nn.softplus(
 56 |     #                        tf.get_variable("qPi/scale", [K, K])))
 57 |     # qZ = Normal(loc=tf.get_variable("qZ/loc", [N, K]),
 58 |     #                scale=tf.nn.softplus(
 59 |     #                        tf.get_variable("qZ/scale", [N, K])))
 60 | 
 61 |     # inference = ed.KLqp({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: data})
 62 |     inference = ed.MAP({gamma: qgamma, Pi: qPi, Z: qZ}, data={X: data})
 63 | 
 64 |     # inference.run()
 65 |     n_iter = 6000
 66 |     inference.initialize(optimizer=tf.train.AdamOptimizer(learning_rate=0.01), n_iter=n_iter)
 67 | 
 68 |     tf.global_variables_initializer().run()
 69 | 
 70 |     for _ in range(inference.n_iter):
 71 |         info_dict = inference.update()
 72 |         inference.print_progress(info_dict)
 73 | 
 74 |     inference.finalize()
 75 |     print('qgamma after: ', qgamma.mean().eval())
 76 |     return qZ.mean().eval(), qPi.eval()
 77 | 
 78 | 
 79 | def arg_parse():
 80 |     parser = argparse.ArgumentParser(description='MMSB arguments.')
 81 |     parser.add_argument('--dataset', dest='dataset',
 82 |                         help='Input dataset.')
 83 |     parser.add_argument('--K', dest='K', type=int,
 84 |                         help='Number of blocks.')
 85 |     parser.add_argument('--samples-per-G', dest='samples', type=int,
 86 |                         help='Number of samples for every graph.')
 87 | 
 88 |     parser.set_defaults(dataset='grid',
 89 |                         K=4,
 90 |                         samples=1)
 91 |     return parser.parse_args()
 92 | 
 93 | 
 94 | def graph_gen_from_blockmodel(B, Z):
 95 |     n_blocks = len(B)
 96 |     B = np.array(B)
 97 |     Z = np.array(Z)
 98 |     adj_prob = np.dot(Z, np.dot(B, np.transpose(Z)))
 99 |     adj = np.random.binomial(1, adj_prob * 0.3)
100 |     return nx.from_numpy_matrix(adj)
101 | 
102 | 
103 | if __name__ == '__main__':
104 |     prog_args = arg_parse()
105 |     os.environ['CUDA_VISIBLE_DEVICES'] = str(CUDA)
106 |     print('CUDA', CUDA)
107 | 
108 |     X_dataset = []
109 |     # X_data = nx.to_numpy_matrix(nx.connected_caveman_graph(4, 7))
110 |     if prog_args.dataset == 'clique_test':
111 |         X_data = disjoint_cliques_test_graph(4, 7)
112 |         X_dataset.append(X_data)
113 |     elif prog_args.dataset == 'grid':
114 |         graphs = []
115 |         for i in range(10, 20):
116 |             for j in range(10, 20):
117 |                 graphs.append(nx.grid_2d_graph(i, j))
118 |         X_dataset = [nx.to_numpy_matrix(g) for g in graphs]
119 | 
120 |     print('Number of graphs: ', len(X_dataset))
121 |     K = prog_args.K  # number of clusters
122 |     gen_graphs = []
123 |     for i in range(len(X_dataset)):
124 |         if i % 5 == 0:
125 |             print(i)
126 |             X_data = X_dataset[i]
127 |             N = X_data.shape[0]  # number of vertices
128 | 
129 |             Zp, B = mmsb(N, K, X_data)
130 |             # print("Block: ", B)
131 |             Z_pred = Zp.argmax(axis=1)
132 |             print("Result (label flip can happen):")
133 |             # print("prob: ", Zp)
134 |             print("Predicted")
135 |             print(Z_pred)
136 |             # print(Z_true)
137 |             # print("Adjusted Rand Index =", adjusted_rand_score(Z_pred, Z_true))
138 |             for j in range(prog_args.samples):
139 |                 gen_graphs.append(graph_gen_from_blockmodel(B, Zp))
140 | 
141 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg_ops/tree_clib/src/lib/tree_util.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <algorithm>
  3 | #include <cassert>
  4 | 
  5 | #include "config.h"  // NOLINT
  6 | #include "tree_util.h"  // NOLINT
  7 | #include "struct_util.h"  // NOLINT
  8 | 
  9 | 
 10 | AdjNode::AdjNode(AdjNode* parent, int row, int col_begin, int col_end,
 11 |                  int depth)
 12 | {
 13 |     this->init(parent, row, col_begin, col_end, depth);
 14 | }
 15 | 
 16 | AdjNode::~AdjNode()
 17 | {
 18 |     if (this->lch != nullptr)
 19 |         delete this->lch;
 20 |     if (this->rch != nullptr)
 21 |         delete this->rch;
 22 | }
 23 | 
 24 | void AdjNode::init(AdjNode* parent, int row, int col_begin, int col_end,
 25 |                    int depth)
 26 | {
 27 |     this->lch = nullptr;
 28 |     this->rch = nullptr;
 29 |     this->parent = parent;
 30 |     this->row = row;
 31 |     this->col_begin = col_begin;
 32 |     this->col_end = col_end;
 33 |     this->depth = depth;
 34 |     this->mid = (col_begin + col_end) / 2;
 35 |     this->n_cols = col_end - col_begin;
 36 |     this->is_lowlevel = this->n_cols <= cfg::bits_compress;
 37 |     this->is_leaf = (this->n_cols <= 1);
 38 |     this->is_root = (this->parent == nullptr);
 39 |     if (is_lowlevel)
 40 |         this->bits_rep = BitSet(cfg::bits_compress);
 41 |     this->has_edge = false;
 42 |     this->job_idx = -1;
 43 | }
 44 | 
 45 | void AdjNode::update_bits()
 46 | {
 47 |     if (!is_lowlevel)
 48 |         return;
 49 |     if (is_leaf)
 50 |     {
 51 |         if (has_edge)
 52 |             bits_rep.set(0);
 53 |     } else {
 54 |         bits_rep = lch->bits_rep.left_shift(rch->n_cols);
 55 |         bits_rep = bits_rep.or_op(rch->bits_rep);
 56 |     }
 57 | }
 58 | 
 59 | void AdjNode::split()
 60 | {
 61 |     if (this->lch != nullptr && this->rch != nullptr)
 62 |         return;
 63 |     if (this->is_leaf)
 64 |         return;
 65 |     this->lch = node_holder.get_pt(this, row, col_begin, mid, depth + 1);
 66 |     this->rch = node_holder.get_pt(this, row, mid, col_end, depth + 1);
 67 | }
 68 | 
 69 | AdjRow::AdjRow(int row, int col_start, int col_end)
 70 | {
 71 |     init(row, col_start, col_end);
 72 | }
 73 | 
 74 | AdjRow::~AdjRow()
 75 | {
 76 |     if (this->root != nullptr)
 77 |         delete this->root;
 78 | }
 79 | 
 80 | void AdjRow::init(int row, int col_start, int col_end)
 81 | {
 82 |     this->row = row;
 83 |     assert(!cfg::directed);
 84 |     int max_col = row;
 85 |     if (cfg::self_loop)
 86 |         max_col += 1;
 87 |     if (col_start < 0 || col_end < 0)
 88 |     {
 89 |         col_start = 0;
 90 |         col_end = max_col;
 91 |     }
 92 |     this->root = node_holder.get_pt(nullptr, row, col_start, col_end, 0);
 93 | }
 94 | 
 95 | 
 96 | void AdjRow::insert_edges(std::vector<int>& col_indices)
 97 | {
 98 |     auto* col_sm = new ColAutomata(col_indices);
 99 |     this->add_edges(this->root, col_sm);
100 |     delete col_sm;
101 | }
102 | 
103 | void AdjRow::add_edges(AdjNode* node, ColAutomata* col_sm)
104 | {
105 |     if (node->is_root)
106 |     {
107 |         node->has_edge = col_sm->num_indices > 0;
108 |         job_collect.has_ch.push_back(node->has_edge);
109 |     } else {
110 |         node->has_edge = true;
111 |     }
112 |     if (!node->has_edge)
113 |         return;
114 |     job_collect.append_bool(job_collect.is_internal, node->depth,
115 |                             !(node->is_leaf));
116 |     if (node->is_leaf) {
117 |         col_sm->add_edge(node->col_begin);
118 |         node->update_bits();
119 |     } else {
120 |         node->split();
121 |         bool has_left = (col_sm->next_edge() < node->mid);
122 |         if (has_left)
123 |             this->add_edges(node->lch, col_sm);
124 |         job_collect.append_bool(job_collect.has_left, node->depth, has_left);
125 |         job_collect.append_bool(job_collect.num_left, node->depth,
126 |                                 node->lch->n_cols);
127 |         bool has_right = has_left ?
128 |             col_sm->has_edge(node->mid, node->col_end) : true;
129 |         if (has_right)
130 |             this->add_edges(node->rch, col_sm);
131 |         job_collect.append_bool(job_collect.has_right, node->depth, has_right);
132 |         job_collect.append_bool(job_collect.num_right, node->depth,
133 |                                 node->rch->n_cols);
134 |         node->update_bits();
135 |         node->job_idx = job_collect.add_job(node);
136 | 
137 |         int cur_idx = (int)job_collect.has_left[node->depth].size() - 1;
138 |         auto* ch = node->lch;
139 |         if (ch->has_edge && !ch->is_leaf && !ch->is_lowlevel)
140 |         {
141 |             int pos = job_collect.job_position[ch->job_idx];
142 |             job_collect.append_bool(job_collect.next_left_froms, node->depth,
143 |                                     pos);
144 |             job_collect.append_bool(job_collect.next_left_tos, node->depth,
145 |                                     cur_idx);
146 |         } else {
147 |             int bid = ch->has_edge ? 1 : 0;
148 |             if (ch->has_edge && !ch->is_leaf)
149 |                 bid = 2 + job_collect.job_position[ch->job_idx];
150 |             job_collect.append_bool(job_collect.bot_left_froms, node->depth,
151 |                                     bid);
152 |             job_collect.append_bool(job_collect.bot_left_tos, node->depth,
153 |                                     cur_idx);
154 |         }
155 |     }
156 | }
157 | 
158 | 
159 | PtHolder<AdjNode> node_holder;
160 | PtHolder<AdjRow> row_holder;
161 | 


--------------------------------------------------------------------------------
/GraphGenerator/__main__.py:
--------------------------------------------------------------------------------
  1 | import argparse, sys, pickle, warnings, os, torch
  2 | 
  3 | # import torch.cuda
  4 | 
  5 | warnings.filterwarnings("ignore")
  6 | from GraphGenerator.preprocessing import dataio
  7 | from GraphGenerator.utils.arg_utils import get_config, set_device
  8 | import pandas as pd
  9 | 
 10 | 
 11 | def print_variables(vdict, name="args"):
 12 |     print("-----------------------------------------")
 13 |     print("|This is the summary of {}:".format(name))
 14 |     var = vdict
 15 |     for i in var:
 16 |         if var[i] is None:
 17 |             continue
 18 |         print("|{:11}\t: {}".format(i, var[i]))
 19 |     print("-----------------------------------------")
 20 | 
 21 | 
 22 | if __name__ == '__main__':
 23 |     # get arguments
 24 |     parser = argparse.ArgumentParser()
 25 |     parser.add_argument("-p", "--phase", help="Choose phase.", default="preprocessing", type=str,
 26 |                         choices=["preprocessing", "train", "evaluate", "test"],
 27 |                         required=True)
 28 |     parser.add_argument("-i", "--input", help="Path of input file. Example:```-i google.txt```", default=None)
 29 |     parser.add_argument("-o", "--output", help="Specify the name of output file.", default=None)
 30 |     parser.add_argument("-c", "--config", help="Specify the path of config file.", default=None)
 31 |     parser.add_argument("-g", "--generator", help="choose the generator. Example:```-g sbm```", default="vgae",
 32 |                         choices=["e-r", "b-a", "w-s", "rtg", "bter", "sbm", "dcsbm", "rmat", "kronecker",
 33 |                                  "mmsb", "vgae", "graphite", "sbmgnn", "graphrnn", "gran", "bigg", "arvga",
 34 |                                  "netgan", "condgen", "sgae"])
 35 |     parser.add_argument("-e", "--evaluate", help="choose the evaluating metrics.", default=None)
 36 |     parser.add_argument("-r", "--ref", help="Path of referenced graphs(Only required in evaluate phase)", default=None)
 37 |     args = parser.parse_args()
 38 |     print_variables(vars(args))
 39 |     if args.phase == 'preprocessing':
 40 |         from GraphGenerator.preprocessing import utils
 41 |         tmp_path = args.input
 42 |         print("# Load edgelist...")
 43 |         graph = utils.edgelist_to_graph(tmp_path)
 44 |         graphlist = [graph]
 45 |         print("# Save graphlist...")
 46 |         if args.output is None:
 47 |             output_name = "{}.graphs".format(args.input)
 48 |         else:
 49 |             output_name = args.output
 50 |         dataio.save_data(graphlist, name=output_name)
 51 | 
 52 |     elif args.phase == 'train':
 53 |         config = get_config(args.config)
 54 |         set_device(config)
 55 |         from GraphGenerator.train import train_base as train
 56 |         print("Start loading data...")
 57 |         input_data = dataio.load_data(args.input)
 58 |         if args.config is None:
 59 |             args.config = "config/{}.yaml".format(args.generator)
 60 |         # os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu)
 61 |         print("Start (training and) inferencing graph...")
 62 |         output_data = []
 63 |         if isinstance(input_data, list):
 64 |             for graph in input_data:
 65 |                 tmp_data = train.train_and_inference(graph, args.generator, config=config)
 66 |                 if isinstance(tmp_data, list):
 67 |                     output_data.extend(tmp_data)
 68 |                 else:
 69 |                     output_data.append(tmp_data)
 70 |         else:
 71 |             tmp_data = train.train_and_inference(input_data, args.generator)
 72 |             if isinstance(tmp_data, list):
 73 |                 output_data.extend(tmp_data)
 74 |             else:
 75 |                 output_data.append(tmp_data)
 76 |         print("Start saving generated graphs...")
 77 |         if args.output is None:
 78 |             output_name = "{}_to_{}.graphs".format(config.dataset.name, args.generator)
 79 |         else:
 80 |             output_name = args.output
 81 |         dataio.save_data(output_data, name=os.path.join(config.exp_dir, config.exp_name, output_name))
 82 |     elif args.phase == 'evaluate':
 83 |         config = get_config(args.config)
 84 |         set_device(config)
 85 |         if args.evaluate == 'efficiency':
 86 |             from GraphGenerator.evaluate.efficiency import eval_efficiency
 87 |             print("Start evaluating the efficiency of graph generator [{}].".format(args.generator))
 88 |             result = eval_efficiency(args.generator, config)
 89 |         elif args.evaluate == 'performance':
 90 |             from GraphGenerator.metrics import mmd
 91 |             print("Start evaluating the quality of generated graphs...")
 92 |             graphs_ref = dataio.load_data(args.ref)
 93 |             graphs_pred = dataio.load_data(args.input)
 94 |             result = mmd.print_result(args.evaluate, graphs_ref, graphs_pred)
 95 |             if args.output is None:
 96 |                 output_name = "{}_to_{}.csv".format(args.ref, args.input)
 97 |             else:
 98 |                 output_name = args.output
 99 |             tmp_pd = pd.DataFrame(result)
100 |             tmp_pd.to_csv(output_name)
101 |     elif args.phase == 'test':
102 |         config = get_config(args.config)
103 |         set_device(config)
104 |         from GraphGenerator.test import test_generator
105 |         print("Start test the package...")
106 |         test_generator(args, config)
107 |         print("Memory reserved: {} KiB.".format(torch.cuda.memory_reserved(config.device)//1024))
108 |         print("Test finished.")
109 |     print("Done!")
110 |     # sys.exit(0)
111 | 


--------------------------------------------------------------------------------
/GraphGenerator/evaluate/distance.py:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | #
  3 | # Adapt from https://github.com/JiaxuanYou/graph-generation
  4 | #
  5 | ###############################################################################
  6 | import pyemd
  7 | import numpy as np
  8 | import networkx as nx
  9 | import concurrent.futures
 10 | from functools import partial
 11 | from scipy.linalg import toeplitz
 12 | 
 13 | 
 14 | def vanilla_emd(x, y, distance_scaling=1.0):
 15 |   support_size = max(len(x), len(y))
 16 |   d_mat = toeplitz(range(support_size)).astype(np.float)
 17 |   distance_mat = d_mat / distance_scaling
 18 | 
 19 |   # convert histogram values x and y to float, and make them equal len
 20 |   x = x.astype(np.float)
 21 |   y = y.astype(np.float)
 22 |   if len(x) < len(y):
 23 |     x = np.hstack((x, [0.0] * (support_size - len(x))))
 24 |   elif len(y) < len(x):
 25 |     y = np.hstack((y, [0.0] * (support_size - len(y))))
 26 | 
 27 |   emd = pyemd.emd(x, y, distance_mat)
 28 |   return emd
 29 | 
 30 | 
 31 | def l2(x, y):
 32 |   dist = np.linalg.norm(x - y, 2)
 33 |   return dist
 34 | 
 35 | 
 36 | def emd(x, y, sigma=1.0, distance_scaling=1.0):
 37 |   ''' EMD
 38 |     Args:
 39 |       x, y: 1D pmf of two distributions with the same support
 40 |       sigma: standard deviation
 41 |   '''
 42 |   support_size = max(len(x), len(y))
 43 |   d_mat = toeplitz(range(support_size)).astype(np.float)
 44 |   distance_mat = d_mat / distance_scaling
 45 | 
 46 |   # convert histogram values x and y to float, and make them equal len
 47 |   x = x.astype(np.float)
 48 |   y = y.astype(np.float)
 49 |   if len(x) < len(y):
 50 |     x = np.hstack((x, [0.0] * (support_size - len(x))))
 51 |   elif len(y) < len(x):
 52 |     y = np.hstack((y, [0.0] * (support_size - len(y))))
 53 | 
 54 |   return np.abs(pyemd.emd(x, y, distance_mat))
 55 | 
 56 | 
 57 | def gaussian_emd(x, y, sigma=1.0, distance_scaling=1.0):
 58 |   ''' Gaussian kernel with squared distance in exponential term replaced by EMD
 59 |     Args:
 60 |       x, y: 1D pmf of two distributions with the same support
 61 |       sigma: standard deviation
 62 |   '''
 63 |   support_size = max(len(x), len(y))
 64 |   d_mat = toeplitz(range(support_size)).astype(np.float)
 65 |   distance_mat = d_mat / distance_scaling
 66 | 
 67 |   # convert histogram values x and y to float, and make them equal len
 68 |   x = x.astype(np.float)
 69 |   y = y.astype(np.float)
 70 |   if len(x) < len(y):
 71 |     x = np.hstack((x, [0.0] * (support_size - len(x))))
 72 |   elif len(y) < len(x):
 73 |     y = np.hstack((y, [0.0] * (support_size - len(y))))
 74 | 
 75 |   emd = pyemd.emd(x, y, distance_mat)
 76 |   return np.exp(-emd * emd / (2 * sigma * sigma))
 77 | 
 78 | 
 79 | def gaussian(x, y, sigma=1.0):
 80 |   support_size = max(len(x), len(y))
 81 |   # convert histogram values x and y to float, and make them equal len
 82 |   x = x.astype(np.float)
 83 |   y = y.astype(np.float)
 84 |   if len(x) < len(y):
 85 |     x = np.hstack((x, [0.0] * (support_size - len(x))))
 86 |   elif len(y) < len(x):
 87 |     y = np.hstack((y, [0.0] * (support_size - len(y))))
 88 | 
 89 |   dist = np.linalg.norm(x - y, 2)# two norm (二范数)
 90 |   return np.exp(-dist * dist / (2 * sigma * sigma))
 91 | 
 92 | 
 93 | def gaussian_tv(x, y, sigma=1.0):
 94 |   support_size = max(len(x), len(y))
 95 |   # convert histogram values x and y to float, and make them equal len
 96 |   x = x.astype(np.float)
 97 |   y = y.astype(np.float)
 98 |   if len(x) < len(y):
 99 |     x = np.hstack((x, [0.0] * (support_size - len(x))))
100 |   elif len(y) < len(x):
101 |     y = np.hstack((y, [0.0] * (support_size - len(y))))
102 | 
103 |   dist = np.abs(x - y).sum() / 2.0# one norm 一范数
104 |   return np.exp(-dist * dist / (2 * sigma * sigma))
105 | 
106 | 
107 | def kernel_parallel_unpacked(x, samples2, kernel):
108 |   d = 0
109 |   for s2 in samples2:
110 |     d += kernel(x, s2)
111 |   return d
112 | 
113 | 
114 | def kernel_parallel_worker(t):
115 |   return kernel_parallel_unpacked(*t)
116 | 
117 | 
118 | def disc(samples1, samples2, kernel, is_parallel=True, *args, **kwargs):
119 |   ''' Discrepancy between 2 samples '''
120 |   d = 0
121 | 
122 |   if not is_parallel:
123 |     for s1 in samples1:
124 |       for s2 in samples2:
125 |         d += kernel(s1, s2, *args, **kwargs)
126 |   else:
127 |     # with concurrent.futures.ProcessPoolExecutor() as executor:
128 |     #   for dist in executor.map(kernel_parallel_worker, [
129 |     #       (s1, samples2, partial(kernel, *args, **kwargs)) for s1 in samples1
130 |     #   ]):
131 |     #     d += dist
132 | 
133 |     with concurrent.futures.ThreadPoolExecutor() as executor:
134 |       for dist in executor.map(kernel_parallel_worker, [
135 |           (s1, samples2, partial(kernel, *args, **kwargs)) for s1 in samples1
136 |       ]):
137 |         d += dist
138 | 
139 |   d /= len(samples1) * len(samples2)
140 |   return d
141 | 
142 | 
143 | def compute_mmd(samples1, samples2, kernel, is_hist=True, *args, **kwargs):
144 |   ''' MMD between two samples '''
145 |   print("--- MMD of sample1: {}, sample2:{}.---".format(len(samples1),len(samples2)))
146 |   # normalize histograms into pmf
147 |   if is_hist:
148 |     samples1 = [s1 / np.sum(s1) for s1 in samples1]
149 |     samples2 = [s2 / np.sum(s2) for s2 in samples2]
150 |   # print('===============================')
151 |   # print('s1: ', disc(samples1, samples1, kernel, *args, **kwargs))
152 |   # print('--------------------------')
153 |   # print('s2: ', disc(samples2, samples2, kernel, *args, **kwargs))
154 |   # print('--------------------------')
155 |   # print('cross: ', disc(samples1, samples2, kernel, *args, **kwargs))
156 |   # print('===============================')
157 |   return disc(samples1, samples1, kernel, *args, **kwargs) + \
158 |           disc(samples2, samples2, kernel, *args, **kwargs) - \
159 |           2 * disc(samples1, samples2, kernel, *args, **kwargs)
160 | 
161 | 
162 | def compute_emd(samples1, samples2, kernel, is_hist=True, *args, **kwargs):
163 |   ''' EMD between average of two samples '''
164 |   # normalize histograms into pmf
165 |   if is_hist:
166 |     samples1 = [np.mean(samples1)]
167 |     samples2 = [np.mean(samples2)]
168 |   # print('===============================')
169 |   # print('s1: ', disc(samples1, samples1, kernel, *args, **kwargs))
170 |   # print('--------------------------')
171 |   # print('s2: ', disc(samples2, samples2, kernel, *args, **kwargs))
172 |   # print('--------------------------')
173 |   # print('cross: ', disc(samples1, samples2, kernel, *args, **kwargs))
174 |   # print('===============================')
175 |   return disc(samples1, samples2, kernel, *args,
176 |               **kwargs), [samples1[0], samples2[0]]
177 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/graphite.py:
--------------------------------------------------------------------------------
  1 | import torch, math
  2 | from torch.nn.modules.module import Module
  3 | from torch.nn.parameter import Parameter
  4 | from torch.autograd import Variable
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | 
  9 | class GraphConvolution(Module):
 10 |     """
 11 |     Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
 12 |     """
 13 | 
 14 |     def __init__(self, in_features, out_features, bias=True, act=lambda x: x):
 15 |         super(GraphConvolution, self).__init__()
 16 |         self.in_features = in_features
 17 |         self.out_features = out_features
 18 |         self.weight = Parameter(torch.FloatTensor(in_features, out_features))
 19 |         self.act = act
 20 |         if bias:
 21 |             self.bias = Parameter(torch.FloatTensor(out_features))
 22 |         else:
 23 |             self.register_parameter('bias', None)
 24 |         self.reset_parameters()
 25 | 
 26 |     def reset_parameters(self):
 27 |         stdv = 1. / math.sqrt(self.weight.size(1))
 28 |         self.weight.data.uniform_(-stdv, stdv)
 29 |         if self.bias is not None:
 30 |             self.bias.data.uniform_(-stdv, stdv)
 31 | 
 32 |     def forward(self, input, adj):
 33 |         support = torch.mm(input, self.weight)
 34 |         output = torch.mm(adj, support)
 35 |         if self.bias is not None:
 36 |             output = output + self.bias
 37 |         return self.act(output)
 38 | 
 39 |     def __repr__(self):
 40 |         return self.__class__.__name__ + ' (' \
 41 |                + str(self.in_features) + ' -> ' \
 42 |                + str(self.out_features) + ')'
 43 | 
 44 | 
 45 | class GraphiteLayer(Module):
 46 |     """
 47 |     Simple Graphite layer, similar to https://arxiv.org/abs/1803.10459
 48 |     """
 49 |     def __init__(self, input_dim, output_dim, bias=True, act=lambda x: x):
 50 |         super(GraphiteLayer, self).__init__()
 51 |         self.in_features = input_dim
 52 |         self.out_features = output_dim
 53 |         self.weight = Parameter(torch.FloatTensor(input_dim, output_dim))
 54 |         self.act = act
 55 |         if bias:
 56 |             self.bias = Parameter(torch.FloatTensor(output_dim))
 57 |         else:
 58 |             self.register_parameter('bias', None)
 59 |         self.reset_parameters()
 60 | 
 61 |     def reset_parameters(self):
 62 |         stdv = 1. / math.sqrt(self.weight.size(1))
 63 |         self.weight.data.uniform_(-stdv, stdv)
 64 |         if self.bias is not None:
 65 |             self.bias.data.uniform_(-stdv, stdv)
 66 | 
 67 |     def forward(self, x, input1, input2):
 68 |         x = torch.mm(x, self.weight)
 69 |         if self.bias is not None:
 70 |             x = x + self.bias
 71 |         x = torch.mm(input1, torch.mm(input1.T, x))+torch.mm(input2, torch.mm(input2.T, x))
 72 |         return self.act(x)
 73 | 
 74 | 
 75 | class GraphiteVAE(nn.Module):
 76 |     def __init__(self, num_features, hidden_dim, embed_dim, decode_dim, act=F.relu, autoregressive_scalar=0.5):
 77 |         super(GraphiteVAE, self).__init__()
 78 |         self.hidden = GraphConvolution(num_features, hidden_dim, act=act)
 79 |         self.z_mean = GraphConvolution(hidden_dim, embed_dim, act=act)
 80 |         self.mean = None
 81 |         self.z_logv = GraphConvolution(hidden_dim, embed_dim, act=act)
 82 |         self.logv = None
 83 |         self.decode0 = GraphiteLayer(num_features, decode_dim, act=act)
 84 |         self.decode1 = GraphiteLayer(embed_dim, decode_dim, act=act)
 85 |         self.decode2 = GraphiteLayer(decode_dim, embed_dim, act=lambda x: x)
 86 |         self.autoregressive_scalar = autoregressive_scalar
 87 | 
 88 |     def forward(self, adj, x=None, device='cuda:0'):
 89 |         support = self.hidden(x, adj)
 90 |         self.mean = self.z_mean(support, adj)
 91 |         self.logv = self.z_logv(support, adj)
 92 |         noise = Variable(torch.rand(self.mean.shape[0], self.mean.shape[1], dtype=torch.float32)).to(device)
 93 |         support = noise * torch.exp(self.logv) + self.mean
 94 |         recon_1 = F.normalize(support, p=2, dim=1)
 95 |         recon_2 = torch.ones(recon_1.shape).to(device)
 96 |         recon_2 /= torch.sqrt(recon_2.sum(1, keepdim=True))
 97 |         d = torch.mm(recon_1, torch.unsqueeze(recon_1.sum(0), 1)) + \
 98 |             torch.mm(recon_2, torch.unsqueeze(recon_2.sum(0), 1))
 99 |         d = d.pow(-0.5)
100 |         recon_1 = recon_1*d
101 |         recon_2 = recon_2*d
102 |         update = self.decode1(support, recon_1, recon_2) + self.decode0(x, recon_1, recon_2)
103 |         update = self.decode2((update, recon_1, recon_2))
104 |         update = (1-self.autoregressive_scalar) * support + self.autoregressive_scalar * update
105 |         reconstructions = torch.mm(update, update.T)
106 |         return reconstructions
107 |         # return update
108 | 
109 | 
110 | class GraphiteAE(nn.Module):
111 |     def __init__(self, num_features, hidden_dim, embed_dim, decode_dim, act=F.relu, autoregressive_scalar=0.5):
112 |         super(GraphiteAE, self).__init__()
113 |         self.hidden = GraphConvolution(num_features, hidden_dim, act=act)
114 |         self.z_mean = GraphConvolution(hidden_dim, embed_dim, act=act)
115 |         self.mean = None
116 |         # self.z_logv = GraphConvolution(hidden_dim, embed_dim, act=act)
117 |         # self.logv = None
118 |         self.decode0 = GraphiteLayer(num_features, decode_dim, act=act)
119 |         self.decode1 = GraphiteLayer(embed_dim, decode_dim, act=act)
120 |         self.decode2 = GraphiteLayer(decode_dim, embed_dim, act=lambda x: x)
121 |         self.autoregressive_scalar = autoregressive_scalar
122 | 
123 |     def forward(self, adj, x=None, device='cuda:0'):
124 |         support = self.hidden(x, adj)
125 |         support = self.z_mean(support, adj)
126 |         # self.logv = self.z_logv(support, adj)
127 |         # noise = Variable(torch.rand(self.mean.shape[0], self.mean.shape[1], dtype=torch.float32)).to(device)
128 |         # support = noise * torch.exp(self.logv) + self.mean
129 |         recon_1 = F.normalize(support, p=2, dim=1)
130 |         recon_2 = torch.ones(recon_1.shape).to(device)
131 |         recon_2 /= torch.sqrt(recon_2.sum(1, keepdim=True))
132 |         d = torch.mm(recon_1, torch.unsqueeze(recon_1.sum(0), 1)) + \
133 |             torch.mm(recon_2, torch.unsqueeze(recon_2.sum(0), 1))
134 |         d = d.pow(-0.5)
135 |         recon_1 = recon_1 * d
136 |         recon_2 = recon_2 * d
137 |         update = self.decode1(support, recon_1, recon_2) + self.decode0(x, recon_1, recon_2)
138 |         update = self.decode2(update, recon_1, recon_2)
139 |         update = (1 - self.autoregressive_scalar) * support + self.autoregressive_scalar * update
140 |         reconstructions = torch.mm(update, update.T)
141 |         return reconstructions
142 |         # return update
143 | 
144 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/bindata.m:
--------------------------------------------------------------------------------
  1 | function [xx,yy,bins] = bindata(x,y,varargin)
  2 | %BINDATA Exponential data binning.
  3 | %
  4 | %   [XX,YY] = BINDATA(X,Y) logarithmically bins data. We assume both X and
  5 | %   Y are column vectors of the same length. By default, the k-th bin is
  6 | %   [2^k, 2^k-1]. The return values are defined as follows: XX(k) = 2^k is
  7 | %   the "bin label" and YY(k) = sum_i { Y(i) s.t. 2^k <= X(i) < 2^(k-1) }
  8 | %   is the "bin value".
  9 | %
 10 | %   [XX,YY] = BINDATA([],Y) sets X = (1:length(Y))'.
 11 | %
 12 | %   [XX,YY,BB] = BINDATA(...) returns the data bins, i.e., the k-th bin is
 13 | %   defined by [BB(k), BB(k+1)-1]. This can be useful if the meaning of XX
 14 | %   is modified by the parameters defined below.
 15 | %
 16 | %   [XX,YY,BB] = BINDATA(X,Y,'param',value,...) also accepts
 17 | %   parameter-value pairs, as described below.
 18 | %
 19 | %      --- Bin Definitions ---
 20 | %      The start of the k-th bin is given by 
 21 | %        BB(k) = k + idx0 - 1 if k <= tau, else 
 22 | %        BB(k) = ceil((omega.^(k-tau)-1)/(omega-1)) + tau + idx0 - 1.
 23 | %
 24 | %      o 'omega' - Bin increase multiplier. Default: 2.
 25 | %      o 'tau' - Number of singleton bins. Default: 1.
 26 | %      o 'idx0' - Starting index to be binned. Default: 1.
 27 | %
 28 | %      --- Binning Behavior ---
 29 | %      o 'bin' - Do binning? If false, returns X and Y unchanged unless X
 30 | %        was empty on input, in which case it's been reset to
 31 | %        (1:length(Y)). Default: true.    
 32 | %      o 'ybinfun' - Function for the "bin value", used to combine all the
 33 | %        y-values in the same bin. Default: @sum. 
 34 | %      o 'xbinfun' - Function for the "bin index". By default, XX(k)=BB(k).
 35 | %        If a function is specified, however, then this is used to combine
 36 | %        all the values in the same bin. Specifying @mean, for instance,
 37 | %        gives a weighted mean of the x-value as the bin index. 
 38 | %        Default: [] (indicate to use the bin starts).
 39 | % 
 40 | %      --- Preprocessing ---
 41 | %      o 'prebin' - Collect values together for same x. This has the side
 42 | %        effect of ensuring the x values are dense, even for zero y values.
 43 | %        Default: false.  
 44 | %      o 'prebinfun' - Specified function to combine values with same x.
 45 | %        Default: @mean. 
 46 | %
 47 | %      --- Postprocessing ---
 48 | %      o 'nozeros' - Remove any zero yy-values (and corresponding xx) from
 49 | %        the output. Default: false. 
 50 | %
 51 | %   EXAMPLES
 52 | %   y = [10 8 6 0 4]';
 53 | %   [xx,yy] = bindata([],y) % Create 3 bins and gives total per bin.
 54 | %   [xx,yy] = bindata([],y,'bin',false) % Returns xx = (1:5)' and yy = y.
 55 | %   x = [2 3 5 5 6]'; 
 56 | % 
 57 | %   See also BINLOOKUP, BINSTART.
 58 | %
 59 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
 60 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
 61 | % January 2014  
 62 | 
 63 | %% License
 64 | % Copyright (c) 2014, Sandia National Laboratories
 65 | % All rights reserved.
 66 | %
 67 | % Redistribution and use in source and binary forms, with or without
 68 | % modification, are permitted provided that the following conditions are
 69 | % met:  
 70 | %
 71 | % # Redistributions of source code must retain the above copyright notice,
 72 | % this list of conditions and the following disclaimer. 
 73 | % # Redistributions in binary form must reproduce the above copyright
 74 | % notice, this list of conditions and the following disclaimer in the
 75 | % documentation and/or other materials provided with the distribution.  
 76 | %
 77 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 78 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 79 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 80 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
 81 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 82 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 83 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 84 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 85 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 86 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 87 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
 88 | %
 89 | %
 90 | % Sandia National Laboratories is a multi-program laboratory managed and
 91 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
 92 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
 93 | % Security Administration under contract DE-AC04-94AL85000. 
 94 | 
 95 | % ** Parse inputs
 96 | params = inputParser;
 97 | params.addParamValue('bin', true);
 98 | params.addParamValue('prebin', false);
 99 | params.addParamValue('prebinfun', @mean);
100 | params.addParamValue('omega', 2);
101 | params.addParamValue('tau', 1);
102 | params.addParamValue('idx0', 1);
103 | params.addParamValue('xbinfun', []);
104 | params.addParamValue('ybinfun', @sum);
105 | params.addParamValue('nozeros', false);
106 | params.parse(varargin{:});
107 | 
108 | binparams = {params.Results.omega, params.Results.tau, params.Results.idx0};
109 | % ** Check and fix empty x
110 | if isempty(x)
111 |     x = (1:length(y))';
112 | end
113 | 
114 | % ** Make sure both x and y are column vectors
115 | x = reshape(x,[],1);
116 | y = reshape(y,[],1);
117 | 
118 | % ** Check inputs are the same length
119 | if numel(x) ~= numel(y)
120 |     error('Input vectors are not the same length');
121 | end
122 | 
123 | % ** Check for no binning
124 | if ~params.Results.bin % No binning
125 |     xx = x;
126 |     yy = y;
127 |     bins = [];
128 |     return;
129 | end
130 | 
131 | % ** Number of bins?
132 | nbins = binlookup(max(x), binparams{:});
133 | 
134 | % ** Pre-binning?
135 | % Pre-binning creates dense x and y arrays, with an entry for every
136 | % possible x-value. If there are multiple copies of x, then the default is
137 | % to take the mean of the associated y-values.
138 | if params.Results.prebin
139 |     xmax = binstart(nbins+1, binparams{:})-1;  
140 |     y = accumarray(x,y,[xmax 1],params.Results.prebinfun);
141 |     x = (1:xmax)';
142 | end
143 | 
144 | % ** Determine xx
145 | idx = binlookup(x, binparams{:});
146 | if isempty(params.Results.xbinfun)
147 |     xx = binstart((1:nbins)', binparams{:});
148 | else
149 |     xx = accumarray(idx, x, [], params.Results.xbinfun);
150 | end
151 | yy = accumarray(idx, y, [], params.Results.ybinfun);
152 | bins = binstart((1:(nbins+1))', binparams{:});
153 | 
154 | % ** Remove zero entries?
155 | if params.Results.nozeros
156 |     tf = yy > 0;
157 |     yy = yy(tf);
158 |     xx = xx(tf);
159 | end


--------------------------------------------------------------------------------
/GraphGenerator/evaluate/diff.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import powerlaw
  3 | import networkx as nx
  4 | import igraph,datetime
  5 | import scipy.sparse as sp
  6 | from scipy.sparse.csgraph import connected_components
  7 | 
  8 | 
  9 | # eval utils
 10 | def statistics_degrees(A_in):
 11 |     degrees = A_in.sum(axis=0)
 12 |     return np.max(degrees), np.min(degrees), np.mean(degrees)
 13 | 
 14 | 
 15 | def statistics_LCC(A_in):
 16 |     unique, counts = np.unique(connected_components(A_in)[1], return_counts=True)
 17 |     LCC = np.where(connected_components(A_in)[1] == np.argmax(counts))[0]
 18 |     return LCC
 19 | 
 20 | 
 21 | def statistics_wedge_count(A_in):
 22 |     degrees = A_in.sum(axis=0).flatten()
 23 |     return float(np.sum(np.array([0.5 * x * (x - 1) for x in degrees])))
 24 | 
 25 | 
 26 | def statistics_claw_count(A_in):
 27 |     degrees = A_in.sum(axis=0).flatten()
 28 |     return float(np.sum(np.array([1 / 6. * x * (x - 1) * (x - 2) for x in degrees])))
 29 | 
 30 | 
 31 | def statistics_triangle_count(A_in):
 32 |     A_graph = nx.from_numpy_matrix(A_in)
 33 |     triangles = nx.triangles(A_graph)
 34 |     t = np.sum(list(triangles.values())) / 3
 35 |     return int(t)
 36 | 
 37 | 
 38 | def squares(g):
 39 |     cliques = g.cliques(min=4, max=4)
 40 |     result = [0] * g.vcount()
 41 |     for i, j, k, l in cliques:
 42 |         result[i] += 1
 43 |         result[j] += 1
 44 |         result[k] += 1
 45 |         result[l] += 1
 46 |     return result
 47 | 
 48 | 
 49 | def statistics_square_count(A_in):
 50 |     A_igraph = igraph.Graph.Adjacency((A_in > 0).tolist()).as_undirected()
 51 |     return int(np.sum(squares(A_igraph)) / 4)
 52 | 
 53 | 
 54 | def statistics_power_law_alpha(A_in):
 55 |     degrees = A_in.sum(axis=0).flatten()
 56 |     return powerlaw.Fit(degrees, xmin=max(np.min(degrees),1)).power_law.alpha
 57 | 
 58 | 
 59 | def statistics_gini(A_in):
 60 |     n = A_in.shape[0]
 61 |     degrees = np.array(A_in.sum(axis=0)).flatten()
 62 |     degrees_sorted = np.sort(degrees)
 63 |     G = (2 * np.sum(np.array([i * degrees_sorted[i] for i in range(len(degrees))]))) / (n * np.sum(degrees)) - (
 64 |                                                                                                                n + 1) / n
 65 |     return float(G)
 66 | 
 67 | 
 68 | def statistics_edge_distribution_entropy(A_in):
 69 |     degrees = A_in.sum(axis=0).flatten()
 70 |     m = 0.5 * np.sum(np.square(A_in))
 71 |     n = A_in.shape[0]
 72 | 
 73 |     H_er = 1 / np.log(n) * np.sum(-degrees / (2 * float(m)) * np.log((degrees+.0001) / (2 * float(m))))
 74 |     return H_er
 75 | 
 76 | 
 77 | def statistics_cluster_coefficient(A_in):
 78 |     G = nx.Graph(A_in)
 79 |     return nx.average_clustering(G)
 80 | 
 81 | 
 82 | def statistics_compute_cpl(A):
 83 |     P = sp.csgraph.shortest_path(sp.csr_matrix(A))
 84 |     return P[((1 - np.isinf(P)) * (1 - np.eye(P.shape[0]))).astype(np.bool)].mean()
 85 |     #return P[((1 - np.isinf(P)) * (1 - np.eye(P.shape[0]))).astype(np.bool)]
 86 | 
 87 | 
 88 | def compute_graph_statistics(A_in, Z_obs=None):
 89 |     A = A_in.copy()
 90 | 
 91 |     assert((A == A.T).all())
 92 |     A_graph = nx.from_numpy_matrix(A).to_undirected()
 93 | 
 94 |     statistics = {}
 95 | 
 96 |     d_max, d_min, d_mean = statistics_degrees(A)# 0.5s
 97 | 
 98 |     # Degree statistics
 99 |     statistics['deg_max'] = d_max
100 |     statistics['deg_min'] = d_min
101 |     statistics['deg_mean'] = d_mean
102 | 
103 |     # node number & edger number
104 |     #statistics['node_num'] = A_graph.number_of_nodes()
105 |     #statistics['edge_num'] = A_graph.number_of_edges()
106 | 
107 |     # largest connected component
108 |     LCC = statistics_LCC(A)# 33.1s
109 | 
110 |     statistics['LCC'] = LCC.shape[0]
111 |     # wedge count
112 |     statistics['wedge_count'] = statistics_wedge_count(A)# 0.4s
113 | 
114 |     # claw count
115 |     statistics['claw_count'] = statistics_claw_count(A)# 0.5s
116 | 
117 |     # triangle count
118 |     statistics['triangle_count'] = statistics_triangle_count(A)# 4.7s
119 | 
120 |     # Square count
121 |     statistics['square_count'] = statistics_square_count(A)# 41.5s
122 | 
123 |     # power law exponent
124 |     statistics['power_law_exp'] = statistics_power_law_alpha(A)# 1.1s
125 | 
126 |     # gini coefficient
127 |     statistics['gini'] = statistics_gini(A)# 0.5s
128 | 
129 |     # Relative edge distribution entropy
130 |     statistics['rel_edge_distr_entropy'] = statistics_edge_distribution_entropy(A)# 3.5s
131 | 
132 |     # Assortativity
133 |     statistics['assortativity'] = nx.degree_assortativity_coefficient(A_graph)# unknown
134 | 
135 |     # Clustering coefficient
136 |     statistics['clustering_coefficient'] = statistics_cluster_coefficient(A)# 8.4s
137 | 
138 |     # Number of connected components
139 |     #statistics['n_components'] = connected_components(A)[0]
140 | 
141 |     # if Z_obs is not None:
142 |     #     # inter- and intra-community density
143 |     #     intra, inter = statistics_cluster_props(A, Z_obs)
144 |     #     statistics['intra_community_density'] = intra
145 |     #     statistics['inter_community_density'] = inter
146 | 
147 |     statistics['cpl'] = statistics_compute_cpl(A)# 252.4s
148 | 
149 |     return statistics
150 | 
151 | 
152 | def compute_graph_statistics_short(A_in, Z_obs=None):
153 |     A = A_in.copy()
154 |     assert((A == A.T).all())
155 |     statistics = {}
156 |     # power law exponent
157 |     statistics['power_law_exp'] = statistics_power_law_alpha(A)# 1.1s
158 |     # gini coefficient
159 |     statistics['gini'] = statistics_gini(A)# 0.5s
160 |     statistics['cpl'] = statistics_compute_cpl(A)# 252.4s
161 |     return statistics
162 | 
163 | 
164 | def diff_graphs(graphs_ref, graphs_pred):
165 |     diff_d = {}
166 |     for g1 in graphs_ref:
167 |         d1 = compute_graph_statistics(nx.to_numpy_array(g1))
168 |         for g2 in graphs_pred:
169 |             d2 = compute_graph_statistics(nx.to_numpy_array(g2))
170 |             for k in list(d1.keys()):
171 |                 tmp = diff_d.get(k, 0.)
172 |                 diff_d[k] = tmp + round(abs(d1[k] - d2[k]), 5)
173 |     sample_num = len(graphs_ref)*len(graphs_pred)
174 |     for k in list(d1.keys()):
175 |         tmp = diff_d.get(k, 0.)
176 |         diff_d[k] = tmp/sample_num
177 |     return diff_d
178 | 
179 | 
180 | def diff_graphs_short(graphs_ref, graphs_pred):
181 |     diff_d = {}
182 |     for g1 in graphs_ref:
183 |         d1 = compute_graph_statistics_short(nx.to_numpy_array(g1))
184 |         for g2 in graphs_pred:
185 |             d2 = compute_graph_statistics_short(nx.to_numpy_array(g2))
186 |             for k in list(d1.keys()):
187 |                 tmp = diff_d.get(k, 0.)
188 |                 diff_d[k] = tmp + round(abs(d1[k] - d2[k]), 5)
189 |     sample_num = len(graphs_ref)*len(graphs_pred)
190 |     for k in list(d1.keys()):
191 |         tmp = diff_d.get(k, 0.)
192 |         diff_d[k] = tmp/sample_num
193 |     return diff_d
194 | 
195 | 
196 | def preprocess_graph(g):
197 |     g.remove_edges_from(nx.selfloop_edges(g))
198 |     g =g.subgraph(max(nx.connected_components(g), key=len))
199 |     g = nx.convert_node_labels_to_integers(g)
200 |     return g
201 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/rtg.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import networkx as nx
  3 | import itertools as it
  4 | from scipy.linalg import toeplitz
  5 | import pyemd
  6 | import concurrent.futures
  7 | import multiprocessing as mp
  8 | 
  9 | 
 10 | def rtg_graph(num_edges, num_chars, beta, q, num_timestick=1,
 11 |         bipartite=False, self_loop=False, parallel=True):
 12 |     if num_chars > 26:
 13 |         raise ValueError('Number of characters cannot be greater than 26')
 14 | 
 15 |     all_chars = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
 16 |                  'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
 17 |     all_chars2 = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K',
 18 |                   'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
 19 | 
 20 |     chars = all_chars[:num_chars] + ['#']
 21 |     if bipartite:
 22 |         chars2 = all_chars2[:num_chars] + ['$']
 23 |     else:
 24 |         chars2 = all_chars[:num_chars] + ['#']
 25 | 
 26 |     keyboard = create_2d_keyboard(num_chars, q, beta)
 27 |     edges = []
 28 |     graph = nx.Graph()
 29 |     if parallel:
 30 |         with concurrent.futures.ThreadPoolExecutor() as executor:
 31 |             for edge in executor.map(edge_kernel, [
 32 |                 (chars, chars2, keyboard,
 33 |                  bipartite, self_loop) for _ in range(num_edges)
 34 |             ]):
 35 |                 edges.append(edge)
 36 |     else:
 37 |         for _ in range(num_edges):
 38 |             edges.append(create_edge(chars, chars2, keyboard,
 39 |                                      bipartite, self_loop))
 40 |     graph.add_edges_from(edges)
 41 |     return graph
 42 | 
 43 | 
 44 | def create_2d_keyboard(num_chars, q, beta):
 45 |     # assign unequal probabilities to the keys
 46 |     p = np.zeros(num_chars + 1)
 47 |     p_remaining = 1 - q
 48 |     for i in range(num_chars - 1):
 49 |         p[i] = np.random.rand() * p_remaining
 50 |         p_remaining -= p[i]
 51 |     p[num_chars - 1] = p_remaining
 52 |     # last key is the seperator
 53 |     p[num_chars] = q
 54 | 
 55 |     # init the keyboard with indipendant cross product probs
 56 |     keyboard = np.outer(p, p)
 57 |     # multiply the imbalance factor
 58 |     keyboard = keyboard * beta
 59 |     # set diagonal to 0
 60 |     np.fill_diagonal(keyboard, 0)
 61 |     # calculate remaining probabilities for the diagonal
 62 |     # such that each row and column sums up to the
 63 |     # marginal probability
 64 |     remaining_diag = p - keyboard.sum(axis=0)
 65 |     dia_idx = np.diag_indices_from(keyboard)
 66 |     keyboard[dia_idx] = remaining_diag
 67 | 
 68 |     return keyboard
 69 | 
 70 | 
 71 | def create_edge(chars, chars2, keyboard, bipartite, self_loop):
 72 |     src_finished = False
 73 |     dst_finished = False
 74 |     src = ''
 75 |     dst = ''
 76 |     char_combi = np.fromiter(it.product(chars, chars2),
 77 |                              dtype='1str,1str')
 78 | 
 79 |     if not self_loop and not bipartite:
 80 |         # for the first try the key that produces a selfloop
 81 |         # on the delimeter is permitted (to reduce the number
 82 |         # of selfloops)
 83 |         first_try_keyboard = np.copy(keyboard)
 84 |         first_try_keyboard[-1, -1] = 0
 85 |         first_try_keyboard = first_try_keyboard / first_try_keyboard.sum()
 86 |         src, dst = np.random.choice(char_combi, p=first_try_keyboard.flatten())
 87 |         if src == '#':
 88 |             src_finished = True
 89 |         if dst == '#' or dst == '$':
 90 |             dst_finished = True
 91 | 
 92 |     while not (src_finished and dst_finished):
 93 |         s, d = np.random.choice(char_combi, p=keyboard.flatten())
 94 |         if not src_finished:
 95 |             src += s
 96 |         if not dst_finished:
 97 |             dst += d
 98 |         if s == '#':
 99 |             src_finished = True
100 |         if d == '#' or d == '$':
101 |             dst_finished = True
102 | 
103 |     # if we produced a self loop but they are not allowed
104 |     # we generate a new edge by running the whole function
105 |     # again
106 |     if ((not self_loop) and (src == dst)):
107 |         return create_edge(chars, chars2, keyboard, bipartite, self_loop)
108 |     else:
109 |         return (src, dst)
110 | 
111 | 
112 | def edge_kernel(t):
113 |     return create_edge(*t)
114 | 
115 | 
116 | def wasserstein_distance(x, y, distance_scaling=1.0):
117 |     support_size = max(len(x), len(y))
118 |     d_mat = toeplitz(range(support_size)).astype(np.float)
119 |     distance_mat = d_mat / distance_scaling
120 | 
121 |     # convert histogram values x and y to float, and make them equal len
122 |     x = x.astype(np.float)
123 |     y = y.astype(np.float)
124 |     if len(x) < len(y):
125 |         x = np.hstack((x, [0.0] * (support_size - len(x))))
126 |     elif len(y) < len(x):
127 |         y = np.hstack((y, [0.0] * (support_size - len(y))))
128 | 
129 |     emd = pyemd.emd(x, y, distance_mat)
130 |     return emd
131 | 
132 | 
133 | def degree_loss(x, n=3, real_g=None, generator='RTG', k=2):
134 |     pred_g = nx.empty_graph()
135 |     if generator in ['RTG', 'rtg']:
136 |         pred_g = rtg_graph(n, 26, beta=x, q=k)
137 |     real_hist = np.array(nx.degree_histogram(real_g))
138 |     real_hist = real_hist / np.sum(real_hist)
139 |     pred_hist = np.array(nx.degree_histogram(pred_g))
140 |     pred_hist = pred_hist / np.sum(pred_hist)
141 |     loss = wasserstein_distance(real_hist, pred_hist)
142 |     return loss
143 | 
144 | 
145 | def grid_search(x_min, x_max, x_step, n, real_g, generator, k=2, repeat=2):
146 |     loss_all = []
147 |     x_list = np.arange(x_min, x_max, x_step)
148 |     for x_test in x_list:
149 |         tmp_loss = 0
150 |         for i in range(repeat):
151 |             tmp_loss += degree_loss(x_test, n=n, real_g=real_g, generator=generator, k=k)
152 |         loss_all.append(tmp_loss)
153 |     x_best = x_list[np.argmin(np.array(loss_all))]
154 |     return x_best, min(loss_all)
155 | 
156 | 
157 | def generator_optimization(graph, generator='RTG'):
158 |     graph_node = graph.number_of_nodes()
159 |     print('graph with {} nodes'.format(graph_node))
160 |     parameter_temp = 1
161 |     if generator == 'RTG':
162 |         pool = mp.Pool(processes=8)
163 |         edge_num = graph.number_of_edges()
164 |         args_all = [(.09, 1, .1, edge_num, graph, generator, q ** 2 / 100) for q in range(1, 10)]
165 |         results = [pool.apply_async(grid_search, args=args) for args in args_all]
166 |         output = [p.get() for p in results]
167 |         parameter_all = [o[0] for o in output]
168 |         loss_all = [o[1] for o in output]
169 |         idx = np.argmin(np.array(loss_all))
170 |         parameter_temp = parameter_all[int(idx)]
171 |         parameter_temp = (edge_num, 26, parameter_temp, (list(range(1, 10))[int(idx)]) ** 2 / 100)
172 |     return parameter_temp
173 | 
174 | 
175 | def generate_new_graph(parameters, generator, repeat=1):
176 |     graph_list = []
177 |     for i in range(repeat):
178 |         if generator in ['rtg', 'RTG']:
179 |             graph_list.append(rtg_graph(*parameters))
180 |     return graph_list
181 | 
182 | 
183 | def rtg(in_graph, config):
184 |     """
185 |     RTG graph generator
186 |     :param in_graph: referenced graph, type: nx.Graph
187 |     :param config: configure object
188 |     :return: generated graphs, type: list of nx.Graph
189 |     """
190 |     parameters = generator_optimization(in_graph, config.model.name)
191 |     return generate_new_graph(parameters, config.model.name, repeat=config.num_gen)
192 | 
193 | 
194 | if __name__ == '__main__':
195 |     tmp = rtg_graph(5429, 20, 0.09, 0.01, 1)
196 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/degdist_param_search.m:
--------------------------------------------------------------------------------
  1 | function [p1,p2] = degdist_param_search(avgdeg,maxdeg,varargin)
  2 | %DEGDIST_PARAM_SEARCH Find parameters for "ideal" degree distribution.
  3 | % 
  4 | %   [A,B] = DEGDIST_PARAM_SEARCH(AVG,BND) will attempt to find ideal
  5 | %   parameters for generating a discrete generalized log-normal
  6 | %   distribution with the expected average degree (AVG) and maximum degree
  7 | %   bound (BND) with probability less than 1e-10.
  8 | %
  9 | %   G = DEGDIST_PARAM_SEARCH(AVG,BND,'type','dpl') is the same as above
 10 | %   except that it will attempt to find the ideal parameter for generating
 11 | %   a discrete power law distribution.
 12 | %
 13 | %   Optional Parameters:
 14 | %   o 'type' - Type of degree distribution. Choices are discrete
 15 | %        generalized log normal ('dgln') or discrete power law ('dpl'). 
 16 | %   o 'maxdeg_prbnd' - Ideally, the probability of a node with degree BND
 17 | %        (the maximum possible) is less than this bound. Default: 1e-10.
 18 | %   o 'fminsearch_opts' - The options passed to the function fminsearch.
 19 | %        Default: optimset('TolFun', 1e-4, 'TolX', 1e-4).
 20 | %   o 'verbose' - True to print out details of the progress of the search.
 21 | %        Default: true.
 22 | %
 23 | %   See also GENDEGDIST, DGLNPDF, DPLPDF.
 24 | %
 25 | % Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
 26 | % Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
 27 | % January 2014  
 28 | 
 29 | %% License
 30 | % Copyright (c) 2014, Sandia National Laboratories
 31 | % All rights reserved.
 32 | %
 33 | % Redistribution and use in source and binary forms, with or without
 34 | % modification, are permitted provided that the following conditions are
 35 | % met:  
 36 | %
 37 | % # Redistributions of source code must retain the above copyright notice,
 38 | % this list of conditions and the following disclaimer. 
 39 | % # Redistributions in binary form must reproduce the above copyright
 40 | % notice, this list of conditions and the following disclaimer in the
 41 | % documentation and/or other materials provided with the distribution.  
 42 | %
 43 | % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 44 | % IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 45 | % THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 46 | % PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
 47 | % CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 48 | % EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 49 | % PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 50 | % PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 51 | % LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 52 | % NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 53 | % SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
 54 | %
 55 | %
 56 | % Sandia National Laboratories is a multi-program laboratory managed and
 57 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
 58 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
 59 | % Security Administration under contract DE-AC04-94AL85000. 
 60 | 
 61 | params = inputParser;
 62 | params.addParamValue('maxdeg_prbnd', 1e-10);
 63 | params.addParamValue('fminsearch_opts', optimset('TolFun', 1e-4, 'TolX', 1e-4));
 64 | params.addParamValue('type','dgln');
 65 | params.addParamValue('verbose',true);
 66 | params.parse(varargin{:});
 67 | 
 68 | options = params.Results.fminsearch_opts;
 69 | bnd = params.Results.maxdeg_prbnd;
 70 | type = params.Results.type;
 71 | verbose = params.Results.verbose;
 72 | 
 73 | if strcmp(type,'dgln')
 74 |     fhandle = @(x) dglnobjfunc(x(1), x(2), maxdeg, bnd, avgdeg, verbose);
 75 |     [xstar,fstar,exitflag] = fminsearch(fhandle, [2 2], options);
 76 |     p1 = xstar(1);
 77 |     p2 = xstar(2);
 78 | elseif strcmp(type,'dpl')
 79 |     fhandle = @(x) dplobjfunc(x, maxdeg, bnd, avgdeg, verbose);
 80 |     [xstar,fstar,exitflag] = fminsearch(fhandle, 2, options);
 81 |     p1 = xstar;
 82 |     p2 = 0;
 83 | else
 84 |     error('Invalid type');
 85 | end
 86 | 
 87 | if ((exitflag ~= 1) || (fstar > 0.01))
 88 |     warning('Could not find ideal solution. F(X)=%e, Exit Flag = %d.\n', fstar, exitflag);
 89 | end
 90 | 
 91 | function y = dglnobjfunc(alpha,beta,maxdeg,bnd,avgdeg,verbose)
 92 | %DGLNOBJFUNC Function to evaluate degree distribution
 93 | %
 94 | %    Y = DGLNOBJFUNC(ALPHA,BETA,MEXDEG,BND,AVGDEG,BND) computes a score for
 95 | %    the DGLN degree distribution with MAXDEG and parameters ALPHA and
 96 | %    BETA. The goal is that the final degree distribution should have an
 97 | %    average degree of AVGDEG and the probability of obtaining the maximum
 98 | %    degree should be less than BND. A perfect match would have a score of
 99 | %    zero.  
100 | %
101 | %    Y = DGLNOBJFUNC(...,VERBOSE) also indicates whether or not the function
102 | %    should print anything. By default, VERBOSE = true.
103 | %
104 | %T. Kolda, November 2012.
105 | 
106 | % ** Input checking
107 | if ~exist('verbose','var')
108 |     verbose = true;
109 | end
110 | 
111 | % ** Find maximum expected degree
112 | % We want to find x such that P(random vertex has degree > x) < bnd.
113 | p = dglnpdf(maxdeg,alpha,beta);
114 | 
115 | % Penalty should grow quickly!
116 | if p(end) > bnd 
117 |     y1 = (exp(1+p(end)-bnd))^2 - 1;
118 | else
119 |     y1 = 0;
120 | end
121 | 
122 | % ** Find expected average degree
123 | a = ((1:maxdeg)*p); % Compute average degree
124 | y2 = (a-avgdeg)^2;
125 | 
126 | % ** Sum the two values
127 | y = y1+y2;
128 | 
129 | % ** Optional printing
130 | 
131 | if verbose
132 |     fprintf('alpha=%.3f, beta=%.3f, maxdeg=%d, p(maxdeg)=%e, avgdeg=%.1f, y=%.2f\n', ...
133 |         alpha, beta, maxdeg, p(end), a, y);
134 | end
135 | 
136 | function y = dplobjfunc(gamma,maxdeg,bnd,avgdeg,verbose)
137 | %DPLOBJFUNC Function to evaluate degree distribution
138 | %
139 | %    Y = DPLOBJFUNC(GAMMA,MEXDEG,BND,AVGDEG,BND) computes a score for
140 | %    the powerlaw degree distribution with MAXDEG and parameter GAMMA. The
141 | %    goal is that the final degree distribution should have an average
142 | %    degree of AVGDEG and the probability of obtaining the maximum degree
143 | %    should be less than BND. A perfect match would have a score of zero.  
144 | %
145 | %    Y = DPLOBJFUNC(...,VERBOSE) also indicates whether or not the function
146 | %    should print anything. By default, VERBOSE = true.
147 | %
148 | %T. G. Kolda and others, Sandia National Laboratories, November 2012.
149 | 
150 | % Sandia National Laboratories is a multi-program laboratory managed and
151 | % operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
152 | % Martin Corporation, for the U.S. Department of Energy's National Nuclear
153 | % Security Administration under contract DE-AC04-94AL85000. 
154 | 
155 | % ** Input checking
156 | if ~exist('verbose','var')
157 |     verbose = true;
158 | end
159 | 
160 | % ** Find maximum expected degree
161 | % We want to find x such that P(random vertex has degree > x) < bnd.
162 | p = dplpdf(maxdeg,gamma);
163 | 
164 | % Penalty should grow quickly!
165 | if p(end) > bnd 
166 |     y1 = (exp(1+p(end)-bnd))^2 - 1;
167 | else
168 |     y1 = 0;
169 | end
170 | 
171 | % ** Find expected average degree
172 | a = ((1:maxdeg)*p); % Compute average degree
173 | y2 = (a-avgdeg)^2;
174 | 
175 | % ** Sum the two values
176 | y = y1+y2;
177 | 
178 | % ** Optional printing
179 | 
180 | if verbose
181 |     fprintf('gamma=%.3f, maxdeg=%d, p(maxdeg)=%e, avgdeg=%.1f, y=%.2f\n', ...
182 |         gamma, maxdeg, p(end), a, y);
183 | end


--------------------------------------------------------------------------------
/GraphGenerator/models/bter_ops/tricnt_mex.c:
--------------------------------------------------------------------------------
  1 | /*  TRICNT_MEX.C: Computes the number of triangles adjacent to each vertex.  
  2 | 
  3 | The code uses full enumeration. Each edge is assigned to its lower degree vertex, 
  4 | and each vertex checks wedges formed by edges assigned to itself.   
  5 | 
  6 | For computational results for this algorithm, see 
  7 | C. Seshadhri, A. Pinar, and T.G. Kolda, 
  8 | Triadic Measures on Graphs: The Power of Wedge Sampling, 
  9 | Proc. SIAM Data Mining, May 2013. 
 10 | 
 11 | Tamara G. Kolda, Ali Pinar, and others, FEASTPACK v1.1, Sandia National
 12 | Laboratories, SAND2013-4136W, http://www.sandia.gov/~tgkolda/feastpack/,
 13 | January 2014  
 14 | 
 15 | ** License **
 16 | Copyright (c) 2014, Sandia National Laboratories
 17 | All rights reserved.
 18 | 
 19 | Redistribution and use in source and binary forms, with or without
 20 | modification, are permitted provided that the following conditions are
 21 | met:  
 22 | 
 23 | 1. Redistributions of source code must retain the above copyright notice,
 24 | this list of conditions and the following disclaimer. 
 25 | 
 26 | 2. Redistributions in binary form must reproduce the above copyright
 27 | notice, this list of conditions and the following disclaimer in the
 28 | documentation and/or other materials provided with the distribution.  
 29 | 
 30 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 31 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 32 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 33 | PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
 34 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 35 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 36 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 37 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 38 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 39 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 40 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          
 41 | 
 42 | Sandia National Laboratories is a multi-program laboratory managed and
 43 | operated by Sandia Corporation, a wholly owned subsidiary of Lockheed
 44 | Martin Corporation, for the U.S. Department of Energy's National Nuclear
 45 | Security Administration under contract DE-AC04-94AL85000.                                         
 46 | */
 47 | 
 48 | #include "mex.h"
 49 | #include <stdio.h>
 50 | #include <stdlib.h>
 51 | #include <string.h>
 52 | #include <math.h>
 53 | #include <memory.h>
 54 | 
 55 | struct graph /* Stored in MATLAB Compressed Sparse Column format */
 56 | {
 57 |   int V; /* Number of vertices */
 58 |   int E; /* Number of edges */  
 59 |   mwIndex *ptr; /* ptr[j] = start of column j within ind array */
 60 |   mwIndex *ind; /* ind[ptr[j]] = row index for first nonzero in column j */
 61 | };
 62 | 
 63 | /*  
 64 | Checks if u is adjacent to v in G 
 65 | Returns 1 if they are adjacent; and 0 otherwise.  
 66 | */
 67 | int check_pair(struct graph *G, mwIndex u, mwIndex v)
 68 | {
 69 |   int i;
 70 |   for (i = G->ptr[u]; i < G->ptr[u+1]; i ++)
 71 |   {
 72 |     if (G->ind[i] == v)
 73 |     {
 74 |       return(1);
 75 |     }
 76 |   }
 77 |   return(0);
 78 | }
 79 | 
 80 | /* 
 81 | Marks on triangles  formed by vertices on the list "list"  and vertex r
 82 | Inputs: G: graph 
 83 | list: list of vertices  that are adjacent to r 
 84 | n: length of the "list" 
 85 | r: vertex r that is the center of wedgesbeing checked
 86 | td: an array that stores the number of triangles adjacent to each vertex; 
 87 | the array entries are incremented with the new triagles identified     
 88 | 
 89 | Output: cnt:  number of triangles found 
 90 | 
 91 | */
 92 | int mark_triangles(struct graph *G, mwIndex *list, int n, int r, double *td)
 93 | {
 94 |   int i, j, cnt, x, y;
 95 |   mwIndex *ptr;
 96 | 
 97 |   ptr = G->ptr;
 98 |   cnt = 0;
 99 |   for(i = 0; i < n; i ++)
100 |   {
101 |     x = ptr[list[i]+1] - ptr[list[i]];
102 |     for(j = i+1; j < n; j ++)   /* Check every pair of vertices on the list "list"  to see  if they form a triangle */ 
103 |     {
104 |       if (x < (ptr[list[j]+1] - ptr[list[j]])) /* enables searching via the shorter list */
105 |       {
106 |         y = check_pair(G, list[i], list[j]);
107 |       }
108 |       else 
109 |       {
110 |         y = check_pair(G, list[j], list[i]);  
111 |       }
112 |       if (y)   /*  increment the counters if a triangle is identified */
113 |       {
114 |         cnt ++;
115 |         td[r] ++;
116 |         td[list[j]] ++;
117 |         td[list[i]] ++;
118 |       }
119 |     }
120 |   }
121 |   return(cnt);
122 | }
123 | 
124 | /* The main function that  counts all the triangles 
125 | Arguments
126 | - G: input graph [unmodified]
127 | - td: array to be filled in with the number of triangles per vertex
128 | Return value
129 | - Total number of triangles (int)
130 | 
131 | */   
132 | int tri_enumerate(struct graph *G, double *td)
133 | {
134 |   mwIndex i, j, N, tcnt, t, *d;
135 |   mwIndex *ptr, *ptr2, *ind;
136 | 
137 |   N = G->V;
138 |   ptr = G->ptr;
139 |   ind = G->ind;
140 | 
141 |   /* d[i] is the degree of the ith vertex */
142 |   d = (mwIndex*) malloc( sizeof(mwIndex) * N );
143 | 
144 |   /* ptr2 will be used to shorten adjacency lists, where each edge is assigned the vertex with a smaller degree */
145 |   ptr2 = (mwIndex*) malloc( sizeof(mwIndex) * (N+1) );
146 | 
147 |   /* Initialize td to zero */
148 |   memset(td, 0, sizeof(double) * N);
149 | 
150 |   /* make degree list */
151 |   for(i = 0; i < N; i ++)
152 |   {
153 |     d[i] = ptr[i+1] - ptr[i];
154 |     ptr2[i] = ptr[i+1];
155 |   }
156 | 
157 |   /* Each vertex is assigned to its vertex with a smaller degree 
158 |   edges assigned to a vetex are moved towards the start of each list  such that 
159 |   neighbors  for which the edges assigned to vertex i are listed in ind[ptr[i]] to ind[ptr2[i]-1]
160 |   Note that ind[ptr[i]] to ind[ptr[i]-1] stil stores all neighbors of the ith vertex
161 |   */
162 |   for (i = 0; i < N; i ++) 
163 |   {
164 |     for (j = ptr[i]; j < ptr2[i]; j ++)
165 |     {
166 |       if ((d[i] > d[ind[j]]) || ((d[i] == d[ind[j]]) && (i > ind[j])))
167 |       {
168 |         ptr2[i] --;
169 | 
170 |         /* swap */
171 |         t = ind[ptr2[i]];
172 |         ind[ptr2[i]] = ind[j]; 
173 |         ind[j] = t; 
174 | 
175 |         j--;
176 |       }
177 |     }
178 |   }
179 | 
180 | 
181 |   /* Check for triangles centered on each vertex with the edges assigned to it */
182 |   tcnt = 0;  
183 |   for (i = 0; i < N; i ++)
184 |   {
185 |     tcnt += mark_triangles(G, ind + ptr[i], ptr2[i] - ptr[i], i, td);
186 |   }
187 | 
188 |   free(d);
189 |   free(ptr2);
190 |   return(tcnt);
191 | }
192 | 
193 | /* ----------------------------------------------------------------------------------
194 | This function provides the interface to Matlab 
195 | To call this function, you need to execute  in Matlab the following
196 | >> mex tricnt_mex.c -largeArrayDims
197 | 
198 | The matlab function sould be called  as 
199 | >> t = tricnt_mex(G)
200 | 
201 | G is assumed to be a sparse adjacency matrix for a simple graph.
202 | It returns a vector t, such that t[i] is the number of triangles 
203 | adjacent to the ith vertex.
204 | ------------------------------------------------------------------------------------ */
205 | void mexFunction (int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
206 | {
207 |   double *dtd;
208 |   struct graph G;
209 | 
210 |   /* Check inputs */
211 |   if ((nrhs != 1) || (!mxIsSparse (prhs[0])) )
212 |   {
213 |     mexErrMsgTxt ("expects sparse matrix");
214 |   }
215 | 
216 |   /* Read sparse matrix input */
217 |   G.V = mxGetN (prhs [0]);
218 |   G.E = mxGetNzmax (prhs [0]);
219 |   G.ind =  mxGetIr (prhs[0]);
220 |   G.ptr =  mxGetJc (prhs[0]);
221 | 
222 |   /* Create array for the return argument. */
223 |   plhs[0] = mxCreateDoubleMatrix(G.V, 1, mxREAL);
224 |   dtd = mxGetPr(plhs[0]);
225 | 
226 |   /* Compute the number of triangles for each vertex */
227 |   tri_enumerate(&G,dtd);
228 | }
229 | 
230 | 
231 | 
232 | 
233 | 
234 | 


--------------------------------------------------------------------------------
/GraphGenerator/train/train_base.py:
--------------------------------------------------------------------------------
  1 | import scipy.sparse as sp
  2 | from GraphGenerator.utils.arg_utils import set_device
  3 | import networkx as nx
  4 | import torch.optim as optim
  5 | import torch.nn.functional as F
  6 | from torch.autograd import Variable
  7 | from GraphGenerator.metrics.memory import get_peak_gpu_memory, flush_cached_gpu_memory
  8 | import numpy as np
  9 | import sys, torch, copy, datetime
 10 | from GraphGenerator.evaluate.efficiency import coo_to_csp, sp_normalize
 11 | 
 12 | 
 13 | def train_autoencoder_base(sp_adj, feature, config, model, optimizer):
 14 |     norm = sp_adj.shape[0] * sp_adj.shape[0] / float((sp_adj.shape[0] * sp_adj.shape[0] - sp_adj.sum()) * 2)
 15 |     pos_weight = torch.tensor(float(sp_adj.shape[0] * sp_adj.shape[0] - sp_adj.sum()) / sp_adj.sum()).to(config.device)
 16 |     adj_def = torch.from_numpy(sp_adj.toarray()).to(config.device)
 17 |     adj_normalized = sp_normalize(sp_adj, config.device)
 18 |     adj_normalized = Variable(adj_normalized).to(config.device)
 19 |     training_time = datetime.timedelta()
 20 |     for epoch in range(config.train.max_epochs):
 21 |         epoch_start = datetime.datetime.now()
 22 |         adj_score = model(adj_normalized, feature, device=config.device)
 23 |         train_loss = norm * F.binary_cross_entropy_with_logits(adj_score, adj_def,
 24 |                                                                pos_weight=pos_weight)
 25 |         if config.model.variational:
 26 |             kl_div = 0.5/adj_score.size(0)*(1+2*model.logv-model.mean**2-torch.exp(model.logv)**2).sum(1).mean()
 27 |             train_loss -= kl_div
 28 |             if config.model.name == 'SBMGNN':
 29 |                 train_loss += model.calculate_kl_div(['kl_kumar_beta', 'kl_discrete'])
 30 |         optimizer.zero_grad()
 31 |         train_loss.backward()
 32 |         optimizer.step()
 33 |         epoch_time = datetime.datetime.now() - epoch_start
 34 |         training_time += epoch_time
 35 |         print('[%03d/%d]: loss:%.4f, time per epoch:%.8s'
 36 |               % (epoch + 1,
 37 |                  config.train.max_epochs,
 38 |                  train_loss,
 39 |                  str(epoch_time)[-12:]))
 40 |     print('### Training Time Consumption:%.8s'
 41 |           % str(training_time)[-12:])
 42 |     return model
 43 | 
 44 | 
 45 | def top_n_indexes(arr, n):
 46 |     idx = np.argpartition(arr, arr.size - n, axis=None)[-n:]
 47 |     width = arr.shape[1]
 48 |     return [divmod(i, width) for i in idx]
 49 | 
 50 | 
 51 | def topk_adj(adj, k):
 52 |     if isinstance(adj, torch.Tensor):
 53 |         adj_ = adj.data.cpu().numpy()
 54 |     else:
 55 |         adj_ = adj
 56 |     assert ((adj_ == adj_.T).all())
 57 |     adj_ = (adj_ - np.min(adj_)) / np.ptp(adj_)
 58 |     adj_ -= np.diag(np.diag(adj_))
 59 |     res = np.zeros(adj.shape)
 60 |     tri_adj = np.triu(adj_)
 61 |     inds = top_n_indexes(tri_adj, int(k//2))
 62 |     for ind in inds:
 63 |         i = ind[0]
 64 |         j = ind[1]
 65 |         res[i, j] = 1.0
 66 |         res[j, i] = 1.0
 67 |     return res
 68 | 
 69 | 
 70 | def infer_autoencoder(sp_adj, feature, config, model, repeat=1):
 71 |     generated_graphs = []
 72 |     with torch.no_grad():
 73 |         adj_normalized = sp_normalize(sp_adj, config.device)
 74 |         adj_normalized = Variable(adj_normalized).to(config.device)
 75 |         for i in range(repeat):
 76 |             adj_score = model(adj_normalized, feature, device=config.device)
 77 |             adj = topk_adj(adj_score, k=sp_adj.sum())
 78 |             tmp_graph = nx.from_numpy_array(adj)
 79 |             generated_graphs.append(tmp_graph)
 80 |     return generated_graphs
 81 | 
 82 | 
 83 | def train_and_inference(input_data, generator, config=None, repeat=1):
 84 |     """
 85 |     train model using input graph, and infer new graphs
 86 |     :param input_data: input graph(s), whose type is networkx.Graph or list of nx.Graph
 87 |     :param generator: name of graph generator
 88 |     :param config: configuration of graph generator
 89 |     :param repeat: number of new graphs
 90 |     :return: generated graphs
 91 |     """
 92 |     # graphs = []
 93 |     if generator in ['e-r', 'w-s', 'b-a', 'E-R', 'W-S', 'B-A']:
 94 |         import GraphGenerator.models.er as er
 95 |         import GraphGenerator.models.ws as ws
 96 |         import GraphGenerator.models.ba as ba
 97 |         tmp_name = generator.lower()
 98 |         model_name = "{}.{}".format(tmp_name.replace('-', ''), tmp_name.replace('-', '_'))
 99 |         graphs = eval(model_name)(input_data, config)
100 |     elif generator in ['rtg', 'RTG', 'bter', 'BTER']:
101 |         import GraphGenerator.models.rtg as rtg
102 |         import GraphGenerator.models.bter as bter
103 |         model_name = "{}.{}".format(generator, generator)
104 |         graphs = eval(model_name)(input_data, config)
105 |     elif generator in ['sbm', 'dcsbm']:
106 |         import GraphGenerator.models.sbm as sbm
107 |         graphs = sbm.generate(input_data, generator, repeat)
108 |     elif generator in ['rmat', 'kronecker']:
109 |         import GraphGenerator.models.kronecker as kronecker
110 |         import GraphGenerator.models.rmat as rmat
111 |         graphs = eval(generator).generate(input_data, config)
112 |     elif generator in ['vgae', 'graphite', 'sbmgnn']:
113 |         set_device(config)
114 |         sp_adj = nx.adjacency_matrix(input_data).astype(np.float32)
115 |         # print("Shape!", sp_adj.shape)
116 |         feature = coo_to_csp(sp.diags(np.array([1. for i in range(sp_adj.shape[0])],
117 |                                                 dtype=np.float32)).tocoo()).to(config.device)
118 |         if generator == 'vgae':
119 |             import GraphGenerator.models.vgae as vgae
120 |             if config.model.variational:
121 |                 model_name = "{}.{}".format(generator, "VGAE")
122 |             else:
123 |                 model_name = "{}.{}".format(generator, "GAE")
124 |             model = eval(model_name)(config.model.num_nodes,
125 |                                      config.model.embedding_dim,
126 |                                      config.model.hidden_dim,
127 |                                      act=F.relu,
128 |                                      layers=config.model.num_GNN_layers).to(config.device)
129 |         elif generator == 'graphite':
130 |             import GraphGenerator.models.graphite as graphite
131 |             if config.model.variational:
132 |                 model_name = "{}.{}".format(generator, "GraphiteVAE")
133 |             else:
134 |                 model_name = "{}.{}".format(generator, "GraphiteAE")
135 |             model = eval(model_name)(config.model.num_nodes,
136 |                                      config.model.hidden_dim,
137 |                                      config.model.embedding_dim,
138 |                                      config.model.decoding_dim,
139 |                                      act=F.relu).to(config.device)
140 |         elif generator == 'sbmgnn':
141 |             import GraphGenerator.models.sbmgnn as sbmgnn
142 |             model_name = "{}.{}".format(generator, 'SBMGNN')
143 |             model = eval(model_name)(config.model.num_nodes,
144 |                                      config.model.hidden,
145 |                                      config=config).to(config.device)
146 |         else:
147 |             # model = None
148 |             sys.exit(1)
149 |         optimizer = optim.Adam(model.parameters(), lr=config.train.lr)
150 |         model = train_autoencoder_base(sp_adj, feature, config, model, optimizer)
151 |         tmp_memory = get_peak_gpu_memory(device=config.device)
152 |         print("Peak GPU memory reserved in training process: {} MiB".format(tmp_memory//1024//1024))
153 |         flush_cached_gpu_memory()
154 |         graphs = infer_autoencoder(sp_adj, feature, config, model, repeat=repeat)
155 |     elif generator in ['graphrnn', 'gran', 'bigg']:
156 |         import GraphGenerator.train.train_graphrnn as graphrnn
157 |         import GraphGenerator.models.bigg as bigg
158 |         import GraphGenerator.models.gran as gran
159 |         if isinstance(input_data, nx.Graph):
160 |             input_data = [input_data]
161 |         trained_model = eval("{}.train_{}".format(generator, generator))(input_data, config)
162 |         tmp_memory = get_peak_gpu_memory(device=config.device)
163 |         print("Peak GPU memory reserved in training process: {} MiB".format(tmp_memory//1024//1024))
164 |         flush_cached_gpu_memory()
165 |         graphs = eval("{}.infer_{}".format(generator, generator))(input_data, config, trained_model)
166 |     else:
167 |         print("Wrong generator name! Process exit..")
168 |         sys.exit(1)
169 |     return graphs
170 | 
171 | 


--------------------------------------------------------------------------------
/GraphGenerator/models/bigg.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import sys
  4 | import pickle as cp
  5 | import networkx as nx
  6 | import numpy as np
  7 | import random
  8 | from tqdm import tqdm
  9 | import torch, torch.cuda
 10 | import torch.optim as optim
 11 | from collections import OrderedDict
 12 | from GraphGenerator.utils.arg_utils import get_config, set_device
 13 | from GraphGenerator.models.bigg_ops.tree_clib.tree_lib import setup_treelib, TreeLib
 14 | from GraphGenerator.models.bigg_ops.tree_model import RecurTreeGen
 15 | 
 16 | 
 17 | def get_node_dist(graphs):
 18 |     num_node_dist = np.bincount([len(gg.nodes) for gg in graphs])
 19 |     num_node_dist = num_node_dist / np.sum(num_node_dist)
 20 |     return num_node_dist
 21 | 
 22 | 
 23 | def sqrtn_forward_backward(model,
 24 |                            graph_ids,
 25 |                            list_node_starts,
 26 |                            num_nodes,
 27 |                            blksize,
 28 |                            loss_scale,
 29 |                            init_states=[None, None],
 30 |                            top_grad=None,
 31 |                            **kwargs):
 32 |     assert len(graph_ids) == 1
 33 |     if blksize < 0 or blksize > num_nodes:
 34 |         blksize = num_nodes
 35 | 
 36 |     prev_states = init_states
 37 |     cache_stages = list(range(0, num_nodes, blksize))
 38 | 
 39 |     list_caches = []
 40 |     for st_delta in cache_stages[:-1]:
 41 |         node_st = list_node_starts[0] + st_delta
 42 |         with torch.no_grad():
 43 |             cur_num = num_nodes - node_st if node_st + blksize > num_nodes else blksize
 44 |             _, new_states = model.forward_row_summaries(graph_ids,
 45 |                                                         list_node_starts=[node_st],
 46 |                                                         num_nodes=cur_num,
 47 |                                                         prev_rowsum_states=prev_states,
 48 |                                                         **kwargs)
 49 |             prev_states = new_states
 50 |             list_caches.append(new_states)
 51 | 
 52 |     tot_ll = 0.0
 53 |     for i in range(len(cache_stages) - 1, -1, -1):
 54 |         st_delta = cache_stages[i]
 55 |         node_st = list_node_starts[0] + st_delta
 56 |         cur_num = num_nodes - node_st if node_st + blksize > num_nodes else blksize
 57 |         prev_states = list_caches[i - 1] if i else init_states
 58 |         if prev_states[0] is not None:
 59 |             for x in prev_states:
 60 |                 x.requires_grad = True
 61 |         ll, cur_states = model.forward_train(graph_ids,
 62 |                                              list_node_starts=[node_st],
 63 |                                              num_nodes=cur_num,
 64 |                                              prev_rowsum_states=prev_states,
 65 |                                              **kwargs)
 66 |         tot_ll += ll.item()
 67 |         loss = -ll * loss_scale
 68 |         if top_grad is not None:
 69 |             torch.autograd.backward([loss, *cur_states], [None, *top_grad])
 70 |         else:
 71 |             loss.backward()
 72 |         if i:
 73 |             top_grad = [x.grad.detach() for x in prev_states]
 74 | 
 75 |     return tot_ll, top_grad
 76 | 
 77 | 
 78 | def train_bigg(train_graphs, config):
 79 |     # print("### Type:", type(train_graphs))
 80 |     random.seed(config.seed)
 81 |     torch.manual_seed(config.seed)
 82 |     np.random.seed(config.seed)
 83 |     set_device(config)
 84 |     setup_treelib(config)
 85 |     for g in train_graphs:
 86 |         TreeLib.InsertGraph(g)
 87 |     max_num_nodes = max([len(gg.nodes) for gg in train_graphs])
 88 |     config.model.max_num_nodes = max_num_nodes
 89 | 
 90 |     model = RecurTreeGen(config).to(config.device)
 91 |     if config.train.resume and os.path.isfile(config.train.resume_model_dir):
 92 |         print('loading from', config.train.resume_model_dir)
 93 |         resume_model_path = os.path.join(config.train.resume_model_dir,
 94 |                                          config.train.resume_model_name)
 95 |         model.load_state_dict(torch.load(resume_model_path))
 96 | 
 97 |     optimizer = optim.Adam(model.parameters(), lr=config.train.lr, weight_decay=1e-4)
 98 |     indices = list(range(len(train_graphs)))
 99 |     if config.train.resume_epoch is None:
100 |         config.train.resume_epoch = 0
101 |     training_time = {'time_all':0., 'epochs':0,}
102 |     for epoch in range(config.train.resume_epoch, config.train.max_epochs):
103 |         pbar = tqdm(range(config.train.snapshot_epoch))
104 | 
105 |         optimizer.zero_grad()
106 |         for idx in pbar:
107 |             random.shuffle(indices)
108 |             batch_indices = indices[:config.train.batch_size]
109 | 
110 |             num_nodes = sum([len(train_graphs[i]) for i in batch_indices])
111 |             if config.model.blksize < 0 or num_nodes <= config.model.blksize:
112 |                 start_time = time.time()
113 |                 ll, _ = model.forward_train(batch_indices)
114 |                 loss = -ll / num_nodes
115 |                 loss.backward()
116 |                 end_time = time.time()
117 |                 training_time['time_all'] = training_time['time_all'] + end_time - start_time
118 |                 training_time['epochs'] = training_time['epochs'] + 1
119 |                 loss = loss.item()
120 |             else:
121 |                 ll = 0.0
122 |                 for i in batch_indices:
123 |                     n = len(train_graphs[i])
124 |                     cur_ll, _ = sqrtn_forward_backward(model, graph_ids=[i], list_node_starts=[0],
125 |                                                        num_nodes=n, blksize=config.model.blksize, loss_scale=1.0 / n)
126 |                     ll += cur_ll
127 |                 loss = -ll / num_nodes
128 |             if (idx + 1) % config.train.accum_grad == 0:
129 |                 if config.train.grad_clip > 0:
130 |                     torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.train.grad_clip)
131 |                 optimizer.step()
132 |                 optimizer.zero_grad()
133 |             pbar.set_description('epoch %.2f, loss: %.4f' % (epoch + (idx + 1) / config.train.snapshot_epoch, loss))
134 |         if config.train.save_snapshot:
135 |             torch.save(model.state_dict(), os.path.join(config.exp_dir, config.exp_name, 'epoch-%d.ckpt' % (epoch + 1)))
136 |     train_time = training_time['time_all'] / training_time['epochs']
137 |     print("Time consumption of one epoch of training BiGG is: {:.6f}".format(train_time))
138 |     return model
139 | 
140 | 
141 | def infer_bigg(test_graphs, config, model=None):
142 |     random.seed(config.seed)
143 |     torch.manual_seed(config.seed)
144 |     np.random.seed(config.seed)
145 |     set_device(config)
146 |     setup_treelib(config)
147 |     max_num_nodes = max([len(gg.nodes) for gg in test_graphs])
148 |     config.model.max_num_nodes = max_num_nodes
149 |     if model is None:
150 |         model = RecurTreeGen(config).to(config.device)
151 |         for g in test_graphs:
152 |             TreeLib.InsertGraph(g)
153 |     test_model_path = os.path.join(config.test.test_model_dir,
154 |                                    config.test.test_model_name)
155 |     if config.test.load_snapshot and os.path.isfile(config.test.test_model_dir):
156 |         print('loading from', config.test.test_model_dir)
157 |         model.load_state_dict(torch.load(test_model_path))
158 | 
159 |     # get num nodes dist
160 |     num_node_dist = get_node_dist(test_graphs)
161 |     gen_graphs = []
162 |     infering_time = {'time_all': 0., 'epochs': 0, }
163 |     with torch.no_grad():
164 |         for _ in tqdm(range(config.test.num_test_gen)):
165 |             num_nodes = np.argmax(np.random.multinomial(1, num_node_dist))
166 |             start_time = time.time()
167 |             _, pred_edges, _ = model(num_nodes, display=config.test.display)
168 |             for e in pred_edges:
169 |                 assert e[0] > e[1]
170 |             pred_g = nx.Graph()
171 |             pred_g.add_edges_from(pred_edges)
172 |             end_time = time.time()
173 |             infering_time['time_all'] = infering_time['time_all'] + end_time - start_time
174 |             infering_time['epochs'] = infering_time['epochs'] + 1
175 |             gen_graphs.append(pred_g)
176 |     # print('saving graphs')
177 |     # with open(test_model_path + '.graphs-%s' % str(config.test.greedy_frac), 'wb') as f:
178 |     #     cp.dump(gen_graphs, f, cp.HIGHEST_PROTOCOL)
179 |     # print('evaluating')
180 |     infer_time = infering_time['time_all'] / infering_time['epochs']
181 |     print("\nTime consumption of infering one graph by BiGG is: {:.6f}".format(infer_time))
182 |     return gen_graphs
183 |     # sys.exit(0)
184 | 


--------------------------------------------------------------------------------