├── sparsenet
├── __init__.py
├── test
│ ├── __init__.py
│ └── gen_test.py
├── util
│ ├── __init__.py
│ ├── sys_util.py
│ ├── args_util.py
│ ├── torch_util.py
│ ├── sample.py
│ ├── name_util.py
│ ├── train_util.py
│ ├── model_util.py
│ ├── cut_util.py
│ ├── pyg_util.py
│ ├── gsp_util.py
│ ├── loss_util.py
│ ├── graph_util.py
│ ├── pygsp_util.py
│ └── util.py
├── evaluation
│ ├── __init__.py
│ └── graph-coarsening
│ │ ├── graph_coarsening
│ │ ├── version.py
│ │ ├── __init__.py
│ │ ├── graph_utils.py
│ │ └── graph_lib.py
│ │ ├── setup.py
│ │ ├── examples
│ │ ├── coarsening_methods.py
│ │ ├── experiment_approximation.py
│ │ └── coarsening_demo.py
│ │ └── README.md
├── model
│ ├── __init__.py
│ ├── example.py
│ ├── loss.py
│ ├── model.py
│ └── eval.py
└── install.sh
├── README.md
├── install.sh
└── requirements.txt
/sparsenet/__init__.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-04-16
2 | # Summary:
--------------------------------------------------------------------------------
/sparsenet/test/__init__.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-05-17
2 | # Summary:
--------------------------------------------------------------------------------
/sparsenet/util/__init__.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-04-16
2 | # Summary:
--------------------------------------------------------------------------------
/sparsenet/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-05-08
2 | # Summary:
--------------------------------------------------------------------------------
/sparsenet/model/__init__.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-04-10
2 | # Summary:
3 |
4 |
--------------------------------------------------------------------------------
/sparsenet/evaluation/graph-coarsening/graph_coarsening/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "1.2"
2 |
--------------------------------------------------------------------------------
/sparsenet/evaluation/graph-coarsening/graph_coarsening/__init__.py:
--------------------------------------------------------------------------------
1 | from . import graph_lib, graph_utils
2 | from .coarsening_utils import coarsen, coarsening_quality, plot_coarsening
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # sparsifier
2 |
3 | ## Install
4 |
5 | * python 3.7.6
6 | * torch 1.4.0
7 | * pytorch geometric 1.5.0
8 | * networkx 2.4
9 |
10 | ## Test
11 | download the processed data [here](https://drive.google.com/drive/folders/1WMYebXwU7bVRWTW33BIx-sqAZV3UFFfX?usp=sharing)
12 | and set up directory in sparsenet/util/dir_util.py accordingly
13 |
14 | run ```python sparsenet/test/gen_test.py```. The output should be similar to ouptut.md.
15 |
--------------------------------------------------------------------------------
/sparsenet/install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # https://github.com/rusty1s/pytorch_geometric
4 | pip install torch-scatter==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html
5 | pip install torch-sparse==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html
6 | pip install torch-cluster==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html
7 | pip install torch-spline-conv==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html
8 | pip install torch-geometric==1.5.0
--------------------------------------------------------------------------------
/sparsenet/util/sys_util.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-06-21
2 | # Summary: set thread number
3 |
4 | import os
5 | n=2
6 | os.environ['MKL_NUM_THREADS'] = str(n)
7 | os.environ['OMP_NUM_THREADS'] = str(n)
8 | os.environ['OPENBLAS_NUM_THREADS'] = str(n)
9 | os.environ['MKL_NUM_THREADS'] = str(n)
10 | os.environ['VECLIB_MAXIMUM_THREADS'] = str(n)
11 | os.environ['NUMEXPR_NUM_THREADS'] = str(n)
12 | import torch
13 | torch.set_num_threads(n) # always import this first
14 | status = f'{n}'
15 | print(f'thread status {__file__}: {status}')
16 |
17 | # status=None
--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # https://github.com/rusty1s/pytorch_geometric
4 | # pip install torch-scatter==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html
5 | # pip install torch-sparse==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html
6 | # pip install torch-cluster==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html
7 | # pip install torch-spline-conv==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html
8 | # pip install torch-geometric
9 |
10 | TORCH='1.4.0'
11 | CUDA='cpu'
12 | # pip install torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
13 | pip install torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html
14 | # pip install torch-geometric
--------------------------------------------------------------------------------
/sparsenet/evaluation/graph-coarsening/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | from setuptools import setup, find_packages
4 |
5 | install_requires = [
6 | "numpy",
7 | "scipy",
8 | "pygsp",
9 | "matplotlib",
10 | "sortedcontainers",
11 | ]
12 |
13 | version_py = os.path.join(os.path.dirname(__file__), "graph_coarsening", "version.py")
14 | version = open(version_py).read().strip().split("=")[-1].replace('"', "").strip()
15 |
16 | readme = open("README.md").read()
17 |
18 | setup(
19 | name="graph_coarsening",
20 | version=version,
21 | description="graph_coarsening",
22 | author="Andreas Loukas",
23 | author_email="andreas.loukas@epfl.ch",
24 | packages=find_packages(),
25 | license="Apache License 2.0",
26 | install_requires=install_requires,
27 | long_description=readme,
28 | long_description_content_type="text/markdown",
29 | url="https://github.com/loukasa/graph-coarsening",
30 | download_url="https://github.com/loukasa/graph-coarsening/archive/v{}.tar.gz".format(
31 | version
32 | ),
33 | keywords=["big-data", "networks",],
34 | classifiers=[
35 | "Development Status :: 4 - Beta",
36 | "Environment :: Console",
37 | "Framework :: Jupyter",
38 | "Intended Audience :: Developers",
39 | "Intended Audience :: Science/Research",
40 | "Natural Language :: English",
41 | "Operating System :: MacOS :: MacOS X",
42 | "Operating System :: Microsoft :: Windows",
43 | "Operating System :: POSIX :: Linux",
44 | "Programming Language :: Python :: 3",
45 | "Programming Language :: Python :: 3.5",
46 | "Programming Language :: Python :: 3.6",
47 | "Programming Language :: Python :: 3.7",
48 | ],
49 | )
50 |
--------------------------------------------------------------------------------
/sparsenet/util/args_util.py:
--------------------------------------------------------------------------------
1 | # Created at 2021-03-13
2 | # Summary: argparse related
3 | from warnings import warn
4 |
5 | import numpy as np
6 |
7 |
8 | class argsparser():
9 | def __init__(self, args):
10 | if args.lap in ['None', 'none']:
11 | args.lap = None
12 | self.args = args
13 |
14 | def set_indices(self):
15 | args = self.args
16 |
17 | train_indices = [int(item) for item in args.train_indices.split(',') if len(item) != 0]
18 | test_indices = [int(item) for item in args.test_indices.split(',') if len(item) != 0]
19 | val_indices = np.random.choice(test_indices, 5, replace=False).tolist() if len(test_indices) > 10 else []
20 | test_indices = [idx for idx in test_indices if idx not in val_indices]
21 |
22 | if len(val_indices) == len(test_indices) == 0: # for datasets with single graph
23 | test_indices = train_indices
24 | val_indices = train_indices
25 |
26 | # todo: better handling
27 | if len(val_indices) == 0:
28 | assert len(train_indices) > 1
29 | if len(train_indices) < 5:
30 | n_sample = 1
31 | else:
32 | n_sample = 5 if len(train_indices) < 15 else 10
33 | val_indices = np.random.choice(train_indices, n_sample, replace=False).tolist()
34 | train_indices = [idx for idx in train_indices if idx not in val_indices]
35 |
36 | # todo: handle this case more elegantly
37 | if args.dataset == 'coauthors': # handle coauthors
38 | args.n_epoch = 20
39 | train_indices = [0]
40 | test_indices = [1]
41 | val_indices = [0]
42 |
43 | print(f'train_indices: {train_indices}.\n '
44 | f'val_indices: {val_indices}. \n '
45 | f'test_indices: {test_indices}.')
46 | self.args = args
47 | return train_indices, val_indices, test_indices
48 |
49 | def set_model_name(self):
50 | args = self.args
51 | model_name = 'checkpoint-best-eigen-ratio.pkl' if args.valeigen else 'checkpoint-best-improve-ratio.pkl'
52 | return model_name
53 |
--------------------------------------------------------------------------------
/sparsenet/evaluation/graph-coarsening/examples/coarsening_methods.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # The script shows the effect of different coarsening methods on a toy example.
5 | #
6 | # The code accompanies paper [Graph reduction with spectral and cut guarantees](http://www.jmlr.org/papers/volume20/18-680/18-680.pdf) by Andreas Loukas published at JMLR/2019 ([bibtex](http://www.jmlr.org/papers/v20/18-680.bib)).
7 | #
8 | # This work was kindly supported by the Swiss National Science Foundation (grant number PZ00P2 179981).
9 | #
10 | # 15 May 2020
11 | #
12 | # [Andreas Loukas](https://andreasloukas.blog)
13 | #
14 | # [](https://zenodo.org/badge/latestdoi/175851068)
15 | #
16 | # Released under the Apache license 2.0
17 |
18 | # In[1]:
19 |
20 |
21 | get_ipython().system('pip install networkx')
22 |
23 |
24 | # In[1]:
25 |
26 |
27 | get_ipython().run_line_magic('load_ext', 'autoreload')
28 | get_ipython().run_line_magic('autoreload', '2')
29 | get_ipython().run_line_magic('matplotlib', 'inline')
30 |
31 |
32 | # In[2]:
33 |
34 |
35 | import numpy as np
36 | import scipy as sp
37 |
38 | import matplotlib
39 | import matplotlib.pylab as plt
40 | from mpl_toolkits.mplot3d import Axes3D
41 |
42 | import networkx as nx
43 | import pygsp as gsp
44 | gsp.plotting.BACKEND = 'matplotlib'
45 |
46 |
47 | # In[3]:
48 |
49 |
50 | from graph_coarsening.coarsening_utils import *
51 | import graph_coarsening.graph_utils
52 | import graph_coarsening.graph_lib
53 |
54 |
55 | # Load the graph
56 |
57 | # In[4]:
58 |
59 |
60 | N = 600 # number of nodes
61 |
62 |
63 | # In[5]:
64 |
65 |
66 | G = graph_coarsening.graph_lib.real(N, 'yeast')
67 |
68 |
69 | # Coarsen it with different methods
70 |
71 | # In[6]:
72 |
73 |
74 | r = 0.6 # coarsening ratio
75 | methods = ['variation_neighborhoods', 'variation_edges', 'variation_cliques',
76 | 'heavy_edge', 'algebraic_JC', 'affinity_GS', 'kron']
77 |
78 |
79 | # In[7]:
80 |
81 |
82 | for method in methods:
83 |
84 | C, Gc, Call, Gall = coarsen(G, r=r, method=method)
85 | plot_coarsening(Gall, Call, title=method, size=2);
86 |
87 |
88 |
89 | # In[ ]:
90 |
91 |
92 |
93 |
94 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pymongo
2 | sacred
3 | ase==3.19.1
4 | attrs==19.3.0
5 | backcall==0.1.0
6 | bleach==3.1.4
7 | brotli==1.0.7
8 | chardet==3.0.4
9 | click==7.1.2
10 | colorama==0.4.3
11 | cvxpy==1.0.31
12 | cycler==0.10.0
13 | dash==1.11.0
14 | dash-core-components==1.9.1
15 | dash-html-components==1.0.3
16 | dash-renderer==1.4.0
17 | dash-table==4.6.2
18 | decorator==4.4.2
19 | defusedxml==0.6.0
20 | deprecated==1.2.13
21 | dill==0.3.1.1
22 | dionysus==2.0.6
23 | docopt==0.6.2
24 | ecos==2.0.7.post1
25 | entrypoints==0.3
26 | flask==1.1.2
27 | flask-compress==1.5.0
28 | future==0.18.2
29 | gitdb==4.0.5
30 | gitpython==3.1.2
31 | googledrivedownloader==0.4
32 | h5py==2.10.0
33 | idna==2.9
34 | imageio==2.8.0
35 | importlib-metadata==1.6.0
36 | ipykernel==5.2.0
37 | ipython==7.13.0
38 | ipython-genutils==0.2.0
39 | ipywidgets==7.5.1
40 | isodate==0.6.0
41 | itsdangerous==1.1.0
42 | jedi==0.16.0
43 | jinja2==2.11.1
44 | joblib==0.14.1
45 | jsonpickle==1.4.1
46 | jsonschema==3.2.0
47 | jupyter==1.0.0
48 | jupyter-client==6.1.2
49 | jupyter-console==6.1.0
50 | jupyter-core==4.6.3
51 | kiwisolver==1.2.0
52 | llvmlite==0.31.0
53 | lmdb==0.98
54 | markupsafe==1.1.1
55 | matplotlib==3.2.1
56 | memory-profiler==0.57.0
57 | mendeleev==0.6.0
58 | mistune==0.8.4
59 | monty==3.0.2
60 | mpmath==1.1.0
61 | multiprocess==0.70.9
62 | munch==2.5.0
63 | nbconvert==5.6.1
64 | nbformat==5.0.5
65 | networkx==2.4
66 | nglview==2.7.5
67 | notebook==6.0.3
68 | numba==0.48.0
69 | numpy==1.18.2
70 | osqp==0.6.1
71 | packaging==20.4
72 | palettable==3.3.0
73 | pandas==1.0.3
74 | pandocfilters==1.4.2
75 | parso==0.6.2
76 | pexpect==4.8.0
77 | pickleshare==0.7.5
78 | pillow==7.1.1
79 | plotly==4.6.0
80 | plyfile==0.7.2
81 | prometheus-client==0.7.1
82 | prompt-toolkit==3.0.5
83 | protobuf==3.11.3
84 | psutil==5.7.0
85 | ptyprocess==0.6.0
86 | py-cpuinfo==5.0.0
87 | pydispatcher==2.0.5
88 | pyfiglet==0.8.post1
89 | pygments==2.6.1
90 | pygsp==0.5.1
91 | pymatgen==2020.4.2
92 | pymongo==3.10.1
93 | pynvml==8.0.4
94 | pyparsing==2.4.7
95 | pyrsistent==0.16.0
96 | python-dateutil==2.8.1
97 | # python-graphviz==0.13.2
98 | pytz==2019.3
99 | pywavelets==1.1.1
100 | pyyaml==5.3.1
101 | pyzmq==19.0.0
102 | qtconsole==4.7.2
103 | qtpy==1.9.0
104 | rdflib==4.2.2
105 | requests==2.23.0
106 | retrying==1.3.3
107 | ruamel-yaml==0.16.10
108 | ruamel-yaml-clib==0.2.0
109 | sacred==0.8.1
110 | schedule==0.6.0
111 | scikit-image==0.16.2
112 | scikit-learn==0.22.2.post1
113 | scipy==1.4.1
114 | scs==2.1.2
115 | send2trash==1.5.0
116 | six==1.14.0
117 | sklearn==0.0
118 | skorch==0.8.0
119 | smmap==3.0.4
120 | sortedcontainers==2.1.0
121 | spglib==1.14.1.post0
122 | sqlalchemy==1.3.16
123 | sympy==1.5.1
124 | tabulate==0.8.7
125 | tbb==2020.0.133
126 | tensorboardx==2.0
127 | termcolor==1.1.0
128 | terminado==0.8.3
129 | testpath==0.4.4
130 | torch==1.4.0
131 | torch-cluster==1.5.4
132 | torch-geometric==1.5.0
133 | torch-scatter==2.0.4
134 | torch-sparse==0.6.1
135 | torch-spline-conv==1.2.0
136 | torchsummary==1.5.1
137 | torchvision==0.5.0
138 | torchviz==0.0.1
139 | tornado==6.0.4
140 | tqdm==4.45.0
141 | traitlets==4.3.3
142 | umap-learn==0.4.1
143 | urllib3==1.25.8
144 | vtk==8.1.2
145 | wcwidth==0.1.9
146 | webencodings==0.5.1
147 | werkzeug==1.0.1
148 | widgetsnbextension==3.5.1
149 | wrapt==1.12.1
150 | yacs==0.1.8
151 | zipp==3.1.0
--------------------------------------------------------------------------------
/sparsenet/evaluation/graph-coarsening/graph_coarsening/graph_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pygsp as gsp
3 |
4 | def to_networkx():
5 | import networkx as nx
6 | return nx.from_scipy_sparse_matrix(G.W)
7 |
8 | def get_neighbors(G, i):
9 | return G.A[i,:].indices
10 | # return np.arange(G.N)[np.array((G.W[i,:] > 0).todense())[0]]
11 |
12 | def get_giant_component(G):
13 |
14 | from scipy.sparse import csgraph
15 |
16 | [ncomp, labels] = csgraph.connected_components(G.W, directed=False, return_labels=True)
17 |
18 | W_g = np.array((0,0))
19 | coords_g = np.array((0,2))
20 | keep = np.array(0)
21 |
22 | for i in range(0,ncomp):
23 |
24 | idx = np.where(labels!=i)
25 | idx = idx[0]
26 |
27 | if G.N-len(idx) > W_g.shape[0]:
28 | W_g = G.W.toarray()
29 | W_g = np.delete(W_g, idx, axis=0)
30 | W_g = np.delete(W_g, idx, axis=1)
31 | if hasattr(G, 'coords'):
32 | coords_g = np.delete(G.coords, idx, axis=0)
33 | keep = np.delete(np.arange(G.N), idx)
34 |
35 | if not hasattr(G, 'coords'):
36 | # print(W_g.shape)
37 | G_g = gsp.graphs.Graph(W=W_g)
38 | else:
39 | G_g = gsp.graphs.Graph(W=W_g, coords=coords_g)
40 |
41 |
42 | return (G_g, keep)
43 |
44 |
45 | def get_S(G):
46 | """
47 | Construct the N x |E| gradient matrix S
48 | """
49 | # the edge set
50 | edges = G.get_edge_list()
51 | weights = np.array(edges[2])
52 | edges = np.array(edges[0:2])
53 | M = edges.shape[1]
54 |
55 | # Construct the N x |E| gradient matrix S
56 | S = np.zeros((G.N,M))
57 | for e in np.arange(M):
58 | S[edges[0,e], e] = np.sqrt(weights[e])
59 | S[edges[1,e], e] = -np.sqrt(weights[e])
60 |
61 | return S
62 |
63 | # Compare the spectum of L and Lc
64 | def eig(A, order='ascend'):
65 |
66 | # eigenvalue decomposition
67 | [l,X] = np.linalg.eigh(A)
68 |
69 | # reordering indices
70 | idx = l.argsort()
71 | if order == 'descend':
72 | idx = idx[::-1]
73 |
74 | # reordering
75 | l = np.real(l[idx])
76 | X = X[:, idx]
77 | return (X,np.real(l))
78 |
79 | def zero_diag(A):
80 |
81 | import scipy as sp
82 |
83 | if sp.sparse.issparse(A):
84 | return A - sp.sparse.dia_matrix((A.diagonal()[sp.newaxis, :], [0]), shape=(A.shape[0], A.shape[1]))
85 | else:
86 | D = A.diagonal()
87 | return A - np.diag(D)
88 |
89 | def is_symmetric(As):
90 | """Check if a sparse matrix is symmetric
91 |
92 | Parameters
93 | ----------
94 | As : array or sparse matrix
95 | A square matrix.
96 |
97 | Returns
98 | -------
99 | check : bool
100 | The check result.
101 |
102 | """
103 | from scipy import sparse
104 |
105 | if As.shape[0] != As.shape[1]:
106 | return False
107 |
108 | if not isinstance(As, sparse.coo_matrix):
109 | As = sparse.coo_matrix(As)
110 |
111 | r, c, v = As.row, As.col, As.data
112 | tril_no_diag = r > c
113 | triu_no_diag = c > r
114 |
115 | if triu_no_diag.sum() != tril_no_diag.sum():
116 | return False
117 |
118 | rl = r[tril_no_diag]
119 | cl = c[tril_no_diag]
120 | vl = v[tril_no_diag]
121 | ru = r[triu_no_diag]
122 | cu = c[triu_no_diag]
123 | vu = v[triu_no_diag]
124 |
125 | sortl = np.lexsort((cl, rl))
126 | sortu = np.lexsort((ru, cu))
127 | vl = vl[sortl]
128 | vu = vu[sortu]
129 |
130 | check = np.allclose(vl, vu)
131 |
132 | return check
133 |
--------------------------------------------------------------------------------
/sparsenet/evaluation/graph-coarsening/README.md:
--------------------------------------------------------------------------------
1 | # graph-coarsening package
2 |
3 | Multilevel graph coarsening algorithm with spectral and cut guarantees.
4 |
5 | The code accompanies paper [Graph reduction with spectral and cut guarantees](http://www.jmlr.org/papers/volume20/18-680/18-680.pdf) by Andreas Loukas published at JMLR/2019.
6 |
7 | In addition to the introduced [**variation**](http://www.jmlr.org/papers/volume20/18-680/18-680.pdf) methods, the code provides implementations of [**heavy-edge matching**](http://proceedings.mlr.press/v80/loukas18a.html), [**algebraic distance**](https://epubs.siam.org/doi/abs/10.1137/100791142?casa_token=tReVSPG0pBIAAAAA:P3BxPcyiSNkuxP5mOz8s9I7CN1tFQaMUTjyVHvb7PphqsGDy91ybcmAmECTYOeN2l-ErcpXuuA), [**affinity**](https://epubs.siam.org/doi/abs/10.1137/110843563?mobileUi=0), and [**Kron reduction**](http://motion.me.ucsb.edu/pdf/2011d-db.pdf) (adapted from [pygsp](https://pygsp.readthedocs.io/en/stable)).
8 |
9 | ## Paper abstract
10 | Can one reduce the size of a graph without significantly altering its basic properties? The graph reduction problem is hereby approached from the perspective of restricted spectral approximation, a modification of the spectral similarity measure used for graph sparsification. This choice is motivated by the observation that restricted approximation carries strong spectral and cut guarantees, and that it implies approximation results for unsupervised learning problems relying on spectral embeddings. The article then focuses on coarsening - the most common type of graph reduction. Sufficient conditions are derived for a small graph to approximate a larger one in the sense of restricted approximation. These findings give rise to algorithms that, compared to both standard and advanced graph reduction methods, find coarse graphs of improved quality, often by a large margin, without sacrificing speed.
11 |
12 | ## Contents
13 |
14 | There are five python notebooks included under `examples`:
15 |
16 | * `coarsening_demo.ipynb` demonstrates how the code can be used with a toy example (see also [blogpost](https://andreasloukas.blog/2018/11/05/multilevel-graph-coarsening-with-spectral-and-cut-guarantees/)).
17 | * `coarsening_methods.ipynb` shows the effect of different coarsening methods on a toy example.
18 | * `experiment_approximation.ipynb` reproduces the results of Section 5.1.
19 | * `experiment_spectrum.ipynb` reproduces the results of Section 5.2.
20 | * `experiment_scalability.ipynb` reproduces the results of Section 5.3.
21 |
22 | Since I have not fixed the random seed, some small variance should be expected in the experiment output.
23 |
24 | ## Installation instructions:
25 |
26 | ```
27 | git clone git@github.com:loukasa/graph-coarsening.git
28 | cd graph-coarsening
29 | pip install .
30 | ```
31 |
32 | Dependencies: pygsp, matplotlib, numpy, scipy, sortedcontainers
33 | Optional dependency: networkx
34 |
35 | ## Citation
36 |
37 | If you use this code, please cite:
38 | ```
39 | @article{JMLR:v20:18-680,
40 | author = {Andreas Loukas},
41 | title = {Graph Reduction with Spectral and Cut Guarantees},
42 | journal = {Journal of Machine Learning Research},
43 | year = {2019},
44 | volume = {20},
45 | number = {116},
46 | pages = {1-42},
47 | url = {http://jmlr.org/papers/v20/18-680.html}
48 | }
49 | ```
50 |
51 | ## Acknowledgements
52 |
53 | This work was kindly supported by the Swiss National Science Foundation (grant number PZ00P2 179981). I would like to thank [Scott Gigante](https://cbb.yale.edu/people/scott-gigante) for helping package the code.
54 |
55 | 15 May 2020
56 |
57 | [Andreas Loukas](https://andreasloukas.blog)
58 |
59 | [](https://zenodo.org/badge/latestdoi/175851068)
60 |
61 | Released under the Apache license 2.0
62 |
--------------------------------------------------------------------------------
/sparsenet/util/torch_util.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-06-13
2 | # Summary: torch related functions. Mainly implemented some sparse matrix operations for pytorch==1.4.0.
3 |
4 | from time import time
5 |
6 | import networkx as nx
7 | import numpy as np
8 | import scipy as sp
9 | import torch
10 | from deprecated import deprecated
11 | from scipy.sparse import csr_matrix, coo_matrix
12 |
13 | from sparsenet.util.util import summary, tonp, pf
14 |
15 |
16 | def sparse_tensor2_sparse_numpyarray(sparse_tensor):
17 | """
18 | :param sparse_tensor: a COO torch.sparse.FloatTensor
19 | :return: a scipy.sparse.coo_matrix
20 | """
21 | if sparse_tensor.device.type == 'cuda':
22 | sparse_tensor = sparse_tensor.to('cpu')
23 |
24 | values = sparse_tensor._values().numpy()
25 | indices = sparse_tensor._indices()
26 | rows, cols = indices[0, :].numpy(), indices[1, :].numpy()
27 | size = sparse_tensor.size()
28 | scipy_sparse_mat = coo_matrix((values, (rows, cols)), shape=size, dtype=np.float)
29 | return scipy_sparse_mat
30 |
31 |
32 | def sparse_matrix2sparse_tensor(ret, dev='cpu'):
33 | # coo sparse matrix to sparse tensor
34 | # https://bit.ly/30DI2u8
35 | values = ret.data
36 | indices = np.vstack((ret.row, ret.col))
37 | i = torch.LongTensor(indices)
38 | v = torch.FloatTensor(values)
39 | shape = ret.shape
40 | return torch.sparse.FloatTensor(i, v, torch.Size(shape)).to(dev)
41 |
42 |
43 | def sparse_mm(L, Q):
44 | """
45 | :param L: a sparse tensor
46 | :param Q: a sparse diagonal tensor
47 | :return: Q.L.Q
48 | """
49 | dev = L.device
50 | if dev == 'cuda':
51 | L = L.to('cpu')
52 | Q = Q.to('cpu')
53 |
54 | L = sparse_tensor2_sparse_numpyarray(L) # csr_matrix(L)
55 | Q = sparse_tensor2_sparse_numpyarray(Q) # csr_matrix(Q)
56 |
57 | ret = coo_matrix(Q.dot(L.dot(Q))) # coo matrix sparse
58 | return sparse_matrix2sparse_tensor(ret, dev=dev)
59 |
60 |
61 | def sparse_mm2(P, D1, D2):
62 | """
63 | :param P: a sparse tensor of (n, N)
64 | :param D1: a sparse diagonal tensor of (N, N)
65 | :param D2: a sparse diagonal tensor of (n, n)
66 | :return: D1.P.D2 also a sparse tensor
67 | """
68 |
69 | dev = P.device
70 | if dev == 'cuda':
71 | P, D1, D2 = P.to('cpu'), D1.to('cpu'), D2.to('cpu')
72 | P = sparse_tensor2_sparse_numpyarray(P)
73 | D1 = sparse_tensor2_sparse_numpyarray(D1)
74 | D2 = sparse_tensor2_sparse_numpyarray(D2)
75 | try:
76 | ret = coo_matrix(D2.dot(P.dot(D1)))
77 | except ValueError:
78 | summary(P.todense(), 'P')
79 | summary(D1.todense(), 'D1')
80 | summary(D2.todense(), 'D2')
81 | exit()
82 | return sparse_matrix2sparse_tensor(ret, dev=dev)
83 |
84 |
85 | @deprecated('To be removed')
86 | def mm(n=10):
87 | g = nx.random_geometric_graph(n, 0.1)
88 | L = nx.laplacian_matrix(g).todense()
89 | L = torch.Tensor(L)
90 | Q = torch.diag(torch.rand(n))
91 |
92 | summary(L, 'L')
93 | summary(Q, 'Q')
94 |
95 | # method 1
96 | t0 = time()
97 | ret1 = sp.sparse.csr_matrix(L).dot(sp.sparse.csr_matrix(Q))
98 | ret1 = sp.sparse.csr_matrix(Q).dot(ret1)
99 | summary(ret1, 'ret1')
100 | t1 = time()
101 | print(f'method 1: {pf(t1 - t0, 2)}')
102 |
103 | # ret 2
104 | ret2 = tonp(Q).dot(tonp(L).dot(tonp(Q)))
105 | summary(ret2, 'ret2')
106 | t2 = time()
107 | print(f'method 2: {pf(t2 - t1, 2)}')
108 |
109 | assert (ret2 - ret1 == 0).all()
110 | # summary(tonp(tonp(ret2) - tonp(ret1.todense())), 'ret2-ret1')
111 |
112 |
113 | if __name__ == '__main__':
114 | mm()
115 | exit()
116 | n = 50 # 000
117 | g = nx.random_geometric_graph(n, 0.01)
118 | L = nx.laplacian_matrix(g)
119 | L = torch.Tensor(L)
120 | print(L)
121 | exit()
122 |
123 | Q = torch.diag(torch.rand(n))
124 |
125 | L, Q = L.to_sparse(), Q.to_sparse()
126 | ret = sparse_mm(L, Q)
127 | summary(ret, 'ret')
128 |
129 | exit()
130 | mm(n=1000)
131 |
--------------------------------------------------------------------------------
/sparsenet/util/sample.py:
--------------------------------------------------------------------------------
1 | # used for baseline (BL) graph coarsen method
2 |
3 | import networkx as nx
4 | import numpy as np
5 | import torch
6 | import torch_geometric
7 | from torch_geometric.utils import to_networkx
8 |
9 | from sparsenet.util.util import summary, fix_seed, random_pygeo_graph, timefunc
10 |
11 | INFINITY = 1e8
12 |
13 |
14 | @timefunc
15 | def sample_N2Nlandmarks(G, N, weight_key='edge_weight', reproducible=True):
16 | '''
17 | Node to nearest landmarks sampling.
18 | Selected a number of landmarks, then every node is collapsed to its nearest landmark
19 | :param G: The input networkx Graph or pygeo graph. Required to be CONNECTED. The input graph is by default
20 | DIRECTED.
21 | :param N: Number of nodes (to be sampled) in the sampled graph.
22 | :param weight_key: The key name(in the dictionary) for the weight information.
23 | :return: The sampled graph G_prime, and the correspondence dictionary Assignment. The sampled graph is relabeled
24 | to (0 - N-1). The assigment is the a dict where key is 0-num_nodes_sml and value is a set
25 | '''
26 | if reproducible: fix_seed()
27 |
28 | if isinstance(G, torch_geometric.data.data.Data):
29 | G = to_networkx(G, edge_attrs=[weight_key])
30 |
31 | assert (nx.is_directed(G) and nx.is_strongly_connected(
32 | G)), f'Input graph must be connected. {nx.number_strongly_connected_components(G)}' \
33 | ' components detected, with sizes {[len(c) for c in nx.strongly_connected_components(G)]}'
34 | V_length = G.number_of_nodes()
35 | assert (V_length >= N), f'graph has fewer nodes than input sample size {N}'
36 | V = list(G.nodes)
37 | assert (isinstance(V[0], int)), 'the node id should be integers'
38 | landmarks = [V[i] for i in np.random.choice(V_length, N, replace=False).tolist()]
39 | nearest_neighbor = {x: x for x in V}
40 | shortest_path_distance = {x: INFINITY for x in V}
41 | for landmark in landmarks:
42 | shortest_path_lengths = nx.single_source_shortest_path_length(G, landmark)
43 | for key, value in shortest_path_lengths.items():
44 | if value < shortest_path_distance[key]:
45 | shortest_path_distance[key] = value
46 | nearest_neighbor[key] = landmark
47 |
48 | # new ids for those landmarks are 0-N-1 in G', build a new sparsified graph G' here
49 | G_prime = nx.Graph()
50 | G_prime.add_nodes_from([i for i in range(N)])
51 | Assignment, map_landmarkGid2Gpid = {}, {}
52 | for i, id in enumerate(landmarks):
53 | map_landmarkGid2Gpid[id] = i
54 | for key, value in nearest_neighbor.items():
55 | id = map_landmarkGid2Gpid[value]
56 | Assignment[id] = [key] if id not in Assignment else Assignment[id] + [key]
57 | for key, value in Assignment.items():
58 | Assignment[key] = set(value)
59 |
60 | # build edge in the sparsified graph
61 | g_prime_edges = {}
62 | for u, v, feature in G.edges.data():
63 | i, j, weight = map_landmarkGid2Gpid[nearest_neighbor[u]], map_landmarkGid2Gpid[
64 | nearest_neighbor[v]], feature.get(weight_key, 1)
65 | if i != j:
66 | if i > j:
67 | i, j = j, i
68 | g_prime_edges[(i, j)] = weight if (i, j) not in g_prime_edges else g_prime_edges[(i, j)] + weight
69 |
70 | # divided by 2 to make sure in the limit (no compression), the resulting graph is the same as original graph
71 | g_prime_edges = [(i, j, weight / 2.0) for (i, j), weight in g_prime_edges.items()]
72 | G_prime.add_weighted_edges_from(g_prime_edges, weight=weight_key)
73 | # todo: shall we make G_prime undirected?
74 | return G_prime, Assignment
75 |
76 |
77 | if __name__ == '__main__':
78 | fix_seed()
79 | n_node, n_edge, n_sample = 320, 5000, 100
80 | nfeat_dim = 42
81 | efeat_dim = 20
82 | G = random_pygeo_graph(n_node, nfeat_dim, n_edge, efeat_dim, device='cpu')
83 | G.edge_weight = torch.rand(G.edge_index.size(1), device=G.edge_index.device)
84 | summary(G, 'G')
85 |
86 | G_prime, Assignment = sample_N2Nlandmarks(G, n_sample, weight_key='edge_weight')
87 | print(nx.info(G_prime))
88 |
--------------------------------------------------------------------------------
/sparsenet/util/name_util.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-06-06
2 | # Summary: some global names
3 |
4 | import os
5 | import os.path as osp
6 |
7 | from sparsenet.util.util import fig_dir, make_dir, model_dir
8 |
9 | loukas_datasets = ['minnesota', 'airfoil', 'yeast', 'bunny']
10 | methods = ['variation_edges', 'variation_neighborhoods', 'algebraic_JC', 'heavy_edge' 'affinity_GS']
11 | syn_graphs = ['random_geo', 'random_er', 'ws', 'ba', 'shape']
12 |
13 | big_ego_graphs = ['PubMed', 'Coauthor-CS', 'Coauthor-physics', 'Amazon-photo', 'Amazon-computers', 'yelp', 'reddit',
14 | 'flickr']
15 | ego_graphs = big_ego_graphs + ['CiteSeer']
16 |
17 |
18 | def set_figname(args, name='subgraph'):
19 | """ used to set dir where figure is saved"""
20 | dir = os.path.join(fig_dir(), args.dataset,
21 | f'ratio_{args.ratio}',
22 | f'method_{args.method}',
23 | f'n_epoch_{args.n_epoch}',
24 | f'n_bottomk_{args.n_bottomk}',
25 | f'lap_{args.lap}',
26 | '')
27 | dir = dir.replace('_', '-')
28 | make_dir(dir)
29 | name = name.replace('_', '-')
30 | return dir + name + '.pdf'
31 |
32 |
33 | def set_model_dir(args, train_indices, val_indices, test_indices):
34 | """ used to set dir where model is saved """
35 | OUT_PATH = os.path.join(model_dir(), args.dataset,
36 | f'ratio_{args.ratio}',
37 | f'strategy_{args.strategy}',
38 | f'method_{args.method}',
39 | f'train_{len(train_indices)}',
40 | f'val_{len(val_indices)}',
41 | f'test_{len(test_indices)}',
42 | f'loss_{args.loss}',
43 | f'n_epoch_{args.n_epoch}',
44 | f'n_bottomk_{args.n_bottomk}',
45 | f'lap_{args.lap}',
46 | f'bs_{args.bs}',
47 | f'lr_{args.lr}',
48 | f'ini_{args.ini}',
49 | # f'correction_{args.correction}'
50 | '')
51 | if args.dataset in ['coauthor-cs', 'coauthor-physics', 'flickr', 'pubmeds']:
52 | OUT_PATH = os.path.join(OUT_PATH, f'w_len_{args.w_len}', '')
53 | make_dir(OUT_PATH)
54 | return OUT_PATH
55 |
56 |
57 | def set_coarsening_graph_dir(args):
58 | coarse_dir = osp.join(model_dir(), '..', 'coarse_graph')
59 |
60 | if args.strategy == 'loukas':
61 | dir = osp.join(coarse_dir,
62 | 'loukas',
63 | args.dataset,
64 | f'ratio_{args.ratio}',
65 | f'method_{args.method}',
66 | f'n_bottomk_{args.n_bottomk}',
67 | f'cur_idx_{args.cur_idx}',
68 | '')
69 |
70 | elif args.strategy == 'DK':
71 | dir = osp.join(coarse_dir,
72 | 'DK',
73 | args.dataset,
74 | f'ratio_{args.ratio}',
75 | f'cur_idx_{args.cur_idx}',
76 | '')
77 |
78 | else:
79 | raise NotImplementedError
80 |
81 | if args.dataset in ['coauthor-cs', 'coauthor-physics', 'flickr', 'pubmeds']:
82 | dir = os.path.join(dir, f'w_len_{args.w_len}', '')
83 |
84 | make_dir(dir)
85 | return dir
86 |
87 |
88 | def set_eigenvec_dir(args):
89 | eig_dir = osp.join(model_dir(), '..', 'eigenvec')
90 | assert args.dataset in ['coauthor-cs', 'coauthor-physics', 'flickr', 'pubmeds']
91 | dir = osp.join(eig_dir,
92 | args.strategy,
93 | args.dataset,
94 | f'ratio_{args.ratio}',
95 | f'method_{args.method}',
96 | f'n_bottomk_{args.n_bottomk}',
97 | f'cur_idx_{args.cur_idx}',
98 | f'w_len_{args.w_len}',
99 | '')
100 | make_dir(dir)
101 | return dir
102 |
--------------------------------------------------------------------------------
/sparsenet/util/train_util.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-05-18
2 | # Summary: util functions to monitor training
3 |
4 | import functools
5 |
6 | import numpy as np
7 | import torch
8 | import torch.nn.functional as F
9 |
10 | from sparsenet.util.util import banner, timefunc, summary
11 |
12 |
13 | @timefunc
14 | def check_laplacian(L, step, eps=1e-8):
15 | """ check the whether laplacian is symmetric during training.
16 | check there is no nan in laplacian
17 | :param L: output of get_laplacian_mat. torch.sparse.tensor
18 | :param step: iteration number
19 | :param eps: difference allowed for two float number considered as the same
20 | """
21 |
22 | # check if there is nan in the tensor
23 | Ltypes = (torch.sparse.FloatTensor, torch.cuda.sparse.FloatTensor)
24 | assert isinstance(L, Ltypes), 'Input laplacian is not sparse tensor'
25 | nan_check = torch.isnan(L._values())
26 | nan_cnt = nan_check.nonzero().shape[0]
27 | if nan_cnt != 0:
28 | u, v = L._indices()[:, nan_check.nonzero()[0]]
29 | u, v = u.item(), v.item()
30 | exit(f'Laplacian at step {step} has {nan_cnt} nan values, e.g., L({u}, {v}) = Nan.')
31 |
32 | # dont want to convert to dense. manual implement.
33 | indices, values, sym_check = L._indices(), L._values(), {}
34 | for i in range(indices.shape[1]):
35 | u, v = indices[:, i]
36 | u, v = u.item(), v.item()
37 | sym_check[(u, v)] = values[i].item()
38 | for i in range(indices.shape[1]):
39 | u, v = indices[:, i]
40 | u, v = u.item(), v.item()
41 | if (v, u) not in sym_check and abs(sym_check[(u, v)]) > eps:
42 | exit(f'Laplacian at step {step} is not symmetric... on ({u}, {v}), with L({u}, {v})={sym_check[(u, v)]}'
43 | f' but L({v}, {u})=0.')
44 | if abs(sym_check[(u, v)] - sym_check[(v, u)]) > eps:
45 | exit(f'Laplacian at step {step} is not symmetric... on ({u}, {v}), with L({u}, {v})={sym_check[(u, v)]}'
46 | f' but L({v}, {u})={sym_check[(v, u)]}.')
47 |
48 | print(f'Laplacian at step {step} is normal!')
49 |
50 |
51 | class monitor():
52 | def __init__(self):
53 | pass
54 |
55 | @staticmethod
56 | def data_monitor(train_data, sub, args):
57 | banner('Train data')
58 | for (k, v) in train_data[:args.bs]:
59 | print(k, v, sub.g_sml.edge_index[:, k])
60 | print()
61 |
62 | @staticmethod
63 | def train_data_monitor(train_data, args):
64 | banner('Train_data first check')
65 | for i, (k, v) in enumerate(train_data[:args.bs]):
66 | if i > 5: break
67 | print(k, v)
68 |
69 | def train_monitor(self, pred, edge_weight_sml):
70 | summary(pred, 'pred')
71 | summary(edge_weight_sml, 'edge_weight_sml')
72 |
73 |
74 | def no_grad_func(func):
75 | @functools.wraps(func)
76 | def new_func(*args, **kwargs):
77 | with torch.no_grad():
78 | return func(*args, **kwargs)
79 |
80 | return new_func
81 |
82 |
83 | @no_grad_func
84 | def monitor_param_saturation(model):
85 | monitors = {}
86 | for name, p in model.named_parameters():
87 | p = F.sigmoid(p)
88 | sat = 1 - (p - (p > 0.5).float()).abs()
89 | monitors['sat/' + name] = sat
90 | return monitors
91 |
92 |
93 | if __name__ == '__main__':
94 | # banner('This one is sym and has no nan!')
95 | # i = torch.LongTensor([[0, 1, 2], [0, 1, 2]])
96 | # v = torch.FloatTensor([3, 4, 5])
97 | # s1 = torch.sparse.FloatTensor(i, v, torch.Size([3, 3]))
98 | # check_laplacian(s1, 1)
99 |
100 | # banner('This one is not symmetric!')
101 | # i = torch.LongTensor([[0, 1, 1],
102 | # [2, 0, 2]])
103 | # v = torch.FloatTensor([3, 4, 5])
104 | # s2 = torch.sparse.FloatTensor(i, v, torch.Size([2, 3]))
105 | # check_laplacian(s2, 1)
106 |
107 | banner('This one has nan value!')
108 | i = torch.LongTensor([[0, 1, 2], [0, 1, 2]])
109 | v = torch.FloatTensor([3, 4, np.nan])
110 | s3 = torch.sparse.FloatTensor(i, v, torch.Size([3, 3]))
111 | check_laplacian(s3, 1)
112 |
--------------------------------------------------------------------------------
/sparsenet/util/model_util.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-05-06
2 | # Summary: utils for sparsenet.
3 |
4 | import numpy as np
5 | import torch
6 |
7 | from sparsenet.model.eval import trainer, tester # train, set_train_data
8 | from sparsenet.util.util import banner, red, pf
9 |
10 |
11 | class ModelEvaluator():
12 | def __init__(self, model, dataset_loader, dev, optimizer):
13 | self.dev = dev
14 | self.optimizer = optimizer
15 | self.model = model
16 | self.dataset_loader = dataset_loader
17 |
18 | def set_modelpath(self, path):
19 | self.modelpath = path
20 |
21 | def train(self, idx, TR, model, args):
22 | """ train the model for one graph """
23 | TR.set_train_data(args, self.dataset_loader)
24 | TR.train(model, self.optimizer, args, verbose=False)
25 | TR.delete_train_data(idx)
26 | return model
27 |
28 | def validate(self, idx, val_indices, TE, model, args):
29 | val_score = self.val_score
30 | val_score[idx] = {'n_gen': [], 'impr_ratio': [], 'eigen_ratio': []}
31 | for idx_ in val_indices:
32 | args.test_idx = idx_
33 | args.cur_idx = idx_
34 | TE.set_test_data(args, self.dataset_loader)
35 | n_gen, impr_ratio, eigen_ratio = TE.eval(model, args, verbose=False)
36 |
37 | val_score[idx]['n_gen'].append(n_gen)
38 | val_score[idx]['impr_ratio'].append(impr_ratio)
39 | val_score[idx]['eigen_ratio'].append(eigen_ratio)
40 |
41 | banner(f'{args.dataset}: finish validating graph {val_indices}.')
42 |
43 | cur_impr_ratio = np.mean(val_score[idx]['impr_ratio'])
44 | cur_eigen_ratio = np.mean(val_score[idx]['eigen_ratio'])
45 | print(cur_eigen_ratio, self.best_eigen_ratio)
46 | self.val_score[idx] = val_score[idx]
47 | return cur_impr_ratio, cur_eigen_ratio
48 |
49 | def save(self, idx, model, mode='eigen-ratio'):
50 | """ save model for training graph idx """
51 | assert mode in ['eigen-ratio', 'improve-ratio']
52 | f = f'checkpoint-best-{mode}.pkl'
53 |
54 | if mode == 'eigen-ratio':
55 | torch.save(model.state_dict(), self.modelpath + f)
56 | print(red(f'Save model for train idx {idx}. Best-eigen-ratio is {pf(self.best_eigen_ratio, 2)}.'))
57 | elif model == 'improve-ratio':
58 | torch.save(model.state_dict(), self.modelpath + f)
59 | print(red(f'Save model for train idx {idx}. Best-improve-ratio is {pf(self.best_impr_ratio, 2)}.'))
60 |
61 | def find_best_model(self, model, train_indices, val_indices, args):
62 | """ save the best model on validation dataset """
63 |
64 | self.TR = trainer(dev=self.dev)
65 | self.TE = tester(dev=self.dev)
66 |
67 | self.val_score = {}
68 | self.best_n_gen = -1e10
69 | self.best_impr_ratio = -1e30
70 | self.best_eigen_ratio = -1e30
71 | self.train_indices = train_indices
72 | self.val_indices = val_indices
73 |
74 | for idx in self.train_indices:
75 | args.train_idx = idx
76 | args.cur_idx = idx
77 |
78 | model = self.train(idx, self.TR, model, args)
79 | cur_impr_ratio, cur_eigen_ratio = self.validate(idx, val_indices, self.TE, model, args)
80 |
81 | # save the model if it works well on val data
82 | if cur_eigen_ratio > self.best_eigen_ratio:
83 | self.best_eigen_ratio = cur_eigen_ratio
84 | self.save(idx, model, mode='eigen-ratio')
85 |
86 | if cur_impr_ratio > self.best_impr_ratio:
87 | self.best_impr_ratio = cur_impr_ratio
88 | self.save(idx, model, mode='improve-ratio')
89 | return model, args
90 |
91 | def test_model(self, model, test_indices, AP, args):
92 | model_name = AP.set_model_name()
93 |
94 | model.load_state_dict(torch.load(self.modelpath + model_name))
95 |
96 | for idx_ in test_indices:
97 | args.test_idx = idx_
98 | args.cur_idx = idx_
99 | self.TE.set_test_data(args, self.dataset_loader)
100 | self.TE.eval(model, args, verbose=False)
101 | banner(f'{args.dataset}: finish testing graph {idx_}.')
102 |
103 |
104 | if __name__ == '__main__':
105 | pass
106 |
--------------------------------------------------------------------------------
/sparsenet/evaluation/graph-coarsening/examples/experiment_approximation.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-06-02
2 | # Summary:
3 |
4 | from graph_coarsening.coarsening_utils import *
5 | import graph_coarsening.graph_lib as graph_lib
6 | import graph_coarsening.graph_utils as graph_utils
7 |
8 | import numpy as np
9 | import scipy as sp
10 | from scipy import io
11 | from scipy.linalg import circulant
12 | import time
13 | import os
14 |
15 | import matplotlib
16 | import matplotlib.pylab as plt
17 | from mpl_toolkits.mplot3d import Axes3D
18 |
19 | import pygsp as gsp
20 | gsp.plotting.BACKEND = 'matplotlib'
21 |
22 | # Parameters
23 | graphs = ['yeast','minnesota', 'bunny', 'airfoil']
24 | methods = ['heavy_edge', 'variation_edges', 'variation_neighborhoods', 'algebraic_JC', 'affinity_GS', 'kron']
25 | K_all = np.array([10,40], dtype=np.int32)
26 | r_all = np.linspace(0.1, 0.9, 17, dtype=np.float32)
27 |
28 | print('k: ', K_all, '\nr: ', r_all)
29 |
30 | rerun_all = False
31 | rewrite_results = False
32 | if rerun_all:
33 |
34 | algorithm = 'greedy'
35 | max_levels = 20
36 | n_methods = len(methods)
37 | n_graphs = len(graphs)
38 |
39 | for graphIdx, graph in enumerate(graphs):
40 |
41 | N = 4000 # this is only an upper bound (the actual size depends on the graph)
42 | G = graph_lib.real(N, graph)
43 | N = G.N
44 | if N < 100: continue
45 |
46 | # precompute spectrum needed for metrics
47 | if K_all[-1] > N / 2:
48 | [Uk, lk] = eig(G.L)
49 | else:
50 | offset = 2 * max(G.dw)
51 | T = offset * sp.sparse.eye(G.N, format='csc') - G.L
52 | lk, Uk = sp.sparse.linalg.eigsh(T, k=K_all[-1], which='LM', tol=1e-6)
53 | lk = (offset - lk)[::-1]
54 | Uk = Uk[:, ::-1]
55 |
56 | subspace = np.zeros((n_methods, len(K_all), len(r_all)))
57 | failed = np.zeros((n_methods, len(K_all), len(r_all)))
58 | ratio = np.zeros((n_methods, len(K_all), len(r_all)))
59 |
60 | for KIdx, K in enumerate(K_all):
61 |
62 | print('{} {}| K:{:2.0f}'.format(graph, N, K))
63 |
64 | for rIdx, r in enumerate(r_all):
65 |
66 | n_target = int(np.floor(N * (1 - r)))
67 | if K > n_target:
68 | print('Warning: K={}>n_target={}. skipping'.format(K, n_target))
69 | continue # K = n_target
70 |
71 | for methodIdx, method in enumerate(methods):
72 |
73 | # algorithm is not deterministic: run a few times
74 | if method == 'kron':
75 | if KIdx == 0:
76 | n_iterations = 2
77 | n_failed = 0
78 | r_min = 1.0
79 | for iteration in range(n_iterations):
80 |
81 | Gc, iG = kron_coarsening(G, r=r, m=None)
82 | metrics = kron_quality(iG, Gc, kmax=K_all[-1], Uk=Uk[:, :K_all[-1]], lk=lk[:K_all[-1]])
83 |
84 | if metrics['failed']:
85 | n_failed += 1
86 | else:
87 | r_min = min(r_min, metrics['r'])
88 | for iKIdx, iK in enumerate(K_all):
89 | subspace[methodIdx, iKIdx, rIdx] += metrics['error_subspace'][iK - 1]
90 |
91 | subspace[methodIdx, :, rIdx] /= (n_iterations - n_failed)
92 | failed[methodIdx, :, rIdx] = 1 if (r_min < r - 0.05) else 0
93 | ratio[methodIdx, :, rIdx] = r_min
94 |
95 | if np.abs(r_min - r) > 0.02: print(
96 | 'Warning: ratio={} instead of {} for {}'.format(r_min, r, method))
97 |
98 | else:
99 | C, Gc, Call, Gall = coarsen(G, K=K, r=r, max_levels=max_levels, method=method,
100 | algorithm=algorithm, Uk=Uk[:, :K], lk=lk[:K])
101 | metrics = coarsening_quality(G, C, kmax=K, Uk=Uk[:, :K], lk=lk[:K])
102 |
103 | subspace[methodIdx, KIdx, rIdx] = metrics['error_subspace'][-1]
104 | failed[methodIdx, KIdx, rIdx] = 1 if (metrics['r'] < r - 0.05) else 0
105 | ratio[methodIdx, KIdx, rIdx] = metrics['r']
106 |
107 | if np.abs(metrics['r'] - r) > 0.02:
108 | print('Warning: ratio={} instead of {} for {}'.format(metrics['r'], r, method))
109 |
110 | if rewrite_results:
111 | filepath = os.path.join('..', 'results', 'experiment_approximation_' + graph + '.npz')
112 | print('.. saving to "' + filepath + '"')
113 | np.savez(filepath, methods=methods, K_all=K_all, r_all=r_all, subspace=subspace, failed=failed)
114 |
115 | print('done!')
116 |
--------------------------------------------------------------------------------
/sparsenet/evaluation/graph-coarsening/graph_coarsening/graph_lib.py:
--------------------------------------------------------------------------------
1 | import os
2 | import tempfile
3 | import zipfile
4 | from urllib import request
5 |
6 | import numpy as np
7 | import scipy as sp
8 | from pygsp import graphs
9 | from scipy import sparse
10 |
11 | from . import graph_utils
12 |
13 | _YEAST_URL = "http://nrvis.com/download/data/bio/bio-yeast.zip"
14 | _MOZILLA_HEADERS = [("User-Agent", "Mozilla/5.0")]
15 |
16 |
17 | def download_yeast():
18 | r"""
19 | A convenience method for loading a network of protein-to-protein interactions in budding yeast.
20 |
21 | http://networkrepository.com/bio-yeast.php
22 | """
23 | with tempfile.TemporaryDirectory() as tempdir:
24 | zip_filename = os.path.join(tempdir, "bio-yeast.zip")
25 | with open(zip_filename, "wb") as zip_handle:
26 | opener = request.build_opener()
27 | opener.addheaders = _MOZILLA_HEADERS
28 | request.install_opener(opener)
29 | with request.urlopen(_YEAST_URL) as url_handle:
30 | zip_handle.write(url_handle.read())
31 | with zipfile.ZipFile(zip_filename) as zip_handle:
32 | zip_handle.extractall(tempdir)
33 | mtx_filename = os.path.join(tempdir, "bio-yeast.mtx")
34 | with open(mtx_filename, "r") as mtx_handle:
35 | _ = next(mtx_handle) # header
36 | n_rows, n_cols, _ = next(mtx_handle).split(" ")
37 | E = np.loadtxt(mtx_handle)
38 | E = E.astype(int) - 1
39 | W = sparse.lil_matrix((int(n_rows), int(n_cols)))
40 | W[(E[:, 0], E[:, 1])] = 1
41 | W = W.tocsr()
42 | W += W.T
43 | return W
44 |
45 |
46 | def real(N, graph_name, connected=True):
47 | r"""
48 | A convenience method for loading toy graphs that have been collected from the internet.
49 |
50 | Parameters:
51 | ----------
52 | N : int
53 | The number of nodes. Set N=-1 to return the entire graph.
54 |
55 | graph_name : a string
56 | Use to select which graph is returned. Choices include
57 | * airfoil
58 | Graph from airflow simulation
59 | http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.50.9217&rep=rep1&type=pdf
60 | http://networkrepository.com/airfoil1.php
61 | * yeast
62 | Network of protein-to-protein interactions in budding yeast.
63 | http://networkrepository.com/bio-yeast.php
64 | * minnesota
65 | Minnesota road network.
66 | I am using the version provided by the PyGSP software package (initially taken from the MatlabBGL library.)
67 | * bunny
68 | The Stanford bunny is a computer graphics 3D test model developed by Greg Turk and Marc Levoy in 1994 at Stanford University
69 | I am using the version provided by the PyGSP software package.
70 | connected : Boolean
71 | Set to True if only the giant component is to be returned.
72 | """
73 |
74 | directory = os.path.join(
75 | os.path.dirname(os.path.dirname(graph_utils.__file__)), "data"
76 | )
77 |
78 | tries = 0
79 | while True:
80 | tries = tries + 1
81 |
82 | if graph_name == "airfoil":
83 | G = graphs.Airfoil()
84 | G = graphs.Graph(W=G.W[0:N, 0:N], coords=G.coords[0:N, :])
85 |
86 | elif graph_name == "yeast":
87 | W = download_yeast()
88 | G = graphs.Graph(W=W[0:N, 0:N])
89 |
90 | elif graph_name == "minnesota":
91 | G = graphs.Minnesota()
92 | W = G.W.astype(np.float)
93 | G = graphs.Graph(W=W[0:N, 0:N], coords=G.coords[0:N, :])
94 |
95 | elif graph_name == "bunny":
96 | G = graphs.Bunny()
97 | W = G.W.astype(np.float)
98 | G = graphs.Graph(W=W[0:N, 0:N], coords=G.coords[0:N, :])
99 |
100 | if connected == False or G.is_connected():
101 | break
102 | if tries > 1:
103 | print("WARNING: Disconnected graph. Using the giant component.")
104 | G, _ = graph_utils.get_giant_component(G)
105 | break
106 |
107 | if not hasattr(G, 'coords'):
108 | try:
109 | import networkx as nx
110 | graph = nx.from_scipy_sparse_matrix(G.W)
111 | pos = nx.nx_agraph.graphviz_layout(graph, prog='neato')
112 | G.set_coordinates(np.array(list(pos.values())))
113 | except ImportError:
114 | G.set_coordinates()
115 |
116 | return G
117 |
118 |
119 | def models(N, graph_name, connected=True, default_params=False, k=12, sigma=0.5):
120 | tries = 0
121 | while True:
122 | tries = tries + 1
123 | if graph_name == "regular":
124 | if default_params:
125 | k = 10
126 | offsets = []
127 | for i in range(1, int(k / 2) + 1):
128 | offsets.append(i)
129 | offsets.append(-(N - i))
130 |
131 | offsets = np.array(offsets)
132 | vals = np.ones_like(offsets)
133 | W = sp.sparse.diags(
134 | vals, offsets, shape=(N, N), format="csc", dtype=np.float
135 | )
136 | W = (W + W.T) / 2
137 | G = graphs.Graph(W=W)
138 |
139 | else:
140 | print("ERROR: uknown model")
141 | return
142 |
143 | if connected == False or G.is_connected():
144 | break
145 | if tries > 1:
146 | print("WARNING: disconnected graph.. trying to use the giant component")
147 | G = graph_utils.get_giant_component(G)
148 | break
149 | return G
150 |
151 |
152 | if __name__ == '__main__':
153 | g = real(-1, 'yeast')
154 | print(g)
155 |
--------------------------------------------------------------------------------
/sparsenet/util/cut_util.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-06-11
2 | # Summary: cut, conductance related
3 |
4 | from typing import Optional
5 |
6 | import networkx as nx
7 | import numpy as np
8 | import torch
9 | from torch_geometric.utils import degree, to_networkx
10 |
11 | from sparsenet.util.util import summary, timefunc, fix_seed, pf, random_pygeo_graph
12 |
13 |
14 | def normalized_cut(edge_index, edge_attr, num_nodes: Optional[int] = None):
15 | r"""Computes the normalized cut :math:`\mathbf{e}_{i,j} \cdot
16 | \left( \frac{1}{\deg(i)} + \frac{1}{\deg(j)} \right)` of a weighted graph
17 | given by edge indices and edge attributes.
18 |
19 | Args:
20 | edge_index (LongTensor): The edge indices.
21 | edge_attr (Tensor): Edge weights or multi-dimensional edge features.
22 | num_nodes (int, optional): The number of nodes, *i.e.*
23 | :obj:`max_val + 1` of :attr:`edge_index`. (default: :obj:`None`)
24 |
25 | :rtype: :class:`Tensor`
26 | """
27 |
28 | row, col = edge_index[0], edge_index[1]
29 | deg = 1. / degree(col, num_nodes, edge_attr.dtype)
30 | deg = deg[row] + deg[col]
31 | cut = edge_attr * deg
32 | return cut
33 |
34 |
35 | def _set(row, s, dev='cuda'):
36 | # t0 = time()
37 | i = torch.nonzero(row[..., None] == s)[:, 0]
38 | # t1 = time()
39 | # print(pf(t1-t0, 2))
40 | row_s = torch.zeros(row.size())
41 | row_s[i] = 1
42 | return row_s.type(torch.int8).to(dev)
43 |
44 |
45 | @timefunc
46 | def pyG_conductance(edge_index, edge_attr, s, t=None, dev='cuda', verbose=False):
47 | """
48 | :param edge_index:
49 | :param edge_attr:
50 | :param s: a list or a tensor
51 | :param t: a list or a tensor
52 | :return: conductance (tensor)
53 | """
54 |
55 | if t is None:
56 | _t = None
57 | tmp = torch.unique(edge_index).tolist()
58 | t = list(set(tmp) - set(s))
59 |
60 | s, t = torch.tensor(s).to(dev), torch.tensor(t).to(dev)
61 | edge_index, edge_attr = edge_index.to(dev), edge_attr.to(dev)
62 | row, col = edge_index[0], edge_index[1]
63 | del edge_index
64 |
65 | # row_s = torch.sum(row[..., None] == s, axis=1) # memory intensive
66 | row_s = _set(row, s, dev=dev)
67 | # col_s = torch.sum(col[..., None] == s, axis=1)
68 | col_s = _set(col, s, dev=dev)
69 | # summary(row_s - row_s_, 'row_s - row_s_')
70 | # summary(col_s - col_s_, 'col_s - col_s_')
71 |
72 | vol_s = torch.sum(torch.mul(edge_attr, row_s + col_s))
73 |
74 | # row_t = torch.sum(row[..., None] == t, axis=1)
75 | row_t = _set(row, t, dev=dev) if _t is not None else (1 - row_s).to(dev)
76 | # col_t = torch.sum(col[..., None] == t, axis=1)
77 | col_t = _set(col, t, dev=dev) if _t is not None else (1 - col_s).to(dev)
78 | vol_t = torch.sum(torch.mul(edge_attr, row_t + col_t))
79 |
80 | indices = torch.nonzero((row_s & col_t) | (row_t & col_s))
81 | cut = torch.sum(edge_attr[indices])
82 |
83 | # print(f'cut: {cut}. vol_s: {vol_s}. vol_t: {vol_t}')
84 | if verbose:
85 | print(f'cut: {cut}. vol_s: {vol_s}. vol_t: {vol_t}, conductance: {cut / max(1, min(vol_s, vol_t))}')
86 | return cut / max(1, min(vol_s, vol_t)) # make sure it's at least 1. This is needed for large reduction ratio.
87 |
88 |
89 | import argparse
90 |
91 | parser = argparse.ArgumentParser(description='Baseline for graph sparsification')
92 | parser.add_argument('--dataset', type=str, default='ws', help='dataset for egographs')
93 | parser.add_argument('--sample', action='store_true')
94 |
95 | if __name__ == '__main__':
96 | args = parser.parse_args()
97 | fix_seed()
98 | n_node, n_edge = 320, 5000
99 | N = 1
100 | idx = 0
101 |
102 | # kwargs = {'dataset': args.dataset, 'hop': -1, 'size': 50, 's_low': -1, 's_high': -1, 'sample': 'rw'}
103 | # pyGs = EgoGraphs(**kwargs)
104 | # pyGs = syth_graphs(type=args.dataset, n=2, size=1000) #
105 | pyG = random_pygeo_graph(n_node, 1, n_edge, 1)
106 | pyGs = [pyG] * 5
107 |
108 | for pyG in pyGs[:1]:
109 | # pyG = pyGs[idx]
110 | for _ in range(5):
111 | print(pyG)
112 | pyG.edge_weight = pyG.edge_weight * 1
113 | # summary(pyG, 'pyG')
114 | nxG = to_networkx(pyG, edge_attrs=['edge_weight'],
115 | to_undirected=True) # important: directed/non-directed makes difference for cuts
116 |
117 | pyG_cut = normalized_cut(pyG.edge_index, pyG.edge_weight, pyG.num_nodes, )
118 | s = np.random.choice(range(pyG.num_nodes), int(pyG.num_nodes / 2.0), replace=False).tolist()
119 | if args.sample:
120 | s, t = s[:len(s) // 2], s[len(s) // 2:]
121 | else:
122 | s, t = s, None
123 |
124 | summary(np.array(s), 's')
125 | c = pyG_conductance(pyG.edge_index, pyG.edge_weight, s=s, t=None, verbose=True, dev='cuda')
126 |
127 | nxcut = nx.cut_size(nxG, s, T=t, weight='edge_weight')
128 | volume_S = nx.algorithms.volume(nxG, s, weight='edge_weight')
129 | c_ = nx.conductance(nxG, s, T=t, weight='edge_weight')
130 | print(nxcut, volume_S, pf(c, 3))
131 | print()
132 |
133 | assert c == c_, f'c: {c}. c_: {c_}'
134 | exit()
135 |
136 | nx_cut, nx_conductance = [], []
137 | for u, v in nxG.edges:
138 | cut = nx.normalized_cut_size(nxG, [u], [v], weight='edge_weight')
139 | conductance = nx.conductance(nxG, [u], [v], weight='edge_weight')
140 | conductance_ = pyG_conductance(pyG.edge_index, pyG.edge_weight, [u], [v], )
141 | assert conductance == conductance_, f'nx: {conductance}. pyG: {conductance_}'
142 |
143 | nx_cut.append(cut)
144 | nx_conductance.append(conductance)
145 |
146 | summary(np.array(nx_conductance), 'nx_conductance')
147 |
148 | exit()
149 | summary(pyG_cut.numpy(), 'pyG_cut')
150 | summary(np.array(nx_cut), 'nx_cut')
151 |
--------------------------------------------------------------------------------
/sparsenet/util/pyg_util.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-06-07
2 | # Summary:
3 | import os
4 | n=2
5 | os.environ['MKL_NUM_THREADS'] = str(n)
6 | os.environ['OMP_NUM_THREADS'] = str(n)
7 | os.environ['OPENBLAS_NUM_THREADS'] = str(n)
8 | os.environ['MKL_NUM_THREADS'] = str(n)
9 | os.environ['VECLIB_MAXIMUM_THREADS'] = str(n)
10 | os.environ['NUMEXPR_NUM_THREADS'] = str(n)
11 | import torch
12 | torch.set_num_threads(n) # always import this first
13 | status = f'{n}'
14 | print(f'thread status {__file__}: {status}')
15 |
16 | import os
17 | import os.path as osp
18 |
19 | import torch
20 | from torch_geometric.data import InMemoryDataset
21 |
22 | from sparsenet.util.data import EgoGraphs, precompute_eig, shape_data, syth_graphs, loukas_data
23 | from sparsenet.util.name_util import big_ego_graphs, syn_graphs, loukas_datasets
24 | from sparsenet.util.util import summary, \
25 | random_pygeo_graph
26 |
27 |
28 | class NonEgoGraphs(InMemoryDataset):
29 | """ similar to EgoGraphs, but for other graphs (sythetic graphs + Loukas's dataset ) """
30 |
31 | def __init__(self, dataset=None, transform=None, pre_transform=None):
32 | """ kwargs for function egographs """
33 | data_dir = os.path.join(osp.dirname(osp.realpath(__file__)), '..', 'data')
34 | nonegograph_dir = os.path.join(data_dir, 'nonegographs')
35 | self.dataset = dataset
36 | root = os.path.join(nonegograph_dir, self.dataset)
37 |
38 | super(NonEgoGraphs, self).__init__(root, transform, pre_transform)
39 | print(self.processed_paths[0])
40 | self.data, self.slices = torch.load(self.processed_paths[0])
41 |
42 | @property
43 | def raw_file_names(self):
44 | return ['some_file_1']
45 |
46 | @property
47 | def processed_file_names(self):
48 | return self.dataset # dict2name(self.egograph_kwargs)
49 |
50 | def download(self):
51 | pass
52 |
53 | def _select_datasets(self):
54 | dataset = self.dataset
55 | if dataset == 'shape':
56 | datasets = shape_data(50, _k=10)
57 | elif dataset == 'faust':
58 | datasets = shape_data(50, _k=10, name='FAUST')
59 | elif dataset == 'random_geo':
60 | datasets = syth_graphs(n=50, size=700, type='geo') # random_geo(n=10, size=512)
61 | elif dataset == 'random_er':
62 | datasets = syth_graphs(n=50, size=512, type='er') # random_er(n=10, size=512)
63 | elif dataset in ['sbm', 'ws', 'ba']:
64 | datasets = syth_graphs(n=50, size=512, type=dataset)
65 | elif dataset in ['yeast', 'airfoil', 'bunny', 'minnesota']:
66 | datasets = loukas_data(name=dataset)
67 | else:
68 | NotImplementedError
69 |
70 | return datasets
71 |
72 | def process(self):
73 | # Read data into huge `Data` list.
74 |
75 | data_list = self._select_datasets()
76 |
77 | if self.pre_filter is not None:
78 | data_list = [data for data in data_list if self.pre_filter(data)]
79 |
80 | if self.pre_transform is not None:
81 | data_list = [self.pre_transform(data) for data in data_list]
82 |
83 | data, slices = self.collate(data_list)
84 | torch.save((data, slices), self.processed_paths[0])
85 |
86 | import argparse
87 | parser = argparse.ArgumentParser(description='Baseline for graph sparsification')
88 | parser.add_argument('--dataset', type=str, default='Coauthor-CS', help='dataset for egographs')
89 | parser.add_argument('--lap', type=str, default='None')
90 | parser.add_argument('--n_vec', type=int, default=100)
91 | parser.add_argument('--w_len', type=int, default=5000)
92 |
93 |
94 | if __name__ == '__main__':
95 | def main():
96 | args = parser.parse_args()
97 | for dataset in [args.dataset]: # big_ego_graphs: # ['CiteSeer','PubMed', 'wiki-vote']:
98 | for size in [50]: # [20]:
99 | # kwargs = {"hop": -1, "size": size, "dataset": dataset, 's_low': -1, 's_high': -1, 'sample': 'rw',
100 | # 'n_vec':args.n_vec, 'w_len':args.w_len, 'include_self': False}
101 | kwargs = {'dataset': 'flickr', 'hop': -1, 'size': 1, 's_low': -1, 's_high': -1,
102 | 'sample': 'rw', 'n_vec': 500, 'w_len': 15000, 'include_self': True}
103 |
104 | data = EgoGraphs(**kwargs)
105 | for d in data:
106 | print(d)
107 | # print(data[0])
108 | # for idx, g in enumerate(data):
109 | # g = clip_feat(g, args, dim=52)
110 | # if idx < 5: print(g)
111 | # new_data = [clip_feat(g, args, dim=52) for g in data]
112 | del data
113 | print('hello')
114 |
115 | main()
116 | exit()
117 |
118 | g = syth_graphs(1, size=1000)
119 | summary(g)
120 | exit()
121 | for dataset in ['faust']: # syn_graphs + loukas_datasets:
122 | data = NonEgoGraphs(dataset=dataset)
123 | for i, d in enumerate(data):
124 | summary(d, i, highlight=True)
125 |
126 | exit()
127 |
128 |
129 | # dir = os.path.join(data_dir, 'wiki-vote')
130 | for dataset in [args.dataset]: # big_ego_graphs: # ['CiteSeer','PubMed', 'wiki-vote']:
131 | for hop in [2,]: # [3,4,5]: # [3, 4, 5, 6]: # [2,3,4,5,6]:
132 | for size in [20]: # [20]:
133 | s_low = 5000 if dataset in big_ego_graphs else 200
134 | s_high = 10000 if dataset in big_ego_graphs else 5000
135 | kwargs = {"hop": hop, "size": size, "dataset": dataset, 's_low': s_low, 's_high': s_high}
136 | data = EgoGraphs(**kwargs)
137 | print(data[0])
138 | # continue
139 |
140 | if False: # dataset == 'wiki-vote' and hop==3 and size==10:
141 | print(data)
142 | print(data[0]['None_vals'][:5])
143 | for g in data:
144 | summary(g)
145 | exit()
146 |
147 |
148 | for dataset in syn_graphs + loukas_datasets:
149 | data = NonEgoGraphs(dataset=dataset)
150 | summary(data[0], dataset, highlight=True)
151 | # data_cmp = syth_graphs(n=20, size=512, type='ws')
152 | # print(data_cmp[5])
153 |
154 | exit()
155 | g = random_pygeo_graph(1000, 1, 20000, 1)
156 | summary(g, 'beofre')
157 | g = precompute_eig(g)
158 | summary(g, 'after')
159 | exit()
160 |
--------------------------------------------------------------------------------
/sparsenet/model/example.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-04-17
2 | # Summary: a simple example to illustrate data pipeline
3 |
4 | import os
5 |
6 | from sparsenet.util.model_util import ModelEvaluator
7 |
8 | n = 2
9 | os.environ['MKL_NUM_THREADS'] = str(n)
10 | os.environ['OMP_NUM_THREADS'] = str(n)
11 | os.environ['OPENBLAS_NUM_THREADS'] = str(n)
12 | os.environ['MKL_NUM_THREADS'] = str(n)
13 | os.environ['VECLIB_MAXIMUM_THREADS'] = str(n)
14 | os.environ['NUMEXPR_NUM_THREADS'] = str(n)
15 | import torch
16 |
17 | torch.set_num_threads(n) # always import this first
18 |
19 | import argparse
20 | import logging
21 | import sys
22 |
23 | from sparsenet.model.eval import tester # train, set_train_data
24 | from sparsenet.model.model import GNN_graphpred
25 | from sparsenet.util.data import data_loader
26 | from sparsenet.util.name_util import set_model_dir
27 | from sparsenet.util.util import fix_seed
28 | from sparsenet.util.args_util import argsparser
29 |
30 | parser = argparse.ArgumentParser(description='Graph edge sparsification')
31 |
32 | # model
33 | parser.add_argument('--n_layer', type=int, default=3, help='number of layer')
34 | parser.add_argument('--emb_dim', type=int, default=50, help='embedding dimension')
35 | parser.add_argument('--ratio', type=float, default=0.5, help='reduction ratio')
36 | parser.add_argument('--n_vec', type=int, default=100, help='number of random vector')
37 | parser.add_argument('--force_pos', action='store_true', help='Force the output of GNN to be positive')
38 | parser.add_argument('--dataset', type=str, default='ws', help='the name of dataset')
39 | parser.add_argument('--w_len', type=int, default=5000, help='walk length')
40 |
41 | # optim
42 | parser.add_argument('--device', type=str, default='cuda', help='')
43 | parser.add_argument('--n_epoch', type=int, default=50, help='')
44 | parser.add_argument('--bs', type=int, default=600, help='batch size')
45 | parser.add_argument('--lr', type=float, default=1e-3, help='learning rate')
46 | parser.add_argument('--n_bottomk', type=int, default=40, help='Number of Bottom K eigenvector')
47 |
48 | # debug
49 | parser.add_argument('--lap', type=str, default='none', help='Laplacian type',
50 | choices=[None, 'sym', 'rw', 'none', 'None'])
51 | parser.add_argument('--debug', action='store_true', help='debug. Smaller graph')
52 | parser.add_argument('--tbx', action='store_true', help='write to tensorboardx.')
53 | parser.add_argument('--inv', action='store_true', help='use inverse Laplacian for loss')
54 | parser.add_argument('--viz', action='store_true', help='visualization of weights of sparsified graph. Save to dir.')
55 | parser.add_argument('--show', action='store_true', help='Show the figure.')
56 | parser.add_argument('--ini', action='store_true', help='initialilze the output of gnn to be near the weights of g_sml')
57 | parser.add_argument('--testonly', action='store_true', help='Skip the training. Only test.')
58 | parser.add_argument('--valeigen', action='store_true', help='Use eigen_ratio as metric to select model')
59 | parser.add_argument('--cacheeig', action='store_true', help='save and load cached eigenvector of coarse graph')
60 | parser.add_argument('--mlp', action='store_true', help='use a mlp baseline')
61 |
62 | # parser.add_argument('--verbose', action='store_true', help='control the info level for real graph')
63 | parser.add_argument('--train_idx', type=int, default=0, help='train index of the shape data. Do not change.')
64 | parser.add_argument('--test_idx', type=int, default=0, help='test index of the shape data. Do not change.')
65 | parser.add_argument('--cur_idx', type=int, default=-1, help='Current index. used for save coarsening graphs')
66 | parser.add_argument('--lap_check', action='store_true', help='check the laplacian is normal during training')
67 | parser.add_argument('--n_cycle', type=int, default=1, help='number of cycles')
68 | parser.add_argument('--trial', type=int, default=0, help='trial. Act like random seed')
69 | parser.add_argument('--seed', type=int, default=1, help='random seed')
70 | parser.add_argument('--loss', type=str, default='quadratic', help='quadratic loss',
71 | choices=['quadratic', 'rayleigh'])
72 | parser.add_argument('--offset', type=int, default=0, help='number of offset eigenvector')
73 |
74 | parser.add_argument('--correction', action='store_true', help='Apply Laplacian correction')
75 | parser.add_argument('--dynamic', action='store_true', help='Dynamic projection')
76 | parser.add_argument('--loukas_quality', action='store_true', help='Compute the coarsening quality of loukas method')
77 | parser.add_argument('--log', type=str, default='debug', help='{info, debug}')
78 | parser.add_argument('--train_indices', type=str, default='0,',
79 | help='train indices of the dataset') # https://bit.ly/3dtJtPn
80 | parser.add_argument('--test_indices', type=str, default='0,', help='test indices of the dataset')
81 | parser.add_argument('--strategy', type=str, default='loukas', help='coarsening strategy', choices=['DK', 'loukas'])
82 | parser.add_argument('--method', type=str, default='variation_edges', help='Loukas methods',
83 | choices=['variation_neighborhoods', 'variation_edges', 'variation_cliques',
84 | 'heavy_edge', 'algebraic_JC', 'affinity_GS', 'kron', 'variation_neighborhood',
85 | 'DK_method'])
86 |
87 | if __name__ == '__main__':
88 | AP = argsparser(parser.parse_args())
89 | args = AP.args
90 | dev = args.device
91 | fix_seed(seed=args.seed)
92 |
93 | dataset_loader = data_loader(args, dataset=args.dataset)
94 | train_indices, val_indices, test_indices = AP.set_indices()
95 |
96 | nfeat_dim, efeat_dim, out_dim = 5, 1, 1
97 | model = GNN_graphpred(args.n_layer, args.emb_dim, nfeat_dim, efeat_dim, out_dim,
98 | force_pos=args.force_pos, mlp=args.mlp).to(dev)
99 | optimizer = torch.optim.Adam(model.parameters(), args.lr)
100 | logging.basicConfig(level=getattr(logging, args.log.upper()),
101 | handlers=[logging.StreamHandler(sys.stdout)])
102 |
103 | TE = tester(dev=dev)
104 | ################################################################
105 | ME = ModelEvaluator(model, dataset_loader, dev, optimizer)
106 | ME.set_modelpath(set_model_dir(args, train_indices, val_indices, test_indices))
107 | model, args = ME.find_best_model(model, train_indices, val_indices, args)
108 | ME.test_model(model, test_indices, AP, args)
109 | ################################################################
110 |
--------------------------------------------------------------------------------
/sparsenet/evaluation/graph-coarsening/examples/coarsening_demo.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | # The script demonstrates how the code can be used with a toy example (see also [this blogpost](https://andreasloukas.blog/2018/11/05/multilevel-graph-coarsening-with-spectral-and-cut-guarantees/)).
5 | #
6 | # The code accompanies paper [Graph reduction with spectral and cut guarantees](http://www.jmlr.org/papers/volume20/18-680/18-680.pdf) by Andreas Loukas published at JMLR/2019 ([bibtex](http://www.jmlr.org/papers/v20/18-680.bib)).
7 | #
8 | # This work was kindly supported by the Swiss National Science Foundation (grant number PZ00P2 179981).
9 | #
10 | # 15 March 2019
11 | #
12 | # [Andreas Loukas](https://andreasloukas.blog)
13 | #
14 | # [](https://zenodo.org/badge/latestdoi/175851068)
15 | #
16 | # Released under the Apache license 2.0
17 |
18 | # In[1]:
19 |
20 |
21 | # get_ipython().system('pip install networkx')
22 | #
23 | #
24 | # # In[1]:
25 | #
26 | #
27 | # get_ipython().run_line_magic('load_ext', 'autoreload')
28 | # get_ipython().run_line_magic('autoreload', '2')
29 | # get_ipython().run_line_magic('matplotlib', 'inline')
30 | #
31 | #
32 | # # In[2]:
33 | #
34 | #
35 | # from IPython.core.display import display, HTML
36 | # display(HTML(""))
37 |
38 |
39 | # In[3]:
40 |
41 |
42 | from graph_coarsening.coarsening_utils import *
43 | import graph_coarsening.graph_utils
44 |
45 | import numpy as np
46 | import scipy as sp
47 |
48 | import matplotlib
49 | import matplotlib.pylab as plt
50 | from mpl_toolkits.mplot3d import Axes3D
51 |
52 | import networkx as nx
53 | import pygsp as gsp
54 | from pygsp import graphs
55 |
56 | from sparsenet.util.util import summary, tonp, np2set
57 |
58 | gsp.plotting.BACKEND = 'matplotlib'
59 |
60 |
61 | # ### Construct the graph ##
62 |
63 | # In[4]:
64 |
65 |
66 | N = 400
67 |
68 | G = graphs.BarabasiAlbert(N)
69 | if not hasattr(G, 'coords'):
70 | try:
71 | graph = nx.from_scipy_sparse_matrix(G.W)
72 | pos = nx.nx_agraph.graphviz_layout(graph, prog='neato')
73 | G.set_coordinates(np.array(list(pos.values())))
74 | except ImportError:
75 | G.set_coordinates()
76 | G.compute_fourier_basis() # this is for convenience (not really needed by coarsening)
77 |
78 | N = G.N
79 | L = G.L.toarray()
80 | S = graph_coarsening.graph_utils.get_S(G).T
81 |
82 | plt.spy(G.W, markersize=0.2);
83 |
84 |
85 | # ### Do some coarsening
86 | #
87 | # * Possible methods supported are: 'variation_edges', 'variation_neighborhood', 'algebraic_JC', 'heavy_edge', 'affinity_GS'
88 | # * $r = 1 - n/N$ is the dimensionality reduction ratio
89 | # * $k$ is the size of the subspace we are interested in
90 | # * $k_{max}$ should be set to be larger or equal to $k$. It allows us to measure the quality of coarsening over subspaces larger than $k$.
91 |
92 | # In[5]:
93 |
94 |
95 | method = 'variation_edges' # 'variation_neighborhood'
96 |
97 | # Parameters
98 | r = 0.6 # the extend of dimensionality reduction (r=0 means no reduction)
99 | k = 5
100 | kmax = int(3*k)
101 |
102 | import scipy
103 | from signor.viz.matrix import viz_matrix
104 | import collections
105 |
106 | C, Gc, Call, Gall = coarsen(G, K=k, r=r, method=method)
107 | print('projection check:',sp.sparse.linalg.norm( ((C.T).dot(C))**2 - ((C.T).dot(C)) , ord='fro'))
108 | P = C.power(2)
109 | print(type(C))
110 | assert isinstance(C, scipy.sparse.csc.csc_matrix)
111 | P = tonp(P)
112 | n1, n2 = P.shape
113 | ret = P.dot(np.ones((n2, )))
114 | print(collections.Counter(ret))
115 | print(np2set(ret))
116 | summary(ret, 'check')
117 |
118 |
119 | exit()
120 |
121 | summary(C, 'Coarsening matrix')
122 | summary(Call, 'a list of coarsening matrix')
123 |
124 | exit()
125 | for i, m in enumerate(Call):
126 | viz_matrix(tonp(m), f'proj matrix {i}')
127 |
128 | metrics = coarsening_quality(G, C, kmax=kmax)
129 | n = Gc.N
130 | summary(metrics, 'metrics')
131 | exit()
132 |
133 | print('{:16} | r: {:1.4}, nedges: {}, levels: {}, epsilon: {:1.4}'.format(method, metrics['r'], metrics['m'], len(Call), metrics['error_subspace'][k-1]))
134 |
135 |
136 | # ### Visualize the sequence of coarsening levels
137 | #
138 | # * $c+1$ graphs are plotted in total. The leftmost is the original graph and the rightmost is the final coarse graph.
139 | # * Colors are used to indicate the size of each contraction set $C$:
140 | # * green is for $|C|=2$ blue is for $|C|=3$, red is for $|C|=4$, and yellow for $|C|>4$
141 | #
142 |
143 | # In[6]:
144 |
145 |
146 | plot_coarsening(Gall, Call, size=5, alpha=0.6, title=method);
147 |
148 |
149 | # ### Various metrics for coarsening quality
150 | #
151 | # * $\epsilon$ is the restricted similarity constant such that, for every $x \in span(U_k)$ we have $$(1 - \epsilon) x^\top L x \leq x_c^\top L_c x_c \leq (1+\epsilon) x^\top L x $$
152 | # * the eigenvalue error is defined (for every $i = 1, \ldots, k, \ldots, kmax$ as $\frac{\lambda_i - \tilde{\lambda}_i}{\lambda_i}$
153 | # * the angle matrix contains the angles between the eigenvectors of $L$ (y-axis) and the lifted eigenvectors of $L_c$. The closer to counter-diagonal it is, the better.
154 |
155 | # In[7]:
156 |
157 |
158 | size = 2.04; fig, axes = plt.subplots(1, 3, figsize=(4*size*3, 3*size)); lineWidth = 1
159 |
160 | axes[0].plot(np.arange(kmax), np.abs(metrics['error_subspace']), 'or-')
161 | axes[0].set_xlabel('$k$'); axes[0].set_ylabel('$\epsilon$')
162 | axes[0].plot( [k, k], [0, max(metrics['error_subspace'])], ':k')
163 |
164 | axes[1].boxplot(np.abs(metrics['error_eigenvalue']))
165 | axes[1].set_ylabel('relative eigenvalue error')
166 |
167 | axes[2].imshow(abs(metrics['angle_matrix'][:,0:kmax]) )
168 | axes[2].plot( [k, k], [0, kmax], ':w')
169 | axes[2].plot( [0, kmax], [k, k], ':w')
170 | axes[2].plot( [0, N], [n-1, n-1], ':w')
171 | axes[2].set_xlim([0, kmax-1])
172 | axes[2].set_ylim([0, kmax-1])
173 | axes[2].set_xlabel('Lc eigenvectors lifted'); axes[2].set_ylabel('L eigenvectors');
174 |
175 |
176 | # ### Coarsen and lift a vector
177 |
178 | # In[8]:
179 |
180 |
181 | size = 2.04; fig, axes = plt.subplots(1, 4, figsize=(4*size*4, 3*size)); lineWidth = 1
182 |
183 | # a random smooth signal
184 | x = G.U[:,:k] @ np.random.randn(k,1)
185 | x = x / np.linalg.norm(x)
186 | G.plot_signal(x, ax=axes[0], plot_name='signal')
187 |
188 | # coarsen it
189 | xc = coarsen_vector(x, C)
190 | Gc.plot_signal(xc, ax=axes[1], plot_name='coarsened signal')
191 |
192 | # lift it
193 | xp = lift_vector(xc, C)
194 | G.plot_signal(xp, ax=axes[2], plot_name='lifted signal')
195 |
196 | # difference
197 | G.plot_signal(np.abs(x-xp), ax=axes[3], plot_name='|x - xp|')
198 |
199 | print('signal error: {}'.format(np.linalg.norm(x - xp)))
200 |
201 |
202 | # In[ ]:
203 |
204 |
205 |
206 |
207 |
208 | # In[ ]:
209 |
210 |
211 |
212 |
213 |
--------------------------------------------------------------------------------
/sparsenet/model/loss.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-04-17
2 | # Summary: implement a loss function based on || x.T * L * x - x'.T * L_{sparse} * x' ||
3 |
4 | import math
5 | from functools import partial
6 |
7 | import numpy as np
8 | import torch
9 | import torch_geometric
10 | from scipy.sparse import csc_matrix
11 | from torch_geometric.utils import get_laplacian, to_networkx, from_networkx
12 |
13 | # convert the assignment to the projection mat, so we don't need to do it every time when we compute loss.
14 | # n, r the size of L and L_sparse.
15 | from sparsenet.util.sample import sample_N2Nlandmarks
16 | from sparsenet.util.util import random_pygeo_graph, summary, fix_seed, banner, timefunc, pf
17 |
18 | fix_seed()
19 |
20 | tf = partial(timefunc, threshold=-1)
21 |
22 |
23 | @timefunc
24 | def get_projection_mat(n, r, Assignment):
25 | '''
26 | :param n: Size of original graph
27 | :param r: Size of sampled graph
28 | :param Assignment: The correspondence matrix returned from sample_N2Nlandmarks.
29 | :return: The projection matrix of (r, n).
30 | '''
31 | P = np.zeros((r, n))
32 | for key, value in Assignment.items():
33 | s = len(value)
34 | assert s != 0
35 | for v in value:
36 | P[key][v] = 1 / s # important
37 | return torch.FloatTensor(P)
38 |
39 |
40 | def get_sparse_projection_mat(n, r, Assignment):
41 | '''
42 | :param n: Size of original graph
43 | :param r: Size of sampled graph
44 | :param Assignment: The correspondence matrix returned from sample_N2Nlandmarks.
45 | :return: The projection matrix of size (r, n).
46 | '''
47 | index, val = [], []
48 | for key, value in Assignment.items():
49 | s = len(value)
50 | assert s != 0
51 | for v in value:
52 | index.append([key, v])
53 | val = val + [1 / s] * s
54 | i, v = torch.tensor(index).T, torch.tensor(val)
55 | return torch.sparse.FloatTensor(i, v, torch.Size([r, n]))
56 |
57 |
58 | def get_sparse_C(n, r, Assignment):
59 | '''
60 | :param n: Size of original graph
61 | :param r: Size of sampled graph
62 | :param Assignment: The correspondence matrix returned from sample_N2Nlandmarks. (# todo: not really matrix but a dict)
63 | key is the node for small graph, value is the set of nodes in big graph contracted to the smaller graph
64 | :return: The sparse c matrix (csc) of size (r, n).
65 | '''
66 | row, col, val = [], [], []
67 | for key, value in Assignment.items():
68 | s = len(value)
69 | assert s != 0
70 | row.extend([key] * s)
71 | col.extend(list(value))
72 | val = val + [1 / np.sqrt(s)] * s # the major differeence
73 |
74 | row, col = np.array(row), np.array(col)
75 | data = np.array(val)
76 | return csc_matrix((data, (row, col)), shape=(r, n))
77 | # return torch.sparse.FloatTensor(i, v, torch.Size([r, n]))
78 |
79 |
80 | def random_vec_loss(L, L_sparse, Projection, device='cpu', num_vec=None, debug=False):
81 | '''
82 | :param L: L is a n*n sparse Tensor.
83 | :param L_sparse: a r*r sparse Tensor
84 | :param Projection: The projection tensor (r * n)
85 | :param device: run on cpu or gpu
86 | :param num_vec: num of random vectors sampled for computing loss
87 | :param debug: debug mode. Will get removed later.
88 | :return: The loss X.T L X - X.T Proj.T L_sparse Proj X, where X is the mat of concating random vecs.
89 | '''
90 |
91 | # todo: add more variety of random vector (loss freq/high freq)
92 | # todo: need to test for large L, the speed difference on cpu vs. gpu
93 |
94 | L = L.to(device)
95 | L_sparse = L_sparse.to(device)
96 |
97 | if debug:
98 | print('L', L)
99 | print('L_sparse', L_sparse)
100 |
101 | n = (Projection.shape[1])
102 | if num_vec == None:
103 | num_vec = max(1, int(math.log(n)))
104 |
105 | X = torch.rand(n, num_vec) - 0.5
106 | Projection = Projection.to(device)
107 |
108 | X = X / ((X ** 2).sum(0, keepdim=True)).sqrt()
109 | X = X.to(device)
110 |
111 | X_prime = torch.mm(Projection, X)
112 | quadL = torch.mm(X.t(), torch.sparse.mm(L, X))
113 | qualL_sparse = torch.mm(X_prime.t(), torch.sparse.mm(L_sparse, X_prime))
114 | loss = torch.sum(torch.abs(quadL - qualL_sparse)) # important: this is wrong!
115 | return loss
116 |
117 |
118 | # @tf
119 | def get_laplacian_mat(edge_index, edge_weight, num_node, normalization='sym'): # todo: change back
120 | """ return a laplacian (torch.sparse.tensor)"""
121 | edge_index, edge_weight = get_laplacian(edge_index, edge_weight,
122 | normalization=normalization) # see https://bit.ly/3c70FJK for format
123 | return torch.sparse.FloatTensor(edge_index, edge_weight, torch.Size([num_node, num_node]))
124 |
125 |
126 | @tf
127 | def energy_loss(L1, L2, assignment, device='cuda', test=False,
128 | n_measure=1, num_vec=None):
129 | """
130 | :param g1: pygeo graph
131 | :param g2: pygeo graph (smaller)
132 | :param assignment: a dict where key is the node in smaller graph and value is the nodes in larger graph
133 | :param n_measure
134 | :return:
135 | """
136 |
137 | if test:
138 | assert isinstance(g1, torch_geometric.data.data.Data)
139 | assert isinstance(g2, torch_geometric.data.data.Data)
140 |
141 | L1 = get_laplacian_mat(g1.edge_index, g1.edge_weight, g1.num_nodes)
142 | L2 = get_laplacian_mat(g2.edge_index, g2.edge_weight, g2.num_nodes)
143 |
144 | assert isinstance(L1, (torch.sparse.FloatTensor, torch.cuda.sparse.FloatTensor)), summary(L1, 'L1')
145 | assert isinstance(L2, (torch.sparse.FloatTensor, torch.cuda.sparse.FloatTensor)), summary(L2, 'L2')
146 |
147 | Projection = get_projection_mat(L1.shape[0], L2.shape[0], assignment)
148 |
149 | if n_measure == 1:
150 | loss = random_vec_loss(L1, L2, Projection, device=device, num_vec=num_vec)
151 | return loss
152 | else:
153 | losses = []
154 | for _ in range(n_measure):
155 | loss = random_vec_loss(L1, L2, Projection, device=device, num_vec=num_vec)
156 | losses.append(np.float(loss))
157 | mean, std = np.mean(losses), np.std(losses)
158 | return f'{pf(mean, 2)}±{pf(std, 2)}'
159 |
160 |
161 | if __name__ == '__main__':
162 | # undirected 4-path
163 | banner('random_vec_loss test')
164 | L = get_laplacian_mat(torch.LongTensor([[0, 1, 2, 1, 2, 3], [1, 2, 3, 0, 1, 2]]),
165 | torch.FloatTensor([1., 1., 1., 1., 1., 1.]), 4)
166 | # undirected 2-path (link)
167 | L_sparse = get_laplacian_mat(torch.LongTensor([[0, 1], [1, 0]]), torch.FloatTensor([1., 1.]), 2)
168 | Projection = get_projection_mat(L.shape[0], L_sparse.shape[0], {0: set([0, 1]), 1: set([2, 3])})
169 |
170 | losses = []
171 | for _ in range(1000):
172 | loss = random_vec_loss(L, L_sparse, Projection)
173 | losses.append(loss)
174 | summary(np.array(losses), 'losses')
175 | exit()
176 |
177 | banner('sample_N2Nlandmarks test')
178 | n_node, n_edge = 10, 40
179 | node_feat_dim, edge_feat_dim = 1, 1
180 | n_node_small, n_edge_small = 5, 20
181 |
182 | g1 = random_pygeo_graph(n_node, node_feat_dim, n_edge, edge_feat_dim)
183 | g1.edge_weight = 1.1 * torch.ones(n_edge)
184 |
185 | g2, assignment = sample_N2Nlandmarks(to_networkx(g1), n_node_small, weight_key='edge_weight')
186 | g2 = from_networkx(g2)
187 | g2.edge_weight = g2.edge_weight.type(torch.float)
188 |
189 | summary(g1, 'g1')
190 | summary(g2, 'g2')
191 | # exit()
192 |
193 | loss = energy_loss(g1, g2, assignment, device='cpu', test=True)
194 | print(loss)
195 |
196 | exit()
197 |
198 | print(loss)
199 |
--------------------------------------------------------------------------------
/sparsenet/util/gsp_util.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-06-02
2 | # Summary: pygsp util
3 |
4 | import collections
5 | from time import time
6 | from warnings import warn
7 |
8 | import networkx as nx
9 | import scipy
10 | import torch
11 | import torch_geometric
12 | from graph_coarsening.coarsening_utils import *
13 | from pygsp import graphs
14 | from torch_geometric.data.data import Data
15 | from torch_geometric.utils import from_scipy_sparse_matrix
16 |
17 | from sparsenet.util.util import summary, tonp, np2set, timefunc, red, pyg2gsp, update_dict
18 |
19 |
20 | def assert_proj_matrix(C):
21 | proj_error = sp.sparse.linalg.norm(((C.T).dot(C)) ** 2 - ((C.T).dot(C)), ord='fro')
22 | assert proj_error < 1e-5, f'proj error {proj_error} larger than {1e-5}.'
23 |
24 |
25 | def ba_graph(N=400):
26 | # return a gsp graph
27 | G = graphs.BarabasiAlbert(N)
28 | if not hasattr(G, 'coords'):
29 | try:
30 | graph = nx.from_scipy_sparse_matrix(G.W)
31 | pos = nx.nx_agraph.graphviz_layout(graph, prog='neato')
32 | G.set_coordinates(np.array(list(pos.values())))
33 | except ImportError:
34 | G.set_coordinates()
35 | G.compute_fourier_basis() # this is for convenience (not really needed by coarsening)
36 | return G
37 |
38 |
39 | class gsp2pyg(object):
40 | def __init__(self, g, **loukas_args):
41 | assert isinstance(g, torch_geometric.data.data.Data)
42 | self.origin_pyg = g
43 | self.gspG = self.pyg2gsp(g)
44 | self.pyg = self.gsp2pyg(self.gspG)
45 | self.loukas_method(**loukas_args)
46 | self.pyg_sml = self.gsp2pyg(self.gspG_sml)
47 | self.assignment = self.process()
48 | self._set_pos()
49 |
50 | def _set_pos(self):
51 | """ set pos for smaller pyg graph """
52 | # todo: this is for what?
53 |
54 | if 'pos' not in self.origin_pyg.keys:
55 | return
56 |
57 | n = self.pyg_sml.num_nodes
58 | d = self.origin_pyg.pos.size(1)
59 | pos = torch.zeros((n, d))
60 | for k, v in self.assignment.items():
61 | v = list(v)
62 | pos[k, :] = torch.mean(self.origin_pyg.pos[v], 0)
63 | self.pyg_sml.pos = pos
64 |
65 | def pyg2gsp(self, g):
66 | return pyg2gsp(g)
67 |
68 | def gsp2pyg(self, g):
69 | """ works only for g with uniform weights """
70 | from sparsenet.util.data import input_check
71 | edge_index, edge_weight = from_scipy_sparse_matrix(g.W)
72 | edge_weight = edge_weight.type(torch.FloatTensor)
73 |
74 | summary(edge_weight, 'edge_weight in gsp2pyg', highlight=True)
75 | pyG = Data(edge_index=edge_index, edge_weight=edge_weight,
76 | edge_attr=torch.flatten(edge_weight)) # important: set edge_attr to be edge_weight
77 | pyG_check = input_check(pyG, size_check=False, eig=False) # need to comment out for Cora
78 | try:
79 | assert g.N == pyG_check.num_nodes
80 | return pyG_check
81 | except AssertionError:
82 | print(f'AssertionError! gsp Graph size is {g.N} but pyG size is {pyG_check.num_nodes}. '
83 | f'{red("Return pyG instead of pyG_check.")}')
84 | return pyG
85 |
86 | @timefunc
87 | def loukas_method(self, **kwargs):
88 | """ api to call loukas's code.
89 | modified from looukas's code.
90 | This function provides a common interface for coarsening algorithms that contract subgraphs
91 |
92 | Parameters
93 | ----------
94 | G : pygsp Graph
95 | K : int
96 | The size of the subspace we are interested in preserving.
97 | r : float between (0,1)
98 | The desired reduction defined as 1 - n/N.
99 | method : String
100 | ['variation_neighborhoods', 'variation_edges', 'variation_cliques', 'heavy_edge', 'algebraic_JC', 'affinity_GS', 'kron']
101 |
102 | Returns
103 | -------
104 | C : np.array of size n x N
105 | The coarsening matrix.
106 | Gc : pygsp Graph
107 | The smaller graph.
108 | Call : list of np.arrays
109 | Coarsening matrices for each level
110 | Gall : list of (n_levels+1) pygsp Graphs
111 | All graphs involved in the multilevel coarsening
112 |
113 | Example
114 | -------
115 | C, Gc, Call, Gall = coarsen(G, K=10, r=0.8)
116 | """
117 |
118 | t0 = time()
119 | default_kwargs = {'K': 40, 'r': 0.5, 'method': 'variation_edges', 'max_levels': 20}
120 | loukas_quality = kwargs.get('loukas_quality', False)
121 | kwargs.pop('loukas_quality', None)
122 | kwargs = update_dict(kwargs, default_kwargs)
123 | print(f'{red("kwargs for coarsen function")}: {kwargs}\n')
124 | G = self.gspG
125 | K = kwargs['K']
126 |
127 | # precompute spectrum needed for metrics
128 | if loukas_quality:
129 | if False: # K_all[-1] > N / 2:
130 | pass # [Uk, lk] = eig(G.L)
131 | else:
132 | offset = 2 * max(G.dw)
133 | T = offset * sp.sparse.eye(G.N, format='csc') - G.L
134 | lk, Uk = sp.sparse.linalg.eigsh(T, k=K, which='LM', tol=1e-6)
135 | lk = (offset - lk)[::-1]
136 | Uk = Uk[:, ::-1]
137 | kwargs['Uk'] = Uk
138 | kwargs['lk'] = lk
139 | t1 = time()
140 |
141 | C, Gc, Call, Gall = coarsen(self.gspG, **kwargs)
142 |
143 | if loukas_quality:
144 | metrics = coarsening_quality(G, C, kmax=K, Uk=Uk[:, :K], lk=lk[:K])
145 | for k in metrics:
146 | summary(metrics[k], k, highlight=True)
147 | else:
148 | print(red('No coarsening_quality.'))
149 |
150 | _check_loukas(self, G, Gc, C)
151 | t2 = time()
152 |
153 | P = C.power(2)
154 | self.P = P # save memory
155 | self.gspG_sml = Gc
156 | self.C = C
157 | t3 = time()
158 |
159 | print(f'Compute Eigenvalue: {int(t1 - t0)}')
160 | print(f'Coarsen + Metric: {int(t2 - t1)}')
161 | print(f'Misc: {int(t3 - t2)}')
162 |
163 | def process(self):
164 | # convert coarsening matrix to assignment / projection to make life easy
165 | sml_idx, big_idx = self.P.nonzero()
166 | sml_idx, big_idx = sml_idx.astype(int), big_idx.astype(int)
167 |
168 | n = len(sml_idx)
169 | assignment = {}
170 | for i in range(n):
171 | assignment[sml_idx[i]] = set()
172 |
173 | for i in range(n):
174 | k, v = sml_idx[i], big_idx[i]
175 | assignment[k].add(v)
176 | del self.P
177 | return assignment
178 |
179 | def _check_loukas(self, G, Gc, C):
180 | # verify matrix. change to scipy multiplication.
181 |
182 | Q = C.dot(np.ones((C.shape[1], 1))).reshape(-1)
183 | Q = scipy.sparse.diags([Q], [0])
184 | QC = Q.dot(C)
185 | Lc_Q = QC.dot((G.L).dot(QC.T))
186 | diff = Lc_Q - Gc.L
187 | if np.max(diff) > 0.1:
188 | warn('Lc_Q - Gc.L is not close enough.')
189 | del Lc_Q
190 |
191 |
192 | if __name__ == '__main__':
193 | from sparsenet.util.util import random_pygeo_graph
194 |
195 | pyg = random_pygeo_graph(100, 1, 4000, 1)
196 |
197 | converter = gsp2pyg(pyg, loukas_quality=False)
198 | gsp = converter.gspG
199 | pyG1 = converter.gsp2pyg(gsp)
200 | summary(pyG1, 'pyG1 ')
201 |
202 | exit()
203 | pyg = random_pygeo_graph(10, 1, 40, 1)
204 | converter = gsp2pyg(pyg)
205 | g_sml, assignment = converter.pyg_sml, converter.assignment
206 | summary(g_sml, 'g_sml')
207 | print(assignment)
208 | exit()
209 |
210 | G = ba_graph(400)
211 | method = 'variation_edges' # 'variation_neighborhood'
212 | r = 0.6 # the extend of dimensionality reduction (r=0 means no reduction)
213 | k = 5
214 | kmax = int(3 * k)
215 |
216 | C, Gc, Call, Gall = coarsen(G, K=k, r=r, method=method)
217 | assert_proj_matrix(C)
218 | print(type(C))
219 | P = C.power(2)
220 |
221 | assert isinstance(P, scipy.sparse.csc.csc_matrix)
222 | P = tonp(P)
223 | n1, n2 = C.shape
224 | ret = P.dot(np.ones((n2,)))
225 | print(collections.Counter(ret))
226 | print(np2set(ret))
227 | summary(ret, 'check')
228 |
--------------------------------------------------------------------------------
/sparsenet/test/gen_test.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-05-27
2 | # Summary: test generalization
3 |
4 | import os
5 |
6 | from sparsenet.util.util import runcmd
7 | from sparsenet.util.name_util import big_ego_graphs
8 | from sparsenet.util.dir_util import PYTHON
9 | python = PYTHON
10 | warn = False
11 | warn_cmd = ' -W ignore ' if not warn else ''
12 |
13 |
14 | class tester:
15 | def __init__(self):
16 | self.loukas_datasets = ['minnesota', 'airfoil', 'yeast', 'bunny']
17 | self.syn_datasets = ['ws', 'random_geo', 'shape', 'sbm', 'random_er', ] # ego_facebook
18 | self.file = 'sparsenet/model/example.py '
19 | self.methods = ['affinity_GS', 'algebraic_JC', 'heavy_edge', 'variation_edges', 'variation_neighborhoods',
20 | ] # 'heavy_edge' 'affinity_GS', 'kron'
21 | self.method = ['variation_neighborhood'] # it's best in most cases
22 |
23 | self.args = ' --lap none ' \
24 | f' --train_idx 0 --test_idx 0 --n_bottomk 40 --force_pos ' \
25 | f'--n_cycle 100 --seed 0 ' \
26 | f'--train_indices 0 --test_indices ,'
27 |
28 | self.cmd = f'{python} {warn_cmd} {self.file} {self.args} '
29 |
30 | def viz_test(self):
31 | train_indices = '0,1,2,3,4,5,6' # '10,11,' # '0,1,2,3,4,'
32 | test_indices = '13,14,15,16,17,18,' #'5,6,8,9,10,11,12,13,14,15,15,17,18,19'
33 | for data in \
34 | ['faust']:
35 | for method in self.method: # [1e-4, 1e-5, 1e-6]:
36 | special_args = f'--n_epoch 20 --lap None --loukas_quality --bs 600 --lr 1e-3 --ini --viz '
37 | cmd = f'{self.cmd} --dataset {data} --ratio 0.5 --strategy loukas ' \
38 | f'--method {method} --train_indices {train_indices} --test_indices {test_indices} {special_args} '
39 | runcmd(cmd)
40 |
41 | def generalization(self):
42 | train_indices = \
43 | '2,3,4,5,6,7,8,9,10,11,12,13,14,'
44 | # '9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,'
45 | #'2,3,4,5,6,7,8,9,10,11,12,13,14,'
46 | # '0,1,2,3,4,'
47 | # '2,3,4,5,6,7,8,9,'
48 | # '0'
49 | # '1,2,3,4,5,'
50 | # '2,3,4,5,6,7,8,9' \
51 | test_indices = \
52 | '0'
53 | # '5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,'
54 | # '0'
55 | # '0,1,2,3,4,5,6,7,8,9,10'
56 | # '6,7,8,9,10,11,12,13,14,15,16,17,18,19'
57 | for data in \
58 | ['coauthor-cs']:
59 | # self.syn_datasets:
60 | # ['random_er']:
61 | # ['random_geo', ]:
62 | # self.loukas_datasets:
63 |
64 | for method in self.method: # [1e-4, 1e-5, 1e-6]:
65 | special_args = f'--bs 600 --lr 1e-3 --n_epoch 20 --lap sym --device cuda ' \
66 | f'--loss quadratic --n_bottomk 500 --correction --ini --valeigen --w_len 5000 --offset 100 '
67 | cmd = f'{self.cmd} --dataset {data} --ratio 0.3 --strategy loukas ' \
68 | f'--method {method} --train_indices {train_indices} --test_indices {test_indices} {special_args} '
69 | # cmd = cmd.replace('--strategy loukas ', '--strategy DK ')
70 | runcmd(cmd)
71 |
72 | def metric_test(self):
73 | args = ' --loukas_quality '
74 |
75 | file = 'sparsenet/evaluation/metric.py '
76 | for data in self.loukas_datasets[:1]:
77 | for ratio in [.5]:
78 | for method in self.methods: # ['variation_neighborhoods']:
79 | cmd = f'{python} {warn_cmd} {file} {args} --dataset {data} ' \
80 | f'--strategy DK --ratio {ratio} --method {method}'
81 | runcmd(cmd)
82 |
83 | def loukas_quality_test(self):
84 | """ test the effect of using with not using argument loukas_quality. """
85 | train_indices = '0,'
86 | test_indices = ','
87 | for data in ['bunny']:
88 | for method in ['variation_edges']: # self.methods:
89 | for ratio in [0.3, 0.5, 0.7]:
90 | special_args = f'--bs 600 --n_epoch 50 --device cuda ' # --loukas_quality
91 | cmd = f'{self.cmd} --dataset {data} --ratio {ratio} --strategy loukas --correction ' \
92 | f'--method {method} --train_indices {train_indices} --test_indices {test_indices} {special_args} ' # --ini
93 | runcmd(cmd)
94 |
95 | def feature_test(self):
96 | train_indices = '0'
97 | test_indices = ','
98 | for data in \
99 | ['shape']:
100 |
101 | for method in self.methods:
102 | special_args = f'--bs 600 --n_epoch 50 '
103 | cmd = f'{self.cmd} --dataset {data} --ratio 0.5 --strategy loukas --device cpu ' \
104 | f'--method {method} --train_indices {train_indices} --test_indices {test_indices} {special_args}'
105 | # cmd = cmd.replace('--strategy loukas ', '--strategy DK ')
106 | runcmd(cmd)
107 |
108 | def fit_test(self):
109 | """ test all loukas's datasets """
110 | datasets = ['bunny']# [ 'airfoil', 'yeast', 'bunny']
111 | train_indices = '0,,'
112 | methods = [
113 | 'variation_neighborhoods'] # ['heavy_edge', 'variation_edges', 'algebraic_JC', 'affinity_GS'] # important: exclude and kron
114 |
115 | for data in datasets:
116 | for ratio in [.5]:
117 | for method in methods:
118 | special_args = ''# '--lr 1e-4 --bs 6000' if data == 'bunny' else '' # large bs for bunny
119 | cmd = f'{self.cmd} --dataset {data} ' \
120 | f'--strategy loukas --ratio {ratio} --method {method} --train_indices {train_indices} --correction --ini {special_args} '
121 | runcmd(cmd)
122 |
123 | def otherloss_test(self):
124 | """ test all loukas's datasets """
125 | datasets = ['ws', ]
126 | train_indices = '0,1,2,3,'# '0,1,2,3,4,'
127 | test_indices = '5,6,7,8,9,10,11,' # '5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,' # '10,11,12,13,14,15,16,17,18,19,' # '5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,' # '5,6,7,8,9,10,11,12,13,14,15,16'
128 | methods = self.method # [ ] # ['heavy_edge', 'variation_edges', 'algebraic_JC', 'affinity_GS']
129 |
130 | for data in datasets:
131 | for ratio in [.7]:
132 | for method in methods: # ['affinity_GS']:
133 | self.cmd = self.cmd.replace('--n_bottomk 40 ', '--n_bottomk 40 ')
134 | # self.cmd = self.cmd.replace('--lap none ', '--lap sym ')
135 | cmd = f'{self.cmd} --dataset {data} ' f' --n_epoch 20 --device cpu --w_len 5000 ' \
136 | f'--ratio {ratio} --method {method} --loss quadratic --dynamic --ini --valeigen' \
137 | f'--train_indices {train_indices} --test_indices {test_indices} --n_layer 3 --emb_dim 50 '
138 | # cmd += ' --strategy DK '
139 | # cmd = cmd.replace(method, 'DK_method')
140 | runcmd(cmd)
141 |
142 | def debug_test(self):
143 | args = ' --n_epoch 50 --lap none ' \
144 | f' --train_idx 0 --test_idx 0 --n_bottomk 40 --force_pos ' \
145 | f'--n_cycle 100 --device cuda --seed 0 ' \
146 | f'--train_indices 0, --test_indices ,'
147 | kwargs = {'dataset': 'random_geo', 'n_bottomk': 40, 'ratio': 0.7, 'seed': 0, 'method': 'variation_edges'}
148 | # {'dataset': 'ws', 'n_bottomk': 40, 'ratio': 0.7, 'seed': 0, 'method': 'variation_edges'}
149 |
150 | cmd = f'{python} {warn_cmd} {self.file} {args} --dataset {kwargs["dataset"]} ' \
151 | f'--strategy loukas --ratio {kwargs["ratio"]} --method {kwargs["method"]} --seed {kwargs["seed"]}'
152 | runcmd(cmd)
153 |
154 | def local_var_nbr_test(self):
155 | args = ' --n_epoch 50 --lap none ' \
156 | f' --train_idx 0 --test_idx 0 --n_bottomk 40 --force_pos ' \
157 | f'--n_cycle 1 --device cuda --seed 0 ' \
158 | f'--train_indices 0, --test_indices ,'
159 |
160 | for data in ['bunny']: #self.loukas_datasets:
161 | for ratio in [.5]:
162 | for method in ['variation_neighborhoods']:
163 | special_args = '--lr 1e-4 --bs 6000 --ini ' if data == 'bunny' else '' # large bs for bunny
164 | cmd = f'{python} {warn_cmd} {self.file} {args} --dataset {data} ' \
165 | f'--strategy loukas --ratio {ratio} --method {method} {special_args}'
166 | runcmd(cmd)
167 |
168 |
169 | if __name__ == '__main__':
170 | # tester().feature_test()
171 | # tester().local_var_nbr_test()
172 | # tester().loukas_quality_test()
173 | # tester().generalization()
174 | # tester().viz_test()
175 | tester().otherloss_test()
176 | # tester().metric_test()
177 | # tester().fit_test()
178 |
179 | exit()
180 | for data in \
181 | ['minnesota', 'bunny', 'airfoil', 'yeast']:
182 | # ['random_er', 'random_geo']: #
183 |
184 | cmd = f'{python} {warn_cmd} sparsenet/model/example.py --bs 600 --n_epoch 30 --lap none ' \
185 | f' --train_idx 0 --test_idx 0 --dataset {data} --n_bottomk 40 --ratio 0.5 --force_pos ' \
186 | f'--n_cycle 100 --device cuda --seed 0' # # --lap_check
187 | print(cmd)
188 | os.system(cmd)
189 |
--------------------------------------------------------------------------------
/sparsenet/model/model.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-04-10
2 | # Summary: graph encoders
3 |
4 | import argparse
5 |
6 | import torch
7 | import torch.nn.functional as F
8 | from torch_geometric.data import DataLoader
9 | from torch_geometric.nn import Set2Set, MessagePassing, global_add_pool, global_mean_pool, global_max_pool, \
10 | GlobalAttention
11 | from torch_geometric.utils import add_self_loops
12 |
13 | from sparsenet.util.util import summary, fix_seed, random_pygeo_graph
14 |
15 |
16 | class GINConv(MessagePassing):
17 | """
18 | Extension of GIN aggregation to incorporate edge information by concatenation.
19 |
20 | Args:
21 | emb_dim (int): dimensionality of embeddings for nodes and edges.
22 | embed_input (bool): whether to embed input or not.
23 |
24 |
25 | See https://arxiv.org/abs/1810.00826
26 | """
27 |
28 | def __init__(self, edge_feat_dim, emb_dim, aggr="add"):
29 | super(GINConv, self).__init__()
30 | # multi-layer perceptron
31 | self.edge_feat_dim = edge_feat_dim
32 | self.mlp = torch.nn.Sequential(torch.nn.Linear(emb_dim, 2 * emb_dim), torch.nn.ReLU(),
33 | torch.nn.Linear(2 * emb_dim, emb_dim))
34 | self.edge_embedding = torch.nn.Linear(self.edge_feat_dim, emb_dim) # torch.nn.Embedding(num_bond_type, emb_dim)
35 |
36 | torch.nn.init.xavier_uniform_(self.edge_embedding.weight.data)
37 | self.aggr = aggr
38 |
39 | def forward(self, x, edge_index, edge_attr):
40 | # add self loops in the edge space
41 | edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
42 |
43 | self_loop_attr = torch.zeros(x.size(0), self.edge_feat_dim)
44 | self_loop_attr = self_loop_attr.to(edge_attr.device).to(edge_attr.dtype) # LongTensor of shape [32, 1]
45 |
46 | edge_attr_ = torch.cat((edge_attr, self_loop_attr), dim=0)
47 | edge_attr_ = edge_attr_.type(torch.FloatTensor).to(edge_attr.device)
48 | # summary(edge_attr, 'edge_attr after adding self loop')
49 |
50 | edge_embeddings = self.edge_embedding(edge_attr_)
51 |
52 | return self.propagate(edge_index, size=[x.size(0), x.size(0)], x=x, edge_attr=edge_embeddings)
53 |
54 | def message(self, x_j, edge_attr):
55 | return x_j + edge_attr
56 |
57 | def update(self, aggr_out):
58 | return self.mlp(aggr_out)
59 |
60 |
61 | class GNN(torch.nn.Module):
62 | """
63 | Args:
64 | num_layer (int): the number of GNN layers
65 | emb_dim (int): dimensionality of embeddings
66 | JK (str): last, concat, max or sum.
67 | max_pool_layer (int): the layer from which we use max pool rather than add pool for neighbor aggregation
68 | drop_ratio (float): dropout rate
69 | gnn_type: gin, gcn, graphsage, gat
70 |
71 | Output:
72 | node representations
73 |
74 | """
75 |
76 | def __init__(self, num_layer, emb_dim, node_feat_dim, edge_feat_dim, JK="last", drop_ratio=0, gnn_type="gin"):
77 | super(GNN, self).__init__()
78 | self.num_layer = num_layer
79 | self.drop_ratio = drop_ratio
80 | self.JK = JK
81 | self.node_feat_dim = node_feat_dim
82 | self.edge_feat_dim = edge_feat_dim
83 |
84 | if self.num_layer < 2:
85 | print("Warning: Number of GNN layers must be greater than 1.")
86 |
87 | ########
88 | self.x_embedding0 = torch.nn.Linear(self.node_feat_dim, emb_dim)
89 |
90 | ###List of MLPs
91 | self.gnns = torch.nn.ModuleList()
92 | for layer in range(num_layer):
93 | if gnn_type == "gin":
94 | self.gnns.append(GINConv(self.edge_feat_dim, emb_dim, aggr="add"))
95 | else:
96 | NotImplementedError
97 |
98 | ### List of batchnorms
99 | self.batch_norms = torch.nn.ModuleList()
100 | for layer in range(num_layer):
101 | self.batch_norms.append(torch.nn.BatchNorm1d(emb_dim))
102 |
103 | # def forward(self, x, edge_index, edge_attr):
104 | def forward(self, *argv):
105 | if len(argv) == 3:
106 | x, edge_index, edge_attr = argv[0], argv[1], argv[2]
107 | elif len(argv) == 1:
108 | data = argv[0]
109 | x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
110 | else:
111 | raise ValueError("unmatched number of arguments.")
112 |
113 | x = self.x_embedding0(x) # self.x_embedding1(x[:, 0]) + self.x_embedding2(x[:, 1])
114 |
115 | h_list = [x]
116 | for layer in range(self.num_layer):
117 | h = self.gnns[layer](h_list[layer], edge_index, edge_attr)
118 | h = self.batch_norms[layer](h)
119 | # h = F.dropout(F.relu(h), self.drop_ratio, training = self.training)
120 | if layer == self.num_layer - 1:
121 | # remove relu for the last layer
122 | h = F.dropout(h, self.drop_ratio, training=self.training)
123 | else:
124 | h = F.dropout(F.relu(h), self.drop_ratio, training=self.training)
125 | h_list.append(h)
126 |
127 | ### Different implementations of Jk-concat
128 | if self.JK == "concat":
129 | node_representation = torch.cat(h_list, dim=1)
130 | elif self.JK == "last":
131 | node_representation = h_list[-1]
132 | elif self.JK == "max":
133 | h_list = [h.unsqueeze_(0) for h in h_list]
134 | node_representation = torch.max(torch.cat(h_list, dim=0), dim=0)[0]
135 | elif self.JK == "sum":
136 | h_list = [h.unsqueeze_(0) for h in h_list]
137 | node_representation = torch.sum(torch.cat(h_list, dim=0), dim=0)[0]
138 |
139 | return node_representation
140 |
141 |
142 | class GNN_graphpred(torch.nn.Module):
143 | """
144 | Extension of GIN to incorporate edge information by concatenation.
145 |
146 | Args:
147 | num_layer (int): the number of GNN layers
148 | emb_dim (int): dimensionality of embeddings
149 | num_tasks (int): number of tasks in multi-task learning scenario
150 | drop_ratio (float): dropout rate
151 | JK (str): last, concat, max or sum.
152 | graph_pooling (str): sum, mean, max, attention, set2set
153 | gnn_type: gin, gcn, graphsage, gat
154 |
155 | See https://arxiv.org/abs/1810.00826
156 | JK-net: https://arxiv.org/abs/1806.03536
157 | """
158 |
159 | # @profile
160 | def __init__(self, num_layer, emb_dim, node_feat_dim, edge_feat_dim, num_tasks, JK="last", drop_ratio=0,
161 | graph_pooling="mean", gnn_type="gin", force_pos=False, mlp=False):
162 | """
163 |
164 | :param num_layer:
165 | :param emb_dim:
166 | :param node_feat_dim:
167 | :param edge_feat_dim:
168 | :param num_tasks:
169 | :param JK:
170 | :param drop_ratio:
171 | :param graph_pooling:
172 | :param gnn_type:
173 | :param force_pos: force postive. If true, add non-linear layer in the end.
174 | """
175 | super(GNN_graphpred, self).__init__()
176 | self.num_layer = num_layer
177 | self.drop_ratio = drop_ratio
178 | self.JK = JK
179 | self.emb_dim = emb_dim
180 | self.num_tasks = num_tasks
181 | self.edge_feat_dim = edge_feat_dim
182 | self.force_pos = force_pos
183 | self.mlp = mlp
184 |
185 | if self.num_layer < 2:
186 | print("Warning: Number of GNN layers must be greater than 1.")
187 |
188 | self.gnn = GNN(num_layer, emb_dim, node_feat_dim, edge_feat_dim, JK, drop_ratio, gnn_type=gnn_type)
189 |
190 | # Different kind of graph pooling
191 | if graph_pooling == "sum":
192 | self.pool = global_add_pool
193 | elif graph_pooling == "mean":
194 | self.pool = global_mean_pool
195 | elif graph_pooling == "max":
196 | self.pool = global_max_pool
197 | elif graph_pooling == "attention":
198 | if self.JK == "concat":
199 | self.pool = GlobalAttention(gate_nn=torch.nn.Linear((self.num_layer + 1) * emb_dim, 1))
200 | else:
201 | self.pool = GlobalAttention(gate_nn=torch.nn.Linear(emb_dim, 1))
202 | elif graph_pooling[:-1] == "set2set":
203 | set2set_iter = int(graph_pooling[-1])
204 | if self.JK == "concat":
205 | self.pool = Set2Set((self.num_layer + 1) * emb_dim, set2set_iter)
206 | else:
207 | self.pool = Set2Set(emb_dim, set2set_iter)
208 | else:
209 | raise ValueError("Invalid graph pooling type.")
210 |
211 | # For graph-level binary classification
212 | if graph_pooling[:-1] == "set2set":
213 | self.mult = 2
214 | else:
215 | self.mult = 1
216 |
217 | if self.JK == "concat":
218 | self.graph_pred_linear = torch.nn.Linear(self.mult * (self.num_layer + 1) * self.emb_dim, self.num_tasks)
219 | else:
220 | self.graph_pred_linear = torch.nn.Linear(self.mult * self.emb_dim, self.num_tasks)
221 |
222 | if self.mlp:
223 | self.graph_pred_linear = torch.nn.Sequential(
224 | torch.nn.Linear(self.mult * self.emb_dim, self.mult * self.emb_dim),
225 | torch.nn.ReLU(),
226 | torch.nn.Linear(self.mult * self.emb_dim, self.mult * self.emb_dim),
227 | torch.nn.ReLU(),
228 | torch.nn.Linear(self.mult * self.emb_dim, self.mult * self.emb_dim),
229 | torch.nn.ReLU(),
230 | torch.nn.Linear(self.mult * self.emb_dim, self.mult * self.emb_dim),
231 | torch.nn.ReLU(),
232 | torch.nn.Linear(self.mult * self.emb_dim, self.num_tasks))
233 |
234 | def from_pretrained(self, model_file):
235 | # self.gnn = GNN(self.num_layer, self.emb_dim, JK = self.JK, drop_ratio = self.drop_ratio) # important
236 | self.gnn.load_state_dict(torch.load(model_file))
237 |
238 | # @timefunc
239 | def forward(self, *argv, ini=False):
240 | if len(argv) == 4:
241 | x, edge_index, edge_attr, batch = argv[0], argv[1], argv[2], argv[3]
242 | elif len(argv) == 1:
243 | data = argv[0]
244 | x, edge_index, edge_attr, batch = data.x, data.edge_index, data.edge_attr, data.batch
245 |
246 | else:
247 | raise ValueError("unmatched number of arguments.")
248 |
249 | node_representation = self.gnn(x, edge_index, edge_attr)
250 | rep = self.graph_pred_linear(self.pool(node_representation, batch))
251 | if ini and len(argv) == 1:
252 | ini_tsr = torch.stack([data.ini] * rep.size(1), dim=1)
253 |
254 | if self.force_pos:
255 | if ini:
256 | # important: new version. has not tested it. Does it works well for amazons?
257 | return torch.nn.ReLU()(rep + ini_tsr) + 1
258 | # return 0.5 * rep + torch.nn.ReLU()(ini_tsr) # + torch.zeros(rep.size()).to(rep.device)
259 | else:
260 | return 1 + torch.nn.ReLU()(rep) # important: add 1 by default. not sure it's the best.
261 | else:
262 | return rep
263 |
264 |
265 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks')
266 | parser.add_argument('--gnn_type', type=str, default='gin', help='')
267 |
268 | if __name__ == "__main__":
269 | fix_seed()
270 | edge_feat_dim = 1
271 | node_feat_dim = 5
272 | n_node, n_edge = 320, 5000
273 | n_layer = 3
274 | emb_dim, out_dim = 50, 18
275 |
276 | model = GNN_graphpred(n_layer, emb_dim, node_feat_dim, edge_feat_dim, out_dim, mlp=False)
277 |
278 | g1 = random_pygeo_graph(n_node, node_feat_dim, n_edge, edge_feat_dim, device='cpu')
279 | g2 = random_pygeo_graph(n_node + 10, node_feat_dim, n_edge + 10, edge_feat_dim, device='cpu')
280 | summary(g1, 'g1')
281 | loader = DataLoader([g1] * 16 + [g2] * 16, batch_size=8, shuffle=True, num_workers=0)
282 | for batch in loader:
283 | pred = model(batch)
284 | summary(pred, 'pred')
285 |
--------------------------------------------------------------------------------
/sparsenet/model/eval.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-05-27
2 | # Summary: add train and eval for model training. Extracted from example.py. Contains import abstraction.
3 |
4 | import logging
5 | from copy import deepcopy
6 | from time import time
7 |
8 | import torch
9 | from torch.autograd import Variable as V
10 |
11 | from sparsenet.model.loss import get_laplacian_mat
12 | from sparsenet.util.data import set_loader
13 | from sparsenet.util.loss_util import loss_manager
14 | from sparsenet.util.torch_util import sparse_mm
15 | from sparsenet.util.util import summary, pf, red, banner, fix_seed, timefunc as tf
16 |
17 | fix_seed()
18 |
19 |
20 | def apply_gnn(batch, model, dev, verbose=False, ini=False):
21 | """
22 | :param batch:
23 | :param model:
24 | :return:
25 | """
26 | indices_batch, graph_batch = batch
27 | if verbose:
28 | summary(graph_batch, 'graph_batch', highlight=True)
29 |
30 | _bs = len(indices_batch[0])
31 | indices_batch = torch.stack(indices_batch, dim=0).t().contiguous().view((2 * _bs)) # https://bit.ly/2ARazSd
32 | indices_batch, graph_batch = indices_batch.to(dev), graph_batch.to(dev)
33 | pred = model(graph_batch, ini=ini) # tensor of size (bs, out_dim)
34 | return pred, indices_batch
35 |
36 |
37 | @tf
38 | def correction(LM, L2, args):
39 | if args.strategy == 'loukas' and args.correction:
40 | # remark: not ideal but a reasonable workaround. Memory intensive.
41 | # L2_correction = torch.sparse.mm(torch.sparse.mm(LM.invQ, L2.to_dense()).to_sparse(),
42 | # LM.invQ.to_dense())
43 | # L2_correction = L2_correction.to_sparse()
44 | # remark: has small difference with current version
45 | L2_correction = sparse_mm(L2, LM.invQ)
46 | else:
47 | L2_correction = L2
48 | return L2_correction
49 |
50 |
51 | class tester(object):
52 | def __init__(self, name='default', comment='evalulation', dev='cuda'):
53 | self.test_data = {}
54 | self.test_data_comb = {}
55 | self.original_graph = {}
56 | self.sparse_graph = {}
57 | self.dev = dev
58 | self.name = name
59 | self.comment = comment
60 |
61 | def set_test_data(self, args, data_loader, verbose=False):
62 | """ set the data for evalulation """
63 | self.rayleigh_flag = True if args.loss == 'rayleigh' else False
64 | if args.test_idx in self.test_data.keys():
65 | print(f'Test graph {args.test_idx} has been processed. Skip.')
66 | return
67 |
68 | g, _ = data_loader.load(args, mode='test')
69 | self.original_graph[args.test_idx] = g
70 |
71 | test_loader, sub, n_sml = set_loader(g, args.bs, shuffle=True, args=args)
72 |
73 | L1 = sub.L(g, normalization=args.lap)
74 | L2_ = sub.baseline0(normalization=args.lap)
75 |
76 | L1_comb = sub.L(g, normalization=None) if args.dynamic else None
77 | L2_comb = sub.baseline0(normalization=None) if args.dynamic else None
78 | self.L1_comb = L1_comb
79 | self.test_data_comb[args.test_idx] = L2_comb
80 |
81 | self.L2_ = L2_
82 | L2_trival = sub.trivial_L(sub.g_sml)
83 |
84 | LM = loss_manager(signal='bottomk', device=self.dev)
85 |
86 | try:
87 | LM.set_precomute_x(g, args, k=args.n_bottomk - args.offset)
88 | except:
89 | LM.set_x(L1, g.num_nodes, args.n_bottomk - args.offset, which='SM')
90 |
91 | LM.set_C(sub.C)
92 | LM.set_s(g.num_nodes, k=args.n_bottomk)
93 |
94 | params = {'inv': args.inv, 'dynamic': args.dynamic, 'rayleigh': self.rayleigh_flag, }
95 | bl_loss, bl_ratio_loss = LM.quaratic_loss(L1, L2_, sub.assignment, comb=(L1_comb, L2_comb), **params)
96 | trival_loss, _ = LM.quaratic_loss(L1, L2_trival, sub.assignment, comb=(L1_comb, L2_trival), **params) # todo: look at trivial loss
97 | L2_correction = correction(LM, L2_, args)
98 |
99 | bl_eigloss = LM.eigen_loss(L1, L2_correction, args.n_bottomk - args.offset, args=args, g1=g) if args.valeigen else torch.tensor(-1)
100 |
101 | edge_weight_sml_buffer = deepcopy(sub.g_sml.edge_weight).to(self.dev)
102 | edge_index_sml = sub.g_sml.edge_index.to(self.dev)
103 | test_data = g, test_loader, edge_weight_sml_buffer, sub, L1, \
104 | bl_loss, bl_ratio_loss, trival_loss, \
105 | bl_eigloss, n_sml, edge_index_sml, LM
106 | self.test_data[args.test_idx] = test_data
107 |
108 | if verbose:
109 | summary(L1, 'L1')
110 | summary(L2_, 'L2_')
111 | print(f'Baseline 0 loss: {red(bl_loss)}')
112 |
113 | banner(f'Finish setting {args.dataset} graph {args.test_idx}', compact=True, ch='-')
114 |
115 | def delete_test_data(self, idx):
116 | del self.test_data[idx]
117 |
118 | @tf
119 | def eval(self, model, args, verbose=False):
120 |
121 | t0 = time()
122 | model.eval()
123 |
124 | g, test_loader, edge_weight_sml_buffer, sub, L1, bl_loss, \
125 | bl_ratio_loss, trival_loss, bl_eigloss, n_sml, edge_index_sml, LM = \
126 | self.test_data[args.test_idx]
127 |
128 | L2_ini_comb = self.test_data_comb[args.test_idx] # get_laplacian_mat(edge_index_sml, edge_weight_sml_buffer, n_sml, normalization=None) if args.dynamic else None
129 |
130 | L1_comb = sub.L(g, normalization=None) if args.dynamic else None
131 | comb = (L1_comb, L2_ini_comb)
132 |
133 | for step_, batch in enumerate(test_loader):
134 | pred, indices_batch = apply_gnn(batch, model, self.dev, ini=args.ini)
135 | edge_weight_sml = V(edge_weight_sml_buffer)
136 | edge_weight_sml[indices_batch] = pred.view(-1).repeat_interleave(2)
137 | # L2 = get_laplacian_mat(edge_index_sml, edge_weight_sml, n_sml, normalization=args.lap)
138 |
139 | if verbose:
140 | summary(pred, f'test: pred at step {step_}')
141 | summary(edge_weight_sml, f'test: edge_weight_sml at {step_}')
142 | summary(indices_batch, f'test: indices_batch at {step_}')
143 | print()
144 |
145 | L2 = get_laplacian_mat(edge_index_sml, edge_weight_sml, n_sml, normalization=args.lap)
146 | L2_correction = correction(LM, L2, args)
147 |
148 | loss, ratio_loss = LM.quaratic_loss(L1, L2, sub.assignment, inv=args.inv,
149 | rayleigh=self.rayleigh_flag,
150 | verbose=True)
151 | # expansive, so only calculate when needed
152 | eigloss = LM.eigen_loss(L1, L2_correction, args.n_bottomk - args.offset, args=args,
153 | g1=g) if args.valeigen else torch.tensor(-1) # todo: change back
154 |
155 | t1 = time()
156 | msg = 'Generalize!' if loss < min(bl_loss, trival_loss) else ''
157 | nsig = 3
158 | logging.info(' ' * 12 +
159 | f'Graph-{args.dataset}: {args.test_idx}. '
160 | f'{red("Test-Val")}({pf(t1 - t0, 1)}): {pf(loss, nsig)}({pf(ratio_loss, nsig)}) / '
161 | f'{pf(bl_loss, nsig)}({pf(bl_ratio_loss, nsig)}) / {pf(trival_loss)}. {red(msg)}. '
162 | f'Eigenloss: {pf(eigloss, nsig)}. '
163 | f'Bl_Eigenloss: {pf(bl_eigloss, nsig)}.')
164 |
165 | n_gen = 1 if msg == 'Generalize!' else 0
166 | impr_ratio = min(bl_loss, trival_loss) / loss
167 | eigen_ratio = (bl_eigloss - eigloss) / bl_eigloss
168 | return n_gen, impr_ratio.item(), eigen_ratio.item()
169 |
170 |
171 | class trainer(object):
172 |
173 | def __init__(self, name='default', comment='test tensorboard', dev='cuda'):
174 | self.n_graph = 0 # number of graphs that has been processed
175 | self.train_data = {}
176 | self.train_data_comb = {}
177 | self.dev = dev
178 | self.name = name
179 | self.comment = comment
180 | self.original_graph = {}
181 |
182 | def set_train_data(self, args, data_loader):
183 | """ quite similar with set_test_data """
184 | self.rayleigh_flag = True if args.loss == 'rayleigh' else False
185 |
186 | if args.train_idx in self.train_data.keys():
187 | print(f'Train graph {args.train_idx} has been processed. Skip.')
188 | return
189 |
190 | g, _ = data_loader.load(args, mode='train')
191 | self.original_graph[args.train_idx] = g
192 | train_loader, sub, n_sml = set_loader(g, args.bs, shuffle=True, args=args)
193 |
194 | L1 = sub.L(g, normalization=args.lap)
195 | L1_comb = sub.L(g, normalization=None) if args.dynamic else None
196 | g_sml, assignment = sub.g_sml, sub.assignment
197 | edge_index_sml = g_sml.edge_index.to(self.dev)
198 | L2_ = sub.baseline0(normalization=args.lap)
199 | L2_comb = sub.baseline0(normalization=None) if args.dynamic else None
200 | self.train_data_comb[args.train_idx] = (L1_comb, L2_comb)
201 |
202 | self.L2_ = L2_
203 | L_trivial = sub.trivial_L(g_sml) # todo: look at trivial loss tomorrow
204 |
205 | summary(L1, 'L1')
206 | summary(L2_, 'L2_baseline0')
207 |
208 | LM = loss_manager(signal='bottomk', device=self.dev)
209 | # test vector as slightly different when adding loukas_quality argument.
210 | # Not sure why but seems the change is very minor.
211 | try:
212 | LM.set_precomute_x(g, args, k=args.n_bottomk)
213 | except:
214 | LM.set_x(L1, g.num_nodes, args.n_bottomk, which='SM')
215 | LM.set_C(sub.C)
216 | LM.set_s(g.num_nodes, k=args.n_bottomk)
217 |
218 | bl_loss, bl_ratio = LM.quaratic_loss(L1, L2_, sub.assignment, inv=args.inv, rayleigh=self.rayleigh_flag,
219 | dynamic=args.dynamic, comb=(L1_comb, L2_comb))
220 | trivial_loss, trivial_ratio = LM.quaratic_loss(L1, L_trivial, sub.assignment, inv=args.inv,
221 | rayleigh=self.rayleigh_flag, dynamic=args.dynamic,
222 | comb=(L1_comb, L_trivial))
223 | L2_correction = correction(LM, L2_, args)
224 | skip_flag = True if g.num_nodes > 1e3 else False
225 | bl_eigen_loss = LM.eigen_loss(L1, L2_correction, args.n_bottomk, args=args, g1=g, skip=skip_flag)
226 |
227 | edge_weight_sml_buffer = deepcopy(sub.g_sml.edge_weight).to(self.dev)
228 | train_data = g, train_loader, edge_weight_sml_buffer, \
229 | sub, L1, bl_loss, bl_ratio, \
230 | trivial_loss, trivial_ratio, \
231 | n_sml, edge_index_sml, LM, bl_eigen_loss
232 |
233 | assert args.train_idx not in self.train_data.keys(), \
234 | f'Overwrite self.train_data for key {args.train_idx}. Check carefully!'
235 |
236 | self.train_data[args.train_idx] = train_data
237 | return
238 |
239 | def delete_train_data(self, idx):
240 | del self.train_data[idx]
241 |
242 | @tf
243 | def train(self, model, optimizer, args, verbose=False):
244 | g, train_loader, edge_weight_sml_buffer, sub, L1, bl_loss, bl_ratio, trivial_loss, \
245 | trivial_ratio, n_sml, edge_index_sml, LM, bl_eigen_loss = self.train_data[args.train_idx]
246 |
247 | L2_ini = get_laplacian_mat(edge_index_sml, edge_weight_sml_buffer, n_sml, normalization=args.lap)
248 | L1_comb, L2_ini_comb = self.train_data_comb[args.train_idx]
249 | summary(L1_comb, 'Train: L1_comb', highlight=True)
250 | summary(L2_ini_comb, 'Train: L2_ini_comb', highlight=True)
251 | loss_ini, _ = LM.quaratic_loss(L1, L2_ini, sub.assignment, verbose=False, inv=args.inv, dynamic=args.dynamic,
252 | comb=(L1_comb, L2_ini_comb))
253 |
254 | logging.info(f'Initial quaratic loss: {red(pf(loss_ini, 3))}.')
255 | for n_iter in range(1, args.n_epoch + 1):
256 | t0 = time()
257 |
258 | for step, batch in enumerate(train_loader):
259 | model.train()
260 | pred, indices_batch = apply_gnn(batch, model, self.dev, ini=args.ini)
261 | edge_weight_sml = V(edge_weight_sml_buffer)
262 | edge_weight_sml[indices_batch] = pred.view(-1).repeat_interleave(2)
263 |
264 | L2 = get_laplacian_mat(edge_index_sml, edge_weight_sml, n_sml, normalization=args.lap)
265 | comb = (L1_comb, L2_ini_comb)
266 |
267 | loss, ratio = LM.quaratic_loss(L1, L2, sub.assignment, verbose=False, inv=args.inv,
268 | rayleigh=self.rayleigh_flag)
269 |
270 | optimizer.zero_grad()
271 | loss.backward(retain_graph=False) # https://bit.ly/2LbZNaR
272 | optimizer.step()
273 |
274 | L2_correction = correction(LM, L2, args)
275 | skip_flag = True if g.num_nodes > 1e2 else False
276 | eigen_loss = LM.eigen_loss(L1, L2_correction, args.n_bottomk, args=args, g1=g, skip=skip_flag)
277 |
278 | space = '\n' if verbose else ''
279 | n_sig = 3
280 | logging.info(f'{args.dataset}-Idx {args.train_idx}-Epoch: {n_iter}. '
281 | f'Train({pf(time() - t0)}): {pf(loss, n_sig)}({pf(ratio, n_sig)})'
282 | f' / {pf(bl_loss, n_sig)}({pf(bl_ratio, n_sig)}) / {pf(trivial_loss, n_sig)}. '
283 | f'Eigenloss: {pf(eigen_loss, n_sig)}. {space}'
284 | f'Bl_Eigenloss: {pf(bl_eigen_loss, n_sig)}')
285 |
286 | banner(f'Finish training {args.dataset} {args.train_idx} for {args.n_epoch} epochs.')
287 | self.n_graph += 1
288 |
--------------------------------------------------------------------------------
/sparsenet/util/loss_util.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-05-20
2 | # Summary: Implement a class so that one can get all sort of random vecs.
3 |
4 | from copy import deepcopy
5 |
6 | import numpy as np
7 | import scipy as sp
8 | import torch
9 | from torch.sparse import mm as smm
10 |
11 | from deprecated import deprecated
12 | from scipy.sparse import coo_matrix
13 | from scipy.sparse.linalg import eigs, eigsh
14 |
15 | from sparsenet.model.loss import get_sparse_projection_mat
16 | from sparsenet.util.cut_util import pyG_conductance
17 | from sparsenet.util.torch_util import sparse_mm2, sparse_matrix2sparse_tensor
18 | from sparsenet.util.util import timefunc as tf, fix_seed, summary, random_laplacian, pf, tonp, red, dic2tsr
19 |
20 | fix_seed()
21 |
22 |
23 | class vec_generator(object):
24 | def __init__(self):
25 | pass
26 |
27 | def _normalize(self, X):
28 | """
29 | :param X: Input vec mat.
30 | :return: Normalized vec mat.
31 | """
32 | return X / ((X ** 2).sum(0, keepdim=True)).sqrt()
33 |
34 | def _sparse_tensor2_sparse_numpyarray(self, sparse_tensor):
35 | """
36 | :param sparse_tensor: a COO torch.sparse.FloatTensor
37 | :return: a scipy.sparse.coo_matrix
38 | """
39 | if sparse_tensor.device.type == 'cuda':
40 | sparse_tensor = sparse_tensor.to('cpu')
41 |
42 | values = sparse_tensor._values().numpy()
43 | indices = sparse_tensor._indices()
44 | rows, cols = indices[0, :].numpy(), indices[1, :].numpy()
45 | size = sparse_tensor.size()
46 | scipy_sparse_mat = coo_matrix((values, (rows, cols)), shape=size, dtype=np.float)
47 | return scipy_sparse_mat
48 |
49 | def _laplacian2adjacency(self, laplacian):
50 | """
51 | :param laplacian: Input laplacian mat.
52 | :return: return adajacency, basically remove diagonal elements, make non-diagonal elements positive.
53 | """
54 | values, indices = laplacian._values(), laplacian._indices()
55 | mask = [False if (u == v) else True for _, (u, v) in enumerate(indices.t().tolist())]
56 | new_values, new_indices = -values[mask], indices[:, mask]
57 | return torch.sparse.FloatTensor(new_indices, new_values, laplacian.size())
58 |
59 | def random_vec(self, N, num_vec, normalized=True, reproducible=False):
60 | """
61 | :param N: Dimension of the vec
62 | :param num_vec: Number of random vec
63 | :param normalized: If normalized the, L2 norm of return vectors will be 1.
64 | :param reproducible: if reproducible=True, then the random seeds are fixed.
65 | :return: A N * num_vec random vec tensor.
66 | """
67 | if reproducible:
68 | fix_seed()
69 | X = torch.rand(N, num_vec) - 0.5
70 | if normalized:
71 | X = self._normalize(X)
72 | return X
73 |
74 | @tf
75 | def bottomk_vec(self, laplacian, k, which='SM', val=False):
76 | """
77 | :param laplacian: The input laplacian matrix, should be a sparse tensor.
78 | :param k: The top K (smalleset) eigenvectors.
79 | :param which: LM, SM, LR, SR, LM, SM largest/smallest magnitude, LR/SR largest/smallest real value.
80 | more details see scipy.sparse.linalg.eigs
81 | :return: return top K eigenvec. in the format of a N * k tensor. All vectors are automatically normalized.
82 | """
83 | assert isinstance(laplacian, (torch.sparse.FloatTensor, torch.cuda.sparse.FloatTensor)), \
84 | f'input laplacian must be sparse tensor. Got {type(laplacian)}'
85 |
86 | # we need to convert the sparse tensor to scipy sparse mat, so that we can apply
87 | # the functions scipy.sparse.linalg.eigs() which should be faster than other methods.
88 | scipy_lap = self._sparse_tensor2_sparse_numpyarray(laplacian)
89 | M, N = scipy_lap.shape
90 | assert (M == N and k < N - 1), f'Input laplacian must be a square matrix. ' \
91 | f'To use scipy method, {k} (#eigvecs) < {N - 1} (size of laplacian - 1).'
92 |
93 | try:
94 | vals, vecs = eigsh(scipy_lap, k=k, which=which, tol=1e-3)
95 | vecs = torch.FloatTensor(vecs.real)
96 | except sp.sparse.linalg.eigen.arpack.ArpackNoConvergence:
97 | print(red('Eigsh failed. Try computing with eigs'))
98 | vals, vecs = eigs(scipy_lap, k=k, which=which, tol=0)
99 | vecs = torch.FloatTensor(vecs.real)
100 | except:
101 | exit(f'Convergence Error in bottomk_vec when computing {k} eigenvecotrs.') # shape dataset has such problem
102 |
103 | vecs = self._normalize(vecs) # no effect
104 | if val:
105 | return vals
106 | else:
107 | return vecs
108 |
109 | def random_projected_vec(self, laplacian, num_vec, power_method_iter=5, reproducible=False):
110 | """
111 | :param laplacian: The laplacian matrix, used to generate the adjacency mat for power method.
112 | :param num_vec: Number of starting random vectors.
113 | :param reproducible: fix random seed?
114 | :param power_method_iter: How many times we apply f(i+1) = Af(i)/|Af(i)|
115 | :return: Return a num_vec N*1 vectors, in the form of N * num_vec matrix. Each vector was applied power method
116 | by #power_method_iter times.
117 | """
118 | assert isinstance(laplacian,
119 | (torch.sparse.FloatTensor, torch.cuda.sparse.FloatTensor)), 'Input laplacian must be' \
120 | 'sparse tensor.'
121 | size = laplacian.size()
122 | assert (size[0] == size[1]), 'Input laplacian must be a square mat.'
123 | vectors = self.random_vec(size[0], num_vec, normalized=True, reproducible=reproducible)
124 | print('Original vecs:', vectors)
125 | adjacency = self._laplacian2adjacency(laplacian)
126 | for i in range(power_method_iter):
127 | vectors = self._normalize(smm(adjacency, vectors))
128 | return vectors
129 |
130 |
131 | class loss_manager(object):
132 | def __init__(self, signal='bottomk', device='cuda'):
133 | assert signal in ['bottomk', 'random', 'random_proj'], f'signal {signal} is not implemented!'
134 | self.gen = vec_generator()
135 | if signal == 'bottomk':
136 | method = 'bottomk_vec'
137 | elif signal == 'random':
138 | method = 'random_vec'
139 | elif signal == 'random_proj':
140 | method = 'random_projected_vec'
141 | else:
142 | NotImplementedError
143 |
144 | self.method = method
145 | self.dev = device
146 | self.L1 = None
147 | self.vals_L1 = None
148 | self.inv_asgmt = None
149 | self.Projection = None
150 | self.D1 = None
151 |
152 | @tf
153 | def set_C(self, C=None):
154 | # todo: add comment
155 | if C is not None:
156 | self.C = C # csc_matrix of shape (n, N)
157 | self.pi = sparse_matrix2sparse_tensor(self.C.T.dot(self.C).tocoo(),
158 | dev=self.dev) # mainly used for rayleigh quotient
159 | tmp = self.C.dot(np.ones((self.C.shape[1], 1))).reshape(-1)
160 | assert np.min(tmp) > 0, f'min of tmp is {np.min(tmp)}'
161 | # self.Q = np.diag(tmp)
162 | # self.invQ = np.diag(1/tmp)
163 |
164 | n = self.C.shape[0]
165 | diag_indices = [list(range(n))] * 2
166 | i = torch.LongTensor(diag_indices)
167 | v = torch.FloatTensor(1.0 / tmp)
168 | self.invQ = torch.sparse.FloatTensor(i, v, torch.Size([n, n])).to(self.dev)
169 |
170 | def set_precomute_x(self, g, args, k=40, v=False):
171 | key = f'{args.lap}_vecs'
172 | self.x = g[key][:, :k].to(self.dev)
173 | if v:
174 | summary(self.x, red(f'precomputed test vector {key}'))
175 |
176 | def set_x(self, *args, **kwargs):
177 | print(red('Recompute eigenvector'))
178 | self.x = getattr(self.gen, self.method)(*args, **kwargs).to(self.dev)
179 | summary(self.x, 'test vector')
180 |
181 | @tf
182 | def set_s(self, n, k=40):
183 | """ set a random set of nodes for condunctance.
184 | Generate a list (len k) of random nodes in ORIGINAL graph as test subset
185 | """
186 | self.s_list = []
187 | self.s_list_tsr = []
188 | for _ in range(k):
189 | _size = np.random.choice(range(int(n / 4.0), int(n / 2.0)))
190 | s = np.random.choice(range(n), size=_size, replace=False).tolist()
191 | self.s_list.append(s)
192 | self.s_list_tsr.append(torch.tensor(s))
193 |
194 | ############ condunctance_loss related ############
195 | @tf
196 | def _build_inv_asgnment(self, assgnment):
197 | if self.inv_asgmt is None:
198 | self.inv_asgmt = {v: key for (key, value) in assgnment.items() for v in
199 | value} # key is the nodes in large graph.
200 | self.inv_asgmt_tsr = dic2tsr(self.inv_asgmt, dev=self.dev)
201 |
202 | @tf
203 | def get_s_prime(self, s):
204 | """ assume self.inv_asgmt is built.
205 | From s generate s_prime. used for condunctance_loss.
206 | """
207 | s = s.to(self.dev)
208 | if isinstance(s, torch.Tensor):
209 | s_prime = torch.index_select(self.inv_asgmt_tsr, 0, s)
210 | s_prime = torch.unique(s_prime) # remove duplicates
211 | elif isinstance(s, list):
212 | s_prime = [self.inv_asgmt[s_] for s_ in s]
213 | s_prime = list(set(s_prime))
214 | else:
215 | summary(s, 's')
216 | raise NotImplementedError(f's is {type(s)}')
217 |
218 | return s_prime
219 |
220 | @tf
221 | def condunctance_loss(self, g1, g2, assgnment, verbose=False):
222 | """
223 | todo: slow for shape dataset: 1.2s each batch
224 | :param g1: edge_index1, edge_attr1 for original graph
225 | :param g2: edge_index2, edge_attr2 for smaller graph
226 | :param assgnment: dict
227 | :return:
228 | """
229 |
230 | edge_index1, edge_attr1 = g1
231 | edge_index2, edge_attr2 = g2
232 | self._build_inv_asgnment(assgnment)
233 | loss = 0
234 | for i, s in enumerate(self.s_list_tsr):
235 |
236 | cond1 = pyG_conductance(edge_index1, edge_attr1, s.tolist(), t=None, dev=self.dev)
237 | s_prime = self.get_s_prime(s)
238 | cond2 = pyG_conductance(edge_index2, edge_attr2, s_prime.tolist(), t=None, dev=self.dev)
239 | loss += torch.abs(cond1 - cond2)
240 |
241 | if verbose:
242 | print(f's: {len(s)}. s_prime: {len(s_prime)}')
243 | summary(np.array(s), 's')
244 | summary(edge_index1, 'edge_index1')
245 | summary(edge_attr1, 'edge_attr1')
246 | print(red(f'cond1-{i}: {pf(cond1, 2)}. cond2-{i}: {pf(cond2, 2)}'))
247 |
248 | return loss / len(self.s_list)
249 |
250 | ############ quadratic_loss related ###############
251 | @deprecated(reason="to be refactord")
252 | def _set_d(self, L, power=0.5):
253 | """ from sparse tensor L to degree matrix """
254 | # todo: speed up. 3
255 | dev = L.device
256 | n = L.shape[0]
257 | idx = torch.LongTensor([[i, i] for i in range(n)]).T.to(dev)
258 | diag = torch.diag(L.to_dense())
259 | diag = diag ** (power)
260 | deg = torch.sparse.FloatTensor(idx, diag, torch.Size([n, n]))
261 | return deg
262 |
263 | def quaratic_loss(self, L1, L2, assignment, verbose=False, inv=False,
264 | rayleigh=False, dynamic=False,
265 | comb=(None, None)):
266 | """
267 | modfied from random_vec_loss.
268 | :param L1: Laplace of original graph
269 | :param L2: Laplace of smaller graph
270 | :param Projection
271 | :param inv: inverse Laplacian. (Not Really Working)
272 | :param: rayleigh: normalized x
273 | :param: dynamic: dynamic projection. Update projection in runtime.
274 | :param: comb: combinatorial L1, L2. Only used for normalized Laplacian.
275 | :return loss, ratio
276 | """
277 | L1, L2 = L1.to(self.dev), L2.to(self.dev)
278 | if self.Projection is None:
279 | self.Projection = get_sparse_projection_mat(L1.shape[0], L2.shape[0], assignment).to(self.dev) # sparse tensor
280 | Projection = self.Projection
281 | else:
282 | Projection = self.Projection
283 |
284 | if dynamic:
285 | L1_comb, L2_comb = comb
286 | assert L1_comb is not None
287 | if self.D1 is None:
288 | self.D1 = self._set_d(L1_comb, power=-0.5)
289 | D1 = self.D1
290 | else:
291 | D1 = self.D1
292 | D2 = self._set_d(L2_comb, power=0.5)
293 | Projection = sparse_mm2(Projection, D1, D2)
294 |
295 | X_prime = smm(Projection, self.x)
296 |
297 | if inv:
298 | raise NotImplementedError
299 | else:
300 | quadL1 = torch.mm(self.x.t(), smm(L1, self.x))
301 | qualL2 = torch.mm(X_prime.t(), smm(L2, X_prime))
302 |
303 | diff = torch.abs(torch.diag(quadL1 - qualL2))
304 | if rayleigh:
305 | assert self.pi is not None
306 | denominator = torch.diag(torch.mm(self.x.t(), smm(self.pi, self.x))) # (n_bottomk,)
307 | diff = diff / denominator
308 |
309 | loss = torch.mean(diff)
310 | ratio = torch.sum(torch.diag(qualL2)) / torch.sum(torch.diag(quadL1))
311 | ratio = torch.abs(torch.log(ratio))
312 | if verbose:
313 | bad_indices = tonp((diff / loss > 1).nonzero())
314 | print(bad_indices.reshape(-1))
315 | return loss, ratio
316 |
317 | @tf
318 | def eigen_loss(self, L1, L2, k, args=None, g1=None, skip=False):
319 | """ compare the first k eigen difference; L1 is larger than L2
320 | :param args
321 | :param g1: used for retrive precomputed spectrum
322 | """
323 | if skip: return -1
324 |
325 | # get eigenvalues of L1
326 | if self.vals_L1 is None:
327 | # compute eigenvals only once
328 | self.L1 = L1 # doesn't seem to be useful any more
329 | key = str(args.lap) + '_vals'
330 | vals_L1 = g1[key][:k].numpy()
331 | vals_L1 = deepcopy(vals_L1) # if not deepcopy, g1 None_vals[0] will get modified
332 | self.vals_L1 = vals_L1
333 | else:
334 | vals_L1 = self.vals_L1
335 |
336 | # get eigenvalues of L2
337 | if args.cacheeig:
338 | raise NotImplementedError
339 | else:
340 | vals_L2 = self.gen.bottomk_vec(L2, k, which='SM', val=True).real
341 |
342 | # compute the eigenvalues error
343 | vals_L1 = vals_L1[:len(vals_L2)] # in case vals_L1 and vals_L2 are of different length
344 | bad_indices = np.nonzero(vals_L1 < 1e-5)
345 | if len(bad_indices) > 1:
346 | print(red(f'There are {len(bad_indices)} nearly zero eigenvalues.'))
347 | err = np.abs(vals_L1 - vals_L2) / (vals_L1 + 1e-15)
348 | err[0] = 0
349 | err[bad_indices] = 0
350 | return np.mean(err)
351 |
352 |
353 | if __name__ == '__main__':
354 |
355 | LM = loss_manager(signal='bottomk')
356 |
357 | # exit()
358 | gen = vec_generator()
359 | # print(gen.random_vec(N=3, num_vec=2,reproducible=True))
360 | # print(gen.random_vec(N=3, num_vec=2,reproducible=True))
361 |
362 | i = torch.LongTensor([[0, 1, 2, 3], [0, 1, 2, 3]])
363 | v = torch.FloatTensor([0, 1, 2, 3])
364 | sparse_mat = torch.sparse.FloatTensor(i, v, torch.Size((4, 4)))
365 | LM = loss_manager()
366 | eigloss = LM.eigen_loss(sparse_mat, sparse_mat, 2, g1=None)
367 | print(eigloss)
368 | bottomk_vec = gen.bottomk_vec(laplacian=sparse_mat, k=2)
369 |
370 | for i in range(2):
371 | summary(bottomk_vec[:, i], f'bottomk_vec[:, {i}]')
372 | exit()
373 |
374 | # i = torch.LongTensor([[0, 1, 2, 3, 1, 2, 0, 3, 1, 3],[0, 1, 2, 3, 2, 1, 3, 0, 3, 1]])
375 | # v = torch.FloatTensor([1, 2, 1, 2, -1, -1, -1, -1, -1, -1])
376 | projected_vec = gen.random_projected_vec(laplacian, 5, 2, reproducible=True)
377 | summary(projected_vec, 'projected_vec')
378 |
379 | projected_vec = gen.random_projected_vec(laplacian, 5, 2, reproducible=True)
380 | print('Projected vecs', projected_vec)
381 | exit()
382 |
383 | n = 100
384 | i, v = random_laplacian(n)
385 | summary(i, 'i')
386 | summary(v, 'v')
387 | laplacian = torch.sparse.FloatTensor(i, v, torch.Size((n, n)))
388 |
389 | for _ in range(5):
390 | eigenvec = gen.bottomk_vec(laplacian, 2)
391 | summary(eigenvec, 'eigenvec')
392 | exit()
393 |
--------------------------------------------------------------------------------
/sparsenet/util/graph_util.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-04-16
2 | # Summary: util functions
3 |
4 | import shutil
5 | from warnings import warn
6 |
7 | import numpy as np
8 | import torch
9 | import torch_geometric
10 | from memory_profiler import profile
11 | from torch.autograd import Variable
12 | from torch_geometric.data import Data
13 | from torch_geometric.transforms import LocalDegreeProfile
14 | from torch_geometric.utils import from_networkx, subgraph
15 |
16 | from sparsenet.model.loss import get_laplacian_mat, get_sparse_C
17 | from sparsenet.util.gsp_util import gsp2pyg
18 | from sparsenet.util.name_util import set_coarsening_graph_dir
19 | from sparsenet.util.sample import sample_N2Nlandmarks
20 | from sparsenet.util.util import timefunc, banner, summary, random_edge_index, fix_seed, red, make_dir
21 |
22 | INFINITY = 1e8
23 |
24 |
25 | @timefunc
26 | def get_bipartite(G1, G2, crossing_edge):
27 | '''
28 | :param G1: graph 1
29 | :param G2: graph 2
30 | :param crossing_edge: crossing edges between those two subgraphs of G
31 | :return: A bipartiti graph G1 <-> G2. (nodes(G1) + nodes(G2) + crossing edge) in format of torch_geo
32 | '''
33 | xedge_index, xedge_attr = crossing_edge
34 | final_x = torch.cat((G1.x, G2.x), 0)
35 | final_node_index = torch.cat((G1.node_index, G2.node_index), 0)
36 | return Data(x=final_x, edge_attr=xedge_attr, edge_index=xedge_index, node_index=final_node_index)
37 |
38 |
39 | @timefunc
40 | def get_merged_subgraph(G1, G2, crossing_edge):
41 | '''
42 | :param G1: pygeo graph G1
43 | :param G2: pygeo graph G2
44 | :param crossing_edge: (edge_index, edge_attr)
45 | :return: Merge x, edge_attr, edge_index, and node_index in G1, G2, and crossing edge
46 | '''
47 | xedge_index, xedge_attr = crossing_edge
48 | final_edge_index = torch.cat((G1.edge_index, G2.edge_index, xedge_index), 1)
49 | final_edge_attr = torch.cat((G1.edge_attr, G2.edge_attr, xedge_attr), 0)
50 | final_x = torch.cat((G1.x, G2.x), 0)
51 | final_node_index = torch.cat((G1.node_index, G2.node_index), 0)
52 | return Data(x=final_x, edge_attr=final_edge_attr, edge_index=final_edge_index, node_index=final_node_index)
53 |
54 |
55 | @profile
56 | class GraphPair(object):
57 | def __init__(self, G, g_sml, Assignment):
58 | """
59 | :param G: Original graph G, in pygeo format
60 | :param gsml: sampled graph g_sml, in pygeo format, the node index of both G and gsml MUST starts with 0.
61 | :param Assignment: assignment
62 | """
63 | assert (isinstance(G, torch_geometric.data.data.Data)
64 | and isinstance(g_sml, torch_geometric.data.data.Data)), f'G is {type(G)}. g_sml is {type(g_sml)}'
65 | self.G = G
66 | if 'edge_attr' not in G.keys:
67 | # todo: discuss with DK
68 | warn(f'edge_attr does not exit. Will creat 1-dim edge attr (all ones)')
69 | G.edge_attr = torch.ones((G.num_edges, 1))
70 | banner('modified G')
71 | summary(G)
72 |
73 | self.G_prime = g_sml
74 | self.assignment = Assignment
75 | assert (self.__check_indexes()), 'Input graphs must have node_index starting from 0.'
76 | tensor_imap, inverse_assignment = {}, {}
77 | for i, (uprime, vprime) in enumerate(g_sml.edge_index.t().tolist()):
78 | if uprime > vprime:
79 | tensor_imap[(vprime, uprime)] = [i] if (vprime, uprime) not in tensor_imap.keys() \
80 | else tensor_imap[(vprime, uprime)] + [i]
81 | else:
82 | tensor_imap[(uprime, vprime)] = [i] if (uprime, vprime) not in tensor_imap.keys() \
83 | else tensor_imap[(uprime, vprime)] + [i]
84 | for key, value in Assignment.items():
85 | for v in value:
86 | inverse_assignment[v] = key
87 | self.tensor_imap = tensor_imap
88 | self.inverse_assignment = inverse_assignment # map index of big graph to small graph
89 |
90 | def __check_indexes(self):
91 | gflag, gprimeflag = False, False
92 | for (u, v) in enumerate(self.G.edge_index.t().tolist()):
93 | if u == 0 or v == 0:
94 | gflag = True
95 | break
96 | for (uprime, vprime) in enumerate(self.G_prime.edge_index.t().tolist()):
97 | if uprime == 0 or vprime == 0:
98 | gprimeflag = True
99 | break
100 | return gflag and gprimeflag
101 |
102 | @timefunc
103 | def construct(self):
104 | """
105 | This function basically compute the partition of original graph G based on landmarks. Also, it will
106 | compute the crossing_edges, and the indices in tensor of any edge (u, v) in G_prime.
107 | :return: void.
108 | """
109 | N = len(self.assignment.keys())
110 | G_edge_attrs, G_edge_indices = self.G.edge_attr.tolist(), self.G.edge_index.t().tolist()
111 | G_x = self.G.x.tolist()
112 | print('N:', N, 'x:', len(G_x), 'edge_attr:', len(G_edge_attrs), 'edge_indices:', len(G_edge_indices))
113 | # subgraphs: list of subgraph: {'edge_index':[], 'edge_attr':[], 'x':[]}
114 | # crossing_edges: dictionary key (uprime, vprime) an edge from G_prime; crossing_edges[(up, vp)] = list of
115 | # [e=(u, v), e_attr], where e is an edge in G, and e_attr is its corresponding attr.
116 | subgraphs, crossing_edges = [{'edge_index': [], 'edge_attr': [], 'x': []} for i in range(N)], {}
117 | for i, (u, v) in enumerate(G_edge_indices):
118 | uprime, vprime = self.inverse_assignment[u], self.inverse_assignment[v]
119 | if uprime == vprime: # add into subgraph gs[uprime]
120 | subgraphs[uprime]['edge_index'].append((u, v))
121 | subgraphs[uprime]['edge_attr'].append(G_edge_attrs[i])
122 | else: # add into crossing edges [(up, vp)]
123 | uprime, vprime = (vprime, uprime) if uprime > vprime else (uprime, vprime)
124 | crossing_edges[(uprime, vprime)] = [[(u, v), G_edge_attrs[i]]] if (uprime, vprime) not in crossing_edges \
125 | else crossing_edges[(uprime, vprime)] + [[(u, v), G_edge_attrs[i]]]
126 |
127 | for i, nx in enumerate(G_x):
128 | xprime = self.inverse_assignment[i]
129 | subgraphs[xprime]['x'].append(i)
130 | self.subgraphs = subgraphs
131 | self.crossing_edges = crossing_edges
132 |
133 | def __get_subgraph(self, uprime):
134 | """
135 | :param uprime: the id of a landmark in G_prime
136 | :return: the corresponding subgraph in original graph G (in pygeo format).
137 | """
138 | _x = torch.FloatTensor(self.subgraphs[uprime]['x'])
139 | _edge_index = torch.LongTensor(self.subgraphs[uprime]['edge_index']).t()
140 | _edge_attr = torch.FloatTensor(self.subgraphs[uprime]['edge_attr'])
141 | _node_index = torch.LongTensor(list(self.assignment[uprime]))
142 | return Data(x=_x, edge_index=_edge_index, edge_attr=_edge_attr, node_index=_node_index)
143 |
144 | def __get_crossing_edges(self, uprime, vprime):
145 | """
146 | :param uprime: landmark uprime in G_prime
147 | :param vprime: landmark vprime in G_prime
148 | :return: the crossing edges between subgraphs G_u, G_v (assigned to u_prime & v_prime) in G,
149 | return None if (uprime, vprime) is not in self.crossing_edges
150 | """
151 | assert (uprime < vprime) and (uprime, vprime) in self.crossing_edges, f'({uprime}, {vprime}) is not in crossing' \
152 | f'edges dictionary.'
153 | data = self.crossing_edges[(uprime, vprime)]
154 | _edge_index = [e[0] for e in data]
155 | _edge_attr = [e[1] for e in data]
156 | return (torch.LongTensor(_edge_index).t(), torch.FloatTensor(_edge_attr))
157 |
158 | def __get_tensor_indices(self, uprime, vprime):
159 | """
160 | :param uprime: landmark uprime in G_prime
161 | :param vprime: landmark vprime in G_prime
162 | :return: the indices of edge (uprime, vprime) in tensor edge_index in g_sml.
163 | """
164 | assert (uprime < vprime and (uprime, vprime) in self.tensor_imap)
165 | return tuple(self.tensor_imap[(uprime, vprime)])
166 |
167 | def get_data(self, edge):
168 | """
169 | :param edge: (u, v) from the sampled graph.
170 | :return: subgraphs G1, G2 corresponding to landmark u, v in edge. Crossing edges between G1, G2
171 | and the indices of (u, v), (v, u) in tensor edge_index of g_sml.
172 | """
173 | uprime, vprime = edge
174 | uprime, vprime = (vprime, uprime) if uprime > vprime else (uprime, vprime)
175 | G1, G2 = self.__get_subgraph(uprime), self.__get_subgraph(vprime)
176 | if G1.num_nodes == 1:
177 | warn(f'edge {edge}: output subgraph G1 is a singleton!')
178 | if G2.num_nodes == 1:
179 | warn(f'edge {edge}: output subgraph G2 is a singleton!')
180 | crossing_edges = self.__get_crossing_edges(uprime, vprime)
181 | tensor_indices = self.__get_tensor_indices(uprime, vprime)
182 | return G1, G2, crossing_edges, tensor_indices
183 |
184 |
185 | @profile
186 | class subgraphs(object):
187 | def __init__(self, g, assertion=False, args=None):
188 | """
189 | :param g: pyG graph with edeg_weight
190 | :param assertion: assert the edge index is right.
191 | """
192 |
193 | self.g = g
194 | self.C = None
195 | self.args = args
196 | assert 'edge_weight' in g.keys
197 | assert isinstance(g, Data)
198 |
199 | self.__load_coarsening_graphs(args, recompute=False)
200 |
201 | self.graph_pair = GraphPair(self.g, self.g_sml, self.assignment)
202 | self.graph_pair.construct()
203 | dict = self.__construct_dict()
204 | del self.graph_pair
205 |
206 | self.edges, self.inv_edges, self.double_edges = [], [], []
207 | for (idx1, idx2) in dict.keys():
208 | self.edges.append(idx1)
209 | self.inv_edges.append(idx2)
210 | self.double_edges.append((idx1, idx2)) # [idx1, idx2]
211 | if assertion: self.__assert(idx1, idx2)
212 |
213 | new_info = {}
214 | for k, v in dict.items():
215 | # k is of form [8, 9], v is of form (G1, G2, (crossing_edge_index, crossing_edge_attr), ini)
216 | new_info[k[0]] = v
217 | self.info = new_info
218 | del new_info
219 |
220 | def __assert(self, idx1, idx2):
221 | set_e1 = set(self.g_sml.edge_index[:, idx1].numpy()) # {0,, 13}
222 | set_e2 = set(self.g_sml.edge_index[:, idx2].numpy()) # {13, 0}
223 | assert set_e1 == set_e2, f'{idx1} edge is {set_e1}. {idx2} edge is {set_e2}'
224 |
225 | def __load_coarsening_graphs(self, args, recompute=False):
226 | dir = set_coarsening_graph_dir(args)
227 | if recompute:
228 | shutil.rmtree(dir)
229 | make_dir(dir)
230 |
231 | if args.strategy == 'DK':
232 | n_sml = int(self.g.num_nodes * (1 - args.ratio))
233 | try:
234 | self.assignment = torch.load(f'{dir}assignment.pt')
235 | self.g_sml = torch.load(f'{dir}g_sml.pt')
236 | print(f'load g_sml, assignment from \n {red(dir)}')
237 | except FileNotFoundError:
238 | g_sml, assignment = sample_N2Nlandmarks(self.g, n_sml, weight_key='edge_weight')
239 | self.g_sml = from_networkx(g_sml)
240 | self.assignment = assignment
241 |
242 | # save g_sml, assignment
243 | torch.save(self.assignment, f'{dir}assignment.pt')
244 | torch.save(self.g_sml, f'{dir}g_sml.pt')
245 | print(f'save at g_sml, assignment at \n {red(dir)}')
246 |
247 | # todo: add a function to convert self.assignment to C
248 | self.C = get_sparse_C(self.g.num_nodes, n_sml, self.assignment)
249 | elif args.strategy == 'loukas':
250 | try:
251 | self.assignment = torch.load(f'{dir}assignment.pt')
252 | self.g_sml = torch.load(f'{dir}g_sml.pt')
253 | self.C = torch.load(f'{dir}C.pt')
254 | print(f'load g_sml, assignment, and C from \n {red(dir)}')
255 | except (FileNotFoundError, TypeError):
256 | loukas_kwargs = {'r': args.ratio, 'method': args.method,
257 | 'loukas_quality': args.loukas_quality,
258 | 'K': args.n_bottomk}
259 | converter = gsp2pyg(self.g, **loukas_kwargs)
260 | g_sml, assignment = converter.pyg_sml, converter.assignment
261 | self.g_sml = g_sml
262 | self.C = converter.C
263 | self.assignment = assignment
264 |
265 | # save g_sml, C, assignment
266 | torch.save(self.assignment, f'{dir}assignment.pt')
267 | torch.save(self.g_sml, f'{dir}g_sml.pt')
268 | torch.save(self.C, f'{dir}C.pt')
269 | print(f'save at g_sml, assignment, and C at \n {red(dir)}')
270 | else:
271 | raise NotImplementedError
272 |
273 | @timefunc
274 | def __construct_dict(self):
275 | """ construct a dict.
276 | modified from get_original_subgraphs where __map_back_from_edges is replaced by graph_pair.get_data,
277 | which makes it faster
278 |
279 | :return: a set of {edge_indexes (i, j):(G1, G2, (crossing_edge_index, crossing_edge_attr(DELETED)))
280 | """
281 | assert (isinstance(self.g, torch_geometric.data.data.Data) and isinstance(self.g_sml,
282 | torch_geometric.data.data.Data))
283 | imap, ret = {}, {}
284 | for i, (u, v) in enumerate(self.g_sml.edge_index.t().tolist()):
285 | if u > v:
286 | # map node index (u, v) to edge index (i, j)
287 | imap[(v, u)] = [i] if (v, u) not in imap.keys() else imap[(v, u)] + [i]
288 | else:
289 | imap[(u, v)] = [i] if (u, v) not in imap.keys() else imap[(u, v)] + [i]
290 | for _, (u, v) in enumerate(self.g_sml.edge_index.t().tolist()):
291 | if u < v:
292 | (i, j) = tuple(imap[(u, v)])
293 | G1, G2, crossing_edges, _ = self.graph_pair.get_data(
294 | (u, v)) # __map_back_from_edge(G, (u, v), assignment)
295 | ini = self.g_sml.edge_weight[[i, j]] # used to initialize gnn output. assert ini[0]==ini[1]
296 | ret[(i, j)] = (G1, G2, crossing_edges, ini)
297 | return ret
298 |
299 | @timefunc
300 | def get_subgraphs(self, verbose=False):
301 | """ the main fucntions that is called
302 | return a list of pyG graph corresponding to each edge in G'
303 | """
304 |
305 | subgraphs_list = []
306 |
307 | for e in self.edges:
308 | G1, G2, crossing_edges, ini = self.info[e]
309 | pyG = get_merged_subgraph(G1, G2, crossing_edges)
310 | indices = pyG.node_index.numpy().ravel().tolist()
311 |
312 | try:
313 | new_edge_index, new_edge_attr = subgraph(indices, pyG.edge_index, pyG.edge_attr, relabel_nodes=True)
314 | except IndexError:
315 | warn('Index Error. Filter out isolated notes.')
316 | _edge_indices = pyG.edge_index.numpy().ravel().tolist()
317 | indices = [idx for idx in indices if idx in _edge_indices]
318 | new_edge_index, new_edge_attr = subgraph(indices, pyG.edge_index, pyG.edge_attr, relabel_nodes=True)
319 |
320 | new_pyG = Data(edge_index=new_edge_index, edge_attr=new_edge_attr, ini=torch.ones(1) * ini[0])
321 | new_pyG.x = None
322 | new_pyG = LocalDegreeProfile()(new_pyG)
323 | new_pyG.x = Variable(new_pyG.x)
324 | new_pyG.x = torch.nn.functional.normalize(new_pyG.x, dim=0)
325 | subgraphs_list += [new_pyG]
326 |
327 | del self.info
328 | if verbose:
329 | for idx, v in enumerate(subgraphs_list):
330 | summary(v, f'{idx}-subgraph')
331 |
332 | print(f'{len(subgraphs_list)} Subgraph Stats:')
333 | nodes_stats = [g_.num_nodes for g_ in subgraphs_list]
334 | edges_stats = [g_.num_edges for g_ in subgraphs_list]
335 | summary(np.array(nodes_stats), 'node_stats')
336 | summary(np.array(edges_stats), 'edge_stats')
337 |
338 | return subgraphs_list
339 |
340 | def get_bipartitle_graphs(self):
341 | # todo: similar to get_subgraphs but for bipartitle graph. 0.
342 | raise NotImplementedError
343 |
344 | def baseline0(self, normalization):
345 | """
346 | return the laplacian of baseline 0, which is the Laplacian of G' without learning
347 | Summary of g_sml in baseline0 (torch_geometric.data.data.Data):
348 | edge_index LongTensor [2, 9476] 796.55(mean) 0.0(min) 1688.0(max) 770.0(median) 492.89(std) 1689.0(unique)
349 | edge_weight FloatTensor [9476] 2.11(mean) 1.0(min) 10.0(max) 2.0(median) 1.12(std) 10.0(unique)
350 | """
351 | g_sml = self.g_sml # all index should be contiguous
352 | L = get_laplacian_mat(g_sml.edge_index, g_sml.edge_weight, g_sml.num_nodes, normalization=normalization)
353 | return L
354 |
355 | def L(self, g, normalization):
356 | # todo: add check num_nodes
357 | L = get_laplacian_mat(g.edge_index, g.edge_weight, g.num_nodes, normalization=normalization)
358 | return L
359 |
360 | def trivial_L(self, g):
361 | """ trival Laplacian for standarded Laplacian"""
362 | L = get_laplacian_mat(g.edge_index, torch.zeros(g.edge_weight.size()), g.num_nodes, normalization=None)
363 | return L
364 |
365 |
366 | if __name__ == '__main__':
367 | # edge = random_edge_index(n_edge=200, n_node=20)
368 | fix_seed()
369 |
370 | n_node, n_edge = 3200, 40000
371 | node_dim = 1
372 | edge_feat_dim = 1
373 | n_node_sml = 200
374 |
375 | g = Data(x=torch.rand(n_node, node_dim),
376 | edge_index=random_edge_index(n_edge, n_node),
377 | edge_attr=torch.rand(n_edge, edge_feat_dim))
378 | g.edge_weight = torch.ones(n_edge) * 1.1
379 | summary(g, 'original_graph')
380 |
381 | # n_sml = 200
382 | # banner('Test subgraphs')
383 | # all_graphs = subgraphs(g, n_sml).get_subgraphs(verbose=False)
384 |
385 | banner('Test sample Landmark')
386 | g_sml, assignment = sample_N2Nlandmarks(g, n_node_sml, weight_key='edge_weight')
387 | print(g_sml.edges.data())
388 | g_sml_pyG = from_networkx(g_sml)
389 |
390 | banner('Get original subgraphs test')
391 | graph_pair = GraphPair(g, g_sml_pyG, assignment)
392 | graph_pair.construct()
393 |
394 | edge_indexes = g_sml_pyG.edge_index.t().tolist()
395 |
396 | for i, edge in enumerate(edge_indexes):
397 | print(edge)
398 | G1, G2, crossing_edge, tensor_indices = graph_pair.get_data(edge)
399 | if i > 4: exit()
400 | continue
401 |
402 | # When G1 or G2 is a single node, summary() will cause error.
403 | # summary(G1, 'G1')
404 | print(G2)
405 | # exit()
406 | # summary(G2, 'G2')
407 | print(crossing_edge[0].shape, crossing_edge[1].shape)
408 | print(tensor_indices)
409 |
410 | # edge_indexes = g_sml_pyG.edge_index.numpy()
411 | # dict = get_original_subgraphs(g, g_sml_pyG, assignment)
412 | # for i, j in dict.keys():
413 | # print('edge_index_pair ({}, {}):'.format(i, j), edge_indexes[:, i], edge_indexes[:, j])
414 | #
415 | # banner('g_sml_nx')
416 | # summary(g_sml_pyG, 'g_sml_pyG')
417 | # edges = [e for e in g_sml.edges]
418 | # summary(torch.Tensor(np.array(edges)), 'g_sml_nx')
419 | #
420 | # banner('DK\'s test')
421 | # print('Select edge:', edges[0])
422 | # G1, G2, crossing_edges = __map_back_from_edge(g, edges[0], assignment)
423 | # summary(G1, 'G1')
424 | # summary(G2, 'G2')
425 | # print('Crossing edge size:', crossing_edges[0].shape)
426 | #
427 | # summary(get_bipartite(G1, G2, crossing_edges), 'bipartitle')
428 | # summary(get_merged_subgraph(G1, G2, crossing_edges), 'merged_subgraph')
429 |
--------------------------------------------------------------------------------
/sparsenet/util/pygsp_util.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-05-31
2 | # Summary: in order to use to_networkx
3 | # https://github.com/epfl-lts2/pygsp/blob/c8687ee3619741010a7b81ee298085da8299a0bd/pygsp/graphs/_io.py#L8
4 |
5 | # -*- coding: utf-8 -*-
6 |
7 | import os
8 |
9 | import numpy as np
10 |
11 |
12 | def _import_networkx():
13 | try:
14 | import networkx as nx
15 | except Exception as e:
16 | raise ImportError('Cannot import networkx. Use graph-tool or try to '
17 | 'install it with pip (or conda) install networkx. '
18 | 'Original exception: {}'.format(e))
19 | return nx
20 |
21 |
22 | def _import_graphtool():
23 | try:
24 | import graph_tool as gt
25 | except Exception as e:
26 | raise ImportError('Cannot import graph-tool. Use networkx or try to '
27 | 'install it. Original exception: {}'.format(e))
28 | return gt
29 |
30 |
31 | class IOMixIn(object):
32 |
33 | def _break_signals(self):
34 | r"""Break N-dimensional signals into N 1D signals."""
35 | for name in list(self.signals.keys()):
36 | if self.signals[name].ndim == 2:
37 | for i, signal_1d in enumerate(self.signals[name].T):
38 | self.signals[name + '_' + str(i)] = signal_1d
39 | del self.signals[name]
40 |
41 | def _join_signals(self):
42 | r"""Join N 1D signals into one N-dimensional signal."""
43 | joined = dict()
44 | for name in self.signals:
45 | name_base = name.rsplit('_', 1)[0]
46 | names = joined.get(name_base, list())
47 | names.append(name)
48 | joined[name_base] = names
49 | for name_base, names in joined.items():
50 | if len(names) > 1:
51 | names = sorted(names) # ensure dim ordering (_0, _1, etc.)
52 | signal_nd = np.stack([self.signals[n] for n in names], axis=1)
53 | self.signals[name_base] = signal_nd
54 | for name in names:
55 | del self.signals[name]
56 |
57 | def to_networkx(self):
58 | r"""Export the graph to NetworkX.
59 |
60 | Edge weights are stored as an edge attribute,
61 | under the name "weight".
62 |
63 | Signals are stored as node attributes,
64 | under their name in the :attr:`signals` dictionary.
65 | `N`-dimensional signals are broken into `N` 1-dimensional signals.
66 | They will eventually be joined back together on import.
67 |
68 | Returns
69 | -------
70 | graph : :class:`networkx.Graph`
71 | A NetworkX graph object.
72 |
73 | See Also
74 | --------
75 | to_graphtool : export to graph-tool
76 | save : save to a file
77 |
78 | Examples
79 | --------
80 | >>> import networkx as nx
81 | >>> from matplotlib import pyplot as plt
82 | >>> graph = graphs.Path(4, directed=True)
83 | >>> graph.set_signal(np.full(4, 2.3), 'signal')
84 | >>> graph = graph.to_networkx()
85 | >>> print(nx.info(graph))
86 | Name: Path
87 | Type: DiGraph
88 | Number of nodes: 4
89 | Number of edges: 3
90 | Average in degree: 0.7500
91 | Average out degree: 0.7500
92 | >>> nx.is_directed(graph)
93 | True
94 | >>> graph.nodes()
95 | NodeView((0, 1, 2, 3))
96 | >>> graph.edges()
97 | OutEdgeView([(0, 1), (1, 2), (2, 3)])
98 | >>> graph.nodes()[2]
99 | {'signal': 2.3}
100 | >>> graph.edges()[(0, 1)]
101 | {'weight': 1.0}
102 | >>> # nx.draw(graph, with_labels=True)
103 |
104 | Another common goal is to use NetworkX to compute some properties to be
105 | be imported back in the PyGSP as signals.
106 |
107 | >>> import networkx as nx
108 | >>> from matplotlib import pyplot as plt
109 | >>> graph = graphs.Sensor(100, seed=42)
110 | >>> graph.set_signal(graph.coords, 'coords')
111 | >>> graph = graph.to_networkx()
112 | >>> betweenness = nx.betweenness_centrality(graph, weight='weight')
113 | >>> nx.set_node_attributes(graph, betweenness, 'betweenness')
114 | >>> graph = graphs.Graph.from_networkx(graph)
115 | >>> graph.compute_fourier_basis()
116 | >>> graph.set_coordinates(graph.signals['coords'])
117 | >>> fig, axes = plt.subplots(1, 2)
118 | >>> _ = graph.plot(graph.signals['betweenness'], ax=axes[0])
119 | >>> _ = axes[1].plot(graph.e, graph.gft(graph.signals['betweenness']))
120 |
121 | """
122 | nx = _import_networkx()
123 |
124 | def convert(number):
125 | # NetworkX accepts arbitrary python objects as attributes, but:
126 | # * the GEXF writer does not accept any NumPy types (on signals),
127 | # * the GraphML writer does not accept NumPy ints.
128 | if issubclass(number.dtype.type, (np.integer, np.bool_)):
129 | return int(number)
130 | else:
131 | return float(number)
132 |
133 | def edges():
134 | for source, target, weight in zip(*self.get_edge_list()):
135 | yield int(source), int(target), {'weight': convert(weight)}
136 |
137 | def nodes():
138 | for vertex in range(self.n_vertices):
139 | signals = {name: convert(signal[vertex])
140 | for name, signal in self.signals.items()}
141 | yield vertex, signals
142 |
143 | # self._break_signals()
144 | graph = nx.DiGraph() if self.is_directed() else nx.Graph()
145 | graph.add_nodes_from(nodes())
146 | graph.add_edges_from(edges())
147 | graph.name = self.__class__.__name__
148 | return graph
149 |
150 | def to_graphtool(self):
151 | r"""Export the graph to graph-tool.
152 |
153 | Edge weights are stored as an edge property map,
154 | under the name "weight".
155 |
156 | Signals are stored as vertex property maps,
157 | under their name in the :attr:`signals` dictionary.
158 | `N`-dimensional signals are broken into `N` 1-dimensional signals.
159 | They will eventually be joined back together on import.
160 |
161 | Returns
162 | -------
163 | graph : :class:`graph_tool.Graph`
164 | A graph-tool graph object.
165 |
166 | See Also
167 | --------
168 | to_networkx : export to NetworkX
169 | save : save to a file
170 |
171 | Examples
172 | --------
173 | >>> import graph_tool as gt
174 | >>> import graph_tool.draw
175 | >>> from matplotlib import pyplot as plt
176 | >>> graph = graphs.Path(4, directed=True)
177 | >>> graph.set_signal(np.full(4, 2.3), 'signal')
178 | >>> graph = graph.to_graphtool()
179 | >>> graph.is_directed()
180 | True
181 | >>> graph.vertex_properties['signal'][2]
182 | 2.3
183 | >>> graph.edge_properties['weight'][(0, 1)]
184 | 1.0
185 | >>> # gt.draw.graph_draw(graph, vertex_text=graph.vertex_index)
186 |
187 | Another common goal is to use graph-tool to compute some properties to
188 | be imported back in the PyGSP as signals.
189 |
190 | >>> import graph_tool as gt
191 | >>> import graph_tool.centrality
192 | >>> from matplotlib import pyplot as plt
193 | >>> graph = graphs.Sensor(100, seed=42)
194 | >>> graph.set_signal(graph.coords, 'coords')
195 | >>> graph = graph.to_graphtool()
196 | >>> vprop, eprop = gt.centrality.betweenness(
197 | ... graph, weight=graph.edge_properties['weight'])
198 | >>> graph.vertex_properties['betweenness'] = vprop
199 | >>> graph = graphs.Graph.from_graphtool(graph)
200 | >>> graph.compute_fourier_basis()
201 | >>> graph.set_coordinates(graph.signals['coords'])
202 | >>> fig, axes = plt.subplots(1, 2)
203 | >>> _ = graph.plot(graph.signals['betweenness'], ax=axes[0])
204 | >>> _ = axes[1].plot(graph.e, graph.gft(graph.signals['betweenness']))
205 |
206 | """
207 |
208 | # See gt.value_types() for the list of accepted types.
209 | # See the definition of _type_alias() for a list of aliases.
210 | # Mapping from https://docs.scipy.org/doc/numpy/user/basics.types.html.
211 | convert = {
212 | np.bool_: 'bool',
213 | np.int8: 'int8_t',
214 | np.int16: 'int16_t',
215 | np.int32: 'int32_t',
216 | np.int64: 'int64_t',
217 | np.short: 'short',
218 | np.intc: 'int',
219 | np.uintc: 'unsigned int',
220 | np.long: 'long',
221 | np.longlong: 'long long',
222 | np.uint: 'unsigned long',
223 | np.single: 'float',
224 | np.double: 'double',
225 | np.longdouble: 'long double',
226 | }
227 |
228 | gt = _import_graphtool()
229 | graph = gt.Graph(directed=self.is_directed())
230 |
231 | sources, targets, weights = self.get_edge_list()
232 | graph.add_edge_list(np.asarray((sources, targets)).T)
233 | try:
234 | dtype = convert[weights.dtype.type]
235 | except KeyError:
236 | raise TypeError("Type {} of the edge weights is not supported."
237 | .format(weights.dtype))
238 | prop = graph.new_edge_property(dtype)
239 | prop.get_array()[:] = weights
240 | graph.edge_properties['weight'] = prop
241 |
242 | self._break_signals()
243 | for name, signal in self.signals.items():
244 | try:
245 | dtype = convert[signal.dtype.type]
246 | except KeyError:
247 | raise TypeError("Type {} of signal {} is not supported."
248 | .format(signal.dtype, name))
249 | prop = graph.new_vertex_property(dtype)
250 | prop.get_array()[:] = signal
251 | graph.vertex_properties[name] = prop
252 |
253 | return graph
254 |
255 | @classmethod
256 | def from_networkx(cls, graph, weight='weight'):
257 | r"""Import a graph from NetworkX.
258 |
259 | Edge weights are retrieved as an edge attribute,
260 | under the name specified by the ``weight`` parameter.
261 |
262 | Signals are retrieved from node attributes,
263 | and stored in the :attr:`signals` dictionary under the attribute name.
264 | `N`-dimensional signals that were broken during export are joined.
265 |
266 | Parameters
267 | ----------
268 | graph : :class:`networkx.Graph`
269 | A NetworkX graph object.
270 | weight : string or None, optional
271 | The edge attribute that holds the numerical values used as the edge
272 | weights. All edge weights are set to 1 if None, or not found.
273 |
274 | Returns
275 | -------
276 | graph : :class:`~pygsp.graphs.Graph`
277 | A PyGSP graph object.
278 |
279 | Notes
280 | -----
281 |
282 | The nodes are ordered according to :meth:`networkx.Graph.nodes`.
283 |
284 | In NetworkX, node attributes need not be set for every node.
285 | If a node attribute is not set for a node, a NaN is assigned to the
286 | corresponding signal for that node.
287 |
288 | If the graph is a :class:`networkx.MultiGraph`, multiedges are
289 | aggregated by summation.
290 |
291 | See Also
292 | --------
293 | from_graphtool : import from graph-tool
294 | load : load from a file
295 |
296 | Examples
297 | --------
298 | >>> import networkx as nx
299 | >>> graph = nx.Graph()
300 | >>> graph.add_edge(1, 2, weight=0.2)
301 | >>> graph.add_edge(2, 3, weight=0.9)
302 | >>> graph.add_node(4, sig=3.1416)
303 | >>> graph.nodes()
304 | NodeView((1, 2, 3, 4))
305 | >>> graph = graphs.Graph.from_networkx(graph)
306 | >>> graph.W.toarray()
307 | array([[0. , 0.2, 0. , 0. ],
308 | [0.2, 0. , 0.9, 0. ],
309 | [0. , 0.9, 0. , 0. ],
310 | [0. , 0. , 0. , 0. ]])
311 | >>> graph.signals
312 | {'sig': array([ nan, nan, nan, 3.1416])}
313 |
314 | """
315 | nx = _import_networkx()
316 | from .graph import Graph
317 |
318 | adjacency = nx.to_scipy_sparse_matrix(graph, weight=weight)
319 | graph_pg = Graph(adjacency)
320 |
321 | for i, node in enumerate(graph.nodes()):
322 | for name in graph.nodes[node].keys():
323 | try:
324 | signal = graph_pg.signals[name]
325 | except KeyError:
326 | signal = np.full(graph_pg.n_vertices, np.nan)
327 | graph_pg.set_signal(signal, name)
328 | try:
329 | signal[i] = graph.nodes[node][name]
330 | except KeyError:
331 | pass # attribute not set for node
332 |
333 | graph_pg._join_signals()
334 | return graph_pg
335 |
336 |
337 | @classmethod
338 | def load(cls, path, fmt=None, backend=None):
339 | r"""Load a graph from a file.
340 |
341 | Edge weights are retrieved as an edge attribute named "weight".
342 |
343 | Signals are retrieved from node attributes,
344 | and stored in the :attr:`signals` dictionary under the attribute name.
345 | `N`-dimensional signals that were broken during export are joined.
346 |
347 | Parameters
348 | ----------
349 | path : string
350 | Path to the file from which to load the graph.
351 | fmt : {'graphml', 'gml', 'gexf', None}, optional
352 | Format in which the graph is saved.
353 | Guessed from the filename extension if None.
354 | backend : {'networkx', 'graph-tool', None}, optional
355 | Library used to load the graph. Automatically chosen if None.
356 |
357 | Returns
358 | -------
359 | graph : :class:`Graph`
360 | The loaded graph.
361 |
362 | See Also
363 | --------
364 | save : save a graph to a file
365 | from_networkx : load with NetworkX then import in the PyGSP
366 | from_graphtool : load with graph-tool then import in the PyGSP
367 |
368 | Notes
369 | -----
370 |
371 | A lossless round-trip is only guaranteed if the graph (and its signals)
372 | is saved and loaded with the same backend.
373 |
374 | Loading from other formats is possible by loading in NetworkX or
375 | graph-tool, and importing to the PyGSP.
376 | The proposed formats are however tested for faithful round-trips.
377 |
378 | Examples
379 | --------
380 | >>> graph = graphs.Logo()
381 | >>> graph.save('logo.graphml')
382 | >>> graph = graphs.Graph.load('logo.graphml')
383 | >>> import os
384 | >>> os.remove('logo.graphml')
385 |
386 | """
387 |
388 | if fmt is None:
389 | fmt = os.path.splitext(path)[1][1:]
390 | if fmt not in ['graphml', 'gml', 'gexf']:
391 | raise ValueError('Unsupported format {}.'.format(fmt))
392 |
393 | def load_networkx(path, fmt):
394 | nx = _import_networkx()
395 | load = getattr(nx, 'read_' + fmt)
396 | graph = load(path)
397 | return cls.from_networkx(graph)
398 |
399 | def load_graphtool(path, fmt):
400 | gt = _import_graphtool()
401 | graph = gt.load_graph(path, fmt=fmt)
402 | return cls.from_graphtool(graph)
403 |
404 | if backend == 'networkx':
405 | return load_networkx(path, fmt)
406 | elif backend == 'graph-tool':
407 | return load_graphtool(path, fmt)
408 | elif backend is None:
409 | try:
410 | return load_networkx(path, fmt)
411 | except ImportError:
412 | try:
413 | return load_graphtool(path, fmt)
414 | except ImportError:
415 | raise ImportError('Cannot import networkx nor graph-tool.')
416 | else:
417 | raise ValueError('Unknown backend {}.'.format(backend))
418 |
419 | def save(self, path, fmt=None, backend=None):
420 | r"""Save the graph to a file.
421 |
422 | Edge weights are stored as an edge attribute,
423 | under the name "weight".
424 |
425 | Signals are stored as node attributes,
426 | under their name in the :attr:`signals` dictionary.
427 | `N`-dimensional signals are broken into `N` 1-dimensional signals.
428 | They will eventually be joined back together on import.
429 |
430 | Supported formats are:
431 |
432 | * GraphML_, a comprehensive XML format.
433 | `Wikipedia `_.
434 | Supported by NetworkX_, graph-tool_, NetworKit_, igraph_, Gephi_,
435 | Cytoscape_, SocNetV_.
436 | * GML_ (Graph Modelling Language), a simple non-XML format.
437 | `Wikipedia `_.
438 | Supported by NetworkX_, graph-tool_, NetworKit_, igraph_, Gephi_,
439 | Cytoscape_, SocNetV_, Tulip_.
440 | * GEXF_ (Graph Exchange XML Format), Gephi's XML format.
441 | Supported by NetworkX_, NetworKit_, Gephi_, Tulip_, ngraph_.
442 |
443 | If unsure, we recommend GraphML_.
444 |
445 | .. _GraphML: http://graphml.graphdrawing.org
446 | .. _GML: https://web.archive.org/web/20190303094704/http://www.fim.uni-passau.de:80/fileadmin/files/lehrstuhl/brandenburg/projekte/gml/gml-technical-report.pdf
447 | .. _GEXF: https://gephi.org/gexf/format
448 | .. _NetworkX: https://networkx.github.io
449 | .. _graph-tool: https://graph-tool.skewed.de
450 | .. _NetworKit: https://networkit.github.io
451 | .. _igraph: https://igraph.org
452 | .. _ngraph: https://github.com/anvaka/ngraph
453 | .. _Gephi: https://gephi.org
454 | .. _Cytoscape: https://cytoscape.org
455 | .. _SocNetV: https://socnetv.org
456 | .. _Tulip: http://tulip.labri.fr
457 |
458 | Parameters
459 | ----------
460 | path : string
461 | Path to the file where the graph is to be saved.
462 | fmt : {'graphml', 'gml', 'gexf', None}, optional
463 | Format in which to save the graph.
464 | Guessed from the filename extension if None.
465 | backend : {'networkx', 'graph-tool', None}, optional
466 | Library used to load the graph. Automatically chosen if None.
467 |
468 | See Also
469 | --------
470 | load : load a graph from a file
471 | to_networkx : export as a NetworkX graph, and save with NetworkX
472 | to_graphtool : export as a graph-tool graph, and save with graph-tool
473 |
474 | Notes
475 | -----
476 |
477 | A lossless round-trip is only guaranteed if the graph (and its signals)
478 | is saved and loaded with the same backend.
479 |
480 | Saving in other formats is possible by exporting to NetworkX or
481 | graph-tool, and using their respective saving functionality.
482 | The proposed formats are however tested for faithful round-trips.
483 |
484 | Edge weights and signal values are rounded at the sixth decimal when
485 | saving in ``fmt='gml'`` with ``backend='graph-tool'``.
486 |
487 | Examples
488 | --------
489 | >>> graph = graphs.Logo()
490 | >>> graph.save('logo.graphml')
491 | >>> graph = graphs.Graph.load('logo.graphml')
492 | >>> import os
493 | >>> os.remove('logo.graphml')
494 |
495 | """
496 |
497 | if fmt is None:
498 | fmt = os.path.splitext(path)[1][1:]
499 | if fmt not in ['graphml', 'gml', 'gexf']:
500 | raise ValueError('Unsupported format {}.'.format(fmt))
501 |
502 | def save_networkx(graph, path, fmt):
503 | nx = _import_networkx()
504 | graph = graph.to_networkx()
505 | save = getattr(nx, 'write_' + fmt)
506 | save(graph, path)
507 |
508 | def save_graphtool(graph, path, fmt):
509 | graph = graph.to_graphtool()
510 | graph.save(path, fmt=fmt)
511 |
512 | if backend == 'networkx':
513 | save_networkx(self, path, fmt)
514 | elif backend == 'graph-tool':
515 | save_graphtool(self, path, fmt)
516 | elif backend is None:
517 | try:
518 | save_networkx(self, path, fmt)
519 | except ImportError:
520 | try:
521 | save_graphtool(self, path, fmt)
522 | except ImportError:
523 | raise ImportError('Cannot import networkx nor graph-tool.')
524 | else:
525 | raise ValueError('Unknown backend {}.'.format(backend))
--------------------------------------------------------------------------------
/sparsenet/util/util.py:
--------------------------------------------------------------------------------
1 | # Created at 2020-04-16
2 | # Summary: util functions
3 | import collections
4 | import math
5 | import os
6 | import random
7 | import sys
8 | import time
9 | from functools import partial
10 | from itertools import chain
11 | from warnings import warn
12 |
13 | import matplotlib.pyplot as plt
14 | import networkx as nx
15 | import numpy as np
16 | import pandas as pd
17 | import torch
18 | import torch_geometric
19 | from colorama import init
20 | from pygsp import graphs
21 | from scipy.sparse import coo_matrix
22 | from termcolor import colored
23 | from torch_geometric.data import Data
24 | from torch_geometric.utils import subgraph, get_laplacian, from_networkx, to_networkx
25 |
26 | nan1 = 0.12345
27 | init()
28 |
29 |
30 | def timefunc(method, threshold=1):
31 | def timed(*args, **kw):
32 | ts = time.time()
33 | result = method(*args, **kw)
34 | te = time.time()
35 | if 'log_time' in kw:
36 | name = kw.get('log_name', method.__name__.upper())
37 | kw['log_time'][name] = int((te - ts) * 1000)
38 | else:
39 | if int(te - ts) >= threshold:
40 | print(f'{method.__name__}: {pf(te - ts, precision=1)}s')
41 | return result
42 |
43 | return timed
44 |
45 |
46 | tf = partial(timefunc, threshold=1)
47 |
48 |
49 | def stats(x, precision=2, verbose=True, var_name='None'):
50 | """
51 | print the stats of a (np.array, list, pt.Tensor)
52 |
53 | :param x:
54 | :param precision:
55 | :param verbose:
56 | :return:
57 | """
58 | if isinstance(x, torch.Tensor): x = tonp(x)
59 | assert isinstance(x, (list, np.ndarray)), 'stats only take list or numpy array'
60 |
61 | ave_ = np.mean(x)
62 | median_ = np.median(x)
63 | max_ = np.max(x)
64 | min_ = np.min(x)
65 | std_ = np.std(x)
66 | pf_ = partial(pf, precision=precision)
67 |
68 | if verbose:
69 | ave_, min_, max_, median_, std_ = list(map(pf_, [ave_, min_, max_, median_, std_]))
70 | line = '{:>25}: {:>8}(mean) {:>8}(min) {:>8}(max) {:>8}(median) {:>8}(std)'.format(var_name, ave_, min_, max_,
71 | median_, std_)
72 | print(line)
73 |
74 | return list(map(pf_, [ave_, min_, max_, median_, std_]))
75 |
76 |
77 | def viz_graph(g, node_size=5, edge_width=1, node_color='b', color_bar=False, show=False):
78 | # g = nx.random_geometric_graph(100, 0.125)
79 | pos = nx.spring_layout(g)
80 | nx.draw(g, pos, node_color=node_color, node_size=node_size, with_labels=False, width=edge_width)
81 | if color_bar:
82 | # https://stackoverflow.com/questions/26739248/how-to-add-a-simple-colorbar-to-a-network-graph-plot-in-python
83 | sm = plt.cm.ScalarMappable(norm=plt.Normalize(vmin=min(node_color), vmax=max(node_color)))
84 | sm._A = []
85 | plt.colorbar(sm)
86 | if show: plt.show()
87 |
88 |
89 | def largest_cc(g):
90 | isinstance(g, Data)
91 | g = to_networkx(g).to_undirected()
92 | subgraphs = [g.subgraph(c).copy() for c in sorted(nx.connected_components(g), key=len, reverse=True)]
93 | largest_cc = subgraphs[0]
94 | g = from_networkx(largest_cc)
95 | return g
96 |
97 |
98 | def num_comp(g):
99 | assert isinstance(g, Data)
100 | g_nx = to_networkx(g).to_undirected()
101 | n_compoent = nx.number_connected_components(g_nx)
102 |
103 | comp_size = [len(c) for c in nx.connected_components(g_nx)]
104 | comp_size = sorted(comp_size, reverse=True)
105 | if n_compoent > 1:
106 | if n_compoent < 10:
107 | print(comp_size)
108 | else:
109 | print(f'Print size of first 10 compoents: {comp_size[:10]}')
110 |
111 | # assert n_compoent == 1, f'number of component is {n_compoent}'
112 | return n_compoent
113 |
114 |
115 | def random_pygeo_graph(n_node, node_feat_dim, n_edge, edge_feat_dim, device='cpu', viz=False):
116 | """ random DIRECTED pyG graph """
117 | g = Data(x=torch.rand(n_node, node_feat_dim),
118 | edge_index=random_edge_index(n_edge, n_node),
119 | edge_attr=torch.rand(n_edge, edge_feat_dim).type(torch.LongTensor),
120 | edge_weight=torch.ones(n_edge))
121 |
122 | g_nx = to_networkx(g).to_undirected()
123 | n_compoent = nx.number_connected_components(g_nx)
124 | if n_compoent > 1 and viz: viz_graph(g_nx, show=True)
125 | assert n_compoent == 1, f'number of component is {n_compoent}'
126 | g = g.to(device)
127 | return g
128 |
129 |
130 | def maybe_edge_weight(g):
131 | """ used for get_laplacian.
132 | edge_weigher will update edge weights, which is saved in g.edge_weight attribute
133 | get_laplacian will try to retrive latest g.edge_weight to compute loss
134 | """
135 | assert isinstance(g, torch_geometric.data.data.Data)
136 | try:
137 | return g.edge_weight
138 | except AttributeError:
139 | warn('Use default edge weight')
140 | return None
141 |
142 |
143 | def random_edge_index(n_edge=200, n_node=20):
144 | """ generate random edge tensor of shape (2, n_edge) """
145 | assert n_edge % 2 == 0
146 | assert n_edge <= n_node * (n_node - 1), f'n_edge: {n_edge}; n_node: {n_node}'
147 | edges = []
148 | for i in range(n_edge // 2):
149 | a, b = np.random.choice(n_node, 2, replace=False).tolist()
150 | while (a, b) in edges:
151 | a, b = np.random.choice(n_node, 2, replace=False).tolist()
152 | edges.append((a, b))
153 | edges.append((b, a))
154 | edges = list(edges)
155 | edges = torch.LongTensor(np.array(edges).T)
156 | return edges
157 |
158 |
159 | def random_edge_weight(n_edges):
160 | """
161 | :param edges: [2, n] tensor (output of random_edge_index)
162 | :return:
163 | """
164 | weights = []
165 | assert n_edges % 2 == 0
166 | for i in range(n_edges // 2):
167 | w = np.random.random()
168 | weights.append(w)
169 | weights.append(w)
170 | return torch.Tensor(weights)
171 |
172 |
173 | def unit_vector(vector):
174 | """ Returns the unit vector of the vector. """
175 | return vector / np.linalg.norm(vector)
176 |
177 |
178 | def angle_between(v1, v2):
179 | """ Returns the angle in radians between vectors 'v1' and 'v2'::
180 | https://bit.ly/2YHzUYK
181 |
182 | >>> angle_between((1, 0, 0), (0, 1, 0))
183 | 1.5707963267948966
184 | >>> angle_between((1, 0, 0), (1, 0, 0))
185 | 0.0
186 | >>> angle_between((1, 0, 0), (-1, 0, 0))
187 | 3.141592653589793
188 | """
189 | v1_u = unit_vector(v1)
190 | v2_u = unit_vector(v2)
191 | return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))
192 |
193 |
194 | def add_range(r1, r2):
195 | concatenated = chain(r1, r2)
196 | return concatenated
197 |
198 |
199 | def fix_seed(seed=1):
200 | random.seed(seed)
201 | np.random.seed(seed)
202 | torch.manual_seed(seed)
203 | torch.cuda.manual_seed_all(seed)
204 |
205 | torch.backends.cudnn.benchmark = False
206 | torch.backends.cudnn.deterministic = True
207 |
208 |
209 | def banner(text='', ch='=', length=140, compact=False):
210 | """ http://bit.ly/2vfTDCr
211 | print a banner
212 | """
213 | spaced_text = ' %s ' % text
214 | banner = spaced_text.center(length, ch)
215 | print(banner)
216 | if not compact:
217 | print()
218 |
219 |
220 | def pf(nbr, precision=1):
221 | """ precision format """
222 | # assert type(nbr)==float
223 | if isinstance(nbr, torch.Tensor):
224 | nbr = np.float(nbr)
225 |
226 | if math.isnan(nbr):
227 | return 'nan'
228 | elif math.isinf(nbr):
229 | return 'inf'
230 | else:
231 | return round(nbr * (10 ** precision)) / (10 ** precision)
232 |
233 |
234 | def set_thread(n=1):
235 | import os
236 | os.environ['MKL_NUM_THREADS'] = str(n)
237 |
238 | os.environ['OMP_NUM_THREADS'] = str(n)
239 | os.environ['OPENBLAS_NUM_THREADS'] = str(n)
240 | os.environ['MKL_NUM_THREADS'] = str(n)
241 | os.environ['VECLIB_MAXIMUM_THREADS'] = str(n)
242 | os.environ['NUMEXPR_NUM_THREADS'] = str(n)
243 | import torch
244 | torch.set_num_threads(n)
245 |
246 |
247 | @timefunc
248 | def tonp(tsr):
249 | if isinstance(tsr, np.ndarray):
250 | return tsr
251 | elif isinstance(tsr, np.matrix):
252 | return np.array(tsr)
253 | elif isinstance(tsr, scipy.sparse.csc.csc_matrix):
254 | return np.array(tsr.todense())
255 |
256 | assert isinstance(tsr, torch.Tensor)
257 | tsr = tsr.cpu()
258 | assert isinstance(tsr, torch.Tensor)
259 |
260 | try:
261 | arr = tsr.numpy()
262 | except TypeError:
263 | arr = tsr.detach().to_dense().numpy()
264 | except:
265 | arr = tsr.detach().numpy()
266 |
267 | assert isinstance(arr, np.ndarray)
268 | return arr
269 |
270 |
271 | def nan_ratio(x):
272 | """ http://bit.ly/2PL7yaP
273 | """
274 | assert isinstance(x, np.ndarray)
275 | try:
276 | return np.count_nonzero(np.isnan(x)) / x.size
277 | except TypeError:
278 | return '-1 (TypeError)'
279 |
280 |
281 | import scipy
282 |
283 |
284 | def np2set(x):
285 | assert isinstance(x, np.ndarray)
286 | return set(np.unique(x))
287 |
288 |
289 | @timefunc
290 | def summary(x, name='x', terminate=False,
291 | skip=False, delimiter=None, precision=3,
292 | exit=False, highlight=False):
293 | if highlight:
294 | name = red(name)
295 |
296 | if skip:
297 | print('', end='')
298 | return ''
299 |
300 | if isinstance(x, list):
301 | print(f'{name}: a list of length {len(x)}')
302 |
303 | if len(x) < 6:
304 | for _x in x:
305 | summary(_x)
306 |
307 | elif isinstance(x, scipy.sparse.csc.csc_matrix):
308 | min_, max_ = x.min(), x.max()
309 | mean_ = x.mean()
310 |
311 | std1 = np.std(tonp(x))
312 | x_copy = x.copy()
313 | x_copy.data **= 2
314 | std2 = x_copy.mean() - (x.mean() ** 2) # todo: std1 and std2 are different. 1
315 | pf_ = partial(pf, precision=precision)
316 | mean_, min_, max_, std1, std2 = list(map(pf_, [mean_, min_, max_, std1, std2]))
317 |
318 | line0 = '{:>10}: csc_matrix ({}) of shape {:>8}'.format(name, str(x.dtype), str(x.shape))
319 | line0 = line0 + ' ' * max(5, (45 - len(line0)))
320 | # line0 += 'Nan ratio: {:>8}.'.format(nan_ratio(x_))
321 | line1 = ' {:>8}(mean) {:>8}(min) {:>8}(max) {:>8}(std1) {:>8}(std2) {:>8}(unique) ' \
322 | .format(mean_, min_, max_, std1, std2, -1)
323 | line = line0 + line1
324 | print(line)
325 |
326 | elif isinstance(x, (np.ndarray,)):
327 | if x.size > 232960 * 10:
328 | return
329 | x_ = tonp(x)
330 | ave_ = np.mean(x_)
331 | median_ = np.median(x_)
332 | max_ = np.max(x_)
333 | min_ = np.min(x_)
334 | std_ = np.std(x_)
335 | unique_ = len(np.unique(x_))
336 | pf_ = partial(pf, precision=precision)
337 | ave_, min_, max_, median_, std_, unique_ = list(map(pf_, [ave_, min_, max_, median_, std_, unique_]))
338 |
339 | line0 = '{:>10}: array ({}) of shape {:>8}'.format(name, str(x.dtype), str(x.shape))
340 | line0 = line0 + ' ' * max(5, (45 - len(line0)))
341 | line0 += 'Nan ratio: {:>8}.'.format(nan_ratio(x_))
342 | line1 = ' {:>8}(mean) {:>8}(min) {:>8}(max) {:>8}(median) {:>8}(std) {:>8}(unique) '.format(ave_, min_, max_,
343 | median_, std_,
344 | unique_)
345 | line = line0 + line1
346 | if np2set(x_) <= set([-1, 0, 1]):
347 | ratio1 = np.sum(x_ == 1) / float(x_.size)
348 | ratio0 = np.sum(x_ == 0) / float(x_.size)
349 | line += '|| {:>8}(1 ratio) {:>8}(0 ratio)'.format(pf(ratio1, 3), pf(ratio0, 3))
350 |
351 | if nan1 in x_:
352 | nan_cnt = np.sum(x_ == nan1)
353 | line += f'nan_cnt {nan_cnt}'
354 |
355 | # f'{name}: array of shape {x.shape}.'
356 | print(line)
357 | # print(f'{name}: a np.array of shape {x.shape}. nan ratio: {nan_ratio(x)}. ' + line)
358 |
359 | elif isinstance(x, (torch.Tensor)):
360 | if x.numel() > 232965 * 10:
361 | return
362 | x_ = tonp(x)
363 | if len(x_) == 0:
364 | print(f'{name}: zero length np.array')
365 | else:
366 | ave_ = np.mean(x_)
367 | median_ = np.median(x_)
368 | max_ = np.max(x_)
369 | min_ = np.min(x_)
370 | std_ = np.std(x_)
371 | unique_ = len(np.unique(x_))
372 |
373 | pf_ = partial(pf, precision=2)
374 | ave_, min_, max_, median_, std_, unique_ = list(map(pf_, [ave_, min_, max_, median_, std_, unique_]))
375 | line = '{:>8}(mean) {:>8}(min) {:>8}(max) {:>8}(median) {:>8}(std) {:>8}(unique)'.format(ave_, min_, max_,
376 | median_, std_,
377 | unique_)
378 |
379 | print(
380 | '{:20}'.format(name) + '{:20}'.format(str(x.data.type())[6:]) + '{:15}'.format(
381 | str(x.size())[11:-1]) + line)
382 | # print(line)
383 | # print(f'{name}: a Tensor ({x.data.type()}) of shape {x.size()}')
384 |
385 | elif isinstance(x, tuple):
386 | print(f'{name}: a tuple of shape {len(x)}')
387 | if len(x) < 6:
388 | for ele in x:
389 | summary(ele, name='ele')
390 |
391 | elif isinstance(x, (dict, collections.defaultdict)):
392 | print(f'summarize a dict {name} of len {len(x)}')
393 | for k, v in x.items():
394 | # print(f'key is {k}')
395 | summary(v, name=k)
396 |
397 | elif isinstance(x, torch_geometric.data.data.Data):
398 | try:
399 | summary_pygeo(x, name=name)
400 | except:
401 | raise Exception('Check pytorch geometric install.')
402 |
403 | elif isinstance(x, pd.DataFrame):
404 | from collections import OrderedDict
405 |
406 | dataType_dict = OrderedDict(x.dtypes)
407 | banner(text=f'start summarize a df ({name}) of shape {x.shape}', ch='-')
408 | print('df info')
409 | print(x.info())
410 | print('\n')
411 |
412 | print('head of df:')
413 | # print(tabulate(x, headers='firstrow'))
414 | print(x.head())
415 | print('\n')
416 |
417 | try:
418 | print('continuous feats of Dataframe:')
419 | cont_x = x.describe().T
420 | print(cont_x)
421 | print(cont_x.shape)
422 | print('\n')
423 | except ValueError:
424 | print('x.describe().T raise ValueError')
425 |
426 | try:
427 | print('non-cont\' feats (object type) of Dataframe:')
428 | non_cont = x.describe(include=[object]).T
429 | print(non_cont)
430 | print(non_cont.shape)
431 | except ValueError:
432 | print('x.describe(include=[object]).T raise ValueError')
433 |
434 | banner(text=f'finish summarize a df ({name}) of shape {x.shape}', ch='-')
435 |
436 | elif isinstance(x, (int, float)):
437 | print(f'{name}(float): {x}')
438 |
439 | elif isinstance(x, str):
440 | print(f'{name}(str): {x}')
441 |
442 | else:
443 | print(f'{x}: \t\t {type(x)}')
444 | if terminate:
445 | exit(f'NotImplementedError for input {type(x)}')
446 | else:
447 | pass
448 |
449 | if delimiter is not None:
450 | assert isinstance(delimiter, str)
451 | print(delimiter)
452 |
453 | if exit:
454 | sys.exit()
455 |
456 |
457 | def dict2name(d):
458 | """
459 | :param d: {'n_epoch': 300, 'bs': 32, 'n_data': 10, 'scheduler': True}
460 | :return: bs_32_n_data_10_n_epoch_300_scheduler_True
461 | """
462 | assert isinstance(d, dict)
463 | keys = list(d.keys())
464 | keys.sort()
465 | name = ''
466 | for k in keys:
467 | name += f'{k}_{d[k]}_'
468 | return name[:-1]
469 |
470 |
471 | def update_dict(d1, d2):
472 | # use d1 to update d2, return updated d2.
473 | # keys of d1 has to be a subset of keys of d2.
474 | assert isinstance(d1, dict)
475 | assert isinstance(d2, dict)
476 | assert set(d1.keys()) <= set(d2.keys()), 'Keys of d1 has to be a subset of keys of d2.'
477 | for k, v in d1.items():
478 | d2[k] = v
479 | return d2
480 |
481 |
482 | def hasany(s, s_list):
483 | """
484 | :param s: a string
485 | :param s_list: a list of str
486 | :return:
487 | """
488 | return any(ele in s for ele in s_list)
489 |
490 |
491 | def slicestr(s, f=None, t=None):
492 | """
493 | :param s: a string
494 | :param f: from
495 | :param t: to
496 | :return:
497 | """
498 | from_idx = s.index(f)
499 | to_idx = s.index(t)
500 | return s[from_idx:to_idx]
501 |
502 |
503 | def summary_pygeo(data, stat=False, precision=2, name=None):
504 | assert isinstance(data, torch_geometric.data.data.Data)
505 | print(f'Summary of {name} (torch_geometric.data.data.Data):')
506 |
507 | for k, v in data:
508 | print(' ', sep=' ', end=' ')
509 | if isinstance(v, torch.Tensor):
510 | if v.ndim == 1:
511 | summary(v, name=k, precision=precision)
512 | else:
513 | if v.size()[1] != 0:
514 | summary(v, name=k, precision=precision)
515 | else:
516 | warn(f'Empty edge index: {v}')
517 | elif isinstance(v, str):
518 | summary(v, k)
519 | else:
520 | NotImplementedError
521 |
522 | if stat:
523 | for k, v in data:
524 | stats(v, var_name=k)
525 |
526 |
527 | def subset_graph(g, indices, relabel_nodes=None):
528 | """
529 | :param g: pyG graph where node index are contigious
530 | :param indices:
531 | :param relabel_nodes: if true, relabel nodes of the subgraph
532 | :return:
533 | """
534 | if isinstance(indices, torch.Tensor): indices = indices.tolist()
535 | if isinstance(indices, set): indices = list(indices)
536 |
537 | assert isinstance(indices, list)
538 | assert isinstance(g, torch_geometric.data.data.Data)
539 |
540 | sub_edge_index, sub_edge_attr = subgraph(indices, g.edge_index, g.edge_attr, relabel_nodes=relabel_nodes)
541 | g_subindices = torch.tensor(indices)
542 | g_subx = g.x.index_select(0, g_subindices)
543 | g_sub = Data(x=g_subx, edge_index=sub_edge_index, edge_attr=sub_edge_attr, node_index=g_subindices)
544 | return g_sub
545 |
546 |
547 | def assert_nonan(x):
548 | res = torch.isnan(x)
549 | assert (res == False).all(), 'contains Nan'
550 |
551 |
552 | def make_dir(dir):
553 | # has side effect
554 |
555 | if dir == None:
556 | return
557 |
558 | if not os.path.exists(dir):
559 | os.makedirs(dir)
560 |
561 |
562 | def args_print(args, one_line=False):
563 | """ pretty print cmd with lots of args
564 | """
565 | for i in range(20):
566 | args = args.replace(' ', ' ')
567 |
568 | arglis = args.split(' ')
569 | new_arglist = []
570 | for i, token in enumerate(arglis):
571 | if '--' in token:
572 | token = '\n' + token
573 | elif token in ['-u', 'nohup']:
574 | pass
575 | elif '.py' in token:
576 | pass
577 | elif 'python' in token:
578 | pass
579 | else:
580 | space = (30 - len(arglis[i - 1])) * ' '
581 | token = space + token # '{:>35}'.format(token) #
582 | new_arglist.append(token)
583 |
584 | newargs = ' '.join(new_arglist) + '\n'
585 |
586 | if not one_line:
587 | print(newargs)
588 | else:
589 | newargs = one_liner(newargs)
590 | print(newargs)
591 |
592 |
593 | def one_liner(cmd):
594 | """ convert cmd that takes many lines into just one line """
595 | assert isinstance(cmd, str)
596 | cmd = cmd.replace('\n', '')
597 | for _ in range(10):
598 | cmd = cmd.replace(' ', ' ')
599 | return cmd
600 |
601 |
602 | def sig_dir():
603 | from sparsenet.util.dir_util import DIR
604 | return DIR
605 |
606 |
607 | def fig_dir():
608 | return f'{sig_dir()}sparsenet/paper/tex/Figs/'
609 |
610 |
611 | def tb_dir():
612 | return f'{tb_dir()}/result/tensorboardx/'
613 |
614 |
615 | def model_dir():
616 | dir = f'{sig_dir()}result/model/'
617 | make_dir(dir)
618 | return dir
619 |
620 |
621 | def red(x):
622 | return colored(x, "red")
623 |
624 |
625 | def tex_dir():
626 | tex_dir = f'{sig_dir()}sparsenet/paper/tex/iclr_table/'
627 | make_dir(tex_dir)
628 | return tex_dir
629 |
630 |
631 | def random_laplacian(n):
632 | from torch_geometric.utils.random import erdos_renyi_graph
633 | edge_index = erdos_renyi_graph(n, 0.1)
634 | i, v = get_laplacian(edge_index, None, normalization=None)
635 | return i, v
636 |
637 |
638 | def runcmd(cmd, print_only=False):
639 | cmd = cmd.replace('--', ' --')
640 | banner('Execution of following cmds:', compact=True)
641 | if len(cmd) > 50 and '--' in cmd:
642 | args_print(cmd)
643 | else:
644 | print(cmd)
645 |
646 | if not print_only:
647 | os.system(cmd)
648 |
649 | if len(cmd) > 50 and '--' in cmd:
650 | args_print(cmd)
651 |
652 |
653 | def sparse_tensor2_sparse_numpyarray(sparse_tensor):
654 | """
655 | :param sparse_tensor: a COO torch.sparse.FloatTensor
656 | :return: a scipy.sparse.coo_matrix
657 | """
658 | if sparse_tensor.device.type == 'cuda':
659 | sparse_tensor = sparse_tensor.to('cpu')
660 |
661 | values = sparse_tensor._values().numpy()
662 | indices = sparse_tensor._indices()
663 | rows, cols = indices[0, :].numpy(), indices[1, :].numpy()
664 | size = sparse_tensor.size()
665 | scipy_sparse_mat = coo_matrix((values, (rows, cols)), shape=size, dtype=np.float)
666 | return scipy_sparse_mat
667 |
668 |
669 | def pyg2gsp(g):
670 | """
671 | convert pyG graph to gsp graph.
672 | discard any info from pyG graph, and only take graph topology.
673 | """
674 | assert isinstance(g, torch_geometric.data.Data)
675 | edge_indices, edge_weight = tonp(g.edge_index), tonp(g.edge_weight)
676 | row, col = edge_indices[0, :], edge_indices[1, :]
677 |
678 | # memory efficient
679 | n = g.num_nodes
680 | W = scipy.sparse.csr_matrix((edge_weight, (row, col)), shape=(n, n))
681 | gspG = graphs.Graph(W)
682 | return gspG
683 |
684 |
685 | def dic2tsr(d, dev='cuda'):
686 | """ given a dict where key (size N) are consecutive numbers and values are also numbers (at most n),
687 | convert it into a tensor of size (N) where index is the key value is the value of d.
688 | """
689 | N = len(d)
690 | assert N == max(d.keys()) + 1, f'keys ({N}) are not consecutive. Max key is {max(d.keys)}'
691 | tsr = [0] * N
692 | for k in d:
693 | tsr[k] = d[k]
694 | return torch.tensor(tsr).to(dev)
695 |
696 |
697 | if __name__ == '__main__':
698 | from scipy.sparse import csc_matrix
699 |
700 | n = 400
701 | x = csc_matrix((n, n), dtype=np.int8)
702 | print(x.mean())
703 | print(x.max())
704 | print(x.min())
705 |
706 | std1 = np.std(tonp(x))
707 | x_copy = x.copy()
708 | x_copy.data **= 2
709 | std2 = x_copy.mean() - x.mean() ** 2
710 | print(std1, std2)
711 | # summary(x, 'x')
712 |
713 | exit()
714 | # edge = random_edge_index(n_edge=200, n_node=20)
715 | from sparsenet.util.sample import sample_N2Nlandmarks
716 | from sparsenet.util.graph_util import __map_back_from_edge, get_bipartite
717 |
718 | n_node, n_edge = 320, 1000
719 | node_dim = 1
720 | edge_feat_dim = 1
721 |
722 | g = Data(x=torch.rand(n_node, node_dim),
723 | edge_index=random_edge_index(n_edge, n_node),
724 | edge_attr=torch.rand(n_edge, edge_feat_dim).type(torch.LongTensor))
725 | summary(g, 'original_graph')
726 |
727 | G = to_networkx(g)
728 | G_prime, Assignment = sample_N2Nlandmarks(G, 10)
729 | print(G_prime.edges.data())
730 | edges = [e for e in G_prime.edges]
731 | print('Select edge:', edges[0])
732 | G1, G2, crossing_edges = __map_back_from_edge(g, edges[0], Assignment)
733 | summary(G1, 'G1')
734 | summary(G2, 'G2')
735 | print('Crossing edge size:', crossing_edges[0].shape)
736 | summary(get_bipartite(G1, G2, crossing_edges))
737 |
--------------------------------------------------------------------------------