├── sparsenet ├── __init__.py ├── test │ ├── __init__.py │ └── gen_test.py ├── util │ ├── __init__.py │ ├── sys_util.py │ ├── args_util.py │ ├── torch_util.py │ ├── sample.py │ ├── name_util.py │ ├── train_util.py │ ├── model_util.py │ ├── cut_util.py │ ├── pyg_util.py │ ├── gsp_util.py │ ├── loss_util.py │ ├── graph_util.py │ ├── pygsp_util.py │ └── util.py ├── evaluation │ ├── __init__.py │ └── graph-coarsening │ │ ├── graph_coarsening │ │ ├── version.py │ │ ├── __init__.py │ │ ├── graph_utils.py │ │ └── graph_lib.py │ │ ├── setup.py │ │ ├── examples │ │ ├── coarsening_methods.py │ │ ├── experiment_approximation.py │ │ └── coarsening_demo.py │ │ └── README.md ├── model │ ├── __init__.py │ ├── example.py │ ├── loss.py │ ├── model.py │ └── eval.py └── install.sh ├── README.md ├── install.sh └── requirements.txt /sparsenet/__init__.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-04-16 2 | # Summary: -------------------------------------------------------------------------------- /sparsenet/test/__init__.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-05-17 2 | # Summary: -------------------------------------------------------------------------------- /sparsenet/util/__init__.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-04-16 2 | # Summary: -------------------------------------------------------------------------------- /sparsenet/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-05-08 2 | # Summary: -------------------------------------------------------------------------------- /sparsenet/model/__init__.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-04-10 2 | # Summary: 3 | 4 | -------------------------------------------------------------------------------- /sparsenet/evaluation/graph-coarsening/graph_coarsening/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.2" 2 | -------------------------------------------------------------------------------- /sparsenet/evaluation/graph-coarsening/graph_coarsening/__init__.py: -------------------------------------------------------------------------------- 1 | from . import graph_lib, graph_utils 2 | from .coarsening_utils import coarsen, coarsening_quality, plot_coarsening 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sparsifier 2 | 3 | ## Install 4 | 5 | * python 3.7.6 6 | * torch 1.4.0 7 | * pytorch geometric 1.5.0 8 | * networkx 2.4 9 | 10 | ## Test 11 | download the processed data [here](https://drive.google.com/drive/folders/1WMYebXwU7bVRWTW33BIx-sqAZV3UFFfX?usp=sharing) 12 | and set up directory in sparsenet/util/dir_util.py accordingly 13 | 14 | run ```python sparsenet/test/gen_test.py```. The output should be similar to ouptut.md. 15 | -------------------------------------------------------------------------------- /sparsenet/install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # https://github.com/rusty1s/pytorch_geometric 4 | pip install torch-scatter==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html 5 | pip install torch-sparse==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html 6 | pip install torch-cluster==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html 7 | pip install torch-spline-conv==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html 8 | pip install torch-geometric==1.5.0 -------------------------------------------------------------------------------- /sparsenet/util/sys_util.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-06-21 2 | # Summary: set thread number 3 | 4 | import os 5 | n=2 6 | os.environ['MKL_NUM_THREADS'] = str(n) 7 | os.environ['OMP_NUM_THREADS'] = str(n) 8 | os.environ['OPENBLAS_NUM_THREADS'] = str(n) 9 | os.environ['MKL_NUM_THREADS'] = str(n) 10 | os.environ['VECLIB_MAXIMUM_THREADS'] = str(n) 11 | os.environ['NUMEXPR_NUM_THREADS'] = str(n) 12 | import torch 13 | torch.set_num_threads(n) # always import this first 14 | status = f'{n}' 15 | print(f'thread status {__file__}: {status}') 16 | 17 | # status=None -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # https://github.com/rusty1s/pytorch_geometric 4 | # pip install torch-scatter==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html 5 | # pip install torch-sparse==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html 6 | # pip install torch-cluster==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html 7 | # pip install torch-spline-conv==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.4.0.html 8 | # pip install torch-geometric 9 | 10 | TORCH='1.4.0' 11 | CUDA='cpu' 12 | # pip install torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html 13 | pip install torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}+${CUDA}.html 14 | # pip install torch-geometric -------------------------------------------------------------------------------- /sparsenet/evaluation/graph-coarsening/setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from setuptools import setup, find_packages 4 | 5 | install_requires = [ 6 | "numpy", 7 | "scipy", 8 | "pygsp", 9 | "matplotlib", 10 | "sortedcontainers", 11 | ] 12 | 13 | version_py = os.path.join(os.path.dirname(__file__), "graph_coarsening", "version.py") 14 | version = open(version_py).read().strip().split("=")[-1].replace('"', "").strip() 15 | 16 | readme = open("README.md").read() 17 | 18 | setup( 19 | name="graph_coarsening", 20 | version=version, 21 | description="graph_coarsening", 22 | author="Andreas Loukas", 23 | author_email="andreas.loukas@epfl.ch", 24 | packages=find_packages(), 25 | license="Apache License 2.0", 26 | install_requires=install_requires, 27 | long_description=readme, 28 | long_description_content_type="text/markdown", 29 | url="https://github.com/loukasa/graph-coarsening", 30 | download_url="https://github.com/loukasa/graph-coarsening/archive/v{}.tar.gz".format( 31 | version 32 | ), 33 | keywords=["big-data", "networks",], 34 | classifiers=[ 35 | "Development Status :: 4 - Beta", 36 | "Environment :: Console", 37 | "Framework :: Jupyter", 38 | "Intended Audience :: Developers", 39 | "Intended Audience :: Science/Research", 40 | "Natural Language :: English", 41 | "Operating System :: MacOS :: MacOS X", 42 | "Operating System :: Microsoft :: Windows", 43 | "Operating System :: POSIX :: Linux", 44 | "Programming Language :: Python :: 3", 45 | "Programming Language :: Python :: 3.5", 46 | "Programming Language :: Python :: 3.6", 47 | "Programming Language :: Python :: 3.7", 48 | ], 49 | ) 50 | -------------------------------------------------------------------------------- /sparsenet/util/args_util.py: -------------------------------------------------------------------------------- 1 | # Created at 2021-03-13 2 | # Summary: argparse related 3 | from warnings import warn 4 | 5 | import numpy as np 6 | 7 | 8 | class argsparser(): 9 | def __init__(self, args): 10 | if args.lap in ['None', 'none']: 11 | args.lap = None 12 | self.args = args 13 | 14 | def set_indices(self): 15 | args = self.args 16 | 17 | train_indices = [int(item) for item in args.train_indices.split(',') if len(item) != 0] 18 | test_indices = [int(item) for item in args.test_indices.split(',') if len(item) != 0] 19 | val_indices = np.random.choice(test_indices, 5, replace=False).tolist() if len(test_indices) > 10 else [] 20 | test_indices = [idx for idx in test_indices if idx not in val_indices] 21 | 22 | if len(val_indices) == len(test_indices) == 0: # for datasets with single graph 23 | test_indices = train_indices 24 | val_indices = train_indices 25 | 26 | # todo: better handling 27 | if len(val_indices) == 0: 28 | assert len(train_indices) > 1 29 | if len(train_indices) < 5: 30 | n_sample = 1 31 | else: 32 | n_sample = 5 if len(train_indices) < 15 else 10 33 | val_indices = np.random.choice(train_indices, n_sample, replace=False).tolist() 34 | train_indices = [idx for idx in train_indices if idx not in val_indices] 35 | 36 | # todo: handle this case more elegantly 37 | if args.dataset == 'coauthors': # handle coauthors 38 | args.n_epoch = 20 39 | train_indices = [0] 40 | test_indices = [1] 41 | val_indices = [0] 42 | 43 | print(f'train_indices: {train_indices}.\n ' 44 | f'val_indices: {val_indices}. \n ' 45 | f'test_indices: {test_indices}.') 46 | self.args = args 47 | return train_indices, val_indices, test_indices 48 | 49 | def set_model_name(self): 50 | args = self.args 51 | model_name = 'checkpoint-best-eigen-ratio.pkl' if args.valeigen else 'checkpoint-best-improve-ratio.pkl' 52 | return model_name 53 | -------------------------------------------------------------------------------- /sparsenet/evaluation/graph-coarsening/examples/coarsening_methods.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # The script shows the effect of different coarsening methods on a toy example. 5 | # 6 | # The code accompanies paper [Graph reduction with spectral and cut guarantees](http://www.jmlr.org/papers/volume20/18-680/18-680.pdf) by Andreas Loukas published at JMLR/2019 ([bibtex](http://www.jmlr.org/papers/v20/18-680.bib)). 7 | # 8 | # This work was kindly supported by the Swiss National Science Foundation (grant number PZ00P2 179981). 9 | # 10 | # 15 May 2020 11 | # 12 | # [Andreas Loukas](https://andreasloukas.blog) 13 | # 14 | # [![DOI](https://zenodo.org/badge/175851068.svg)](https://zenodo.org/badge/latestdoi/175851068) 15 | # 16 | # Released under the Apache license 2.0 17 | 18 | # In[1]: 19 | 20 | 21 | get_ipython().system('pip install networkx') 22 | 23 | 24 | # In[1]: 25 | 26 | 27 | get_ipython().run_line_magic('load_ext', 'autoreload') 28 | get_ipython().run_line_magic('autoreload', '2') 29 | get_ipython().run_line_magic('matplotlib', 'inline') 30 | 31 | 32 | # In[2]: 33 | 34 | 35 | import numpy as np 36 | import scipy as sp 37 | 38 | import matplotlib 39 | import matplotlib.pylab as plt 40 | from mpl_toolkits.mplot3d import Axes3D 41 | 42 | import networkx as nx 43 | import pygsp as gsp 44 | gsp.plotting.BACKEND = 'matplotlib' 45 | 46 | 47 | # In[3]: 48 | 49 | 50 | from graph_coarsening.coarsening_utils import * 51 | import graph_coarsening.graph_utils 52 | import graph_coarsening.graph_lib 53 | 54 | 55 | # Load the graph 56 | 57 | # In[4]: 58 | 59 | 60 | N = 600 # number of nodes 61 | 62 | 63 | # In[5]: 64 | 65 | 66 | G = graph_coarsening.graph_lib.real(N, 'yeast') 67 | 68 | 69 | # Coarsen it with different methods 70 | 71 | # In[6]: 72 | 73 | 74 | r = 0.6 # coarsening ratio 75 | methods = ['variation_neighborhoods', 'variation_edges', 'variation_cliques', 76 | 'heavy_edge', 'algebraic_JC', 'affinity_GS', 'kron'] 77 | 78 | 79 | # In[7]: 80 | 81 | 82 | for method in methods: 83 | 84 | C, Gc, Call, Gall = coarsen(G, r=r, method=method) 85 | plot_coarsening(Gall, Call, title=method, size=2); 86 | 87 | 88 | 89 | # In[ ]: 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pymongo 2 | sacred 3 | ase==3.19.1 4 | attrs==19.3.0 5 | backcall==0.1.0 6 | bleach==3.1.4 7 | brotli==1.0.7 8 | chardet==3.0.4 9 | click==7.1.2 10 | colorama==0.4.3 11 | cvxpy==1.0.31 12 | cycler==0.10.0 13 | dash==1.11.0 14 | dash-core-components==1.9.1 15 | dash-html-components==1.0.3 16 | dash-renderer==1.4.0 17 | dash-table==4.6.2 18 | decorator==4.4.2 19 | defusedxml==0.6.0 20 | deprecated==1.2.13 21 | dill==0.3.1.1 22 | dionysus==2.0.6 23 | docopt==0.6.2 24 | ecos==2.0.7.post1 25 | entrypoints==0.3 26 | flask==1.1.2 27 | flask-compress==1.5.0 28 | future==0.18.2 29 | gitdb==4.0.5 30 | gitpython==3.1.2 31 | googledrivedownloader==0.4 32 | h5py==2.10.0 33 | idna==2.9 34 | imageio==2.8.0 35 | importlib-metadata==1.6.0 36 | ipykernel==5.2.0 37 | ipython==7.13.0 38 | ipython-genutils==0.2.0 39 | ipywidgets==7.5.1 40 | isodate==0.6.0 41 | itsdangerous==1.1.0 42 | jedi==0.16.0 43 | jinja2==2.11.1 44 | joblib==0.14.1 45 | jsonpickle==1.4.1 46 | jsonschema==3.2.0 47 | jupyter==1.0.0 48 | jupyter-client==6.1.2 49 | jupyter-console==6.1.0 50 | jupyter-core==4.6.3 51 | kiwisolver==1.2.0 52 | llvmlite==0.31.0 53 | lmdb==0.98 54 | markupsafe==1.1.1 55 | matplotlib==3.2.1 56 | memory-profiler==0.57.0 57 | mendeleev==0.6.0 58 | mistune==0.8.4 59 | monty==3.0.2 60 | mpmath==1.1.0 61 | multiprocess==0.70.9 62 | munch==2.5.0 63 | nbconvert==5.6.1 64 | nbformat==5.0.5 65 | networkx==2.4 66 | nglview==2.7.5 67 | notebook==6.0.3 68 | numba==0.48.0 69 | numpy==1.18.2 70 | osqp==0.6.1 71 | packaging==20.4 72 | palettable==3.3.0 73 | pandas==1.0.3 74 | pandocfilters==1.4.2 75 | parso==0.6.2 76 | pexpect==4.8.0 77 | pickleshare==0.7.5 78 | pillow==7.1.1 79 | plotly==4.6.0 80 | plyfile==0.7.2 81 | prometheus-client==0.7.1 82 | prompt-toolkit==3.0.5 83 | protobuf==3.11.3 84 | psutil==5.7.0 85 | ptyprocess==0.6.0 86 | py-cpuinfo==5.0.0 87 | pydispatcher==2.0.5 88 | pyfiglet==0.8.post1 89 | pygments==2.6.1 90 | pygsp==0.5.1 91 | pymatgen==2020.4.2 92 | pymongo==3.10.1 93 | pynvml==8.0.4 94 | pyparsing==2.4.7 95 | pyrsistent==0.16.0 96 | python-dateutil==2.8.1 97 | # python-graphviz==0.13.2 98 | pytz==2019.3 99 | pywavelets==1.1.1 100 | pyyaml==5.3.1 101 | pyzmq==19.0.0 102 | qtconsole==4.7.2 103 | qtpy==1.9.0 104 | rdflib==4.2.2 105 | requests==2.23.0 106 | retrying==1.3.3 107 | ruamel-yaml==0.16.10 108 | ruamel-yaml-clib==0.2.0 109 | sacred==0.8.1 110 | schedule==0.6.0 111 | scikit-image==0.16.2 112 | scikit-learn==0.22.2.post1 113 | scipy==1.4.1 114 | scs==2.1.2 115 | send2trash==1.5.0 116 | six==1.14.0 117 | sklearn==0.0 118 | skorch==0.8.0 119 | smmap==3.0.4 120 | sortedcontainers==2.1.0 121 | spglib==1.14.1.post0 122 | sqlalchemy==1.3.16 123 | sympy==1.5.1 124 | tabulate==0.8.7 125 | tbb==2020.0.133 126 | tensorboardx==2.0 127 | termcolor==1.1.0 128 | terminado==0.8.3 129 | testpath==0.4.4 130 | torch==1.4.0 131 | torch-cluster==1.5.4 132 | torch-geometric==1.5.0 133 | torch-scatter==2.0.4 134 | torch-sparse==0.6.1 135 | torch-spline-conv==1.2.0 136 | torchsummary==1.5.1 137 | torchvision==0.5.0 138 | torchviz==0.0.1 139 | tornado==6.0.4 140 | tqdm==4.45.0 141 | traitlets==4.3.3 142 | umap-learn==0.4.1 143 | urllib3==1.25.8 144 | vtk==8.1.2 145 | wcwidth==0.1.9 146 | webencodings==0.5.1 147 | werkzeug==1.0.1 148 | widgetsnbextension==3.5.1 149 | wrapt==1.12.1 150 | yacs==0.1.8 151 | zipp==3.1.0 -------------------------------------------------------------------------------- /sparsenet/evaluation/graph-coarsening/graph_coarsening/graph_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pygsp as gsp 3 | 4 | def to_networkx(): 5 | import networkx as nx 6 | return nx.from_scipy_sparse_matrix(G.W) 7 | 8 | def get_neighbors(G, i): 9 | return G.A[i,:].indices 10 | # return np.arange(G.N)[np.array((G.W[i,:] > 0).todense())[0]] 11 | 12 | def get_giant_component(G): 13 | 14 | from scipy.sparse import csgraph 15 | 16 | [ncomp, labels] = csgraph.connected_components(G.W, directed=False, return_labels=True) 17 | 18 | W_g = np.array((0,0)) 19 | coords_g = np.array((0,2)) 20 | keep = np.array(0) 21 | 22 | for i in range(0,ncomp): 23 | 24 | idx = np.where(labels!=i) 25 | idx = idx[0] 26 | 27 | if G.N-len(idx) > W_g.shape[0]: 28 | W_g = G.W.toarray() 29 | W_g = np.delete(W_g, idx, axis=0) 30 | W_g = np.delete(W_g, idx, axis=1) 31 | if hasattr(G, 'coords'): 32 | coords_g = np.delete(G.coords, idx, axis=0) 33 | keep = np.delete(np.arange(G.N), idx) 34 | 35 | if not hasattr(G, 'coords'): 36 | # print(W_g.shape) 37 | G_g = gsp.graphs.Graph(W=W_g) 38 | else: 39 | G_g = gsp.graphs.Graph(W=W_g, coords=coords_g) 40 | 41 | 42 | return (G_g, keep) 43 | 44 | 45 | def get_S(G): 46 | """ 47 | Construct the N x |E| gradient matrix S 48 | """ 49 | # the edge set 50 | edges = G.get_edge_list() 51 | weights = np.array(edges[2]) 52 | edges = np.array(edges[0:2]) 53 | M = edges.shape[1] 54 | 55 | # Construct the N x |E| gradient matrix S 56 | S = np.zeros((G.N,M)) 57 | for e in np.arange(M): 58 | S[edges[0,e], e] = np.sqrt(weights[e]) 59 | S[edges[1,e], e] = -np.sqrt(weights[e]) 60 | 61 | return S 62 | 63 | # Compare the spectum of L and Lc 64 | def eig(A, order='ascend'): 65 | 66 | # eigenvalue decomposition 67 | [l,X] = np.linalg.eigh(A) 68 | 69 | # reordering indices 70 | idx = l.argsort() 71 | if order == 'descend': 72 | idx = idx[::-1] 73 | 74 | # reordering 75 | l = np.real(l[idx]) 76 | X = X[:, idx] 77 | return (X,np.real(l)) 78 | 79 | def zero_diag(A): 80 | 81 | import scipy as sp 82 | 83 | if sp.sparse.issparse(A): 84 | return A - sp.sparse.dia_matrix((A.diagonal()[sp.newaxis, :], [0]), shape=(A.shape[0], A.shape[1])) 85 | else: 86 | D = A.diagonal() 87 | return A - np.diag(D) 88 | 89 | def is_symmetric(As): 90 | """Check if a sparse matrix is symmetric 91 | 92 | Parameters 93 | ---------- 94 | As : array or sparse matrix 95 | A square matrix. 96 | 97 | Returns 98 | ------- 99 | check : bool 100 | The check result. 101 | 102 | """ 103 | from scipy import sparse 104 | 105 | if As.shape[0] != As.shape[1]: 106 | return False 107 | 108 | if not isinstance(As, sparse.coo_matrix): 109 | As = sparse.coo_matrix(As) 110 | 111 | r, c, v = As.row, As.col, As.data 112 | tril_no_diag = r > c 113 | triu_no_diag = c > r 114 | 115 | if triu_no_diag.sum() != tril_no_diag.sum(): 116 | return False 117 | 118 | rl = r[tril_no_diag] 119 | cl = c[tril_no_diag] 120 | vl = v[tril_no_diag] 121 | ru = r[triu_no_diag] 122 | cu = c[triu_no_diag] 123 | vu = v[triu_no_diag] 124 | 125 | sortl = np.lexsort((cl, rl)) 126 | sortu = np.lexsort((ru, cu)) 127 | vl = vl[sortl] 128 | vu = vu[sortu] 129 | 130 | check = np.allclose(vl, vu) 131 | 132 | return check 133 | -------------------------------------------------------------------------------- /sparsenet/evaluation/graph-coarsening/README.md: -------------------------------------------------------------------------------- 1 | # graph-coarsening package 2 | 3 | Multilevel graph coarsening algorithm with spectral and cut guarantees. 4 | 5 | The code accompanies paper [Graph reduction with spectral and cut guarantees](http://www.jmlr.org/papers/volume20/18-680/18-680.pdf) by Andreas Loukas published at JMLR/2019. 6 | 7 | In addition to the introduced [**variation**](http://www.jmlr.org/papers/volume20/18-680/18-680.pdf) methods, the code provides implementations of [**heavy-edge matching**](http://proceedings.mlr.press/v80/loukas18a.html), [**algebraic distance**](https://epubs.siam.org/doi/abs/10.1137/100791142?casa_token=tReVSPG0pBIAAAAA:P3BxPcyiSNkuxP5mOz8s9I7CN1tFQaMUTjyVHvb7PphqsGDy91ybcmAmECTYOeN2l-ErcpXuuA), [**affinity**](https://epubs.siam.org/doi/abs/10.1137/110843563?mobileUi=0), and [**Kron reduction**](http://motion.me.ucsb.edu/pdf/2011d-db.pdf) (adapted from [pygsp](https://pygsp.readthedocs.io/en/stable)). 8 | 9 | ## Paper abstract 10 | Can one reduce the size of a graph without significantly altering its basic properties? The graph reduction problem is hereby approached from the perspective of restricted spectral approximation, a modification of the spectral similarity measure used for graph sparsification. This choice is motivated by the observation that restricted approximation carries strong spectral and cut guarantees, and that it implies approximation results for unsupervised learning problems relying on spectral embeddings. The article then focuses on coarsening - the most common type of graph reduction. Sufficient conditions are derived for a small graph to approximate a larger one in the sense of restricted approximation. These findings give rise to algorithms that, compared to both standard and advanced graph reduction methods, find coarse graphs of improved quality, often by a large margin, without sacrificing speed. 11 | 12 | ## Contents 13 | 14 | There are five python notebooks included under `examples`: 15 | 16 | * `coarsening_demo.ipynb` demonstrates how the code can be used with a toy example (see also [blogpost](https://andreasloukas.blog/2018/11/05/multilevel-graph-coarsening-with-spectral-and-cut-guarantees/)). 17 | * `coarsening_methods.ipynb` shows the effect of different coarsening methods on a toy example. 18 | * `experiment_approximation.ipynb` reproduces the results of Section 5.1. 19 | * `experiment_spectrum.ipynb` reproduces the results of Section 5.2. 20 | * `experiment_scalability.ipynb` reproduces the results of Section 5.3. 21 | 22 | Since I have not fixed the random seed, some small variance should be expected in the experiment output. 23 | 24 | ## Installation instructions: 25 | 26 | ``` 27 | git clone git@github.com:loukasa/graph-coarsening.git 28 | cd graph-coarsening 29 | pip install . 30 | ``` 31 | 32 | Dependencies: pygsp, matplotlib, numpy, scipy, sortedcontainers 33 | Optional dependency: networkx 34 | 35 | ## Citation 36 | 37 | If you use this code, please cite: 38 | ``` 39 | @article{JMLR:v20:18-680, 40 | author = {Andreas Loukas}, 41 | title = {Graph Reduction with Spectral and Cut Guarantees}, 42 | journal = {Journal of Machine Learning Research}, 43 | year = {2019}, 44 | volume = {20}, 45 | number = {116}, 46 | pages = {1-42}, 47 | url = {http://jmlr.org/papers/v20/18-680.html} 48 | } 49 | ``` 50 | 51 | ## Acknowledgements 52 | 53 | This work was kindly supported by the Swiss National Science Foundation (grant number PZ00P2 179981). I would like to thank [Scott Gigante](https://cbb.yale.edu/people/scott-gigante) for helping package the code. 54 | 55 | 15 May 2020 56 | 57 | [Andreas Loukas](https://andreasloukas.blog) 58 | 59 | [![DOI](https://zenodo.org/badge/175851068.svg)](https://zenodo.org/badge/latestdoi/175851068) 60 | 61 | Released under the Apache license 2.0 62 | -------------------------------------------------------------------------------- /sparsenet/util/torch_util.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-06-13 2 | # Summary: torch related functions. Mainly implemented some sparse matrix operations for pytorch==1.4.0. 3 | 4 | from time import time 5 | 6 | import networkx as nx 7 | import numpy as np 8 | import scipy as sp 9 | import torch 10 | from deprecated import deprecated 11 | from scipy.sparse import csr_matrix, coo_matrix 12 | 13 | from sparsenet.util.util import summary, tonp, pf 14 | 15 | 16 | def sparse_tensor2_sparse_numpyarray(sparse_tensor): 17 | """ 18 | :param sparse_tensor: a COO torch.sparse.FloatTensor 19 | :return: a scipy.sparse.coo_matrix 20 | """ 21 | if sparse_tensor.device.type == 'cuda': 22 | sparse_tensor = sparse_tensor.to('cpu') 23 | 24 | values = sparse_tensor._values().numpy() 25 | indices = sparse_tensor._indices() 26 | rows, cols = indices[0, :].numpy(), indices[1, :].numpy() 27 | size = sparse_tensor.size() 28 | scipy_sparse_mat = coo_matrix((values, (rows, cols)), shape=size, dtype=np.float) 29 | return scipy_sparse_mat 30 | 31 | 32 | def sparse_matrix2sparse_tensor(ret, dev='cpu'): 33 | # coo sparse matrix to sparse tensor 34 | # https://bit.ly/30DI2u8 35 | values = ret.data 36 | indices = np.vstack((ret.row, ret.col)) 37 | i = torch.LongTensor(indices) 38 | v = torch.FloatTensor(values) 39 | shape = ret.shape 40 | return torch.sparse.FloatTensor(i, v, torch.Size(shape)).to(dev) 41 | 42 | 43 | def sparse_mm(L, Q): 44 | """ 45 | :param L: a sparse tensor 46 | :param Q: a sparse diagonal tensor 47 | :return: Q.L.Q 48 | """ 49 | dev = L.device 50 | if dev == 'cuda': 51 | L = L.to('cpu') 52 | Q = Q.to('cpu') 53 | 54 | L = sparse_tensor2_sparse_numpyarray(L) # csr_matrix(L) 55 | Q = sparse_tensor2_sparse_numpyarray(Q) # csr_matrix(Q) 56 | 57 | ret = coo_matrix(Q.dot(L.dot(Q))) # coo matrix sparse 58 | return sparse_matrix2sparse_tensor(ret, dev=dev) 59 | 60 | 61 | def sparse_mm2(P, D1, D2): 62 | """ 63 | :param P: a sparse tensor of (n, N) 64 | :param D1: a sparse diagonal tensor of (N, N) 65 | :param D2: a sparse diagonal tensor of (n, n) 66 | :return: D1.P.D2 also a sparse tensor 67 | """ 68 | 69 | dev = P.device 70 | if dev == 'cuda': 71 | P, D1, D2 = P.to('cpu'), D1.to('cpu'), D2.to('cpu') 72 | P = sparse_tensor2_sparse_numpyarray(P) 73 | D1 = sparse_tensor2_sparse_numpyarray(D1) 74 | D2 = sparse_tensor2_sparse_numpyarray(D2) 75 | try: 76 | ret = coo_matrix(D2.dot(P.dot(D1))) 77 | except ValueError: 78 | summary(P.todense(), 'P') 79 | summary(D1.todense(), 'D1') 80 | summary(D2.todense(), 'D2') 81 | exit() 82 | return sparse_matrix2sparse_tensor(ret, dev=dev) 83 | 84 | 85 | @deprecated('To be removed') 86 | def mm(n=10): 87 | g = nx.random_geometric_graph(n, 0.1) 88 | L = nx.laplacian_matrix(g).todense() 89 | L = torch.Tensor(L) 90 | Q = torch.diag(torch.rand(n)) 91 | 92 | summary(L, 'L') 93 | summary(Q, 'Q') 94 | 95 | # method 1 96 | t0 = time() 97 | ret1 = sp.sparse.csr_matrix(L).dot(sp.sparse.csr_matrix(Q)) 98 | ret1 = sp.sparse.csr_matrix(Q).dot(ret1) 99 | summary(ret1, 'ret1') 100 | t1 = time() 101 | print(f'method 1: {pf(t1 - t0, 2)}') 102 | 103 | # ret 2 104 | ret2 = tonp(Q).dot(tonp(L).dot(tonp(Q))) 105 | summary(ret2, 'ret2') 106 | t2 = time() 107 | print(f'method 2: {pf(t2 - t1, 2)}') 108 | 109 | assert (ret2 - ret1 == 0).all() 110 | # summary(tonp(tonp(ret2) - tonp(ret1.todense())), 'ret2-ret1') 111 | 112 | 113 | if __name__ == '__main__': 114 | mm() 115 | exit() 116 | n = 50 # 000 117 | g = nx.random_geometric_graph(n, 0.01) 118 | L = nx.laplacian_matrix(g) 119 | L = torch.Tensor(L) 120 | print(L) 121 | exit() 122 | 123 | Q = torch.diag(torch.rand(n)) 124 | 125 | L, Q = L.to_sparse(), Q.to_sparse() 126 | ret = sparse_mm(L, Q) 127 | summary(ret, 'ret') 128 | 129 | exit() 130 | mm(n=1000) 131 | -------------------------------------------------------------------------------- /sparsenet/util/sample.py: -------------------------------------------------------------------------------- 1 | # used for baseline (BL) graph coarsen method 2 | 3 | import networkx as nx 4 | import numpy as np 5 | import torch 6 | import torch_geometric 7 | from torch_geometric.utils import to_networkx 8 | 9 | from sparsenet.util.util import summary, fix_seed, random_pygeo_graph, timefunc 10 | 11 | INFINITY = 1e8 12 | 13 | 14 | @timefunc 15 | def sample_N2Nlandmarks(G, N, weight_key='edge_weight', reproducible=True): 16 | ''' 17 | Node to nearest landmarks sampling. 18 | Selected a number of landmarks, then every node is collapsed to its nearest landmark 19 | :param G: The input networkx Graph or pygeo graph. Required to be CONNECTED. The input graph is by default 20 | DIRECTED. 21 | :param N: Number of nodes (to be sampled) in the sampled graph. 22 | :param weight_key: The key name(in the dictionary) for the weight information. 23 | :return: The sampled graph G_prime, and the correspondence dictionary Assignment. The sampled graph is relabeled 24 | to (0 - N-1). The assigment is the a dict where key is 0-num_nodes_sml and value is a set 25 | ''' 26 | if reproducible: fix_seed() 27 | 28 | if isinstance(G, torch_geometric.data.data.Data): 29 | G = to_networkx(G, edge_attrs=[weight_key]) 30 | 31 | assert (nx.is_directed(G) and nx.is_strongly_connected( 32 | G)), f'Input graph must be connected. {nx.number_strongly_connected_components(G)}' \ 33 | ' components detected, with sizes {[len(c) for c in nx.strongly_connected_components(G)]}' 34 | V_length = G.number_of_nodes() 35 | assert (V_length >= N), f'graph has fewer nodes than input sample size {N}' 36 | V = list(G.nodes) 37 | assert (isinstance(V[0], int)), 'the node id should be integers' 38 | landmarks = [V[i] for i in np.random.choice(V_length, N, replace=False).tolist()] 39 | nearest_neighbor = {x: x for x in V} 40 | shortest_path_distance = {x: INFINITY for x in V} 41 | for landmark in landmarks: 42 | shortest_path_lengths = nx.single_source_shortest_path_length(G, landmark) 43 | for key, value in shortest_path_lengths.items(): 44 | if value < shortest_path_distance[key]: 45 | shortest_path_distance[key] = value 46 | nearest_neighbor[key] = landmark 47 | 48 | # new ids for those landmarks are 0-N-1 in G', build a new sparsified graph G' here 49 | G_prime = nx.Graph() 50 | G_prime.add_nodes_from([i for i in range(N)]) 51 | Assignment, map_landmarkGid2Gpid = {}, {} 52 | for i, id in enumerate(landmarks): 53 | map_landmarkGid2Gpid[id] = i 54 | for key, value in nearest_neighbor.items(): 55 | id = map_landmarkGid2Gpid[value] 56 | Assignment[id] = [key] if id not in Assignment else Assignment[id] + [key] 57 | for key, value in Assignment.items(): 58 | Assignment[key] = set(value) 59 | 60 | # build edge in the sparsified graph 61 | g_prime_edges = {} 62 | for u, v, feature in G.edges.data(): 63 | i, j, weight = map_landmarkGid2Gpid[nearest_neighbor[u]], map_landmarkGid2Gpid[ 64 | nearest_neighbor[v]], feature.get(weight_key, 1) 65 | if i != j: 66 | if i > j: 67 | i, j = j, i 68 | g_prime_edges[(i, j)] = weight if (i, j) not in g_prime_edges else g_prime_edges[(i, j)] + weight 69 | 70 | # divided by 2 to make sure in the limit (no compression), the resulting graph is the same as original graph 71 | g_prime_edges = [(i, j, weight / 2.0) for (i, j), weight in g_prime_edges.items()] 72 | G_prime.add_weighted_edges_from(g_prime_edges, weight=weight_key) 73 | # todo: shall we make G_prime undirected? 74 | return G_prime, Assignment 75 | 76 | 77 | if __name__ == '__main__': 78 | fix_seed() 79 | n_node, n_edge, n_sample = 320, 5000, 100 80 | nfeat_dim = 42 81 | efeat_dim = 20 82 | G = random_pygeo_graph(n_node, nfeat_dim, n_edge, efeat_dim, device='cpu') 83 | G.edge_weight = torch.rand(G.edge_index.size(1), device=G.edge_index.device) 84 | summary(G, 'G') 85 | 86 | G_prime, Assignment = sample_N2Nlandmarks(G, n_sample, weight_key='edge_weight') 87 | print(nx.info(G_prime)) 88 | -------------------------------------------------------------------------------- /sparsenet/util/name_util.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-06-06 2 | # Summary: some global names 3 | 4 | import os 5 | import os.path as osp 6 | 7 | from sparsenet.util.util import fig_dir, make_dir, model_dir 8 | 9 | loukas_datasets = ['minnesota', 'airfoil', 'yeast', 'bunny'] 10 | methods = ['variation_edges', 'variation_neighborhoods', 'algebraic_JC', 'heavy_edge' 'affinity_GS'] 11 | syn_graphs = ['random_geo', 'random_er', 'ws', 'ba', 'shape'] 12 | 13 | big_ego_graphs = ['PubMed', 'Coauthor-CS', 'Coauthor-physics', 'Amazon-photo', 'Amazon-computers', 'yelp', 'reddit', 14 | 'flickr'] 15 | ego_graphs = big_ego_graphs + ['CiteSeer'] 16 | 17 | 18 | def set_figname(args, name='subgraph'): 19 | """ used to set dir where figure is saved""" 20 | dir = os.path.join(fig_dir(), args.dataset, 21 | f'ratio_{args.ratio}', 22 | f'method_{args.method}', 23 | f'n_epoch_{args.n_epoch}', 24 | f'n_bottomk_{args.n_bottomk}', 25 | f'lap_{args.lap}', 26 | '') 27 | dir = dir.replace('_', '-') 28 | make_dir(dir) 29 | name = name.replace('_', '-') 30 | return dir + name + '.pdf' 31 | 32 | 33 | def set_model_dir(args, train_indices, val_indices, test_indices): 34 | """ used to set dir where model is saved """ 35 | OUT_PATH = os.path.join(model_dir(), args.dataset, 36 | f'ratio_{args.ratio}', 37 | f'strategy_{args.strategy}', 38 | f'method_{args.method}', 39 | f'train_{len(train_indices)}', 40 | f'val_{len(val_indices)}', 41 | f'test_{len(test_indices)}', 42 | f'loss_{args.loss}', 43 | f'n_epoch_{args.n_epoch}', 44 | f'n_bottomk_{args.n_bottomk}', 45 | f'lap_{args.lap}', 46 | f'bs_{args.bs}', 47 | f'lr_{args.lr}', 48 | f'ini_{args.ini}', 49 | # f'correction_{args.correction}' 50 | '') 51 | if args.dataset in ['coauthor-cs', 'coauthor-physics', 'flickr', 'pubmeds']: 52 | OUT_PATH = os.path.join(OUT_PATH, f'w_len_{args.w_len}', '') 53 | make_dir(OUT_PATH) 54 | return OUT_PATH 55 | 56 | 57 | def set_coarsening_graph_dir(args): 58 | coarse_dir = osp.join(model_dir(), '..', 'coarse_graph') 59 | 60 | if args.strategy == 'loukas': 61 | dir = osp.join(coarse_dir, 62 | 'loukas', 63 | args.dataset, 64 | f'ratio_{args.ratio}', 65 | f'method_{args.method}', 66 | f'n_bottomk_{args.n_bottomk}', 67 | f'cur_idx_{args.cur_idx}', 68 | '') 69 | 70 | elif args.strategy == 'DK': 71 | dir = osp.join(coarse_dir, 72 | 'DK', 73 | args.dataset, 74 | f'ratio_{args.ratio}', 75 | f'cur_idx_{args.cur_idx}', 76 | '') 77 | 78 | else: 79 | raise NotImplementedError 80 | 81 | if args.dataset in ['coauthor-cs', 'coauthor-physics', 'flickr', 'pubmeds']: 82 | dir = os.path.join(dir, f'w_len_{args.w_len}', '') 83 | 84 | make_dir(dir) 85 | return dir 86 | 87 | 88 | def set_eigenvec_dir(args): 89 | eig_dir = osp.join(model_dir(), '..', 'eigenvec') 90 | assert args.dataset in ['coauthor-cs', 'coauthor-physics', 'flickr', 'pubmeds'] 91 | dir = osp.join(eig_dir, 92 | args.strategy, 93 | args.dataset, 94 | f'ratio_{args.ratio}', 95 | f'method_{args.method}', 96 | f'n_bottomk_{args.n_bottomk}', 97 | f'cur_idx_{args.cur_idx}', 98 | f'w_len_{args.w_len}', 99 | '') 100 | make_dir(dir) 101 | return dir 102 | -------------------------------------------------------------------------------- /sparsenet/util/train_util.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-05-18 2 | # Summary: util functions to monitor training 3 | 4 | import functools 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | from sparsenet.util.util import banner, timefunc, summary 11 | 12 | 13 | @timefunc 14 | def check_laplacian(L, step, eps=1e-8): 15 | """ check the whether laplacian is symmetric during training. 16 | check there is no nan in laplacian 17 | :param L: output of get_laplacian_mat. torch.sparse.tensor 18 | :param step: iteration number 19 | :param eps: difference allowed for two float number considered as the same 20 | """ 21 | 22 | # check if there is nan in the tensor 23 | Ltypes = (torch.sparse.FloatTensor, torch.cuda.sparse.FloatTensor) 24 | assert isinstance(L, Ltypes), 'Input laplacian is not sparse tensor' 25 | nan_check = torch.isnan(L._values()) 26 | nan_cnt = nan_check.nonzero().shape[0] 27 | if nan_cnt != 0: 28 | u, v = L._indices()[:, nan_check.nonzero()[0]] 29 | u, v = u.item(), v.item() 30 | exit(f'Laplacian at step {step} has {nan_cnt} nan values, e.g., L({u}, {v}) = Nan.') 31 | 32 | # dont want to convert to dense. manual implement. 33 | indices, values, sym_check = L._indices(), L._values(), {} 34 | for i in range(indices.shape[1]): 35 | u, v = indices[:, i] 36 | u, v = u.item(), v.item() 37 | sym_check[(u, v)] = values[i].item() 38 | for i in range(indices.shape[1]): 39 | u, v = indices[:, i] 40 | u, v = u.item(), v.item() 41 | if (v, u) not in sym_check and abs(sym_check[(u, v)]) > eps: 42 | exit(f'Laplacian at step {step} is not symmetric... on ({u}, {v}), with L({u}, {v})={sym_check[(u, v)]}' 43 | f' but L({v}, {u})=0.') 44 | if abs(sym_check[(u, v)] - sym_check[(v, u)]) > eps: 45 | exit(f'Laplacian at step {step} is not symmetric... on ({u}, {v}), with L({u}, {v})={sym_check[(u, v)]}' 46 | f' but L({v}, {u})={sym_check[(v, u)]}.') 47 | 48 | print(f'Laplacian at step {step} is normal!') 49 | 50 | 51 | class monitor(): 52 | def __init__(self): 53 | pass 54 | 55 | @staticmethod 56 | def data_monitor(train_data, sub, args): 57 | banner('Train data') 58 | for (k, v) in train_data[:args.bs]: 59 | print(k, v, sub.g_sml.edge_index[:, k]) 60 | print() 61 | 62 | @staticmethod 63 | def train_data_monitor(train_data, args): 64 | banner('Train_data first check') 65 | for i, (k, v) in enumerate(train_data[:args.bs]): 66 | if i > 5: break 67 | print(k, v) 68 | 69 | def train_monitor(self, pred, edge_weight_sml): 70 | summary(pred, 'pred') 71 | summary(edge_weight_sml, 'edge_weight_sml') 72 | 73 | 74 | def no_grad_func(func): 75 | @functools.wraps(func) 76 | def new_func(*args, **kwargs): 77 | with torch.no_grad(): 78 | return func(*args, **kwargs) 79 | 80 | return new_func 81 | 82 | 83 | @no_grad_func 84 | def monitor_param_saturation(model): 85 | monitors = {} 86 | for name, p in model.named_parameters(): 87 | p = F.sigmoid(p) 88 | sat = 1 - (p - (p > 0.5).float()).abs() 89 | monitors['sat/' + name] = sat 90 | return monitors 91 | 92 | 93 | if __name__ == '__main__': 94 | # banner('This one is sym and has no nan!') 95 | # i = torch.LongTensor([[0, 1, 2], [0, 1, 2]]) 96 | # v = torch.FloatTensor([3, 4, 5]) 97 | # s1 = torch.sparse.FloatTensor(i, v, torch.Size([3, 3])) 98 | # check_laplacian(s1, 1) 99 | 100 | # banner('This one is not symmetric!') 101 | # i = torch.LongTensor([[0, 1, 1], 102 | # [2, 0, 2]]) 103 | # v = torch.FloatTensor([3, 4, 5]) 104 | # s2 = torch.sparse.FloatTensor(i, v, torch.Size([2, 3])) 105 | # check_laplacian(s2, 1) 106 | 107 | banner('This one has nan value!') 108 | i = torch.LongTensor([[0, 1, 2], [0, 1, 2]]) 109 | v = torch.FloatTensor([3, 4, np.nan]) 110 | s3 = torch.sparse.FloatTensor(i, v, torch.Size([3, 3])) 111 | check_laplacian(s3, 1) 112 | -------------------------------------------------------------------------------- /sparsenet/util/model_util.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-05-06 2 | # Summary: utils for sparsenet. 3 | 4 | import numpy as np 5 | import torch 6 | 7 | from sparsenet.model.eval import trainer, tester # train, set_train_data 8 | from sparsenet.util.util import banner, red, pf 9 | 10 | 11 | class ModelEvaluator(): 12 | def __init__(self, model, dataset_loader, dev, optimizer): 13 | self.dev = dev 14 | self.optimizer = optimizer 15 | self.model = model 16 | self.dataset_loader = dataset_loader 17 | 18 | def set_modelpath(self, path): 19 | self.modelpath = path 20 | 21 | def train(self, idx, TR, model, args): 22 | """ train the model for one graph """ 23 | TR.set_train_data(args, self.dataset_loader) 24 | TR.train(model, self.optimizer, args, verbose=False) 25 | TR.delete_train_data(idx) 26 | return model 27 | 28 | def validate(self, idx, val_indices, TE, model, args): 29 | val_score = self.val_score 30 | val_score[idx] = {'n_gen': [], 'impr_ratio': [], 'eigen_ratio': []} 31 | for idx_ in val_indices: 32 | args.test_idx = idx_ 33 | args.cur_idx = idx_ 34 | TE.set_test_data(args, self.dataset_loader) 35 | n_gen, impr_ratio, eigen_ratio = TE.eval(model, args, verbose=False) 36 | 37 | val_score[idx]['n_gen'].append(n_gen) 38 | val_score[idx]['impr_ratio'].append(impr_ratio) 39 | val_score[idx]['eigen_ratio'].append(eigen_ratio) 40 | 41 | banner(f'{args.dataset}: finish validating graph {val_indices}.') 42 | 43 | cur_impr_ratio = np.mean(val_score[idx]['impr_ratio']) 44 | cur_eigen_ratio = np.mean(val_score[idx]['eigen_ratio']) 45 | print(cur_eigen_ratio, self.best_eigen_ratio) 46 | self.val_score[idx] = val_score[idx] 47 | return cur_impr_ratio, cur_eigen_ratio 48 | 49 | def save(self, idx, model, mode='eigen-ratio'): 50 | """ save model for training graph idx """ 51 | assert mode in ['eigen-ratio', 'improve-ratio'] 52 | f = f'checkpoint-best-{mode}.pkl' 53 | 54 | if mode == 'eigen-ratio': 55 | torch.save(model.state_dict(), self.modelpath + f) 56 | print(red(f'Save model for train idx {idx}. Best-eigen-ratio is {pf(self.best_eigen_ratio, 2)}.')) 57 | elif model == 'improve-ratio': 58 | torch.save(model.state_dict(), self.modelpath + f) 59 | print(red(f'Save model for train idx {idx}. Best-improve-ratio is {pf(self.best_impr_ratio, 2)}.')) 60 | 61 | def find_best_model(self, model, train_indices, val_indices, args): 62 | """ save the best model on validation dataset """ 63 | 64 | self.TR = trainer(dev=self.dev) 65 | self.TE = tester(dev=self.dev) 66 | 67 | self.val_score = {} 68 | self.best_n_gen = -1e10 69 | self.best_impr_ratio = -1e30 70 | self.best_eigen_ratio = -1e30 71 | self.train_indices = train_indices 72 | self.val_indices = val_indices 73 | 74 | for idx in self.train_indices: 75 | args.train_idx = idx 76 | args.cur_idx = idx 77 | 78 | model = self.train(idx, self.TR, model, args) 79 | cur_impr_ratio, cur_eigen_ratio = self.validate(idx, val_indices, self.TE, model, args) 80 | 81 | # save the model if it works well on val data 82 | if cur_eigen_ratio > self.best_eigen_ratio: 83 | self.best_eigen_ratio = cur_eigen_ratio 84 | self.save(idx, model, mode='eigen-ratio') 85 | 86 | if cur_impr_ratio > self.best_impr_ratio: 87 | self.best_impr_ratio = cur_impr_ratio 88 | self.save(idx, model, mode='improve-ratio') 89 | return model, args 90 | 91 | def test_model(self, model, test_indices, AP, args): 92 | model_name = AP.set_model_name() 93 | 94 | model.load_state_dict(torch.load(self.modelpath + model_name)) 95 | 96 | for idx_ in test_indices: 97 | args.test_idx = idx_ 98 | args.cur_idx = idx_ 99 | self.TE.set_test_data(args, self.dataset_loader) 100 | self.TE.eval(model, args, verbose=False) 101 | banner(f'{args.dataset}: finish testing graph {idx_}.') 102 | 103 | 104 | if __name__ == '__main__': 105 | pass 106 | -------------------------------------------------------------------------------- /sparsenet/evaluation/graph-coarsening/examples/experiment_approximation.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-06-02 2 | # Summary: 3 | 4 | from graph_coarsening.coarsening_utils import * 5 | import graph_coarsening.graph_lib as graph_lib 6 | import graph_coarsening.graph_utils as graph_utils 7 | 8 | import numpy as np 9 | import scipy as sp 10 | from scipy import io 11 | from scipy.linalg import circulant 12 | import time 13 | import os 14 | 15 | import matplotlib 16 | import matplotlib.pylab as plt 17 | from mpl_toolkits.mplot3d import Axes3D 18 | 19 | import pygsp as gsp 20 | gsp.plotting.BACKEND = 'matplotlib' 21 | 22 | # Parameters 23 | graphs = ['yeast','minnesota', 'bunny', 'airfoil'] 24 | methods = ['heavy_edge', 'variation_edges', 'variation_neighborhoods', 'algebraic_JC', 'affinity_GS', 'kron'] 25 | K_all = np.array([10,40], dtype=np.int32) 26 | r_all = np.linspace(0.1, 0.9, 17, dtype=np.float32) 27 | 28 | print('k: ', K_all, '\nr: ', r_all) 29 | 30 | rerun_all = False 31 | rewrite_results = False 32 | if rerun_all: 33 | 34 | algorithm = 'greedy' 35 | max_levels = 20 36 | n_methods = len(methods) 37 | n_graphs = len(graphs) 38 | 39 | for graphIdx, graph in enumerate(graphs): 40 | 41 | N = 4000 # this is only an upper bound (the actual size depends on the graph) 42 | G = graph_lib.real(N, graph) 43 | N = G.N 44 | if N < 100: continue 45 | 46 | # precompute spectrum needed for metrics 47 | if K_all[-1] > N / 2: 48 | [Uk, lk] = eig(G.L) 49 | else: 50 | offset = 2 * max(G.dw) 51 | T = offset * sp.sparse.eye(G.N, format='csc') - G.L 52 | lk, Uk = sp.sparse.linalg.eigsh(T, k=K_all[-1], which='LM', tol=1e-6) 53 | lk = (offset - lk)[::-1] 54 | Uk = Uk[:, ::-1] 55 | 56 | subspace = np.zeros((n_methods, len(K_all), len(r_all))) 57 | failed = np.zeros((n_methods, len(K_all), len(r_all))) 58 | ratio = np.zeros((n_methods, len(K_all), len(r_all))) 59 | 60 | for KIdx, K in enumerate(K_all): 61 | 62 | print('{} {}| K:{:2.0f}'.format(graph, N, K)) 63 | 64 | for rIdx, r in enumerate(r_all): 65 | 66 | n_target = int(np.floor(N * (1 - r))) 67 | if K > n_target: 68 | print('Warning: K={}>n_target={}. skipping'.format(K, n_target)) 69 | continue # K = n_target 70 | 71 | for methodIdx, method in enumerate(methods): 72 | 73 | # algorithm is not deterministic: run a few times 74 | if method == 'kron': 75 | if KIdx == 0: 76 | n_iterations = 2 77 | n_failed = 0 78 | r_min = 1.0 79 | for iteration in range(n_iterations): 80 | 81 | Gc, iG = kron_coarsening(G, r=r, m=None) 82 | metrics = kron_quality(iG, Gc, kmax=K_all[-1], Uk=Uk[:, :K_all[-1]], lk=lk[:K_all[-1]]) 83 | 84 | if metrics['failed']: 85 | n_failed += 1 86 | else: 87 | r_min = min(r_min, metrics['r']) 88 | for iKIdx, iK in enumerate(K_all): 89 | subspace[methodIdx, iKIdx, rIdx] += metrics['error_subspace'][iK - 1] 90 | 91 | subspace[methodIdx, :, rIdx] /= (n_iterations - n_failed) 92 | failed[methodIdx, :, rIdx] = 1 if (r_min < r - 0.05) else 0 93 | ratio[methodIdx, :, rIdx] = r_min 94 | 95 | if np.abs(r_min - r) > 0.02: print( 96 | 'Warning: ratio={} instead of {} for {}'.format(r_min, r, method)) 97 | 98 | else: 99 | C, Gc, Call, Gall = coarsen(G, K=K, r=r, max_levels=max_levels, method=method, 100 | algorithm=algorithm, Uk=Uk[:, :K], lk=lk[:K]) 101 | metrics = coarsening_quality(G, C, kmax=K, Uk=Uk[:, :K], lk=lk[:K]) 102 | 103 | subspace[methodIdx, KIdx, rIdx] = metrics['error_subspace'][-1] 104 | failed[methodIdx, KIdx, rIdx] = 1 if (metrics['r'] < r - 0.05) else 0 105 | ratio[methodIdx, KIdx, rIdx] = metrics['r'] 106 | 107 | if np.abs(metrics['r'] - r) > 0.02: 108 | print('Warning: ratio={} instead of {} for {}'.format(metrics['r'], r, method)) 109 | 110 | if rewrite_results: 111 | filepath = os.path.join('..', 'results', 'experiment_approximation_' + graph + '.npz') 112 | print('.. saving to "' + filepath + '"') 113 | np.savez(filepath, methods=methods, K_all=K_all, r_all=r_all, subspace=subspace, failed=failed) 114 | 115 | print('done!') 116 | -------------------------------------------------------------------------------- /sparsenet/evaluation/graph-coarsening/graph_coarsening/graph_lib.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | import zipfile 4 | from urllib import request 5 | 6 | import numpy as np 7 | import scipy as sp 8 | from pygsp import graphs 9 | from scipy import sparse 10 | 11 | from . import graph_utils 12 | 13 | _YEAST_URL = "http://nrvis.com/download/data/bio/bio-yeast.zip" 14 | _MOZILLA_HEADERS = [("User-Agent", "Mozilla/5.0")] 15 | 16 | 17 | def download_yeast(): 18 | r""" 19 | A convenience method for loading a network of protein-to-protein interactions in budding yeast. 20 | 21 | http://networkrepository.com/bio-yeast.php 22 | """ 23 | with tempfile.TemporaryDirectory() as tempdir: 24 | zip_filename = os.path.join(tempdir, "bio-yeast.zip") 25 | with open(zip_filename, "wb") as zip_handle: 26 | opener = request.build_opener() 27 | opener.addheaders = _MOZILLA_HEADERS 28 | request.install_opener(opener) 29 | with request.urlopen(_YEAST_URL) as url_handle: 30 | zip_handle.write(url_handle.read()) 31 | with zipfile.ZipFile(zip_filename) as zip_handle: 32 | zip_handle.extractall(tempdir) 33 | mtx_filename = os.path.join(tempdir, "bio-yeast.mtx") 34 | with open(mtx_filename, "r") as mtx_handle: 35 | _ = next(mtx_handle) # header 36 | n_rows, n_cols, _ = next(mtx_handle).split(" ") 37 | E = np.loadtxt(mtx_handle) 38 | E = E.astype(int) - 1 39 | W = sparse.lil_matrix((int(n_rows), int(n_cols))) 40 | W[(E[:, 0], E[:, 1])] = 1 41 | W = W.tocsr() 42 | W += W.T 43 | return W 44 | 45 | 46 | def real(N, graph_name, connected=True): 47 | r""" 48 | A convenience method for loading toy graphs that have been collected from the internet. 49 | 50 | Parameters: 51 | ---------- 52 | N : int 53 | The number of nodes. Set N=-1 to return the entire graph. 54 | 55 | graph_name : a string 56 | Use to select which graph is returned. Choices include 57 | * airfoil 58 | Graph from airflow simulation 59 | http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.50.9217&rep=rep1&type=pdf 60 | http://networkrepository.com/airfoil1.php 61 | * yeast 62 | Network of protein-to-protein interactions in budding yeast. 63 | http://networkrepository.com/bio-yeast.php 64 | * minnesota 65 | Minnesota road network. 66 | I am using the version provided by the PyGSP software package (initially taken from the MatlabBGL library.) 67 | * bunny 68 | The Stanford bunny is a computer graphics 3D test model developed by Greg Turk and Marc Levoy in 1994 at Stanford University 69 | I am using the version provided by the PyGSP software package. 70 | connected : Boolean 71 | Set to True if only the giant component is to be returned. 72 | """ 73 | 74 | directory = os.path.join( 75 | os.path.dirname(os.path.dirname(graph_utils.__file__)), "data" 76 | ) 77 | 78 | tries = 0 79 | while True: 80 | tries = tries + 1 81 | 82 | if graph_name == "airfoil": 83 | G = graphs.Airfoil() 84 | G = graphs.Graph(W=G.W[0:N, 0:N], coords=G.coords[0:N, :]) 85 | 86 | elif graph_name == "yeast": 87 | W = download_yeast() 88 | G = graphs.Graph(W=W[0:N, 0:N]) 89 | 90 | elif graph_name == "minnesota": 91 | G = graphs.Minnesota() 92 | W = G.W.astype(np.float) 93 | G = graphs.Graph(W=W[0:N, 0:N], coords=G.coords[0:N, :]) 94 | 95 | elif graph_name == "bunny": 96 | G = graphs.Bunny() 97 | W = G.W.astype(np.float) 98 | G = graphs.Graph(W=W[0:N, 0:N], coords=G.coords[0:N, :]) 99 | 100 | if connected == False or G.is_connected(): 101 | break 102 | if tries > 1: 103 | print("WARNING: Disconnected graph. Using the giant component.") 104 | G, _ = graph_utils.get_giant_component(G) 105 | break 106 | 107 | if not hasattr(G, 'coords'): 108 | try: 109 | import networkx as nx 110 | graph = nx.from_scipy_sparse_matrix(G.W) 111 | pos = nx.nx_agraph.graphviz_layout(graph, prog='neato') 112 | G.set_coordinates(np.array(list(pos.values()))) 113 | except ImportError: 114 | G.set_coordinates() 115 | 116 | return G 117 | 118 | 119 | def models(N, graph_name, connected=True, default_params=False, k=12, sigma=0.5): 120 | tries = 0 121 | while True: 122 | tries = tries + 1 123 | if graph_name == "regular": 124 | if default_params: 125 | k = 10 126 | offsets = [] 127 | for i in range(1, int(k / 2) + 1): 128 | offsets.append(i) 129 | offsets.append(-(N - i)) 130 | 131 | offsets = np.array(offsets) 132 | vals = np.ones_like(offsets) 133 | W = sp.sparse.diags( 134 | vals, offsets, shape=(N, N), format="csc", dtype=np.float 135 | ) 136 | W = (W + W.T) / 2 137 | G = graphs.Graph(W=W) 138 | 139 | else: 140 | print("ERROR: uknown model") 141 | return 142 | 143 | if connected == False or G.is_connected(): 144 | break 145 | if tries > 1: 146 | print("WARNING: disconnected graph.. trying to use the giant component") 147 | G = graph_utils.get_giant_component(G) 148 | break 149 | return G 150 | 151 | 152 | if __name__ == '__main__': 153 | g = real(-1, 'yeast') 154 | print(g) 155 | -------------------------------------------------------------------------------- /sparsenet/util/cut_util.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-06-11 2 | # Summary: cut, conductance related 3 | 4 | from typing import Optional 5 | 6 | import networkx as nx 7 | import numpy as np 8 | import torch 9 | from torch_geometric.utils import degree, to_networkx 10 | 11 | from sparsenet.util.util import summary, timefunc, fix_seed, pf, random_pygeo_graph 12 | 13 | 14 | def normalized_cut(edge_index, edge_attr, num_nodes: Optional[int] = None): 15 | r"""Computes the normalized cut :math:`\mathbf{e}_{i,j} \cdot 16 | \left( \frac{1}{\deg(i)} + \frac{1}{\deg(j)} \right)` of a weighted graph 17 | given by edge indices and edge attributes. 18 | 19 | Args: 20 | edge_index (LongTensor): The edge indices. 21 | edge_attr (Tensor): Edge weights or multi-dimensional edge features. 22 | num_nodes (int, optional): The number of nodes, *i.e.* 23 | :obj:`max_val + 1` of :attr:`edge_index`. (default: :obj:`None`) 24 | 25 | :rtype: :class:`Tensor` 26 | """ 27 | 28 | row, col = edge_index[0], edge_index[1] 29 | deg = 1. / degree(col, num_nodes, edge_attr.dtype) 30 | deg = deg[row] + deg[col] 31 | cut = edge_attr * deg 32 | return cut 33 | 34 | 35 | def _set(row, s, dev='cuda'): 36 | # t0 = time() 37 | i = torch.nonzero(row[..., None] == s)[:, 0] 38 | # t1 = time() 39 | # print(pf(t1-t0, 2)) 40 | row_s = torch.zeros(row.size()) 41 | row_s[i] = 1 42 | return row_s.type(torch.int8).to(dev) 43 | 44 | 45 | @timefunc 46 | def pyG_conductance(edge_index, edge_attr, s, t=None, dev='cuda', verbose=False): 47 | """ 48 | :param edge_index: 49 | :param edge_attr: 50 | :param s: a list or a tensor 51 | :param t: a list or a tensor 52 | :return: conductance (tensor) 53 | """ 54 | 55 | if t is None: 56 | _t = None 57 | tmp = torch.unique(edge_index).tolist() 58 | t = list(set(tmp) - set(s)) 59 | 60 | s, t = torch.tensor(s).to(dev), torch.tensor(t).to(dev) 61 | edge_index, edge_attr = edge_index.to(dev), edge_attr.to(dev) 62 | row, col = edge_index[0], edge_index[1] 63 | del edge_index 64 | 65 | # row_s = torch.sum(row[..., None] == s, axis=1) # memory intensive 66 | row_s = _set(row, s, dev=dev) 67 | # col_s = torch.sum(col[..., None] == s, axis=1) 68 | col_s = _set(col, s, dev=dev) 69 | # summary(row_s - row_s_, 'row_s - row_s_') 70 | # summary(col_s - col_s_, 'col_s - col_s_') 71 | 72 | vol_s = torch.sum(torch.mul(edge_attr, row_s + col_s)) 73 | 74 | # row_t = torch.sum(row[..., None] == t, axis=1) 75 | row_t = _set(row, t, dev=dev) if _t is not None else (1 - row_s).to(dev) 76 | # col_t = torch.sum(col[..., None] == t, axis=1) 77 | col_t = _set(col, t, dev=dev) if _t is not None else (1 - col_s).to(dev) 78 | vol_t = torch.sum(torch.mul(edge_attr, row_t + col_t)) 79 | 80 | indices = torch.nonzero((row_s & col_t) | (row_t & col_s)) 81 | cut = torch.sum(edge_attr[indices]) 82 | 83 | # print(f'cut: {cut}. vol_s: {vol_s}. vol_t: {vol_t}') 84 | if verbose: 85 | print(f'cut: {cut}. vol_s: {vol_s}. vol_t: {vol_t}, conductance: {cut / max(1, min(vol_s, vol_t))}') 86 | return cut / max(1, min(vol_s, vol_t)) # make sure it's at least 1. This is needed for large reduction ratio. 87 | 88 | 89 | import argparse 90 | 91 | parser = argparse.ArgumentParser(description='Baseline for graph sparsification') 92 | parser.add_argument('--dataset', type=str, default='ws', help='dataset for egographs') 93 | parser.add_argument('--sample', action='store_true') 94 | 95 | if __name__ == '__main__': 96 | args = parser.parse_args() 97 | fix_seed() 98 | n_node, n_edge = 320, 5000 99 | N = 1 100 | idx = 0 101 | 102 | # kwargs = {'dataset': args.dataset, 'hop': -1, 'size': 50, 's_low': -1, 's_high': -1, 'sample': 'rw'} 103 | # pyGs = EgoGraphs(**kwargs) 104 | # pyGs = syth_graphs(type=args.dataset, n=2, size=1000) # 105 | pyG = random_pygeo_graph(n_node, 1, n_edge, 1) 106 | pyGs = [pyG] * 5 107 | 108 | for pyG in pyGs[:1]: 109 | # pyG = pyGs[idx] 110 | for _ in range(5): 111 | print(pyG) 112 | pyG.edge_weight = pyG.edge_weight * 1 113 | # summary(pyG, 'pyG') 114 | nxG = to_networkx(pyG, edge_attrs=['edge_weight'], 115 | to_undirected=True) # important: directed/non-directed makes difference for cuts 116 | 117 | pyG_cut = normalized_cut(pyG.edge_index, pyG.edge_weight, pyG.num_nodes, ) 118 | s = np.random.choice(range(pyG.num_nodes), int(pyG.num_nodes / 2.0), replace=False).tolist() 119 | if args.sample: 120 | s, t = s[:len(s) // 2], s[len(s) // 2:] 121 | else: 122 | s, t = s, None 123 | 124 | summary(np.array(s), 's') 125 | c = pyG_conductance(pyG.edge_index, pyG.edge_weight, s=s, t=None, verbose=True, dev='cuda') 126 | 127 | nxcut = nx.cut_size(nxG, s, T=t, weight='edge_weight') 128 | volume_S = nx.algorithms.volume(nxG, s, weight='edge_weight') 129 | c_ = nx.conductance(nxG, s, T=t, weight='edge_weight') 130 | print(nxcut, volume_S, pf(c, 3)) 131 | print() 132 | 133 | assert c == c_, f'c: {c}. c_: {c_}' 134 | exit() 135 | 136 | nx_cut, nx_conductance = [], [] 137 | for u, v in nxG.edges: 138 | cut = nx.normalized_cut_size(nxG, [u], [v], weight='edge_weight') 139 | conductance = nx.conductance(nxG, [u], [v], weight='edge_weight') 140 | conductance_ = pyG_conductance(pyG.edge_index, pyG.edge_weight, [u], [v], ) 141 | assert conductance == conductance_, f'nx: {conductance}. pyG: {conductance_}' 142 | 143 | nx_cut.append(cut) 144 | nx_conductance.append(conductance) 145 | 146 | summary(np.array(nx_conductance), 'nx_conductance') 147 | 148 | exit() 149 | summary(pyG_cut.numpy(), 'pyG_cut') 150 | summary(np.array(nx_cut), 'nx_cut') 151 | -------------------------------------------------------------------------------- /sparsenet/util/pyg_util.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-06-07 2 | # Summary: 3 | import os 4 | n=2 5 | os.environ['MKL_NUM_THREADS'] = str(n) 6 | os.environ['OMP_NUM_THREADS'] = str(n) 7 | os.environ['OPENBLAS_NUM_THREADS'] = str(n) 8 | os.environ['MKL_NUM_THREADS'] = str(n) 9 | os.environ['VECLIB_MAXIMUM_THREADS'] = str(n) 10 | os.environ['NUMEXPR_NUM_THREADS'] = str(n) 11 | import torch 12 | torch.set_num_threads(n) # always import this first 13 | status = f'{n}' 14 | print(f'thread status {__file__}: {status}') 15 | 16 | import os 17 | import os.path as osp 18 | 19 | import torch 20 | from torch_geometric.data import InMemoryDataset 21 | 22 | from sparsenet.util.data import EgoGraphs, precompute_eig, shape_data, syth_graphs, loukas_data 23 | from sparsenet.util.name_util import big_ego_graphs, syn_graphs, loukas_datasets 24 | from sparsenet.util.util import summary, \ 25 | random_pygeo_graph 26 | 27 | 28 | class NonEgoGraphs(InMemoryDataset): 29 | """ similar to EgoGraphs, but for other graphs (sythetic graphs + Loukas's dataset ) """ 30 | 31 | def __init__(self, dataset=None, transform=None, pre_transform=None): 32 | """ kwargs for function egographs """ 33 | data_dir = os.path.join(osp.dirname(osp.realpath(__file__)), '..', 'data') 34 | nonegograph_dir = os.path.join(data_dir, 'nonegographs') 35 | self.dataset = dataset 36 | root = os.path.join(nonegograph_dir, self.dataset) 37 | 38 | super(NonEgoGraphs, self).__init__(root, transform, pre_transform) 39 | print(self.processed_paths[0]) 40 | self.data, self.slices = torch.load(self.processed_paths[0]) 41 | 42 | @property 43 | def raw_file_names(self): 44 | return ['some_file_1'] 45 | 46 | @property 47 | def processed_file_names(self): 48 | return self.dataset # dict2name(self.egograph_kwargs) 49 | 50 | def download(self): 51 | pass 52 | 53 | def _select_datasets(self): 54 | dataset = self.dataset 55 | if dataset == 'shape': 56 | datasets = shape_data(50, _k=10) 57 | elif dataset == 'faust': 58 | datasets = shape_data(50, _k=10, name='FAUST') 59 | elif dataset == 'random_geo': 60 | datasets = syth_graphs(n=50, size=700, type='geo') # random_geo(n=10, size=512) 61 | elif dataset == 'random_er': 62 | datasets = syth_graphs(n=50, size=512, type='er') # random_er(n=10, size=512) 63 | elif dataset in ['sbm', 'ws', 'ba']: 64 | datasets = syth_graphs(n=50, size=512, type=dataset) 65 | elif dataset in ['yeast', 'airfoil', 'bunny', 'minnesota']: 66 | datasets = loukas_data(name=dataset) 67 | else: 68 | NotImplementedError 69 | 70 | return datasets 71 | 72 | def process(self): 73 | # Read data into huge `Data` list. 74 | 75 | data_list = self._select_datasets() 76 | 77 | if self.pre_filter is not None: 78 | data_list = [data for data in data_list if self.pre_filter(data)] 79 | 80 | if self.pre_transform is not None: 81 | data_list = [self.pre_transform(data) for data in data_list] 82 | 83 | data, slices = self.collate(data_list) 84 | torch.save((data, slices), self.processed_paths[0]) 85 | 86 | import argparse 87 | parser = argparse.ArgumentParser(description='Baseline for graph sparsification') 88 | parser.add_argument('--dataset', type=str, default='Coauthor-CS', help='dataset for egographs') 89 | parser.add_argument('--lap', type=str, default='None') 90 | parser.add_argument('--n_vec', type=int, default=100) 91 | parser.add_argument('--w_len', type=int, default=5000) 92 | 93 | 94 | if __name__ == '__main__': 95 | def main(): 96 | args = parser.parse_args() 97 | for dataset in [args.dataset]: # big_ego_graphs: # ['CiteSeer','PubMed', 'wiki-vote']: 98 | for size in [50]: # [20]: 99 | # kwargs = {"hop": -1, "size": size, "dataset": dataset, 's_low': -1, 's_high': -1, 'sample': 'rw', 100 | # 'n_vec':args.n_vec, 'w_len':args.w_len, 'include_self': False} 101 | kwargs = {'dataset': 'flickr', 'hop': -1, 'size': 1, 's_low': -1, 's_high': -1, 102 | 'sample': 'rw', 'n_vec': 500, 'w_len': 15000, 'include_self': True} 103 | 104 | data = EgoGraphs(**kwargs) 105 | for d in data: 106 | print(d) 107 | # print(data[0]) 108 | # for idx, g in enumerate(data): 109 | # g = clip_feat(g, args, dim=52) 110 | # if idx < 5: print(g) 111 | # new_data = [clip_feat(g, args, dim=52) for g in data] 112 | del data 113 | print('hello') 114 | 115 | main() 116 | exit() 117 | 118 | g = syth_graphs(1, size=1000) 119 | summary(g) 120 | exit() 121 | for dataset in ['faust']: # syn_graphs + loukas_datasets: 122 | data = NonEgoGraphs(dataset=dataset) 123 | for i, d in enumerate(data): 124 | summary(d, i, highlight=True) 125 | 126 | exit() 127 | 128 | 129 | # dir = os.path.join(data_dir, 'wiki-vote') 130 | for dataset in [args.dataset]: # big_ego_graphs: # ['CiteSeer','PubMed', 'wiki-vote']: 131 | for hop in [2,]: # [3,4,5]: # [3, 4, 5, 6]: # [2,3,4,5,6]: 132 | for size in [20]: # [20]: 133 | s_low = 5000 if dataset in big_ego_graphs else 200 134 | s_high = 10000 if dataset in big_ego_graphs else 5000 135 | kwargs = {"hop": hop, "size": size, "dataset": dataset, 's_low': s_low, 's_high': s_high} 136 | data = EgoGraphs(**kwargs) 137 | print(data[0]) 138 | # continue 139 | 140 | if False: # dataset == 'wiki-vote' and hop==3 and size==10: 141 | print(data) 142 | print(data[0]['None_vals'][:5]) 143 | for g in data: 144 | summary(g) 145 | exit() 146 | 147 | 148 | for dataset in syn_graphs + loukas_datasets: 149 | data = NonEgoGraphs(dataset=dataset) 150 | summary(data[0], dataset, highlight=True) 151 | # data_cmp = syth_graphs(n=20, size=512, type='ws') 152 | # print(data_cmp[5]) 153 | 154 | exit() 155 | g = random_pygeo_graph(1000, 1, 20000, 1) 156 | summary(g, 'beofre') 157 | g = precompute_eig(g) 158 | summary(g, 'after') 159 | exit() 160 | -------------------------------------------------------------------------------- /sparsenet/model/example.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-04-17 2 | # Summary: a simple example to illustrate data pipeline 3 | 4 | import os 5 | 6 | from sparsenet.util.model_util import ModelEvaluator 7 | 8 | n = 2 9 | os.environ['MKL_NUM_THREADS'] = str(n) 10 | os.environ['OMP_NUM_THREADS'] = str(n) 11 | os.environ['OPENBLAS_NUM_THREADS'] = str(n) 12 | os.environ['MKL_NUM_THREADS'] = str(n) 13 | os.environ['VECLIB_MAXIMUM_THREADS'] = str(n) 14 | os.environ['NUMEXPR_NUM_THREADS'] = str(n) 15 | import torch 16 | 17 | torch.set_num_threads(n) # always import this first 18 | 19 | import argparse 20 | import logging 21 | import sys 22 | 23 | from sparsenet.model.eval import tester # train, set_train_data 24 | from sparsenet.model.model import GNN_graphpred 25 | from sparsenet.util.data import data_loader 26 | from sparsenet.util.name_util import set_model_dir 27 | from sparsenet.util.util import fix_seed 28 | from sparsenet.util.args_util import argsparser 29 | 30 | parser = argparse.ArgumentParser(description='Graph edge sparsification') 31 | 32 | # model 33 | parser.add_argument('--n_layer', type=int, default=3, help='number of layer') 34 | parser.add_argument('--emb_dim', type=int, default=50, help='embedding dimension') 35 | parser.add_argument('--ratio', type=float, default=0.5, help='reduction ratio') 36 | parser.add_argument('--n_vec', type=int, default=100, help='number of random vector') 37 | parser.add_argument('--force_pos', action='store_true', help='Force the output of GNN to be positive') 38 | parser.add_argument('--dataset', type=str, default='ws', help='the name of dataset') 39 | parser.add_argument('--w_len', type=int, default=5000, help='walk length') 40 | 41 | # optim 42 | parser.add_argument('--device', type=str, default='cuda', help='') 43 | parser.add_argument('--n_epoch', type=int, default=50, help='') 44 | parser.add_argument('--bs', type=int, default=600, help='batch size') 45 | parser.add_argument('--lr', type=float, default=1e-3, help='learning rate') 46 | parser.add_argument('--n_bottomk', type=int, default=40, help='Number of Bottom K eigenvector') 47 | 48 | # debug 49 | parser.add_argument('--lap', type=str, default='none', help='Laplacian type', 50 | choices=[None, 'sym', 'rw', 'none', 'None']) 51 | parser.add_argument('--debug', action='store_true', help='debug. Smaller graph') 52 | parser.add_argument('--tbx', action='store_true', help='write to tensorboardx.') 53 | parser.add_argument('--inv', action='store_true', help='use inverse Laplacian for loss') 54 | parser.add_argument('--viz', action='store_true', help='visualization of weights of sparsified graph. Save to dir.') 55 | parser.add_argument('--show', action='store_true', help='Show the figure.') 56 | parser.add_argument('--ini', action='store_true', help='initialilze the output of gnn to be near the weights of g_sml') 57 | parser.add_argument('--testonly', action='store_true', help='Skip the training. Only test.') 58 | parser.add_argument('--valeigen', action='store_true', help='Use eigen_ratio as metric to select model') 59 | parser.add_argument('--cacheeig', action='store_true', help='save and load cached eigenvector of coarse graph') 60 | parser.add_argument('--mlp', action='store_true', help='use a mlp baseline') 61 | 62 | # parser.add_argument('--verbose', action='store_true', help='control the info level for real graph') 63 | parser.add_argument('--train_idx', type=int, default=0, help='train index of the shape data. Do not change.') 64 | parser.add_argument('--test_idx', type=int, default=0, help='test index of the shape data. Do not change.') 65 | parser.add_argument('--cur_idx', type=int, default=-1, help='Current index. used for save coarsening graphs') 66 | parser.add_argument('--lap_check', action='store_true', help='check the laplacian is normal during training') 67 | parser.add_argument('--n_cycle', type=int, default=1, help='number of cycles') 68 | parser.add_argument('--trial', type=int, default=0, help='trial. Act like random seed') 69 | parser.add_argument('--seed', type=int, default=1, help='random seed') 70 | parser.add_argument('--loss', type=str, default='quadratic', help='quadratic loss', 71 | choices=['quadratic', 'rayleigh']) 72 | parser.add_argument('--offset', type=int, default=0, help='number of offset eigenvector') 73 | 74 | parser.add_argument('--correction', action='store_true', help='Apply Laplacian correction') 75 | parser.add_argument('--dynamic', action='store_true', help='Dynamic projection') 76 | parser.add_argument('--loukas_quality', action='store_true', help='Compute the coarsening quality of loukas method') 77 | parser.add_argument('--log', type=str, default='debug', help='{info, debug}') 78 | parser.add_argument('--train_indices', type=str, default='0,', 79 | help='train indices of the dataset') # https://bit.ly/3dtJtPn 80 | parser.add_argument('--test_indices', type=str, default='0,', help='test indices of the dataset') 81 | parser.add_argument('--strategy', type=str, default='loukas', help='coarsening strategy', choices=['DK', 'loukas']) 82 | parser.add_argument('--method', type=str, default='variation_edges', help='Loukas methods', 83 | choices=['variation_neighborhoods', 'variation_edges', 'variation_cliques', 84 | 'heavy_edge', 'algebraic_JC', 'affinity_GS', 'kron', 'variation_neighborhood', 85 | 'DK_method']) 86 | 87 | if __name__ == '__main__': 88 | AP = argsparser(parser.parse_args()) 89 | args = AP.args 90 | dev = args.device 91 | fix_seed(seed=args.seed) 92 | 93 | dataset_loader = data_loader(args, dataset=args.dataset) 94 | train_indices, val_indices, test_indices = AP.set_indices() 95 | 96 | nfeat_dim, efeat_dim, out_dim = 5, 1, 1 97 | model = GNN_graphpred(args.n_layer, args.emb_dim, nfeat_dim, efeat_dim, out_dim, 98 | force_pos=args.force_pos, mlp=args.mlp).to(dev) 99 | optimizer = torch.optim.Adam(model.parameters(), args.lr) 100 | logging.basicConfig(level=getattr(logging, args.log.upper()), 101 | handlers=[logging.StreamHandler(sys.stdout)]) 102 | 103 | TE = tester(dev=dev) 104 | ################################################################ 105 | ME = ModelEvaluator(model, dataset_loader, dev, optimizer) 106 | ME.set_modelpath(set_model_dir(args, train_indices, val_indices, test_indices)) 107 | model, args = ME.find_best_model(model, train_indices, val_indices, args) 108 | ME.test_model(model, test_indices, AP, args) 109 | ################################################################ 110 | -------------------------------------------------------------------------------- /sparsenet/evaluation/graph-coarsening/examples/coarsening_demo.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # The script demonstrates how the code can be used with a toy example (see also [this blogpost](https://andreasloukas.blog/2018/11/05/multilevel-graph-coarsening-with-spectral-and-cut-guarantees/)). 5 | # 6 | # The code accompanies paper [Graph reduction with spectral and cut guarantees](http://www.jmlr.org/papers/volume20/18-680/18-680.pdf) by Andreas Loukas published at JMLR/2019 ([bibtex](http://www.jmlr.org/papers/v20/18-680.bib)). 7 | # 8 | # This work was kindly supported by the Swiss National Science Foundation (grant number PZ00P2 179981). 9 | # 10 | # 15 March 2019 11 | # 12 | # [Andreas Loukas](https://andreasloukas.blog) 13 | # 14 | # [![DOI](https://zenodo.org/badge/175851068.svg)](https://zenodo.org/badge/latestdoi/175851068) 15 | # 16 | # Released under the Apache license 2.0 17 | 18 | # In[1]: 19 | 20 | 21 | # get_ipython().system('pip install networkx') 22 | # 23 | # 24 | # # In[1]: 25 | # 26 | # 27 | # get_ipython().run_line_magic('load_ext', 'autoreload') 28 | # get_ipython().run_line_magic('autoreload', '2') 29 | # get_ipython().run_line_magic('matplotlib', 'inline') 30 | # 31 | # 32 | # # In[2]: 33 | # 34 | # 35 | # from IPython.core.display import display, HTML 36 | # display(HTML("")) 37 | 38 | 39 | # In[3]: 40 | 41 | 42 | from graph_coarsening.coarsening_utils import * 43 | import graph_coarsening.graph_utils 44 | 45 | import numpy as np 46 | import scipy as sp 47 | 48 | import matplotlib 49 | import matplotlib.pylab as plt 50 | from mpl_toolkits.mplot3d import Axes3D 51 | 52 | import networkx as nx 53 | import pygsp as gsp 54 | from pygsp import graphs 55 | 56 | from sparsenet.util.util import summary, tonp, np2set 57 | 58 | gsp.plotting.BACKEND = 'matplotlib' 59 | 60 | 61 | # ### Construct the graph ## 62 | 63 | # In[4]: 64 | 65 | 66 | N = 400 67 | 68 | G = graphs.BarabasiAlbert(N) 69 | if not hasattr(G, 'coords'): 70 | try: 71 | graph = nx.from_scipy_sparse_matrix(G.W) 72 | pos = nx.nx_agraph.graphviz_layout(graph, prog='neato') 73 | G.set_coordinates(np.array(list(pos.values()))) 74 | except ImportError: 75 | G.set_coordinates() 76 | G.compute_fourier_basis() # this is for convenience (not really needed by coarsening) 77 | 78 | N = G.N 79 | L = G.L.toarray() 80 | S = graph_coarsening.graph_utils.get_S(G).T 81 | 82 | plt.spy(G.W, markersize=0.2); 83 | 84 | 85 | # ### Do some coarsening 86 | # 87 | # * Possible methods supported are: 'variation_edges', 'variation_neighborhood', 'algebraic_JC', 'heavy_edge', 'affinity_GS' 88 | # * $r = 1 - n/N$ is the dimensionality reduction ratio 89 | # * $k$ is the size of the subspace we are interested in 90 | # * $k_{max}$ should be set to be larger or equal to $k$. It allows us to measure the quality of coarsening over subspaces larger than $k$. 91 | 92 | # In[5]: 93 | 94 | 95 | method = 'variation_edges' # 'variation_neighborhood' 96 | 97 | # Parameters 98 | r = 0.6 # the extend of dimensionality reduction (r=0 means no reduction) 99 | k = 5 100 | kmax = int(3*k) 101 | 102 | import scipy 103 | from signor.viz.matrix import viz_matrix 104 | import collections 105 | 106 | C, Gc, Call, Gall = coarsen(G, K=k, r=r, method=method) 107 | print('projection check:',sp.sparse.linalg.norm( ((C.T).dot(C))**2 - ((C.T).dot(C)) , ord='fro')) 108 | P = C.power(2) 109 | print(type(C)) 110 | assert isinstance(C, scipy.sparse.csc.csc_matrix) 111 | P = tonp(P) 112 | n1, n2 = P.shape 113 | ret = P.dot(np.ones((n2, ))) 114 | print(collections.Counter(ret)) 115 | print(np2set(ret)) 116 | summary(ret, 'check') 117 | 118 | 119 | exit() 120 | 121 | summary(C, 'Coarsening matrix') 122 | summary(Call, 'a list of coarsening matrix') 123 | 124 | exit() 125 | for i, m in enumerate(Call): 126 | viz_matrix(tonp(m), f'proj matrix {i}') 127 | 128 | metrics = coarsening_quality(G, C, kmax=kmax) 129 | n = Gc.N 130 | summary(metrics, 'metrics') 131 | exit() 132 | 133 | print('{:16} | r: {:1.4}, nedges: {}, levels: {}, epsilon: {:1.4}'.format(method, metrics['r'], metrics['m'], len(Call), metrics['error_subspace'][k-1])) 134 | 135 | 136 | # ### Visualize the sequence of coarsening levels 137 | # 138 | # * $c+1$ graphs are plotted in total. The leftmost is the original graph and the rightmost is the final coarse graph. 139 | # * Colors are used to indicate the size of each contraction set $C$: 140 | # * green is for $|C|=2$ blue is for $|C|=3$, red is for $|C|=4$, and yellow for $|C|>4$ 141 | # 142 | 143 | # In[6]: 144 | 145 | 146 | plot_coarsening(Gall, Call, size=5, alpha=0.6, title=method); 147 | 148 | 149 | # ### Various metrics for coarsening quality 150 | # 151 | # * $\epsilon$ is the restricted similarity constant such that, for every $x \in span(U_k)$ we have $$(1 - \epsilon) x^\top L x \leq x_c^\top L_c x_c \leq (1+\epsilon) x^\top L x $$ 152 | # * the eigenvalue error is defined (for every $i = 1, \ldots, k, \ldots, kmax$ as $\frac{\lambda_i - \tilde{\lambda}_i}{\lambda_i}$ 153 | # * the angle matrix contains the angles between the eigenvectors of $L$ (y-axis) and the lifted eigenvectors of $L_c$. The closer to counter-diagonal it is, the better. 154 | 155 | # In[7]: 156 | 157 | 158 | size = 2.04; fig, axes = plt.subplots(1, 3, figsize=(4*size*3, 3*size)); lineWidth = 1 159 | 160 | axes[0].plot(np.arange(kmax), np.abs(metrics['error_subspace']), 'or-') 161 | axes[0].set_xlabel('$k$'); axes[0].set_ylabel('$\epsilon$') 162 | axes[0].plot( [k, k], [0, max(metrics['error_subspace'])], ':k') 163 | 164 | axes[1].boxplot(np.abs(metrics['error_eigenvalue'])) 165 | axes[1].set_ylabel('relative eigenvalue error') 166 | 167 | axes[2].imshow(abs(metrics['angle_matrix'][:,0:kmax]) ) 168 | axes[2].plot( [k, k], [0, kmax], ':w') 169 | axes[2].plot( [0, kmax], [k, k], ':w') 170 | axes[2].plot( [0, N], [n-1, n-1], ':w') 171 | axes[2].set_xlim([0, kmax-1]) 172 | axes[2].set_ylim([0, kmax-1]) 173 | axes[2].set_xlabel('Lc eigenvectors lifted'); axes[2].set_ylabel('L eigenvectors'); 174 | 175 | 176 | # ### Coarsen and lift a vector 177 | 178 | # In[8]: 179 | 180 | 181 | size = 2.04; fig, axes = plt.subplots(1, 4, figsize=(4*size*4, 3*size)); lineWidth = 1 182 | 183 | # a random smooth signal 184 | x = G.U[:,:k] @ np.random.randn(k,1) 185 | x = x / np.linalg.norm(x) 186 | G.plot_signal(x, ax=axes[0], plot_name='signal') 187 | 188 | # coarsen it 189 | xc = coarsen_vector(x, C) 190 | Gc.plot_signal(xc, ax=axes[1], plot_name='coarsened signal') 191 | 192 | # lift it 193 | xp = lift_vector(xc, C) 194 | G.plot_signal(xp, ax=axes[2], plot_name='lifted signal') 195 | 196 | # difference 197 | G.plot_signal(np.abs(x-xp), ax=axes[3], plot_name='|x - xp|') 198 | 199 | print('signal error: {}'.format(np.linalg.norm(x - xp))) 200 | 201 | 202 | # In[ ]: 203 | 204 | 205 | 206 | 207 | 208 | # In[ ]: 209 | 210 | 211 | 212 | 213 | -------------------------------------------------------------------------------- /sparsenet/model/loss.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-04-17 2 | # Summary: implement a loss function based on || x.T * L * x - x'.T * L_{sparse} * x' || 3 | 4 | import math 5 | from functools import partial 6 | 7 | import numpy as np 8 | import torch 9 | import torch_geometric 10 | from scipy.sparse import csc_matrix 11 | from torch_geometric.utils import get_laplacian, to_networkx, from_networkx 12 | 13 | # convert the assignment to the projection mat, so we don't need to do it every time when we compute loss. 14 | # n, r the size of L and L_sparse. 15 | from sparsenet.util.sample import sample_N2Nlandmarks 16 | from sparsenet.util.util import random_pygeo_graph, summary, fix_seed, banner, timefunc, pf 17 | 18 | fix_seed() 19 | 20 | tf = partial(timefunc, threshold=-1) 21 | 22 | 23 | @timefunc 24 | def get_projection_mat(n, r, Assignment): 25 | ''' 26 | :param n: Size of original graph 27 | :param r: Size of sampled graph 28 | :param Assignment: The correspondence matrix returned from sample_N2Nlandmarks. 29 | :return: The projection matrix of (r, n). 30 | ''' 31 | P = np.zeros((r, n)) 32 | for key, value in Assignment.items(): 33 | s = len(value) 34 | assert s != 0 35 | for v in value: 36 | P[key][v] = 1 / s # important 37 | return torch.FloatTensor(P) 38 | 39 | 40 | def get_sparse_projection_mat(n, r, Assignment): 41 | ''' 42 | :param n: Size of original graph 43 | :param r: Size of sampled graph 44 | :param Assignment: The correspondence matrix returned from sample_N2Nlandmarks. 45 | :return: The projection matrix of size (r, n). 46 | ''' 47 | index, val = [], [] 48 | for key, value in Assignment.items(): 49 | s = len(value) 50 | assert s != 0 51 | for v in value: 52 | index.append([key, v]) 53 | val = val + [1 / s] * s 54 | i, v = torch.tensor(index).T, torch.tensor(val) 55 | return torch.sparse.FloatTensor(i, v, torch.Size([r, n])) 56 | 57 | 58 | def get_sparse_C(n, r, Assignment): 59 | ''' 60 | :param n: Size of original graph 61 | :param r: Size of sampled graph 62 | :param Assignment: The correspondence matrix returned from sample_N2Nlandmarks. (# todo: not really matrix but a dict) 63 | key is the node for small graph, value is the set of nodes in big graph contracted to the smaller graph 64 | :return: The sparse c matrix (csc) of size (r, n). 65 | ''' 66 | row, col, val = [], [], [] 67 | for key, value in Assignment.items(): 68 | s = len(value) 69 | assert s != 0 70 | row.extend([key] * s) 71 | col.extend(list(value)) 72 | val = val + [1 / np.sqrt(s)] * s # the major differeence 73 | 74 | row, col = np.array(row), np.array(col) 75 | data = np.array(val) 76 | return csc_matrix((data, (row, col)), shape=(r, n)) 77 | # return torch.sparse.FloatTensor(i, v, torch.Size([r, n])) 78 | 79 | 80 | def random_vec_loss(L, L_sparse, Projection, device='cpu', num_vec=None, debug=False): 81 | ''' 82 | :param L: L is a n*n sparse Tensor. 83 | :param L_sparse: a r*r sparse Tensor 84 | :param Projection: The projection tensor (r * n) 85 | :param device: run on cpu or gpu 86 | :param num_vec: num of random vectors sampled for computing loss 87 | :param debug: debug mode. Will get removed later. 88 | :return: The loss X.T L X - X.T Proj.T L_sparse Proj X, where X is the mat of concating random vecs. 89 | ''' 90 | 91 | # todo: add more variety of random vector (loss freq/high freq) 92 | # todo: need to test for large L, the speed difference on cpu vs. gpu 93 | 94 | L = L.to(device) 95 | L_sparse = L_sparse.to(device) 96 | 97 | if debug: 98 | print('L', L) 99 | print('L_sparse', L_sparse) 100 | 101 | n = (Projection.shape[1]) 102 | if num_vec == None: 103 | num_vec = max(1, int(math.log(n))) 104 | 105 | X = torch.rand(n, num_vec) - 0.5 106 | Projection = Projection.to(device) 107 | 108 | X = X / ((X ** 2).sum(0, keepdim=True)).sqrt() 109 | X = X.to(device) 110 | 111 | X_prime = torch.mm(Projection, X) 112 | quadL = torch.mm(X.t(), torch.sparse.mm(L, X)) 113 | qualL_sparse = torch.mm(X_prime.t(), torch.sparse.mm(L_sparse, X_prime)) 114 | loss = torch.sum(torch.abs(quadL - qualL_sparse)) # important: this is wrong! 115 | return loss 116 | 117 | 118 | # @tf 119 | def get_laplacian_mat(edge_index, edge_weight, num_node, normalization='sym'): # todo: change back 120 | """ return a laplacian (torch.sparse.tensor)""" 121 | edge_index, edge_weight = get_laplacian(edge_index, edge_weight, 122 | normalization=normalization) # see https://bit.ly/3c70FJK for format 123 | return torch.sparse.FloatTensor(edge_index, edge_weight, torch.Size([num_node, num_node])) 124 | 125 | 126 | @tf 127 | def energy_loss(L1, L2, assignment, device='cuda', test=False, 128 | n_measure=1, num_vec=None): 129 | """ 130 | :param g1: pygeo graph 131 | :param g2: pygeo graph (smaller) 132 | :param assignment: a dict where key is the node in smaller graph and value is the nodes in larger graph 133 | :param n_measure 134 | :return: 135 | """ 136 | 137 | if test: 138 | assert isinstance(g1, torch_geometric.data.data.Data) 139 | assert isinstance(g2, torch_geometric.data.data.Data) 140 | 141 | L1 = get_laplacian_mat(g1.edge_index, g1.edge_weight, g1.num_nodes) 142 | L2 = get_laplacian_mat(g2.edge_index, g2.edge_weight, g2.num_nodes) 143 | 144 | assert isinstance(L1, (torch.sparse.FloatTensor, torch.cuda.sparse.FloatTensor)), summary(L1, 'L1') 145 | assert isinstance(L2, (torch.sparse.FloatTensor, torch.cuda.sparse.FloatTensor)), summary(L2, 'L2') 146 | 147 | Projection = get_projection_mat(L1.shape[0], L2.shape[0], assignment) 148 | 149 | if n_measure == 1: 150 | loss = random_vec_loss(L1, L2, Projection, device=device, num_vec=num_vec) 151 | return loss 152 | else: 153 | losses = [] 154 | for _ in range(n_measure): 155 | loss = random_vec_loss(L1, L2, Projection, device=device, num_vec=num_vec) 156 | losses.append(np.float(loss)) 157 | mean, std = np.mean(losses), np.std(losses) 158 | return f'{pf(mean, 2)}±{pf(std, 2)}' 159 | 160 | 161 | if __name__ == '__main__': 162 | # undirected 4-path 163 | banner('random_vec_loss test') 164 | L = get_laplacian_mat(torch.LongTensor([[0, 1, 2, 1, 2, 3], [1, 2, 3, 0, 1, 2]]), 165 | torch.FloatTensor([1., 1., 1., 1., 1., 1.]), 4) 166 | # undirected 2-path (link) 167 | L_sparse = get_laplacian_mat(torch.LongTensor([[0, 1], [1, 0]]), torch.FloatTensor([1., 1.]), 2) 168 | Projection = get_projection_mat(L.shape[0], L_sparse.shape[0], {0: set([0, 1]), 1: set([2, 3])}) 169 | 170 | losses = [] 171 | for _ in range(1000): 172 | loss = random_vec_loss(L, L_sparse, Projection) 173 | losses.append(loss) 174 | summary(np.array(losses), 'losses') 175 | exit() 176 | 177 | banner('sample_N2Nlandmarks test') 178 | n_node, n_edge = 10, 40 179 | node_feat_dim, edge_feat_dim = 1, 1 180 | n_node_small, n_edge_small = 5, 20 181 | 182 | g1 = random_pygeo_graph(n_node, node_feat_dim, n_edge, edge_feat_dim) 183 | g1.edge_weight = 1.1 * torch.ones(n_edge) 184 | 185 | g2, assignment = sample_N2Nlandmarks(to_networkx(g1), n_node_small, weight_key='edge_weight') 186 | g2 = from_networkx(g2) 187 | g2.edge_weight = g2.edge_weight.type(torch.float) 188 | 189 | summary(g1, 'g1') 190 | summary(g2, 'g2') 191 | # exit() 192 | 193 | loss = energy_loss(g1, g2, assignment, device='cpu', test=True) 194 | print(loss) 195 | 196 | exit() 197 | 198 | print(loss) 199 | -------------------------------------------------------------------------------- /sparsenet/util/gsp_util.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-06-02 2 | # Summary: pygsp util 3 | 4 | import collections 5 | from time import time 6 | from warnings import warn 7 | 8 | import networkx as nx 9 | import scipy 10 | import torch 11 | import torch_geometric 12 | from graph_coarsening.coarsening_utils import * 13 | from pygsp import graphs 14 | from torch_geometric.data.data import Data 15 | from torch_geometric.utils import from_scipy_sparse_matrix 16 | 17 | from sparsenet.util.util import summary, tonp, np2set, timefunc, red, pyg2gsp, update_dict 18 | 19 | 20 | def assert_proj_matrix(C): 21 | proj_error = sp.sparse.linalg.norm(((C.T).dot(C)) ** 2 - ((C.T).dot(C)), ord='fro') 22 | assert proj_error < 1e-5, f'proj error {proj_error} larger than {1e-5}.' 23 | 24 | 25 | def ba_graph(N=400): 26 | # return a gsp graph 27 | G = graphs.BarabasiAlbert(N) 28 | if not hasattr(G, 'coords'): 29 | try: 30 | graph = nx.from_scipy_sparse_matrix(G.W) 31 | pos = nx.nx_agraph.graphviz_layout(graph, prog='neato') 32 | G.set_coordinates(np.array(list(pos.values()))) 33 | except ImportError: 34 | G.set_coordinates() 35 | G.compute_fourier_basis() # this is for convenience (not really needed by coarsening) 36 | return G 37 | 38 | 39 | class gsp2pyg(object): 40 | def __init__(self, g, **loukas_args): 41 | assert isinstance(g, torch_geometric.data.data.Data) 42 | self.origin_pyg = g 43 | self.gspG = self.pyg2gsp(g) 44 | self.pyg = self.gsp2pyg(self.gspG) 45 | self.loukas_method(**loukas_args) 46 | self.pyg_sml = self.gsp2pyg(self.gspG_sml) 47 | self.assignment = self.process() 48 | self._set_pos() 49 | 50 | def _set_pos(self): 51 | """ set pos for smaller pyg graph """ 52 | # todo: this is for what? 53 | 54 | if 'pos' not in self.origin_pyg.keys: 55 | return 56 | 57 | n = self.pyg_sml.num_nodes 58 | d = self.origin_pyg.pos.size(1) 59 | pos = torch.zeros((n, d)) 60 | for k, v in self.assignment.items(): 61 | v = list(v) 62 | pos[k, :] = torch.mean(self.origin_pyg.pos[v], 0) 63 | self.pyg_sml.pos = pos 64 | 65 | def pyg2gsp(self, g): 66 | return pyg2gsp(g) 67 | 68 | def gsp2pyg(self, g): 69 | """ works only for g with uniform weights """ 70 | from sparsenet.util.data import input_check 71 | edge_index, edge_weight = from_scipy_sparse_matrix(g.W) 72 | edge_weight = edge_weight.type(torch.FloatTensor) 73 | 74 | summary(edge_weight, 'edge_weight in gsp2pyg', highlight=True) 75 | pyG = Data(edge_index=edge_index, edge_weight=edge_weight, 76 | edge_attr=torch.flatten(edge_weight)) # important: set edge_attr to be edge_weight 77 | pyG_check = input_check(pyG, size_check=False, eig=False) # need to comment out for Cora 78 | try: 79 | assert g.N == pyG_check.num_nodes 80 | return pyG_check 81 | except AssertionError: 82 | print(f'AssertionError! gsp Graph size is {g.N} but pyG size is {pyG_check.num_nodes}. ' 83 | f'{red("Return pyG instead of pyG_check.")}') 84 | return pyG 85 | 86 | @timefunc 87 | def loukas_method(self, **kwargs): 88 | """ api to call loukas's code. 89 | modified from looukas's code. 90 | This function provides a common interface for coarsening algorithms that contract subgraphs 91 | 92 | Parameters 93 | ---------- 94 | G : pygsp Graph 95 | K : int 96 | The size of the subspace we are interested in preserving. 97 | r : float between (0,1) 98 | The desired reduction defined as 1 - n/N. 99 | method : String 100 | ['variation_neighborhoods', 'variation_edges', 'variation_cliques', 'heavy_edge', 'algebraic_JC', 'affinity_GS', 'kron'] 101 | 102 | Returns 103 | ------- 104 | C : np.array of size n x N 105 | The coarsening matrix. 106 | Gc : pygsp Graph 107 | The smaller graph. 108 | Call : list of np.arrays 109 | Coarsening matrices for each level 110 | Gall : list of (n_levels+1) pygsp Graphs 111 | All graphs involved in the multilevel coarsening 112 | 113 | Example 114 | ------- 115 | C, Gc, Call, Gall = coarsen(G, K=10, r=0.8) 116 | """ 117 | 118 | t0 = time() 119 | default_kwargs = {'K': 40, 'r': 0.5, 'method': 'variation_edges', 'max_levels': 20} 120 | loukas_quality = kwargs.get('loukas_quality', False) 121 | kwargs.pop('loukas_quality', None) 122 | kwargs = update_dict(kwargs, default_kwargs) 123 | print(f'{red("kwargs for coarsen function")}: {kwargs}\n') 124 | G = self.gspG 125 | K = kwargs['K'] 126 | 127 | # precompute spectrum needed for metrics 128 | if loukas_quality: 129 | if False: # K_all[-1] > N / 2: 130 | pass # [Uk, lk] = eig(G.L) 131 | else: 132 | offset = 2 * max(G.dw) 133 | T = offset * sp.sparse.eye(G.N, format='csc') - G.L 134 | lk, Uk = sp.sparse.linalg.eigsh(T, k=K, which='LM', tol=1e-6) 135 | lk = (offset - lk)[::-1] 136 | Uk = Uk[:, ::-1] 137 | kwargs['Uk'] = Uk 138 | kwargs['lk'] = lk 139 | t1 = time() 140 | 141 | C, Gc, Call, Gall = coarsen(self.gspG, **kwargs) 142 | 143 | if loukas_quality: 144 | metrics = coarsening_quality(G, C, kmax=K, Uk=Uk[:, :K], lk=lk[:K]) 145 | for k in metrics: 146 | summary(metrics[k], k, highlight=True) 147 | else: 148 | print(red('No coarsening_quality.')) 149 | 150 | _check_loukas(self, G, Gc, C) 151 | t2 = time() 152 | 153 | P = C.power(2) 154 | self.P = P # save memory 155 | self.gspG_sml = Gc 156 | self.C = C 157 | t3 = time() 158 | 159 | print(f'Compute Eigenvalue: {int(t1 - t0)}') 160 | print(f'Coarsen + Metric: {int(t2 - t1)}') 161 | print(f'Misc: {int(t3 - t2)}') 162 | 163 | def process(self): 164 | # convert coarsening matrix to assignment / projection to make life easy 165 | sml_idx, big_idx = self.P.nonzero() 166 | sml_idx, big_idx = sml_idx.astype(int), big_idx.astype(int) 167 | 168 | n = len(sml_idx) 169 | assignment = {} 170 | for i in range(n): 171 | assignment[sml_idx[i]] = set() 172 | 173 | for i in range(n): 174 | k, v = sml_idx[i], big_idx[i] 175 | assignment[k].add(v) 176 | del self.P 177 | return assignment 178 | 179 | def _check_loukas(self, G, Gc, C): 180 | # verify matrix. change to scipy multiplication. 181 | 182 | Q = C.dot(np.ones((C.shape[1], 1))).reshape(-1) 183 | Q = scipy.sparse.diags([Q], [0]) 184 | QC = Q.dot(C) 185 | Lc_Q = QC.dot((G.L).dot(QC.T)) 186 | diff = Lc_Q - Gc.L 187 | if np.max(diff) > 0.1: 188 | warn('Lc_Q - Gc.L is not close enough.') 189 | del Lc_Q 190 | 191 | 192 | if __name__ == '__main__': 193 | from sparsenet.util.util import random_pygeo_graph 194 | 195 | pyg = random_pygeo_graph(100, 1, 4000, 1) 196 | 197 | converter = gsp2pyg(pyg, loukas_quality=False) 198 | gsp = converter.gspG 199 | pyG1 = converter.gsp2pyg(gsp) 200 | summary(pyG1, 'pyG1 ') 201 | 202 | exit() 203 | pyg = random_pygeo_graph(10, 1, 40, 1) 204 | converter = gsp2pyg(pyg) 205 | g_sml, assignment = converter.pyg_sml, converter.assignment 206 | summary(g_sml, 'g_sml') 207 | print(assignment) 208 | exit() 209 | 210 | G = ba_graph(400) 211 | method = 'variation_edges' # 'variation_neighborhood' 212 | r = 0.6 # the extend of dimensionality reduction (r=0 means no reduction) 213 | k = 5 214 | kmax = int(3 * k) 215 | 216 | C, Gc, Call, Gall = coarsen(G, K=k, r=r, method=method) 217 | assert_proj_matrix(C) 218 | print(type(C)) 219 | P = C.power(2) 220 | 221 | assert isinstance(P, scipy.sparse.csc.csc_matrix) 222 | P = tonp(P) 223 | n1, n2 = C.shape 224 | ret = P.dot(np.ones((n2,))) 225 | print(collections.Counter(ret)) 226 | print(np2set(ret)) 227 | summary(ret, 'check') 228 | -------------------------------------------------------------------------------- /sparsenet/test/gen_test.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-05-27 2 | # Summary: test generalization 3 | 4 | import os 5 | 6 | from sparsenet.util.util import runcmd 7 | from sparsenet.util.name_util import big_ego_graphs 8 | from sparsenet.util.dir_util import PYTHON 9 | python = PYTHON 10 | warn = False 11 | warn_cmd = ' -W ignore ' if not warn else '' 12 | 13 | 14 | class tester: 15 | def __init__(self): 16 | self.loukas_datasets = ['minnesota', 'airfoil', 'yeast', 'bunny'] 17 | self.syn_datasets = ['ws', 'random_geo', 'shape', 'sbm', 'random_er', ] # ego_facebook 18 | self.file = 'sparsenet/model/example.py ' 19 | self.methods = ['affinity_GS', 'algebraic_JC', 'heavy_edge', 'variation_edges', 'variation_neighborhoods', 20 | ] # 'heavy_edge' 'affinity_GS', 'kron' 21 | self.method = ['variation_neighborhood'] # it's best in most cases 22 | 23 | self.args = ' --lap none ' \ 24 | f' --train_idx 0 --test_idx 0 --n_bottomk 40 --force_pos ' \ 25 | f'--n_cycle 100 --seed 0 ' \ 26 | f'--train_indices 0 --test_indices ,' 27 | 28 | self.cmd = f'{python} {warn_cmd} {self.file} {self.args} ' 29 | 30 | def viz_test(self): 31 | train_indices = '0,1,2,3,4,5,6' # '10,11,' # '0,1,2,3,4,' 32 | test_indices = '13,14,15,16,17,18,' #'5,6,8,9,10,11,12,13,14,15,15,17,18,19' 33 | for data in \ 34 | ['faust']: 35 | for method in self.method: # [1e-4, 1e-5, 1e-6]: 36 | special_args = f'--n_epoch 20 --lap None --loukas_quality --bs 600 --lr 1e-3 --ini --viz ' 37 | cmd = f'{self.cmd} --dataset {data} --ratio 0.5 --strategy loukas ' \ 38 | f'--method {method} --train_indices {train_indices} --test_indices {test_indices} {special_args} ' 39 | runcmd(cmd) 40 | 41 | def generalization(self): 42 | train_indices = \ 43 | '2,3,4,5,6,7,8,9,10,11,12,13,14,' 44 | # '9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,' 45 | #'2,3,4,5,6,7,8,9,10,11,12,13,14,' 46 | # '0,1,2,3,4,' 47 | # '2,3,4,5,6,7,8,9,' 48 | # '0' 49 | # '1,2,3,4,5,' 50 | # '2,3,4,5,6,7,8,9' \ 51 | test_indices = \ 52 | '0' 53 | # '5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,' 54 | # '0' 55 | # '0,1,2,3,4,5,6,7,8,9,10' 56 | # '6,7,8,9,10,11,12,13,14,15,16,17,18,19' 57 | for data in \ 58 | ['coauthor-cs']: 59 | # self.syn_datasets: 60 | # ['random_er']: 61 | # ['random_geo', ]: 62 | # self.loukas_datasets: 63 | 64 | for method in self.method: # [1e-4, 1e-5, 1e-6]: 65 | special_args = f'--bs 600 --lr 1e-3 --n_epoch 20 --lap sym --device cuda ' \ 66 | f'--loss quadratic --n_bottomk 500 --correction --ini --valeigen --w_len 5000 --offset 100 ' 67 | cmd = f'{self.cmd} --dataset {data} --ratio 0.3 --strategy loukas ' \ 68 | f'--method {method} --train_indices {train_indices} --test_indices {test_indices} {special_args} ' 69 | # cmd = cmd.replace('--strategy loukas ', '--strategy DK ') 70 | runcmd(cmd) 71 | 72 | def metric_test(self): 73 | args = ' --loukas_quality ' 74 | 75 | file = 'sparsenet/evaluation/metric.py ' 76 | for data in self.loukas_datasets[:1]: 77 | for ratio in [.5]: 78 | for method in self.methods: # ['variation_neighborhoods']: 79 | cmd = f'{python} {warn_cmd} {file} {args} --dataset {data} ' \ 80 | f'--strategy DK --ratio {ratio} --method {method}' 81 | runcmd(cmd) 82 | 83 | def loukas_quality_test(self): 84 | """ test the effect of using with not using argument loukas_quality. """ 85 | train_indices = '0,' 86 | test_indices = ',' 87 | for data in ['bunny']: 88 | for method in ['variation_edges']: # self.methods: 89 | for ratio in [0.3, 0.5, 0.7]: 90 | special_args = f'--bs 600 --n_epoch 50 --device cuda ' # --loukas_quality 91 | cmd = f'{self.cmd} --dataset {data} --ratio {ratio} --strategy loukas --correction ' \ 92 | f'--method {method} --train_indices {train_indices} --test_indices {test_indices} {special_args} ' # --ini 93 | runcmd(cmd) 94 | 95 | def feature_test(self): 96 | train_indices = '0' 97 | test_indices = ',' 98 | for data in \ 99 | ['shape']: 100 | 101 | for method in self.methods: 102 | special_args = f'--bs 600 --n_epoch 50 ' 103 | cmd = f'{self.cmd} --dataset {data} --ratio 0.5 --strategy loukas --device cpu ' \ 104 | f'--method {method} --train_indices {train_indices} --test_indices {test_indices} {special_args}' 105 | # cmd = cmd.replace('--strategy loukas ', '--strategy DK ') 106 | runcmd(cmd) 107 | 108 | def fit_test(self): 109 | """ test all loukas's datasets """ 110 | datasets = ['bunny']# [ 'airfoil', 'yeast', 'bunny'] 111 | train_indices = '0,,' 112 | methods = [ 113 | 'variation_neighborhoods'] # ['heavy_edge', 'variation_edges', 'algebraic_JC', 'affinity_GS'] # important: exclude and kron 114 | 115 | for data in datasets: 116 | for ratio in [.5]: 117 | for method in methods: 118 | special_args = ''# '--lr 1e-4 --bs 6000' if data == 'bunny' else '' # large bs for bunny 119 | cmd = f'{self.cmd} --dataset {data} ' \ 120 | f'--strategy loukas --ratio {ratio} --method {method} --train_indices {train_indices} --correction --ini {special_args} ' 121 | runcmd(cmd) 122 | 123 | def otherloss_test(self): 124 | """ test all loukas's datasets """ 125 | datasets = ['ws', ] 126 | train_indices = '0,1,2,3,'# '0,1,2,3,4,' 127 | test_indices = '5,6,7,8,9,10,11,' # '5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,' # '10,11,12,13,14,15,16,17,18,19,' # '5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,' # '5,6,7,8,9,10,11,12,13,14,15,16' 128 | methods = self.method # [ ] # ['heavy_edge', 'variation_edges', 'algebraic_JC', 'affinity_GS'] 129 | 130 | for data in datasets: 131 | for ratio in [.7]: 132 | for method in methods: # ['affinity_GS']: 133 | self.cmd = self.cmd.replace('--n_bottomk 40 ', '--n_bottomk 40 ') 134 | # self.cmd = self.cmd.replace('--lap none ', '--lap sym ') 135 | cmd = f'{self.cmd} --dataset {data} ' f' --n_epoch 20 --device cpu --w_len 5000 ' \ 136 | f'--ratio {ratio} --method {method} --loss quadratic --dynamic --ini --valeigen' \ 137 | f'--train_indices {train_indices} --test_indices {test_indices} --n_layer 3 --emb_dim 50 ' 138 | # cmd += ' --strategy DK ' 139 | # cmd = cmd.replace(method, 'DK_method') 140 | runcmd(cmd) 141 | 142 | def debug_test(self): 143 | args = ' --n_epoch 50 --lap none ' \ 144 | f' --train_idx 0 --test_idx 0 --n_bottomk 40 --force_pos ' \ 145 | f'--n_cycle 100 --device cuda --seed 0 ' \ 146 | f'--train_indices 0, --test_indices ,' 147 | kwargs = {'dataset': 'random_geo', 'n_bottomk': 40, 'ratio': 0.7, 'seed': 0, 'method': 'variation_edges'} 148 | # {'dataset': 'ws', 'n_bottomk': 40, 'ratio': 0.7, 'seed': 0, 'method': 'variation_edges'} 149 | 150 | cmd = f'{python} {warn_cmd} {self.file} {args} --dataset {kwargs["dataset"]} ' \ 151 | f'--strategy loukas --ratio {kwargs["ratio"]} --method {kwargs["method"]} --seed {kwargs["seed"]}' 152 | runcmd(cmd) 153 | 154 | def local_var_nbr_test(self): 155 | args = ' --n_epoch 50 --lap none ' \ 156 | f' --train_idx 0 --test_idx 0 --n_bottomk 40 --force_pos ' \ 157 | f'--n_cycle 1 --device cuda --seed 0 ' \ 158 | f'--train_indices 0, --test_indices ,' 159 | 160 | for data in ['bunny']: #self.loukas_datasets: 161 | for ratio in [.5]: 162 | for method in ['variation_neighborhoods']: 163 | special_args = '--lr 1e-4 --bs 6000 --ini ' if data == 'bunny' else '' # large bs for bunny 164 | cmd = f'{python} {warn_cmd} {self.file} {args} --dataset {data} ' \ 165 | f'--strategy loukas --ratio {ratio} --method {method} {special_args}' 166 | runcmd(cmd) 167 | 168 | 169 | if __name__ == '__main__': 170 | # tester().feature_test() 171 | # tester().local_var_nbr_test() 172 | # tester().loukas_quality_test() 173 | # tester().generalization() 174 | # tester().viz_test() 175 | tester().otherloss_test() 176 | # tester().metric_test() 177 | # tester().fit_test() 178 | 179 | exit() 180 | for data in \ 181 | ['minnesota', 'bunny', 'airfoil', 'yeast']: 182 | # ['random_er', 'random_geo']: # 183 | 184 | cmd = f'{python} {warn_cmd} sparsenet/model/example.py --bs 600 --n_epoch 30 --lap none ' \ 185 | f' --train_idx 0 --test_idx 0 --dataset {data} --n_bottomk 40 --ratio 0.5 --force_pos ' \ 186 | f'--n_cycle 100 --device cuda --seed 0' # # --lap_check 187 | print(cmd) 188 | os.system(cmd) 189 | -------------------------------------------------------------------------------- /sparsenet/model/model.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-04-10 2 | # Summary: graph encoders 3 | 4 | import argparse 5 | 6 | import torch 7 | import torch.nn.functional as F 8 | from torch_geometric.data import DataLoader 9 | from torch_geometric.nn import Set2Set, MessagePassing, global_add_pool, global_mean_pool, global_max_pool, \ 10 | GlobalAttention 11 | from torch_geometric.utils import add_self_loops 12 | 13 | from sparsenet.util.util import summary, fix_seed, random_pygeo_graph 14 | 15 | 16 | class GINConv(MessagePassing): 17 | """ 18 | Extension of GIN aggregation to incorporate edge information by concatenation. 19 | 20 | Args: 21 | emb_dim (int): dimensionality of embeddings for nodes and edges. 22 | embed_input (bool): whether to embed input or not. 23 | 24 | 25 | See https://arxiv.org/abs/1810.00826 26 | """ 27 | 28 | def __init__(self, edge_feat_dim, emb_dim, aggr="add"): 29 | super(GINConv, self).__init__() 30 | # multi-layer perceptron 31 | self.edge_feat_dim = edge_feat_dim 32 | self.mlp = torch.nn.Sequential(torch.nn.Linear(emb_dim, 2 * emb_dim), torch.nn.ReLU(), 33 | torch.nn.Linear(2 * emb_dim, emb_dim)) 34 | self.edge_embedding = torch.nn.Linear(self.edge_feat_dim, emb_dim) # torch.nn.Embedding(num_bond_type, emb_dim) 35 | 36 | torch.nn.init.xavier_uniform_(self.edge_embedding.weight.data) 37 | self.aggr = aggr 38 | 39 | def forward(self, x, edge_index, edge_attr): 40 | # add self loops in the edge space 41 | edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0)) 42 | 43 | self_loop_attr = torch.zeros(x.size(0), self.edge_feat_dim) 44 | self_loop_attr = self_loop_attr.to(edge_attr.device).to(edge_attr.dtype) # LongTensor of shape [32, 1] 45 | 46 | edge_attr_ = torch.cat((edge_attr, self_loop_attr), dim=0) 47 | edge_attr_ = edge_attr_.type(torch.FloatTensor).to(edge_attr.device) 48 | # summary(edge_attr, 'edge_attr after adding self loop') 49 | 50 | edge_embeddings = self.edge_embedding(edge_attr_) 51 | 52 | return self.propagate(edge_index, size=[x.size(0), x.size(0)], x=x, edge_attr=edge_embeddings) 53 | 54 | def message(self, x_j, edge_attr): 55 | return x_j + edge_attr 56 | 57 | def update(self, aggr_out): 58 | return self.mlp(aggr_out) 59 | 60 | 61 | class GNN(torch.nn.Module): 62 | """ 63 | Args: 64 | num_layer (int): the number of GNN layers 65 | emb_dim (int): dimensionality of embeddings 66 | JK (str): last, concat, max or sum. 67 | max_pool_layer (int): the layer from which we use max pool rather than add pool for neighbor aggregation 68 | drop_ratio (float): dropout rate 69 | gnn_type: gin, gcn, graphsage, gat 70 | 71 | Output: 72 | node representations 73 | 74 | """ 75 | 76 | def __init__(self, num_layer, emb_dim, node_feat_dim, edge_feat_dim, JK="last", drop_ratio=0, gnn_type="gin"): 77 | super(GNN, self).__init__() 78 | self.num_layer = num_layer 79 | self.drop_ratio = drop_ratio 80 | self.JK = JK 81 | self.node_feat_dim = node_feat_dim 82 | self.edge_feat_dim = edge_feat_dim 83 | 84 | if self.num_layer < 2: 85 | print("Warning: Number of GNN layers must be greater than 1.") 86 | 87 | ######## 88 | self.x_embedding0 = torch.nn.Linear(self.node_feat_dim, emb_dim) 89 | 90 | ###List of MLPs 91 | self.gnns = torch.nn.ModuleList() 92 | for layer in range(num_layer): 93 | if gnn_type == "gin": 94 | self.gnns.append(GINConv(self.edge_feat_dim, emb_dim, aggr="add")) 95 | else: 96 | NotImplementedError 97 | 98 | ### List of batchnorms 99 | self.batch_norms = torch.nn.ModuleList() 100 | for layer in range(num_layer): 101 | self.batch_norms.append(torch.nn.BatchNorm1d(emb_dim)) 102 | 103 | # def forward(self, x, edge_index, edge_attr): 104 | def forward(self, *argv): 105 | if len(argv) == 3: 106 | x, edge_index, edge_attr = argv[0], argv[1], argv[2] 107 | elif len(argv) == 1: 108 | data = argv[0] 109 | x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr 110 | else: 111 | raise ValueError("unmatched number of arguments.") 112 | 113 | x = self.x_embedding0(x) # self.x_embedding1(x[:, 0]) + self.x_embedding2(x[:, 1]) 114 | 115 | h_list = [x] 116 | for layer in range(self.num_layer): 117 | h = self.gnns[layer](h_list[layer], edge_index, edge_attr) 118 | h = self.batch_norms[layer](h) 119 | # h = F.dropout(F.relu(h), self.drop_ratio, training = self.training) 120 | if layer == self.num_layer - 1: 121 | # remove relu for the last layer 122 | h = F.dropout(h, self.drop_ratio, training=self.training) 123 | else: 124 | h = F.dropout(F.relu(h), self.drop_ratio, training=self.training) 125 | h_list.append(h) 126 | 127 | ### Different implementations of Jk-concat 128 | if self.JK == "concat": 129 | node_representation = torch.cat(h_list, dim=1) 130 | elif self.JK == "last": 131 | node_representation = h_list[-1] 132 | elif self.JK == "max": 133 | h_list = [h.unsqueeze_(0) for h in h_list] 134 | node_representation = torch.max(torch.cat(h_list, dim=0), dim=0)[0] 135 | elif self.JK == "sum": 136 | h_list = [h.unsqueeze_(0) for h in h_list] 137 | node_representation = torch.sum(torch.cat(h_list, dim=0), dim=0)[0] 138 | 139 | return node_representation 140 | 141 | 142 | class GNN_graphpred(torch.nn.Module): 143 | """ 144 | Extension of GIN to incorporate edge information by concatenation. 145 | 146 | Args: 147 | num_layer (int): the number of GNN layers 148 | emb_dim (int): dimensionality of embeddings 149 | num_tasks (int): number of tasks in multi-task learning scenario 150 | drop_ratio (float): dropout rate 151 | JK (str): last, concat, max or sum. 152 | graph_pooling (str): sum, mean, max, attention, set2set 153 | gnn_type: gin, gcn, graphsage, gat 154 | 155 | See https://arxiv.org/abs/1810.00826 156 | JK-net: https://arxiv.org/abs/1806.03536 157 | """ 158 | 159 | # @profile 160 | def __init__(self, num_layer, emb_dim, node_feat_dim, edge_feat_dim, num_tasks, JK="last", drop_ratio=0, 161 | graph_pooling="mean", gnn_type="gin", force_pos=False, mlp=False): 162 | """ 163 | 164 | :param num_layer: 165 | :param emb_dim: 166 | :param node_feat_dim: 167 | :param edge_feat_dim: 168 | :param num_tasks: 169 | :param JK: 170 | :param drop_ratio: 171 | :param graph_pooling: 172 | :param gnn_type: 173 | :param force_pos: force postive. If true, add non-linear layer in the end. 174 | """ 175 | super(GNN_graphpred, self).__init__() 176 | self.num_layer = num_layer 177 | self.drop_ratio = drop_ratio 178 | self.JK = JK 179 | self.emb_dim = emb_dim 180 | self.num_tasks = num_tasks 181 | self.edge_feat_dim = edge_feat_dim 182 | self.force_pos = force_pos 183 | self.mlp = mlp 184 | 185 | if self.num_layer < 2: 186 | print("Warning: Number of GNN layers must be greater than 1.") 187 | 188 | self.gnn = GNN(num_layer, emb_dim, node_feat_dim, edge_feat_dim, JK, drop_ratio, gnn_type=gnn_type) 189 | 190 | # Different kind of graph pooling 191 | if graph_pooling == "sum": 192 | self.pool = global_add_pool 193 | elif graph_pooling == "mean": 194 | self.pool = global_mean_pool 195 | elif graph_pooling == "max": 196 | self.pool = global_max_pool 197 | elif graph_pooling == "attention": 198 | if self.JK == "concat": 199 | self.pool = GlobalAttention(gate_nn=torch.nn.Linear((self.num_layer + 1) * emb_dim, 1)) 200 | else: 201 | self.pool = GlobalAttention(gate_nn=torch.nn.Linear(emb_dim, 1)) 202 | elif graph_pooling[:-1] == "set2set": 203 | set2set_iter = int(graph_pooling[-1]) 204 | if self.JK == "concat": 205 | self.pool = Set2Set((self.num_layer + 1) * emb_dim, set2set_iter) 206 | else: 207 | self.pool = Set2Set(emb_dim, set2set_iter) 208 | else: 209 | raise ValueError("Invalid graph pooling type.") 210 | 211 | # For graph-level binary classification 212 | if graph_pooling[:-1] == "set2set": 213 | self.mult = 2 214 | else: 215 | self.mult = 1 216 | 217 | if self.JK == "concat": 218 | self.graph_pred_linear = torch.nn.Linear(self.mult * (self.num_layer + 1) * self.emb_dim, self.num_tasks) 219 | else: 220 | self.graph_pred_linear = torch.nn.Linear(self.mult * self.emb_dim, self.num_tasks) 221 | 222 | if self.mlp: 223 | self.graph_pred_linear = torch.nn.Sequential( 224 | torch.nn.Linear(self.mult * self.emb_dim, self.mult * self.emb_dim), 225 | torch.nn.ReLU(), 226 | torch.nn.Linear(self.mult * self.emb_dim, self.mult * self.emb_dim), 227 | torch.nn.ReLU(), 228 | torch.nn.Linear(self.mult * self.emb_dim, self.mult * self.emb_dim), 229 | torch.nn.ReLU(), 230 | torch.nn.Linear(self.mult * self.emb_dim, self.mult * self.emb_dim), 231 | torch.nn.ReLU(), 232 | torch.nn.Linear(self.mult * self.emb_dim, self.num_tasks)) 233 | 234 | def from_pretrained(self, model_file): 235 | # self.gnn = GNN(self.num_layer, self.emb_dim, JK = self.JK, drop_ratio = self.drop_ratio) # important 236 | self.gnn.load_state_dict(torch.load(model_file)) 237 | 238 | # @timefunc 239 | def forward(self, *argv, ini=False): 240 | if len(argv) == 4: 241 | x, edge_index, edge_attr, batch = argv[0], argv[1], argv[2], argv[3] 242 | elif len(argv) == 1: 243 | data = argv[0] 244 | x, edge_index, edge_attr, batch = data.x, data.edge_index, data.edge_attr, data.batch 245 | 246 | else: 247 | raise ValueError("unmatched number of arguments.") 248 | 249 | node_representation = self.gnn(x, edge_index, edge_attr) 250 | rep = self.graph_pred_linear(self.pool(node_representation, batch)) 251 | if ini and len(argv) == 1: 252 | ini_tsr = torch.stack([data.ini] * rep.size(1), dim=1) 253 | 254 | if self.force_pos: 255 | if ini: 256 | # important: new version. has not tested it. Does it works well for amazons? 257 | return torch.nn.ReLU()(rep + ini_tsr) + 1 258 | # return 0.5 * rep + torch.nn.ReLU()(ini_tsr) # + torch.zeros(rep.size()).to(rep.device) 259 | else: 260 | return 1 + torch.nn.ReLU()(rep) # important: add 1 by default. not sure it's the best. 261 | else: 262 | return rep 263 | 264 | 265 | parser = argparse.ArgumentParser(description='PyTorch implementation of pre-training of graph neural networks') 266 | parser.add_argument('--gnn_type', type=str, default='gin', help='') 267 | 268 | if __name__ == "__main__": 269 | fix_seed() 270 | edge_feat_dim = 1 271 | node_feat_dim = 5 272 | n_node, n_edge = 320, 5000 273 | n_layer = 3 274 | emb_dim, out_dim = 50, 18 275 | 276 | model = GNN_graphpred(n_layer, emb_dim, node_feat_dim, edge_feat_dim, out_dim, mlp=False) 277 | 278 | g1 = random_pygeo_graph(n_node, node_feat_dim, n_edge, edge_feat_dim, device='cpu') 279 | g2 = random_pygeo_graph(n_node + 10, node_feat_dim, n_edge + 10, edge_feat_dim, device='cpu') 280 | summary(g1, 'g1') 281 | loader = DataLoader([g1] * 16 + [g2] * 16, batch_size=8, shuffle=True, num_workers=0) 282 | for batch in loader: 283 | pred = model(batch) 284 | summary(pred, 'pred') 285 | -------------------------------------------------------------------------------- /sparsenet/model/eval.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-05-27 2 | # Summary: add train and eval for model training. Extracted from example.py. Contains import abstraction. 3 | 4 | import logging 5 | from copy import deepcopy 6 | from time import time 7 | 8 | import torch 9 | from torch.autograd import Variable as V 10 | 11 | from sparsenet.model.loss import get_laplacian_mat 12 | from sparsenet.util.data import set_loader 13 | from sparsenet.util.loss_util import loss_manager 14 | from sparsenet.util.torch_util import sparse_mm 15 | from sparsenet.util.util import summary, pf, red, banner, fix_seed, timefunc as tf 16 | 17 | fix_seed() 18 | 19 | 20 | def apply_gnn(batch, model, dev, verbose=False, ini=False): 21 | """ 22 | :param batch: 23 | :param model: 24 | :return: 25 | """ 26 | indices_batch, graph_batch = batch 27 | if verbose: 28 | summary(graph_batch, 'graph_batch', highlight=True) 29 | 30 | _bs = len(indices_batch[0]) 31 | indices_batch = torch.stack(indices_batch, dim=0).t().contiguous().view((2 * _bs)) # https://bit.ly/2ARazSd 32 | indices_batch, graph_batch = indices_batch.to(dev), graph_batch.to(dev) 33 | pred = model(graph_batch, ini=ini) # tensor of size (bs, out_dim) 34 | return pred, indices_batch 35 | 36 | 37 | @tf 38 | def correction(LM, L2, args): 39 | if args.strategy == 'loukas' and args.correction: 40 | # remark: not ideal but a reasonable workaround. Memory intensive. 41 | # L2_correction = torch.sparse.mm(torch.sparse.mm(LM.invQ, L2.to_dense()).to_sparse(), 42 | # LM.invQ.to_dense()) 43 | # L2_correction = L2_correction.to_sparse() 44 | # remark: has small difference with current version 45 | L2_correction = sparse_mm(L2, LM.invQ) 46 | else: 47 | L2_correction = L2 48 | return L2_correction 49 | 50 | 51 | class tester(object): 52 | def __init__(self, name='default', comment='evalulation', dev='cuda'): 53 | self.test_data = {} 54 | self.test_data_comb = {} 55 | self.original_graph = {} 56 | self.sparse_graph = {} 57 | self.dev = dev 58 | self.name = name 59 | self.comment = comment 60 | 61 | def set_test_data(self, args, data_loader, verbose=False): 62 | """ set the data for evalulation """ 63 | self.rayleigh_flag = True if args.loss == 'rayleigh' else False 64 | if args.test_idx in self.test_data.keys(): 65 | print(f'Test graph {args.test_idx} has been processed. Skip.') 66 | return 67 | 68 | g, _ = data_loader.load(args, mode='test') 69 | self.original_graph[args.test_idx] = g 70 | 71 | test_loader, sub, n_sml = set_loader(g, args.bs, shuffle=True, args=args) 72 | 73 | L1 = sub.L(g, normalization=args.lap) 74 | L2_ = sub.baseline0(normalization=args.lap) 75 | 76 | L1_comb = sub.L(g, normalization=None) if args.dynamic else None 77 | L2_comb = sub.baseline0(normalization=None) if args.dynamic else None 78 | self.L1_comb = L1_comb 79 | self.test_data_comb[args.test_idx] = L2_comb 80 | 81 | self.L2_ = L2_ 82 | L2_trival = sub.trivial_L(sub.g_sml) 83 | 84 | LM = loss_manager(signal='bottomk', device=self.dev) 85 | 86 | try: 87 | LM.set_precomute_x(g, args, k=args.n_bottomk - args.offset) 88 | except: 89 | LM.set_x(L1, g.num_nodes, args.n_bottomk - args.offset, which='SM') 90 | 91 | LM.set_C(sub.C) 92 | LM.set_s(g.num_nodes, k=args.n_bottomk) 93 | 94 | params = {'inv': args.inv, 'dynamic': args.dynamic, 'rayleigh': self.rayleigh_flag, } 95 | bl_loss, bl_ratio_loss = LM.quaratic_loss(L1, L2_, sub.assignment, comb=(L1_comb, L2_comb), **params) 96 | trival_loss, _ = LM.quaratic_loss(L1, L2_trival, sub.assignment, comb=(L1_comb, L2_trival), **params) # todo: look at trivial loss 97 | L2_correction = correction(LM, L2_, args) 98 | 99 | bl_eigloss = LM.eigen_loss(L1, L2_correction, args.n_bottomk - args.offset, args=args, g1=g) if args.valeigen else torch.tensor(-1) 100 | 101 | edge_weight_sml_buffer = deepcopy(sub.g_sml.edge_weight).to(self.dev) 102 | edge_index_sml = sub.g_sml.edge_index.to(self.dev) 103 | test_data = g, test_loader, edge_weight_sml_buffer, sub, L1, \ 104 | bl_loss, bl_ratio_loss, trival_loss, \ 105 | bl_eigloss, n_sml, edge_index_sml, LM 106 | self.test_data[args.test_idx] = test_data 107 | 108 | if verbose: 109 | summary(L1, 'L1') 110 | summary(L2_, 'L2_') 111 | print(f'Baseline 0 loss: {red(bl_loss)}') 112 | 113 | banner(f'Finish setting {args.dataset} graph {args.test_idx}', compact=True, ch='-') 114 | 115 | def delete_test_data(self, idx): 116 | del self.test_data[idx] 117 | 118 | @tf 119 | def eval(self, model, args, verbose=False): 120 | 121 | t0 = time() 122 | model.eval() 123 | 124 | g, test_loader, edge_weight_sml_buffer, sub, L1, bl_loss, \ 125 | bl_ratio_loss, trival_loss, bl_eigloss, n_sml, edge_index_sml, LM = \ 126 | self.test_data[args.test_idx] 127 | 128 | L2_ini_comb = self.test_data_comb[args.test_idx] # get_laplacian_mat(edge_index_sml, edge_weight_sml_buffer, n_sml, normalization=None) if args.dynamic else None 129 | 130 | L1_comb = sub.L(g, normalization=None) if args.dynamic else None 131 | comb = (L1_comb, L2_ini_comb) 132 | 133 | for step_, batch in enumerate(test_loader): 134 | pred, indices_batch = apply_gnn(batch, model, self.dev, ini=args.ini) 135 | edge_weight_sml = V(edge_weight_sml_buffer) 136 | edge_weight_sml[indices_batch] = pred.view(-1).repeat_interleave(2) 137 | # L2 = get_laplacian_mat(edge_index_sml, edge_weight_sml, n_sml, normalization=args.lap) 138 | 139 | if verbose: 140 | summary(pred, f'test: pred at step {step_}') 141 | summary(edge_weight_sml, f'test: edge_weight_sml at {step_}') 142 | summary(indices_batch, f'test: indices_batch at {step_}') 143 | print() 144 | 145 | L2 = get_laplacian_mat(edge_index_sml, edge_weight_sml, n_sml, normalization=args.lap) 146 | L2_correction = correction(LM, L2, args) 147 | 148 | loss, ratio_loss = LM.quaratic_loss(L1, L2, sub.assignment, inv=args.inv, 149 | rayleigh=self.rayleigh_flag, 150 | verbose=True) 151 | # expansive, so only calculate when needed 152 | eigloss = LM.eigen_loss(L1, L2_correction, args.n_bottomk - args.offset, args=args, 153 | g1=g) if args.valeigen else torch.tensor(-1) # todo: change back 154 | 155 | t1 = time() 156 | msg = 'Generalize!' if loss < min(bl_loss, trival_loss) else '' 157 | nsig = 3 158 | logging.info(' ' * 12 + 159 | f'Graph-{args.dataset}: {args.test_idx}. ' 160 | f'{red("Test-Val")}({pf(t1 - t0, 1)}): {pf(loss, nsig)}({pf(ratio_loss, nsig)}) / ' 161 | f'{pf(bl_loss, nsig)}({pf(bl_ratio_loss, nsig)}) / {pf(trival_loss)}. {red(msg)}. ' 162 | f'Eigenloss: {pf(eigloss, nsig)}. ' 163 | f'Bl_Eigenloss: {pf(bl_eigloss, nsig)}.') 164 | 165 | n_gen = 1 if msg == 'Generalize!' else 0 166 | impr_ratio = min(bl_loss, trival_loss) / loss 167 | eigen_ratio = (bl_eigloss - eigloss) / bl_eigloss 168 | return n_gen, impr_ratio.item(), eigen_ratio.item() 169 | 170 | 171 | class trainer(object): 172 | 173 | def __init__(self, name='default', comment='test tensorboard', dev='cuda'): 174 | self.n_graph = 0 # number of graphs that has been processed 175 | self.train_data = {} 176 | self.train_data_comb = {} 177 | self.dev = dev 178 | self.name = name 179 | self.comment = comment 180 | self.original_graph = {} 181 | 182 | def set_train_data(self, args, data_loader): 183 | """ quite similar with set_test_data """ 184 | self.rayleigh_flag = True if args.loss == 'rayleigh' else False 185 | 186 | if args.train_idx in self.train_data.keys(): 187 | print(f'Train graph {args.train_idx} has been processed. Skip.') 188 | return 189 | 190 | g, _ = data_loader.load(args, mode='train') 191 | self.original_graph[args.train_idx] = g 192 | train_loader, sub, n_sml = set_loader(g, args.bs, shuffle=True, args=args) 193 | 194 | L1 = sub.L(g, normalization=args.lap) 195 | L1_comb = sub.L(g, normalization=None) if args.dynamic else None 196 | g_sml, assignment = sub.g_sml, sub.assignment 197 | edge_index_sml = g_sml.edge_index.to(self.dev) 198 | L2_ = sub.baseline0(normalization=args.lap) 199 | L2_comb = sub.baseline0(normalization=None) if args.dynamic else None 200 | self.train_data_comb[args.train_idx] = (L1_comb, L2_comb) 201 | 202 | self.L2_ = L2_ 203 | L_trivial = sub.trivial_L(g_sml) # todo: look at trivial loss tomorrow 204 | 205 | summary(L1, 'L1') 206 | summary(L2_, 'L2_baseline0') 207 | 208 | LM = loss_manager(signal='bottomk', device=self.dev) 209 | # test vector as slightly different when adding loukas_quality argument. 210 | # Not sure why but seems the change is very minor. 211 | try: 212 | LM.set_precomute_x(g, args, k=args.n_bottomk) 213 | except: 214 | LM.set_x(L1, g.num_nodes, args.n_bottomk, which='SM') 215 | LM.set_C(sub.C) 216 | LM.set_s(g.num_nodes, k=args.n_bottomk) 217 | 218 | bl_loss, bl_ratio = LM.quaratic_loss(L1, L2_, sub.assignment, inv=args.inv, rayleigh=self.rayleigh_flag, 219 | dynamic=args.dynamic, comb=(L1_comb, L2_comb)) 220 | trivial_loss, trivial_ratio = LM.quaratic_loss(L1, L_trivial, sub.assignment, inv=args.inv, 221 | rayleigh=self.rayleigh_flag, dynamic=args.dynamic, 222 | comb=(L1_comb, L_trivial)) 223 | L2_correction = correction(LM, L2_, args) 224 | skip_flag = True if g.num_nodes > 1e3 else False 225 | bl_eigen_loss = LM.eigen_loss(L1, L2_correction, args.n_bottomk, args=args, g1=g, skip=skip_flag) 226 | 227 | edge_weight_sml_buffer = deepcopy(sub.g_sml.edge_weight).to(self.dev) 228 | train_data = g, train_loader, edge_weight_sml_buffer, \ 229 | sub, L1, bl_loss, bl_ratio, \ 230 | trivial_loss, trivial_ratio, \ 231 | n_sml, edge_index_sml, LM, bl_eigen_loss 232 | 233 | assert args.train_idx not in self.train_data.keys(), \ 234 | f'Overwrite self.train_data for key {args.train_idx}. Check carefully!' 235 | 236 | self.train_data[args.train_idx] = train_data 237 | return 238 | 239 | def delete_train_data(self, idx): 240 | del self.train_data[idx] 241 | 242 | @tf 243 | def train(self, model, optimizer, args, verbose=False): 244 | g, train_loader, edge_weight_sml_buffer, sub, L1, bl_loss, bl_ratio, trivial_loss, \ 245 | trivial_ratio, n_sml, edge_index_sml, LM, bl_eigen_loss = self.train_data[args.train_idx] 246 | 247 | L2_ini = get_laplacian_mat(edge_index_sml, edge_weight_sml_buffer, n_sml, normalization=args.lap) 248 | L1_comb, L2_ini_comb = self.train_data_comb[args.train_idx] 249 | summary(L1_comb, 'Train: L1_comb', highlight=True) 250 | summary(L2_ini_comb, 'Train: L2_ini_comb', highlight=True) 251 | loss_ini, _ = LM.quaratic_loss(L1, L2_ini, sub.assignment, verbose=False, inv=args.inv, dynamic=args.dynamic, 252 | comb=(L1_comb, L2_ini_comb)) 253 | 254 | logging.info(f'Initial quaratic loss: {red(pf(loss_ini, 3))}.') 255 | for n_iter in range(1, args.n_epoch + 1): 256 | t0 = time() 257 | 258 | for step, batch in enumerate(train_loader): 259 | model.train() 260 | pred, indices_batch = apply_gnn(batch, model, self.dev, ini=args.ini) 261 | edge_weight_sml = V(edge_weight_sml_buffer) 262 | edge_weight_sml[indices_batch] = pred.view(-1).repeat_interleave(2) 263 | 264 | L2 = get_laplacian_mat(edge_index_sml, edge_weight_sml, n_sml, normalization=args.lap) 265 | comb = (L1_comb, L2_ini_comb) 266 | 267 | loss, ratio = LM.quaratic_loss(L1, L2, sub.assignment, verbose=False, inv=args.inv, 268 | rayleigh=self.rayleigh_flag) 269 | 270 | optimizer.zero_grad() 271 | loss.backward(retain_graph=False) # https://bit.ly/2LbZNaR 272 | optimizer.step() 273 | 274 | L2_correction = correction(LM, L2, args) 275 | skip_flag = True if g.num_nodes > 1e2 else False 276 | eigen_loss = LM.eigen_loss(L1, L2_correction, args.n_bottomk, args=args, g1=g, skip=skip_flag) 277 | 278 | space = '\n' if verbose else '' 279 | n_sig = 3 280 | logging.info(f'{args.dataset}-Idx {args.train_idx}-Epoch: {n_iter}. ' 281 | f'Train({pf(time() - t0)}): {pf(loss, n_sig)}({pf(ratio, n_sig)})' 282 | f' / {pf(bl_loss, n_sig)}({pf(bl_ratio, n_sig)}) / {pf(trivial_loss, n_sig)}. ' 283 | f'Eigenloss: {pf(eigen_loss, n_sig)}. {space}' 284 | f'Bl_Eigenloss: {pf(bl_eigen_loss, n_sig)}') 285 | 286 | banner(f'Finish training {args.dataset} {args.train_idx} for {args.n_epoch} epochs.') 287 | self.n_graph += 1 288 | -------------------------------------------------------------------------------- /sparsenet/util/loss_util.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-05-20 2 | # Summary: Implement a class so that one can get all sort of random vecs. 3 | 4 | from copy import deepcopy 5 | 6 | import numpy as np 7 | import scipy as sp 8 | import torch 9 | from torch.sparse import mm as smm 10 | 11 | from deprecated import deprecated 12 | from scipy.sparse import coo_matrix 13 | from scipy.sparse.linalg import eigs, eigsh 14 | 15 | from sparsenet.model.loss import get_sparse_projection_mat 16 | from sparsenet.util.cut_util import pyG_conductance 17 | from sparsenet.util.torch_util import sparse_mm2, sparse_matrix2sparse_tensor 18 | from sparsenet.util.util import timefunc as tf, fix_seed, summary, random_laplacian, pf, tonp, red, dic2tsr 19 | 20 | fix_seed() 21 | 22 | 23 | class vec_generator(object): 24 | def __init__(self): 25 | pass 26 | 27 | def _normalize(self, X): 28 | """ 29 | :param X: Input vec mat. 30 | :return: Normalized vec mat. 31 | """ 32 | return X / ((X ** 2).sum(0, keepdim=True)).sqrt() 33 | 34 | def _sparse_tensor2_sparse_numpyarray(self, sparse_tensor): 35 | """ 36 | :param sparse_tensor: a COO torch.sparse.FloatTensor 37 | :return: a scipy.sparse.coo_matrix 38 | """ 39 | if sparse_tensor.device.type == 'cuda': 40 | sparse_tensor = sparse_tensor.to('cpu') 41 | 42 | values = sparse_tensor._values().numpy() 43 | indices = sparse_tensor._indices() 44 | rows, cols = indices[0, :].numpy(), indices[1, :].numpy() 45 | size = sparse_tensor.size() 46 | scipy_sparse_mat = coo_matrix((values, (rows, cols)), shape=size, dtype=np.float) 47 | return scipy_sparse_mat 48 | 49 | def _laplacian2adjacency(self, laplacian): 50 | """ 51 | :param laplacian: Input laplacian mat. 52 | :return: return adajacency, basically remove diagonal elements, make non-diagonal elements positive. 53 | """ 54 | values, indices = laplacian._values(), laplacian._indices() 55 | mask = [False if (u == v) else True for _, (u, v) in enumerate(indices.t().tolist())] 56 | new_values, new_indices = -values[mask], indices[:, mask] 57 | return torch.sparse.FloatTensor(new_indices, new_values, laplacian.size()) 58 | 59 | def random_vec(self, N, num_vec, normalized=True, reproducible=False): 60 | """ 61 | :param N: Dimension of the vec 62 | :param num_vec: Number of random vec 63 | :param normalized: If normalized the, L2 norm of return vectors will be 1. 64 | :param reproducible: if reproducible=True, then the random seeds are fixed. 65 | :return: A N * num_vec random vec tensor. 66 | """ 67 | if reproducible: 68 | fix_seed() 69 | X = torch.rand(N, num_vec) - 0.5 70 | if normalized: 71 | X = self._normalize(X) 72 | return X 73 | 74 | @tf 75 | def bottomk_vec(self, laplacian, k, which='SM', val=False): 76 | """ 77 | :param laplacian: The input laplacian matrix, should be a sparse tensor. 78 | :param k: The top K (smalleset) eigenvectors. 79 | :param which: LM, SM, LR, SR, LM, SM largest/smallest magnitude, LR/SR largest/smallest real value. 80 | more details see scipy.sparse.linalg.eigs 81 | :return: return top K eigenvec. in the format of a N * k tensor. All vectors are automatically normalized. 82 | """ 83 | assert isinstance(laplacian, (torch.sparse.FloatTensor, torch.cuda.sparse.FloatTensor)), \ 84 | f'input laplacian must be sparse tensor. Got {type(laplacian)}' 85 | 86 | # we need to convert the sparse tensor to scipy sparse mat, so that we can apply 87 | # the functions scipy.sparse.linalg.eigs() which should be faster than other methods. 88 | scipy_lap = self._sparse_tensor2_sparse_numpyarray(laplacian) 89 | M, N = scipy_lap.shape 90 | assert (M == N and k < N - 1), f'Input laplacian must be a square matrix. ' \ 91 | f'To use scipy method, {k} (#eigvecs) < {N - 1} (size of laplacian - 1).' 92 | 93 | try: 94 | vals, vecs = eigsh(scipy_lap, k=k, which=which, tol=1e-3) 95 | vecs = torch.FloatTensor(vecs.real) 96 | except sp.sparse.linalg.eigen.arpack.ArpackNoConvergence: 97 | print(red('Eigsh failed. Try computing with eigs')) 98 | vals, vecs = eigs(scipy_lap, k=k, which=which, tol=0) 99 | vecs = torch.FloatTensor(vecs.real) 100 | except: 101 | exit(f'Convergence Error in bottomk_vec when computing {k} eigenvecotrs.') # shape dataset has such problem 102 | 103 | vecs = self._normalize(vecs) # no effect 104 | if val: 105 | return vals 106 | else: 107 | return vecs 108 | 109 | def random_projected_vec(self, laplacian, num_vec, power_method_iter=5, reproducible=False): 110 | """ 111 | :param laplacian: The laplacian matrix, used to generate the adjacency mat for power method. 112 | :param num_vec: Number of starting random vectors. 113 | :param reproducible: fix random seed? 114 | :param power_method_iter: How many times we apply f(i+1) = Af(i)/|Af(i)| 115 | :return: Return a num_vec N*1 vectors, in the form of N * num_vec matrix. Each vector was applied power method 116 | by #power_method_iter times. 117 | """ 118 | assert isinstance(laplacian, 119 | (torch.sparse.FloatTensor, torch.cuda.sparse.FloatTensor)), 'Input laplacian must be' \ 120 | 'sparse tensor.' 121 | size = laplacian.size() 122 | assert (size[0] == size[1]), 'Input laplacian must be a square mat.' 123 | vectors = self.random_vec(size[0], num_vec, normalized=True, reproducible=reproducible) 124 | print('Original vecs:', vectors) 125 | adjacency = self._laplacian2adjacency(laplacian) 126 | for i in range(power_method_iter): 127 | vectors = self._normalize(smm(adjacency, vectors)) 128 | return vectors 129 | 130 | 131 | class loss_manager(object): 132 | def __init__(self, signal='bottomk', device='cuda'): 133 | assert signal in ['bottomk', 'random', 'random_proj'], f'signal {signal} is not implemented!' 134 | self.gen = vec_generator() 135 | if signal == 'bottomk': 136 | method = 'bottomk_vec' 137 | elif signal == 'random': 138 | method = 'random_vec' 139 | elif signal == 'random_proj': 140 | method = 'random_projected_vec' 141 | else: 142 | NotImplementedError 143 | 144 | self.method = method 145 | self.dev = device 146 | self.L1 = None 147 | self.vals_L1 = None 148 | self.inv_asgmt = None 149 | self.Projection = None 150 | self.D1 = None 151 | 152 | @tf 153 | def set_C(self, C=None): 154 | # todo: add comment 155 | if C is not None: 156 | self.C = C # csc_matrix of shape (n, N) 157 | self.pi = sparse_matrix2sparse_tensor(self.C.T.dot(self.C).tocoo(), 158 | dev=self.dev) # mainly used for rayleigh quotient 159 | tmp = self.C.dot(np.ones((self.C.shape[1], 1))).reshape(-1) 160 | assert np.min(tmp) > 0, f'min of tmp is {np.min(tmp)}' 161 | # self.Q = np.diag(tmp) 162 | # self.invQ = np.diag(1/tmp) 163 | 164 | n = self.C.shape[0] 165 | diag_indices = [list(range(n))] * 2 166 | i = torch.LongTensor(diag_indices) 167 | v = torch.FloatTensor(1.0 / tmp) 168 | self.invQ = torch.sparse.FloatTensor(i, v, torch.Size([n, n])).to(self.dev) 169 | 170 | def set_precomute_x(self, g, args, k=40, v=False): 171 | key = f'{args.lap}_vecs' 172 | self.x = g[key][:, :k].to(self.dev) 173 | if v: 174 | summary(self.x, red(f'precomputed test vector {key}')) 175 | 176 | def set_x(self, *args, **kwargs): 177 | print(red('Recompute eigenvector')) 178 | self.x = getattr(self.gen, self.method)(*args, **kwargs).to(self.dev) 179 | summary(self.x, 'test vector') 180 | 181 | @tf 182 | def set_s(self, n, k=40): 183 | """ set a random set of nodes for condunctance. 184 | Generate a list (len k) of random nodes in ORIGINAL graph as test subset 185 | """ 186 | self.s_list = [] 187 | self.s_list_tsr = [] 188 | for _ in range(k): 189 | _size = np.random.choice(range(int(n / 4.0), int(n / 2.0))) 190 | s = np.random.choice(range(n), size=_size, replace=False).tolist() 191 | self.s_list.append(s) 192 | self.s_list_tsr.append(torch.tensor(s)) 193 | 194 | ############ condunctance_loss related ############ 195 | @tf 196 | def _build_inv_asgnment(self, assgnment): 197 | if self.inv_asgmt is None: 198 | self.inv_asgmt = {v: key for (key, value) in assgnment.items() for v in 199 | value} # key is the nodes in large graph. 200 | self.inv_asgmt_tsr = dic2tsr(self.inv_asgmt, dev=self.dev) 201 | 202 | @tf 203 | def get_s_prime(self, s): 204 | """ assume self.inv_asgmt is built. 205 | From s generate s_prime. used for condunctance_loss. 206 | """ 207 | s = s.to(self.dev) 208 | if isinstance(s, torch.Tensor): 209 | s_prime = torch.index_select(self.inv_asgmt_tsr, 0, s) 210 | s_prime = torch.unique(s_prime) # remove duplicates 211 | elif isinstance(s, list): 212 | s_prime = [self.inv_asgmt[s_] for s_ in s] 213 | s_prime = list(set(s_prime)) 214 | else: 215 | summary(s, 's') 216 | raise NotImplementedError(f's is {type(s)}') 217 | 218 | return s_prime 219 | 220 | @tf 221 | def condunctance_loss(self, g1, g2, assgnment, verbose=False): 222 | """ 223 | todo: slow for shape dataset: 1.2s each batch 224 | :param g1: edge_index1, edge_attr1 for original graph 225 | :param g2: edge_index2, edge_attr2 for smaller graph 226 | :param assgnment: dict 227 | :return: 228 | """ 229 | 230 | edge_index1, edge_attr1 = g1 231 | edge_index2, edge_attr2 = g2 232 | self._build_inv_asgnment(assgnment) 233 | loss = 0 234 | for i, s in enumerate(self.s_list_tsr): 235 | 236 | cond1 = pyG_conductance(edge_index1, edge_attr1, s.tolist(), t=None, dev=self.dev) 237 | s_prime = self.get_s_prime(s) 238 | cond2 = pyG_conductance(edge_index2, edge_attr2, s_prime.tolist(), t=None, dev=self.dev) 239 | loss += torch.abs(cond1 - cond2) 240 | 241 | if verbose: 242 | print(f's: {len(s)}. s_prime: {len(s_prime)}') 243 | summary(np.array(s), 's') 244 | summary(edge_index1, 'edge_index1') 245 | summary(edge_attr1, 'edge_attr1') 246 | print(red(f'cond1-{i}: {pf(cond1, 2)}. cond2-{i}: {pf(cond2, 2)}')) 247 | 248 | return loss / len(self.s_list) 249 | 250 | ############ quadratic_loss related ############### 251 | @deprecated(reason="to be refactord") 252 | def _set_d(self, L, power=0.5): 253 | """ from sparse tensor L to degree matrix """ 254 | # todo: speed up. 3 255 | dev = L.device 256 | n = L.shape[0] 257 | idx = torch.LongTensor([[i, i] for i in range(n)]).T.to(dev) 258 | diag = torch.diag(L.to_dense()) 259 | diag = diag ** (power) 260 | deg = torch.sparse.FloatTensor(idx, diag, torch.Size([n, n])) 261 | return deg 262 | 263 | def quaratic_loss(self, L1, L2, assignment, verbose=False, inv=False, 264 | rayleigh=False, dynamic=False, 265 | comb=(None, None)): 266 | """ 267 | modfied from random_vec_loss. 268 | :param L1: Laplace of original graph 269 | :param L2: Laplace of smaller graph 270 | :param Projection 271 | :param inv: inverse Laplacian. (Not Really Working) 272 | :param: rayleigh: normalized x 273 | :param: dynamic: dynamic projection. Update projection in runtime. 274 | :param: comb: combinatorial L1, L2. Only used for normalized Laplacian. 275 | :return loss, ratio 276 | """ 277 | L1, L2 = L1.to(self.dev), L2.to(self.dev) 278 | if self.Projection is None: 279 | self.Projection = get_sparse_projection_mat(L1.shape[0], L2.shape[0], assignment).to(self.dev) # sparse tensor 280 | Projection = self.Projection 281 | else: 282 | Projection = self.Projection 283 | 284 | if dynamic: 285 | L1_comb, L2_comb = comb 286 | assert L1_comb is not None 287 | if self.D1 is None: 288 | self.D1 = self._set_d(L1_comb, power=-0.5) 289 | D1 = self.D1 290 | else: 291 | D1 = self.D1 292 | D2 = self._set_d(L2_comb, power=0.5) 293 | Projection = sparse_mm2(Projection, D1, D2) 294 | 295 | X_prime = smm(Projection, self.x) 296 | 297 | if inv: 298 | raise NotImplementedError 299 | else: 300 | quadL1 = torch.mm(self.x.t(), smm(L1, self.x)) 301 | qualL2 = torch.mm(X_prime.t(), smm(L2, X_prime)) 302 | 303 | diff = torch.abs(torch.diag(quadL1 - qualL2)) 304 | if rayleigh: 305 | assert self.pi is not None 306 | denominator = torch.diag(torch.mm(self.x.t(), smm(self.pi, self.x))) # (n_bottomk,) 307 | diff = diff / denominator 308 | 309 | loss = torch.mean(diff) 310 | ratio = torch.sum(torch.diag(qualL2)) / torch.sum(torch.diag(quadL1)) 311 | ratio = torch.abs(torch.log(ratio)) 312 | if verbose: 313 | bad_indices = tonp((diff / loss > 1).nonzero()) 314 | print(bad_indices.reshape(-1)) 315 | return loss, ratio 316 | 317 | @tf 318 | def eigen_loss(self, L1, L2, k, args=None, g1=None, skip=False): 319 | """ compare the first k eigen difference; L1 is larger than L2 320 | :param args 321 | :param g1: used for retrive precomputed spectrum 322 | """ 323 | if skip: return -1 324 | 325 | # get eigenvalues of L1 326 | if self.vals_L1 is None: 327 | # compute eigenvals only once 328 | self.L1 = L1 # doesn't seem to be useful any more 329 | key = str(args.lap) + '_vals' 330 | vals_L1 = g1[key][:k].numpy() 331 | vals_L1 = deepcopy(vals_L1) # if not deepcopy, g1 None_vals[0] will get modified 332 | self.vals_L1 = vals_L1 333 | else: 334 | vals_L1 = self.vals_L1 335 | 336 | # get eigenvalues of L2 337 | if args.cacheeig: 338 | raise NotImplementedError 339 | else: 340 | vals_L2 = self.gen.bottomk_vec(L2, k, which='SM', val=True).real 341 | 342 | # compute the eigenvalues error 343 | vals_L1 = vals_L1[:len(vals_L2)] # in case vals_L1 and vals_L2 are of different length 344 | bad_indices = np.nonzero(vals_L1 < 1e-5) 345 | if len(bad_indices) > 1: 346 | print(red(f'There are {len(bad_indices)} nearly zero eigenvalues.')) 347 | err = np.abs(vals_L1 - vals_L2) / (vals_L1 + 1e-15) 348 | err[0] = 0 349 | err[bad_indices] = 0 350 | return np.mean(err) 351 | 352 | 353 | if __name__ == '__main__': 354 | 355 | LM = loss_manager(signal='bottomk') 356 | 357 | # exit() 358 | gen = vec_generator() 359 | # print(gen.random_vec(N=3, num_vec=2,reproducible=True)) 360 | # print(gen.random_vec(N=3, num_vec=2,reproducible=True)) 361 | 362 | i = torch.LongTensor([[0, 1, 2, 3], [0, 1, 2, 3]]) 363 | v = torch.FloatTensor([0, 1, 2, 3]) 364 | sparse_mat = torch.sparse.FloatTensor(i, v, torch.Size((4, 4))) 365 | LM = loss_manager() 366 | eigloss = LM.eigen_loss(sparse_mat, sparse_mat, 2, g1=None) 367 | print(eigloss) 368 | bottomk_vec = gen.bottomk_vec(laplacian=sparse_mat, k=2) 369 | 370 | for i in range(2): 371 | summary(bottomk_vec[:, i], f'bottomk_vec[:, {i}]') 372 | exit() 373 | 374 | # i = torch.LongTensor([[0, 1, 2, 3, 1, 2, 0, 3, 1, 3],[0, 1, 2, 3, 2, 1, 3, 0, 3, 1]]) 375 | # v = torch.FloatTensor([1, 2, 1, 2, -1, -1, -1, -1, -1, -1]) 376 | projected_vec = gen.random_projected_vec(laplacian, 5, 2, reproducible=True) 377 | summary(projected_vec, 'projected_vec') 378 | 379 | projected_vec = gen.random_projected_vec(laplacian, 5, 2, reproducible=True) 380 | print('Projected vecs', projected_vec) 381 | exit() 382 | 383 | n = 100 384 | i, v = random_laplacian(n) 385 | summary(i, 'i') 386 | summary(v, 'v') 387 | laplacian = torch.sparse.FloatTensor(i, v, torch.Size((n, n))) 388 | 389 | for _ in range(5): 390 | eigenvec = gen.bottomk_vec(laplacian, 2) 391 | summary(eigenvec, 'eigenvec') 392 | exit() 393 | -------------------------------------------------------------------------------- /sparsenet/util/graph_util.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-04-16 2 | # Summary: util functions 3 | 4 | import shutil 5 | from warnings import warn 6 | 7 | import numpy as np 8 | import torch 9 | import torch_geometric 10 | from memory_profiler import profile 11 | from torch.autograd import Variable 12 | from torch_geometric.data import Data 13 | from torch_geometric.transforms import LocalDegreeProfile 14 | from torch_geometric.utils import from_networkx, subgraph 15 | 16 | from sparsenet.model.loss import get_laplacian_mat, get_sparse_C 17 | from sparsenet.util.gsp_util import gsp2pyg 18 | from sparsenet.util.name_util import set_coarsening_graph_dir 19 | from sparsenet.util.sample import sample_N2Nlandmarks 20 | from sparsenet.util.util import timefunc, banner, summary, random_edge_index, fix_seed, red, make_dir 21 | 22 | INFINITY = 1e8 23 | 24 | 25 | @timefunc 26 | def get_bipartite(G1, G2, crossing_edge): 27 | ''' 28 | :param G1: graph 1 29 | :param G2: graph 2 30 | :param crossing_edge: crossing edges between those two subgraphs of G 31 | :return: A bipartiti graph G1 <-> G2. (nodes(G1) + nodes(G2) + crossing edge) in format of torch_geo 32 | ''' 33 | xedge_index, xedge_attr = crossing_edge 34 | final_x = torch.cat((G1.x, G2.x), 0) 35 | final_node_index = torch.cat((G1.node_index, G2.node_index), 0) 36 | return Data(x=final_x, edge_attr=xedge_attr, edge_index=xedge_index, node_index=final_node_index) 37 | 38 | 39 | @timefunc 40 | def get_merged_subgraph(G1, G2, crossing_edge): 41 | ''' 42 | :param G1: pygeo graph G1 43 | :param G2: pygeo graph G2 44 | :param crossing_edge: (edge_index, edge_attr) 45 | :return: Merge x, edge_attr, edge_index, and node_index in G1, G2, and crossing edge 46 | ''' 47 | xedge_index, xedge_attr = crossing_edge 48 | final_edge_index = torch.cat((G1.edge_index, G2.edge_index, xedge_index), 1) 49 | final_edge_attr = torch.cat((G1.edge_attr, G2.edge_attr, xedge_attr), 0) 50 | final_x = torch.cat((G1.x, G2.x), 0) 51 | final_node_index = torch.cat((G1.node_index, G2.node_index), 0) 52 | return Data(x=final_x, edge_attr=final_edge_attr, edge_index=final_edge_index, node_index=final_node_index) 53 | 54 | 55 | @profile 56 | class GraphPair(object): 57 | def __init__(self, G, g_sml, Assignment): 58 | """ 59 | :param G: Original graph G, in pygeo format 60 | :param gsml: sampled graph g_sml, in pygeo format, the node index of both G and gsml MUST starts with 0. 61 | :param Assignment: assignment 62 | """ 63 | assert (isinstance(G, torch_geometric.data.data.Data) 64 | and isinstance(g_sml, torch_geometric.data.data.Data)), f'G is {type(G)}. g_sml is {type(g_sml)}' 65 | self.G = G 66 | if 'edge_attr' not in G.keys: 67 | # todo: discuss with DK 68 | warn(f'edge_attr does not exit. Will creat 1-dim edge attr (all ones)') 69 | G.edge_attr = torch.ones((G.num_edges, 1)) 70 | banner('modified G') 71 | summary(G) 72 | 73 | self.G_prime = g_sml 74 | self.assignment = Assignment 75 | assert (self.__check_indexes()), 'Input graphs must have node_index starting from 0.' 76 | tensor_imap, inverse_assignment = {}, {} 77 | for i, (uprime, vprime) in enumerate(g_sml.edge_index.t().tolist()): 78 | if uprime > vprime: 79 | tensor_imap[(vprime, uprime)] = [i] if (vprime, uprime) not in tensor_imap.keys() \ 80 | else tensor_imap[(vprime, uprime)] + [i] 81 | else: 82 | tensor_imap[(uprime, vprime)] = [i] if (uprime, vprime) not in tensor_imap.keys() \ 83 | else tensor_imap[(uprime, vprime)] + [i] 84 | for key, value in Assignment.items(): 85 | for v in value: 86 | inverse_assignment[v] = key 87 | self.tensor_imap = tensor_imap 88 | self.inverse_assignment = inverse_assignment # map index of big graph to small graph 89 | 90 | def __check_indexes(self): 91 | gflag, gprimeflag = False, False 92 | for (u, v) in enumerate(self.G.edge_index.t().tolist()): 93 | if u == 0 or v == 0: 94 | gflag = True 95 | break 96 | for (uprime, vprime) in enumerate(self.G_prime.edge_index.t().tolist()): 97 | if uprime == 0 or vprime == 0: 98 | gprimeflag = True 99 | break 100 | return gflag and gprimeflag 101 | 102 | @timefunc 103 | def construct(self): 104 | """ 105 | This function basically compute the partition of original graph G based on landmarks. Also, it will 106 | compute the crossing_edges, and the indices in tensor of any edge (u, v) in G_prime. 107 | :return: void. 108 | """ 109 | N = len(self.assignment.keys()) 110 | G_edge_attrs, G_edge_indices = self.G.edge_attr.tolist(), self.G.edge_index.t().tolist() 111 | G_x = self.G.x.tolist() 112 | print('N:', N, 'x:', len(G_x), 'edge_attr:', len(G_edge_attrs), 'edge_indices:', len(G_edge_indices)) 113 | # subgraphs: list of subgraph: {'edge_index':[], 'edge_attr':[], 'x':[]} 114 | # crossing_edges: dictionary key (uprime, vprime) an edge from G_prime; crossing_edges[(up, vp)] = list of 115 | # [e=(u, v), e_attr], where e is an edge in G, and e_attr is its corresponding attr. 116 | subgraphs, crossing_edges = [{'edge_index': [], 'edge_attr': [], 'x': []} for i in range(N)], {} 117 | for i, (u, v) in enumerate(G_edge_indices): 118 | uprime, vprime = self.inverse_assignment[u], self.inverse_assignment[v] 119 | if uprime == vprime: # add into subgraph gs[uprime] 120 | subgraphs[uprime]['edge_index'].append((u, v)) 121 | subgraphs[uprime]['edge_attr'].append(G_edge_attrs[i]) 122 | else: # add into crossing edges [(up, vp)] 123 | uprime, vprime = (vprime, uprime) if uprime > vprime else (uprime, vprime) 124 | crossing_edges[(uprime, vprime)] = [[(u, v), G_edge_attrs[i]]] if (uprime, vprime) not in crossing_edges \ 125 | else crossing_edges[(uprime, vprime)] + [[(u, v), G_edge_attrs[i]]] 126 | 127 | for i, nx in enumerate(G_x): 128 | xprime = self.inverse_assignment[i] 129 | subgraphs[xprime]['x'].append(i) 130 | self.subgraphs = subgraphs 131 | self.crossing_edges = crossing_edges 132 | 133 | def __get_subgraph(self, uprime): 134 | """ 135 | :param uprime: the id of a landmark in G_prime 136 | :return: the corresponding subgraph in original graph G (in pygeo format). 137 | """ 138 | _x = torch.FloatTensor(self.subgraphs[uprime]['x']) 139 | _edge_index = torch.LongTensor(self.subgraphs[uprime]['edge_index']).t() 140 | _edge_attr = torch.FloatTensor(self.subgraphs[uprime]['edge_attr']) 141 | _node_index = torch.LongTensor(list(self.assignment[uprime])) 142 | return Data(x=_x, edge_index=_edge_index, edge_attr=_edge_attr, node_index=_node_index) 143 | 144 | def __get_crossing_edges(self, uprime, vprime): 145 | """ 146 | :param uprime: landmark uprime in G_prime 147 | :param vprime: landmark vprime in G_prime 148 | :return: the crossing edges between subgraphs G_u, G_v (assigned to u_prime & v_prime) in G, 149 | return None if (uprime, vprime) is not in self.crossing_edges 150 | """ 151 | assert (uprime < vprime) and (uprime, vprime) in self.crossing_edges, f'({uprime}, {vprime}) is not in crossing' \ 152 | f'edges dictionary.' 153 | data = self.crossing_edges[(uprime, vprime)] 154 | _edge_index = [e[0] for e in data] 155 | _edge_attr = [e[1] for e in data] 156 | return (torch.LongTensor(_edge_index).t(), torch.FloatTensor(_edge_attr)) 157 | 158 | def __get_tensor_indices(self, uprime, vprime): 159 | """ 160 | :param uprime: landmark uprime in G_prime 161 | :param vprime: landmark vprime in G_prime 162 | :return: the indices of edge (uprime, vprime) in tensor edge_index in g_sml. 163 | """ 164 | assert (uprime < vprime and (uprime, vprime) in self.tensor_imap) 165 | return tuple(self.tensor_imap[(uprime, vprime)]) 166 | 167 | def get_data(self, edge): 168 | """ 169 | :param edge: (u, v) from the sampled graph. 170 | :return: subgraphs G1, G2 corresponding to landmark u, v in edge. Crossing edges between G1, G2 171 | and the indices of (u, v), (v, u) in tensor edge_index of g_sml. 172 | """ 173 | uprime, vprime = edge 174 | uprime, vprime = (vprime, uprime) if uprime > vprime else (uprime, vprime) 175 | G1, G2 = self.__get_subgraph(uprime), self.__get_subgraph(vprime) 176 | if G1.num_nodes == 1: 177 | warn(f'edge {edge}: output subgraph G1 is a singleton!') 178 | if G2.num_nodes == 1: 179 | warn(f'edge {edge}: output subgraph G2 is a singleton!') 180 | crossing_edges = self.__get_crossing_edges(uprime, vprime) 181 | tensor_indices = self.__get_tensor_indices(uprime, vprime) 182 | return G1, G2, crossing_edges, tensor_indices 183 | 184 | 185 | @profile 186 | class subgraphs(object): 187 | def __init__(self, g, assertion=False, args=None): 188 | """ 189 | :param g: pyG graph with edeg_weight 190 | :param assertion: assert the edge index is right. 191 | """ 192 | 193 | self.g = g 194 | self.C = None 195 | self.args = args 196 | assert 'edge_weight' in g.keys 197 | assert isinstance(g, Data) 198 | 199 | self.__load_coarsening_graphs(args, recompute=False) 200 | 201 | self.graph_pair = GraphPair(self.g, self.g_sml, self.assignment) 202 | self.graph_pair.construct() 203 | dict = self.__construct_dict() 204 | del self.graph_pair 205 | 206 | self.edges, self.inv_edges, self.double_edges = [], [], [] 207 | for (idx1, idx2) in dict.keys(): 208 | self.edges.append(idx1) 209 | self.inv_edges.append(idx2) 210 | self.double_edges.append((idx1, idx2)) # [idx1, idx2] 211 | if assertion: self.__assert(idx1, idx2) 212 | 213 | new_info = {} 214 | for k, v in dict.items(): 215 | # k is of form [8, 9], v is of form (G1, G2, (crossing_edge_index, crossing_edge_attr), ini) 216 | new_info[k[0]] = v 217 | self.info = new_info 218 | del new_info 219 | 220 | def __assert(self, idx1, idx2): 221 | set_e1 = set(self.g_sml.edge_index[:, idx1].numpy()) # {0,, 13} 222 | set_e2 = set(self.g_sml.edge_index[:, idx2].numpy()) # {13, 0} 223 | assert set_e1 == set_e2, f'{idx1} edge is {set_e1}. {idx2} edge is {set_e2}' 224 | 225 | def __load_coarsening_graphs(self, args, recompute=False): 226 | dir = set_coarsening_graph_dir(args) 227 | if recompute: 228 | shutil.rmtree(dir) 229 | make_dir(dir) 230 | 231 | if args.strategy == 'DK': 232 | n_sml = int(self.g.num_nodes * (1 - args.ratio)) 233 | try: 234 | self.assignment = torch.load(f'{dir}assignment.pt') 235 | self.g_sml = torch.load(f'{dir}g_sml.pt') 236 | print(f'load g_sml, assignment from \n {red(dir)}') 237 | except FileNotFoundError: 238 | g_sml, assignment = sample_N2Nlandmarks(self.g, n_sml, weight_key='edge_weight') 239 | self.g_sml = from_networkx(g_sml) 240 | self.assignment = assignment 241 | 242 | # save g_sml, assignment 243 | torch.save(self.assignment, f'{dir}assignment.pt') 244 | torch.save(self.g_sml, f'{dir}g_sml.pt') 245 | print(f'save at g_sml, assignment at \n {red(dir)}') 246 | 247 | # todo: add a function to convert self.assignment to C 248 | self.C = get_sparse_C(self.g.num_nodes, n_sml, self.assignment) 249 | elif args.strategy == 'loukas': 250 | try: 251 | self.assignment = torch.load(f'{dir}assignment.pt') 252 | self.g_sml = torch.load(f'{dir}g_sml.pt') 253 | self.C = torch.load(f'{dir}C.pt') 254 | print(f'load g_sml, assignment, and C from \n {red(dir)}') 255 | except (FileNotFoundError, TypeError): 256 | loukas_kwargs = {'r': args.ratio, 'method': args.method, 257 | 'loukas_quality': args.loukas_quality, 258 | 'K': args.n_bottomk} 259 | converter = gsp2pyg(self.g, **loukas_kwargs) 260 | g_sml, assignment = converter.pyg_sml, converter.assignment 261 | self.g_sml = g_sml 262 | self.C = converter.C 263 | self.assignment = assignment 264 | 265 | # save g_sml, C, assignment 266 | torch.save(self.assignment, f'{dir}assignment.pt') 267 | torch.save(self.g_sml, f'{dir}g_sml.pt') 268 | torch.save(self.C, f'{dir}C.pt') 269 | print(f'save at g_sml, assignment, and C at \n {red(dir)}') 270 | else: 271 | raise NotImplementedError 272 | 273 | @timefunc 274 | def __construct_dict(self): 275 | """ construct a dict. 276 | modified from get_original_subgraphs where __map_back_from_edges is replaced by graph_pair.get_data, 277 | which makes it faster 278 | 279 | :return: a set of {edge_indexes (i, j):(G1, G2, (crossing_edge_index, crossing_edge_attr(DELETED))) 280 | """ 281 | assert (isinstance(self.g, torch_geometric.data.data.Data) and isinstance(self.g_sml, 282 | torch_geometric.data.data.Data)) 283 | imap, ret = {}, {} 284 | for i, (u, v) in enumerate(self.g_sml.edge_index.t().tolist()): 285 | if u > v: 286 | # map node index (u, v) to edge index (i, j) 287 | imap[(v, u)] = [i] if (v, u) not in imap.keys() else imap[(v, u)] + [i] 288 | else: 289 | imap[(u, v)] = [i] if (u, v) not in imap.keys() else imap[(u, v)] + [i] 290 | for _, (u, v) in enumerate(self.g_sml.edge_index.t().tolist()): 291 | if u < v: 292 | (i, j) = tuple(imap[(u, v)]) 293 | G1, G2, crossing_edges, _ = self.graph_pair.get_data( 294 | (u, v)) # __map_back_from_edge(G, (u, v), assignment) 295 | ini = self.g_sml.edge_weight[[i, j]] # used to initialize gnn output. assert ini[0]==ini[1] 296 | ret[(i, j)] = (G1, G2, crossing_edges, ini) 297 | return ret 298 | 299 | @timefunc 300 | def get_subgraphs(self, verbose=False): 301 | """ the main fucntions that is called 302 | return a list of pyG graph corresponding to each edge in G' 303 | """ 304 | 305 | subgraphs_list = [] 306 | 307 | for e in self.edges: 308 | G1, G2, crossing_edges, ini = self.info[e] 309 | pyG = get_merged_subgraph(G1, G2, crossing_edges) 310 | indices = pyG.node_index.numpy().ravel().tolist() 311 | 312 | try: 313 | new_edge_index, new_edge_attr = subgraph(indices, pyG.edge_index, pyG.edge_attr, relabel_nodes=True) 314 | except IndexError: 315 | warn('Index Error. Filter out isolated notes.') 316 | _edge_indices = pyG.edge_index.numpy().ravel().tolist() 317 | indices = [idx for idx in indices if idx in _edge_indices] 318 | new_edge_index, new_edge_attr = subgraph(indices, pyG.edge_index, pyG.edge_attr, relabel_nodes=True) 319 | 320 | new_pyG = Data(edge_index=new_edge_index, edge_attr=new_edge_attr, ini=torch.ones(1) * ini[0]) 321 | new_pyG.x = None 322 | new_pyG = LocalDegreeProfile()(new_pyG) 323 | new_pyG.x = Variable(new_pyG.x) 324 | new_pyG.x = torch.nn.functional.normalize(new_pyG.x, dim=0) 325 | subgraphs_list += [new_pyG] 326 | 327 | del self.info 328 | if verbose: 329 | for idx, v in enumerate(subgraphs_list): 330 | summary(v, f'{idx}-subgraph') 331 | 332 | print(f'{len(subgraphs_list)} Subgraph Stats:') 333 | nodes_stats = [g_.num_nodes for g_ in subgraphs_list] 334 | edges_stats = [g_.num_edges for g_ in subgraphs_list] 335 | summary(np.array(nodes_stats), 'node_stats') 336 | summary(np.array(edges_stats), 'edge_stats') 337 | 338 | return subgraphs_list 339 | 340 | def get_bipartitle_graphs(self): 341 | # todo: similar to get_subgraphs but for bipartitle graph. 0. 342 | raise NotImplementedError 343 | 344 | def baseline0(self, normalization): 345 | """ 346 | return the laplacian of baseline 0, which is the Laplacian of G' without learning 347 | Summary of g_sml in baseline0 (torch_geometric.data.data.Data): 348 | edge_index LongTensor [2, 9476] 796.55(mean) 0.0(min) 1688.0(max) 770.0(median) 492.89(std) 1689.0(unique) 349 | edge_weight FloatTensor [9476] 2.11(mean) 1.0(min) 10.0(max) 2.0(median) 1.12(std) 10.0(unique) 350 | """ 351 | g_sml = self.g_sml # all index should be contiguous 352 | L = get_laplacian_mat(g_sml.edge_index, g_sml.edge_weight, g_sml.num_nodes, normalization=normalization) 353 | return L 354 | 355 | def L(self, g, normalization): 356 | # todo: add check num_nodes 357 | L = get_laplacian_mat(g.edge_index, g.edge_weight, g.num_nodes, normalization=normalization) 358 | return L 359 | 360 | def trivial_L(self, g): 361 | """ trival Laplacian for standarded Laplacian""" 362 | L = get_laplacian_mat(g.edge_index, torch.zeros(g.edge_weight.size()), g.num_nodes, normalization=None) 363 | return L 364 | 365 | 366 | if __name__ == '__main__': 367 | # edge = random_edge_index(n_edge=200, n_node=20) 368 | fix_seed() 369 | 370 | n_node, n_edge = 3200, 40000 371 | node_dim = 1 372 | edge_feat_dim = 1 373 | n_node_sml = 200 374 | 375 | g = Data(x=torch.rand(n_node, node_dim), 376 | edge_index=random_edge_index(n_edge, n_node), 377 | edge_attr=torch.rand(n_edge, edge_feat_dim)) 378 | g.edge_weight = torch.ones(n_edge) * 1.1 379 | summary(g, 'original_graph') 380 | 381 | # n_sml = 200 382 | # banner('Test subgraphs') 383 | # all_graphs = subgraphs(g, n_sml).get_subgraphs(verbose=False) 384 | 385 | banner('Test sample Landmark') 386 | g_sml, assignment = sample_N2Nlandmarks(g, n_node_sml, weight_key='edge_weight') 387 | print(g_sml.edges.data()) 388 | g_sml_pyG = from_networkx(g_sml) 389 | 390 | banner('Get original subgraphs test') 391 | graph_pair = GraphPair(g, g_sml_pyG, assignment) 392 | graph_pair.construct() 393 | 394 | edge_indexes = g_sml_pyG.edge_index.t().tolist() 395 | 396 | for i, edge in enumerate(edge_indexes): 397 | print(edge) 398 | G1, G2, crossing_edge, tensor_indices = graph_pair.get_data(edge) 399 | if i > 4: exit() 400 | continue 401 | 402 | # When G1 or G2 is a single node, summary() will cause error. 403 | # summary(G1, 'G1') 404 | print(G2) 405 | # exit() 406 | # summary(G2, 'G2') 407 | print(crossing_edge[0].shape, crossing_edge[1].shape) 408 | print(tensor_indices) 409 | 410 | # edge_indexes = g_sml_pyG.edge_index.numpy() 411 | # dict = get_original_subgraphs(g, g_sml_pyG, assignment) 412 | # for i, j in dict.keys(): 413 | # print('edge_index_pair ({}, {}):'.format(i, j), edge_indexes[:, i], edge_indexes[:, j]) 414 | # 415 | # banner('g_sml_nx') 416 | # summary(g_sml_pyG, 'g_sml_pyG') 417 | # edges = [e for e in g_sml.edges] 418 | # summary(torch.Tensor(np.array(edges)), 'g_sml_nx') 419 | # 420 | # banner('DK\'s test') 421 | # print('Select edge:', edges[0]) 422 | # G1, G2, crossing_edges = __map_back_from_edge(g, edges[0], assignment) 423 | # summary(G1, 'G1') 424 | # summary(G2, 'G2') 425 | # print('Crossing edge size:', crossing_edges[0].shape) 426 | # 427 | # summary(get_bipartite(G1, G2, crossing_edges), 'bipartitle') 428 | # summary(get_merged_subgraph(G1, G2, crossing_edges), 'merged_subgraph') 429 | -------------------------------------------------------------------------------- /sparsenet/util/pygsp_util.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-05-31 2 | # Summary: in order to use to_networkx 3 | # https://github.com/epfl-lts2/pygsp/blob/c8687ee3619741010a7b81ee298085da8299a0bd/pygsp/graphs/_io.py#L8 4 | 5 | # -*- coding: utf-8 -*- 6 | 7 | import os 8 | 9 | import numpy as np 10 | 11 | 12 | def _import_networkx(): 13 | try: 14 | import networkx as nx 15 | except Exception as e: 16 | raise ImportError('Cannot import networkx. Use graph-tool or try to ' 17 | 'install it with pip (or conda) install networkx. ' 18 | 'Original exception: {}'.format(e)) 19 | return nx 20 | 21 | 22 | def _import_graphtool(): 23 | try: 24 | import graph_tool as gt 25 | except Exception as e: 26 | raise ImportError('Cannot import graph-tool. Use networkx or try to ' 27 | 'install it. Original exception: {}'.format(e)) 28 | return gt 29 | 30 | 31 | class IOMixIn(object): 32 | 33 | def _break_signals(self): 34 | r"""Break N-dimensional signals into N 1D signals.""" 35 | for name in list(self.signals.keys()): 36 | if self.signals[name].ndim == 2: 37 | for i, signal_1d in enumerate(self.signals[name].T): 38 | self.signals[name + '_' + str(i)] = signal_1d 39 | del self.signals[name] 40 | 41 | def _join_signals(self): 42 | r"""Join N 1D signals into one N-dimensional signal.""" 43 | joined = dict() 44 | for name in self.signals: 45 | name_base = name.rsplit('_', 1)[0] 46 | names = joined.get(name_base, list()) 47 | names.append(name) 48 | joined[name_base] = names 49 | for name_base, names in joined.items(): 50 | if len(names) > 1: 51 | names = sorted(names) # ensure dim ordering (_0, _1, etc.) 52 | signal_nd = np.stack([self.signals[n] for n in names], axis=1) 53 | self.signals[name_base] = signal_nd 54 | for name in names: 55 | del self.signals[name] 56 | 57 | def to_networkx(self): 58 | r"""Export the graph to NetworkX. 59 | 60 | Edge weights are stored as an edge attribute, 61 | under the name "weight". 62 | 63 | Signals are stored as node attributes, 64 | under their name in the :attr:`signals` dictionary. 65 | `N`-dimensional signals are broken into `N` 1-dimensional signals. 66 | They will eventually be joined back together on import. 67 | 68 | Returns 69 | ------- 70 | graph : :class:`networkx.Graph` 71 | A NetworkX graph object. 72 | 73 | See Also 74 | -------- 75 | to_graphtool : export to graph-tool 76 | save : save to a file 77 | 78 | Examples 79 | -------- 80 | >>> import networkx as nx 81 | >>> from matplotlib import pyplot as plt 82 | >>> graph = graphs.Path(4, directed=True) 83 | >>> graph.set_signal(np.full(4, 2.3), 'signal') 84 | >>> graph = graph.to_networkx() 85 | >>> print(nx.info(graph)) 86 | Name: Path 87 | Type: DiGraph 88 | Number of nodes: 4 89 | Number of edges: 3 90 | Average in degree: 0.7500 91 | Average out degree: 0.7500 92 | >>> nx.is_directed(graph) 93 | True 94 | >>> graph.nodes() 95 | NodeView((0, 1, 2, 3)) 96 | >>> graph.edges() 97 | OutEdgeView([(0, 1), (1, 2), (2, 3)]) 98 | >>> graph.nodes()[2] 99 | {'signal': 2.3} 100 | >>> graph.edges()[(0, 1)] 101 | {'weight': 1.0} 102 | >>> # nx.draw(graph, with_labels=True) 103 | 104 | Another common goal is to use NetworkX to compute some properties to be 105 | be imported back in the PyGSP as signals. 106 | 107 | >>> import networkx as nx 108 | >>> from matplotlib import pyplot as plt 109 | >>> graph = graphs.Sensor(100, seed=42) 110 | >>> graph.set_signal(graph.coords, 'coords') 111 | >>> graph = graph.to_networkx() 112 | >>> betweenness = nx.betweenness_centrality(graph, weight='weight') 113 | >>> nx.set_node_attributes(graph, betweenness, 'betweenness') 114 | >>> graph = graphs.Graph.from_networkx(graph) 115 | >>> graph.compute_fourier_basis() 116 | >>> graph.set_coordinates(graph.signals['coords']) 117 | >>> fig, axes = plt.subplots(1, 2) 118 | >>> _ = graph.plot(graph.signals['betweenness'], ax=axes[0]) 119 | >>> _ = axes[1].plot(graph.e, graph.gft(graph.signals['betweenness'])) 120 | 121 | """ 122 | nx = _import_networkx() 123 | 124 | def convert(number): 125 | # NetworkX accepts arbitrary python objects as attributes, but: 126 | # * the GEXF writer does not accept any NumPy types (on signals), 127 | # * the GraphML writer does not accept NumPy ints. 128 | if issubclass(number.dtype.type, (np.integer, np.bool_)): 129 | return int(number) 130 | else: 131 | return float(number) 132 | 133 | def edges(): 134 | for source, target, weight in zip(*self.get_edge_list()): 135 | yield int(source), int(target), {'weight': convert(weight)} 136 | 137 | def nodes(): 138 | for vertex in range(self.n_vertices): 139 | signals = {name: convert(signal[vertex]) 140 | for name, signal in self.signals.items()} 141 | yield vertex, signals 142 | 143 | # self._break_signals() 144 | graph = nx.DiGraph() if self.is_directed() else nx.Graph() 145 | graph.add_nodes_from(nodes()) 146 | graph.add_edges_from(edges()) 147 | graph.name = self.__class__.__name__ 148 | return graph 149 | 150 | def to_graphtool(self): 151 | r"""Export the graph to graph-tool. 152 | 153 | Edge weights are stored as an edge property map, 154 | under the name "weight". 155 | 156 | Signals are stored as vertex property maps, 157 | under their name in the :attr:`signals` dictionary. 158 | `N`-dimensional signals are broken into `N` 1-dimensional signals. 159 | They will eventually be joined back together on import. 160 | 161 | Returns 162 | ------- 163 | graph : :class:`graph_tool.Graph` 164 | A graph-tool graph object. 165 | 166 | See Also 167 | -------- 168 | to_networkx : export to NetworkX 169 | save : save to a file 170 | 171 | Examples 172 | -------- 173 | >>> import graph_tool as gt 174 | >>> import graph_tool.draw 175 | >>> from matplotlib import pyplot as plt 176 | >>> graph = graphs.Path(4, directed=True) 177 | >>> graph.set_signal(np.full(4, 2.3), 'signal') 178 | >>> graph = graph.to_graphtool() 179 | >>> graph.is_directed() 180 | True 181 | >>> graph.vertex_properties['signal'][2] 182 | 2.3 183 | >>> graph.edge_properties['weight'][(0, 1)] 184 | 1.0 185 | >>> # gt.draw.graph_draw(graph, vertex_text=graph.vertex_index) 186 | 187 | Another common goal is to use graph-tool to compute some properties to 188 | be imported back in the PyGSP as signals. 189 | 190 | >>> import graph_tool as gt 191 | >>> import graph_tool.centrality 192 | >>> from matplotlib import pyplot as plt 193 | >>> graph = graphs.Sensor(100, seed=42) 194 | >>> graph.set_signal(graph.coords, 'coords') 195 | >>> graph = graph.to_graphtool() 196 | >>> vprop, eprop = gt.centrality.betweenness( 197 | ... graph, weight=graph.edge_properties['weight']) 198 | >>> graph.vertex_properties['betweenness'] = vprop 199 | >>> graph = graphs.Graph.from_graphtool(graph) 200 | >>> graph.compute_fourier_basis() 201 | >>> graph.set_coordinates(graph.signals['coords']) 202 | >>> fig, axes = plt.subplots(1, 2) 203 | >>> _ = graph.plot(graph.signals['betweenness'], ax=axes[0]) 204 | >>> _ = axes[1].plot(graph.e, graph.gft(graph.signals['betweenness'])) 205 | 206 | """ 207 | 208 | # See gt.value_types() for the list of accepted types. 209 | # See the definition of _type_alias() for a list of aliases. 210 | # Mapping from https://docs.scipy.org/doc/numpy/user/basics.types.html. 211 | convert = { 212 | np.bool_: 'bool', 213 | np.int8: 'int8_t', 214 | np.int16: 'int16_t', 215 | np.int32: 'int32_t', 216 | np.int64: 'int64_t', 217 | np.short: 'short', 218 | np.intc: 'int', 219 | np.uintc: 'unsigned int', 220 | np.long: 'long', 221 | np.longlong: 'long long', 222 | np.uint: 'unsigned long', 223 | np.single: 'float', 224 | np.double: 'double', 225 | np.longdouble: 'long double', 226 | } 227 | 228 | gt = _import_graphtool() 229 | graph = gt.Graph(directed=self.is_directed()) 230 | 231 | sources, targets, weights = self.get_edge_list() 232 | graph.add_edge_list(np.asarray((sources, targets)).T) 233 | try: 234 | dtype = convert[weights.dtype.type] 235 | except KeyError: 236 | raise TypeError("Type {} of the edge weights is not supported." 237 | .format(weights.dtype)) 238 | prop = graph.new_edge_property(dtype) 239 | prop.get_array()[:] = weights 240 | graph.edge_properties['weight'] = prop 241 | 242 | self._break_signals() 243 | for name, signal in self.signals.items(): 244 | try: 245 | dtype = convert[signal.dtype.type] 246 | except KeyError: 247 | raise TypeError("Type {} of signal {} is not supported." 248 | .format(signal.dtype, name)) 249 | prop = graph.new_vertex_property(dtype) 250 | prop.get_array()[:] = signal 251 | graph.vertex_properties[name] = prop 252 | 253 | return graph 254 | 255 | @classmethod 256 | def from_networkx(cls, graph, weight='weight'): 257 | r"""Import a graph from NetworkX. 258 | 259 | Edge weights are retrieved as an edge attribute, 260 | under the name specified by the ``weight`` parameter. 261 | 262 | Signals are retrieved from node attributes, 263 | and stored in the :attr:`signals` dictionary under the attribute name. 264 | `N`-dimensional signals that were broken during export are joined. 265 | 266 | Parameters 267 | ---------- 268 | graph : :class:`networkx.Graph` 269 | A NetworkX graph object. 270 | weight : string or None, optional 271 | The edge attribute that holds the numerical values used as the edge 272 | weights. All edge weights are set to 1 if None, or not found. 273 | 274 | Returns 275 | ------- 276 | graph : :class:`~pygsp.graphs.Graph` 277 | A PyGSP graph object. 278 | 279 | Notes 280 | ----- 281 | 282 | The nodes are ordered according to :meth:`networkx.Graph.nodes`. 283 | 284 | In NetworkX, node attributes need not be set for every node. 285 | If a node attribute is not set for a node, a NaN is assigned to the 286 | corresponding signal for that node. 287 | 288 | If the graph is a :class:`networkx.MultiGraph`, multiedges are 289 | aggregated by summation. 290 | 291 | See Also 292 | -------- 293 | from_graphtool : import from graph-tool 294 | load : load from a file 295 | 296 | Examples 297 | -------- 298 | >>> import networkx as nx 299 | >>> graph = nx.Graph() 300 | >>> graph.add_edge(1, 2, weight=0.2) 301 | >>> graph.add_edge(2, 3, weight=0.9) 302 | >>> graph.add_node(4, sig=3.1416) 303 | >>> graph.nodes() 304 | NodeView((1, 2, 3, 4)) 305 | >>> graph = graphs.Graph.from_networkx(graph) 306 | >>> graph.W.toarray() 307 | array([[0. , 0.2, 0. , 0. ], 308 | [0.2, 0. , 0.9, 0. ], 309 | [0. , 0.9, 0. , 0. ], 310 | [0. , 0. , 0. , 0. ]]) 311 | >>> graph.signals 312 | {'sig': array([ nan, nan, nan, 3.1416])} 313 | 314 | """ 315 | nx = _import_networkx() 316 | from .graph import Graph 317 | 318 | adjacency = nx.to_scipy_sparse_matrix(graph, weight=weight) 319 | graph_pg = Graph(adjacency) 320 | 321 | for i, node in enumerate(graph.nodes()): 322 | for name in graph.nodes[node].keys(): 323 | try: 324 | signal = graph_pg.signals[name] 325 | except KeyError: 326 | signal = np.full(graph_pg.n_vertices, np.nan) 327 | graph_pg.set_signal(signal, name) 328 | try: 329 | signal[i] = graph.nodes[node][name] 330 | except KeyError: 331 | pass # attribute not set for node 332 | 333 | graph_pg._join_signals() 334 | return graph_pg 335 | 336 | 337 | @classmethod 338 | def load(cls, path, fmt=None, backend=None): 339 | r"""Load a graph from a file. 340 | 341 | Edge weights are retrieved as an edge attribute named "weight". 342 | 343 | Signals are retrieved from node attributes, 344 | and stored in the :attr:`signals` dictionary under the attribute name. 345 | `N`-dimensional signals that were broken during export are joined. 346 | 347 | Parameters 348 | ---------- 349 | path : string 350 | Path to the file from which to load the graph. 351 | fmt : {'graphml', 'gml', 'gexf', None}, optional 352 | Format in which the graph is saved. 353 | Guessed from the filename extension if None. 354 | backend : {'networkx', 'graph-tool', None}, optional 355 | Library used to load the graph. Automatically chosen if None. 356 | 357 | Returns 358 | ------- 359 | graph : :class:`Graph` 360 | The loaded graph. 361 | 362 | See Also 363 | -------- 364 | save : save a graph to a file 365 | from_networkx : load with NetworkX then import in the PyGSP 366 | from_graphtool : load with graph-tool then import in the PyGSP 367 | 368 | Notes 369 | ----- 370 | 371 | A lossless round-trip is only guaranteed if the graph (and its signals) 372 | is saved and loaded with the same backend. 373 | 374 | Loading from other formats is possible by loading in NetworkX or 375 | graph-tool, and importing to the PyGSP. 376 | The proposed formats are however tested for faithful round-trips. 377 | 378 | Examples 379 | -------- 380 | >>> graph = graphs.Logo() 381 | >>> graph.save('logo.graphml') 382 | >>> graph = graphs.Graph.load('logo.graphml') 383 | >>> import os 384 | >>> os.remove('logo.graphml') 385 | 386 | """ 387 | 388 | if fmt is None: 389 | fmt = os.path.splitext(path)[1][1:] 390 | if fmt not in ['graphml', 'gml', 'gexf']: 391 | raise ValueError('Unsupported format {}.'.format(fmt)) 392 | 393 | def load_networkx(path, fmt): 394 | nx = _import_networkx() 395 | load = getattr(nx, 'read_' + fmt) 396 | graph = load(path) 397 | return cls.from_networkx(graph) 398 | 399 | def load_graphtool(path, fmt): 400 | gt = _import_graphtool() 401 | graph = gt.load_graph(path, fmt=fmt) 402 | return cls.from_graphtool(graph) 403 | 404 | if backend == 'networkx': 405 | return load_networkx(path, fmt) 406 | elif backend == 'graph-tool': 407 | return load_graphtool(path, fmt) 408 | elif backend is None: 409 | try: 410 | return load_networkx(path, fmt) 411 | except ImportError: 412 | try: 413 | return load_graphtool(path, fmt) 414 | except ImportError: 415 | raise ImportError('Cannot import networkx nor graph-tool.') 416 | else: 417 | raise ValueError('Unknown backend {}.'.format(backend)) 418 | 419 | def save(self, path, fmt=None, backend=None): 420 | r"""Save the graph to a file. 421 | 422 | Edge weights are stored as an edge attribute, 423 | under the name "weight". 424 | 425 | Signals are stored as node attributes, 426 | under their name in the :attr:`signals` dictionary. 427 | `N`-dimensional signals are broken into `N` 1-dimensional signals. 428 | They will eventually be joined back together on import. 429 | 430 | Supported formats are: 431 | 432 | * GraphML_, a comprehensive XML format. 433 | `Wikipedia `_. 434 | Supported by NetworkX_, graph-tool_, NetworKit_, igraph_, Gephi_, 435 | Cytoscape_, SocNetV_. 436 | * GML_ (Graph Modelling Language), a simple non-XML format. 437 | `Wikipedia `_. 438 | Supported by NetworkX_, graph-tool_, NetworKit_, igraph_, Gephi_, 439 | Cytoscape_, SocNetV_, Tulip_. 440 | * GEXF_ (Graph Exchange XML Format), Gephi's XML format. 441 | Supported by NetworkX_, NetworKit_, Gephi_, Tulip_, ngraph_. 442 | 443 | If unsure, we recommend GraphML_. 444 | 445 | .. _GraphML: http://graphml.graphdrawing.org 446 | .. _GML: https://web.archive.org/web/20190303094704/http://www.fim.uni-passau.de:80/fileadmin/files/lehrstuhl/brandenburg/projekte/gml/gml-technical-report.pdf 447 | .. _GEXF: https://gephi.org/gexf/format 448 | .. _NetworkX: https://networkx.github.io 449 | .. _graph-tool: https://graph-tool.skewed.de 450 | .. _NetworKit: https://networkit.github.io 451 | .. _igraph: https://igraph.org 452 | .. _ngraph: https://github.com/anvaka/ngraph 453 | .. _Gephi: https://gephi.org 454 | .. _Cytoscape: https://cytoscape.org 455 | .. _SocNetV: https://socnetv.org 456 | .. _Tulip: http://tulip.labri.fr 457 | 458 | Parameters 459 | ---------- 460 | path : string 461 | Path to the file where the graph is to be saved. 462 | fmt : {'graphml', 'gml', 'gexf', None}, optional 463 | Format in which to save the graph. 464 | Guessed from the filename extension if None. 465 | backend : {'networkx', 'graph-tool', None}, optional 466 | Library used to load the graph. Automatically chosen if None. 467 | 468 | See Also 469 | -------- 470 | load : load a graph from a file 471 | to_networkx : export as a NetworkX graph, and save with NetworkX 472 | to_graphtool : export as a graph-tool graph, and save with graph-tool 473 | 474 | Notes 475 | ----- 476 | 477 | A lossless round-trip is only guaranteed if the graph (and its signals) 478 | is saved and loaded with the same backend. 479 | 480 | Saving in other formats is possible by exporting to NetworkX or 481 | graph-tool, and using their respective saving functionality. 482 | The proposed formats are however tested for faithful round-trips. 483 | 484 | Edge weights and signal values are rounded at the sixth decimal when 485 | saving in ``fmt='gml'`` with ``backend='graph-tool'``. 486 | 487 | Examples 488 | -------- 489 | >>> graph = graphs.Logo() 490 | >>> graph.save('logo.graphml') 491 | >>> graph = graphs.Graph.load('logo.graphml') 492 | >>> import os 493 | >>> os.remove('logo.graphml') 494 | 495 | """ 496 | 497 | if fmt is None: 498 | fmt = os.path.splitext(path)[1][1:] 499 | if fmt not in ['graphml', 'gml', 'gexf']: 500 | raise ValueError('Unsupported format {}.'.format(fmt)) 501 | 502 | def save_networkx(graph, path, fmt): 503 | nx = _import_networkx() 504 | graph = graph.to_networkx() 505 | save = getattr(nx, 'write_' + fmt) 506 | save(graph, path) 507 | 508 | def save_graphtool(graph, path, fmt): 509 | graph = graph.to_graphtool() 510 | graph.save(path, fmt=fmt) 511 | 512 | if backend == 'networkx': 513 | save_networkx(self, path, fmt) 514 | elif backend == 'graph-tool': 515 | save_graphtool(self, path, fmt) 516 | elif backend is None: 517 | try: 518 | save_networkx(self, path, fmt) 519 | except ImportError: 520 | try: 521 | save_graphtool(self, path, fmt) 522 | except ImportError: 523 | raise ImportError('Cannot import networkx nor graph-tool.') 524 | else: 525 | raise ValueError('Unknown backend {}.'.format(backend)) -------------------------------------------------------------------------------- /sparsenet/util/util.py: -------------------------------------------------------------------------------- 1 | # Created at 2020-04-16 2 | # Summary: util functions 3 | import collections 4 | import math 5 | import os 6 | import random 7 | import sys 8 | import time 9 | from functools import partial 10 | from itertools import chain 11 | from warnings import warn 12 | 13 | import matplotlib.pyplot as plt 14 | import networkx as nx 15 | import numpy as np 16 | import pandas as pd 17 | import torch 18 | import torch_geometric 19 | from colorama import init 20 | from pygsp import graphs 21 | from scipy.sparse import coo_matrix 22 | from termcolor import colored 23 | from torch_geometric.data import Data 24 | from torch_geometric.utils import subgraph, get_laplacian, from_networkx, to_networkx 25 | 26 | nan1 = 0.12345 27 | init() 28 | 29 | 30 | def timefunc(method, threshold=1): 31 | def timed(*args, **kw): 32 | ts = time.time() 33 | result = method(*args, **kw) 34 | te = time.time() 35 | if 'log_time' in kw: 36 | name = kw.get('log_name', method.__name__.upper()) 37 | kw['log_time'][name] = int((te - ts) * 1000) 38 | else: 39 | if int(te - ts) >= threshold: 40 | print(f'{method.__name__}: {pf(te - ts, precision=1)}s') 41 | return result 42 | 43 | return timed 44 | 45 | 46 | tf = partial(timefunc, threshold=1) 47 | 48 | 49 | def stats(x, precision=2, verbose=True, var_name='None'): 50 | """ 51 | print the stats of a (np.array, list, pt.Tensor) 52 | 53 | :param x: 54 | :param precision: 55 | :param verbose: 56 | :return: 57 | """ 58 | if isinstance(x, torch.Tensor): x = tonp(x) 59 | assert isinstance(x, (list, np.ndarray)), 'stats only take list or numpy array' 60 | 61 | ave_ = np.mean(x) 62 | median_ = np.median(x) 63 | max_ = np.max(x) 64 | min_ = np.min(x) 65 | std_ = np.std(x) 66 | pf_ = partial(pf, precision=precision) 67 | 68 | if verbose: 69 | ave_, min_, max_, median_, std_ = list(map(pf_, [ave_, min_, max_, median_, std_])) 70 | line = '{:>25}: {:>8}(mean) {:>8}(min) {:>8}(max) {:>8}(median) {:>8}(std)'.format(var_name, ave_, min_, max_, 71 | median_, std_) 72 | print(line) 73 | 74 | return list(map(pf_, [ave_, min_, max_, median_, std_])) 75 | 76 | 77 | def viz_graph(g, node_size=5, edge_width=1, node_color='b', color_bar=False, show=False): 78 | # g = nx.random_geometric_graph(100, 0.125) 79 | pos = nx.spring_layout(g) 80 | nx.draw(g, pos, node_color=node_color, node_size=node_size, with_labels=False, width=edge_width) 81 | if color_bar: 82 | # https://stackoverflow.com/questions/26739248/how-to-add-a-simple-colorbar-to-a-network-graph-plot-in-python 83 | sm = plt.cm.ScalarMappable(norm=plt.Normalize(vmin=min(node_color), vmax=max(node_color))) 84 | sm._A = [] 85 | plt.colorbar(sm) 86 | if show: plt.show() 87 | 88 | 89 | def largest_cc(g): 90 | isinstance(g, Data) 91 | g = to_networkx(g).to_undirected() 92 | subgraphs = [g.subgraph(c).copy() for c in sorted(nx.connected_components(g), key=len, reverse=True)] 93 | largest_cc = subgraphs[0] 94 | g = from_networkx(largest_cc) 95 | return g 96 | 97 | 98 | def num_comp(g): 99 | assert isinstance(g, Data) 100 | g_nx = to_networkx(g).to_undirected() 101 | n_compoent = nx.number_connected_components(g_nx) 102 | 103 | comp_size = [len(c) for c in nx.connected_components(g_nx)] 104 | comp_size = sorted(comp_size, reverse=True) 105 | if n_compoent > 1: 106 | if n_compoent < 10: 107 | print(comp_size) 108 | else: 109 | print(f'Print size of first 10 compoents: {comp_size[:10]}') 110 | 111 | # assert n_compoent == 1, f'number of component is {n_compoent}' 112 | return n_compoent 113 | 114 | 115 | def random_pygeo_graph(n_node, node_feat_dim, n_edge, edge_feat_dim, device='cpu', viz=False): 116 | """ random DIRECTED pyG graph """ 117 | g = Data(x=torch.rand(n_node, node_feat_dim), 118 | edge_index=random_edge_index(n_edge, n_node), 119 | edge_attr=torch.rand(n_edge, edge_feat_dim).type(torch.LongTensor), 120 | edge_weight=torch.ones(n_edge)) 121 | 122 | g_nx = to_networkx(g).to_undirected() 123 | n_compoent = nx.number_connected_components(g_nx) 124 | if n_compoent > 1 and viz: viz_graph(g_nx, show=True) 125 | assert n_compoent == 1, f'number of component is {n_compoent}' 126 | g = g.to(device) 127 | return g 128 | 129 | 130 | def maybe_edge_weight(g): 131 | """ used for get_laplacian. 132 | edge_weigher will update edge weights, which is saved in g.edge_weight attribute 133 | get_laplacian will try to retrive latest g.edge_weight to compute loss 134 | """ 135 | assert isinstance(g, torch_geometric.data.data.Data) 136 | try: 137 | return g.edge_weight 138 | except AttributeError: 139 | warn('Use default edge weight') 140 | return None 141 | 142 | 143 | def random_edge_index(n_edge=200, n_node=20): 144 | """ generate random edge tensor of shape (2, n_edge) """ 145 | assert n_edge % 2 == 0 146 | assert n_edge <= n_node * (n_node - 1), f'n_edge: {n_edge}; n_node: {n_node}' 147 | edges = [] 148 | for i in range(n_edge // 2): 149 | a, b = np.random.choice(n_node, 2, replace=False).tolist() 150 | while (a, b) in edges: 151 | a, b = np.random.choice(n_node, 2, replace=False).tolist() 152 | edges.append((a, b)) 153 | edges.append((b, a)) 154 | edges = list(edges) 155 | edges = torch.LongTensor(np.array(edges).T) 156 | return edges 157 | 158 | 159 | def random_edge_weight(n_edges): 160 | """ 161 | :param edges: [2, n] tensor (output of random_edge_index) 162 | :return: 163 | """ 164 | weights = [] 165 | assert n_edges % 2 == 0 166 | for i in range(n_edges // 2): 167 | w = np.random.random() 168 | weights.append(w) 169 | weights.append(w) 170 | return torch.Tensor(weights) 171 | 172 | 173 | def unit_vector(vector): 174 | """ Returns the unit vector of the vector. """ 175 | return vector / np.linalg.norm(vector) 176 | 177 | 178 | def angle_between(v1, v2): 179 | """ Returns the angle in radians between vectors 'v1' and 'v2':: 180 | https://bit.ly/2YHzUYK 181 | 182 | >>> angle_between((1, 0, 0), (0, 1, 0)) 183 | 1.5707963267948966 184 | >>> angle_between((1, 0, 0), (1, 0, 0)) 185 | 0.0 186 | >>> angle_between((1, 0, 0), (-1, 0, 0)) 187 | 3.141592653589793 188 | """ 189 | v1_u = unit_vector(v1) 190 | v2_u = unit_vector(v2) 191 | return np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)) 192 | 193 | 194 | def add_range(r1, r2): 195 | concatenated = chain(r1, r2) 196 | return concatenated 197 | 198 | 199 | def fix_seed(seed=1): 200 | random.seed(seed) 201 | np.random.seed(seed) 202 | torch.manual_seed(seed) 203 | torch.cuda.manual_seed_all(seed) 204 | 205 | torch.backends.cudnn.benchmark = False 206 | torch.backends.cudnn.deterministic = True 207 | 208 | 209 | def banner(text='', ch='=', length=140, compact=False): 210 | """ http://bit.ly/2vfTDCr 211 | print a banner 212 | """ 213 | spaced_text = ' %s ' % text 214 | banner = spaced_text.center(length, ch) 215 | print(banner) 216 | if not compact: 217 | print() 218 | 219 | 220 | def pf(nbr, precision=1): 221 | """ precision format """ 222 | # assert type(nbr)==float 223 | if isinstance(nbr, torch.Tensor): 224 | nbr = np.float(nbr) 225 | 226 | if math.isnan(nbr): 227 | return 'nan' 228 | elif math.isinf(nbr): 229 | return 'inf' 230 | else: 231 | return round(nbr * (10 ** precision)) / (10 ** precision) 232 | 233 | 234 | def set_thread(n=1): 235 | import os 236 | os.environ['MKL_NUM_THREADS'] = str(n) 237 | 238 | os.environ['OMP_NUM_THREADS'] = str(n) 239 | os.environ['OPENBLAS_NUM_THREADS'] = str(n) 240 | os.environ['MKL_NUM_THREADS'] = str(n) 241 | os.environ['VECLIB_MAXIMUM_THREADS'] = str(n) 242 | os.environ['NUMEXPR_NUM_THREADS'] = str(n) 243 | import torch 244 | torch.set_num_threads(n) 245 | 246 | 247 | @timefunc 248 | def tonp(tsr): 249 | if isinstance(tsr, np.ndarray): 250 | return tsr 251 | elif isinstance(tsr, np.matrix): 252 | return np.array(tsr) 253 | elif isinstance(tsr, scipy.sparse.csc.csc_matrix): 254 | return np.array(tsr.todense()) 255 | 256 | assert isinstance(tsr, torch.Tensor) 257 | tsr = tsr.cpu() 258 | assert isinstance(tsr, torch.Tensor) 259 | 260 | try: 261 | arr = tsr.numpy() 262 | except TypeError: 263 | arr = tsr.detach().to_dense().numpy() 264 | except: 265 | arr = tsr.detach().numpy() 266 | 267 | assert isinstance(arr, np.ndarray) 268 | return arr 269 | 270 | 271 | def nan_ratio(x): 272 | """ http://bit.ly/2PL7yaP 273 | """ 274 | assert isinstance(x, np.ndarray) 275 | try: 276 | return np.count_nonzero(np.isnan(x)) / x.size 277 | except TypeError: 278 | return '-1 (TypeError)' 279 | 280 | 281 | import scipy 282 | 283 | 284 | def np2set(x): 285 | assert isinstance(x, np.ndarray) 286 | return set(np.unique(x)) 287 | 288 | 289 | @timefunc 290 | def summary(x, name='x', terminate=False, 291 | skip=False, delimiter=None, precision=3, 292 | exit=False, highlight=False): 293 | if highlight: 294 | name = red(name) 295 | 296 | if skip: 297 | print('', end='') 298 | return '' 299 | 300 | if isinstance(x, list): 301 | print(f'{name}: a list of length {len(x)}') 302 | 303 | if len(x) < 6: 304 | for _x in x: 305 | summary(_x) 306 | 307 | elif isinstance(x, scipy.sparse.csc.csc_matrix): 308 | min_, max_ = x.min(), x.max() 309 | mean_ = x.mean() 310 | 311 | std1 = np.std(tonp(x)) 312 | x_copy = x.copy() 313 | x_copy.data **= 2 314 | std2 = x_copy.mean() - (x.mean() ** 2) # todo: std1 and std2 are different. 1 315 | pf_ = partial(pf, precision=precision) 316 | mean_, min_, max_, std1, std2 = list(map(pf_, [mean_, min_, max_, std1, std2])) 317 | 318 | line0 = '{:>10}: csc_matrix ({}) of shape {:>8}'.format(name, str(x.dtype), str(x.shape)) 319 | line0 = line0 + ' ' * max(5, (45 - len(line0))) 320 | # line0 += 'Nan ratio: {:>8}.'.format(nan_ratio(x_)) 321 | line1 = ' {:>8}(mean) {:>8}(min) {:>8}(max) {:>8}(std1) {:>8}(std2) {:>8}(unique) ' \ 322 | .format(mean_, min_, max_, std1, std2, -1) 323 | line = line0 + line1 324 | print(line) 325 | 326 | elif isinstance(x, (np.ndarray,)): 327 | if x.size > 232960 * 10: 328 | return 329 | x_ = tonp(x) 330 | ave_ = np.mean(x_) 331 | median_ = np.median(x_) 332 | max_ = np.max(x_) 333 | min_ = np.min(x_) 334 | std_ = np.std(x_) 335 | unique_ = len(np.unique(x_)) 336 | pf_ = partial(pf, precision=precision) 337 | ave_, min_, max_, median_, std_, unique_ = list(map(pf_, [ave_, min_, max_, median_, std_, unique_])) 338 | 339 | line0 = '{:>10}: array ({}) of shape {:>8}'.format(name, str(x.dtype), str(x.shape)) 340 | line0 = line0 + ' ' * max(5, (45 - len(line0))) 341 | line0 += 'Nan ratio: {:>8}.'.format(nan_ratio(x_)) 342 | line1 = ' {:>8}(mean) {:>8}(min) {:>8}(max) {:>8}(median) {:>8}(std) {:>8}(unique) '.format(ave_, min_, max_, 343 | median_, std_, 344 | unique_) 345 | line = line0 + line1 346 | if np2set(x_) <= set([-1, 0, 1]): 347 | ratio1 = np.sum(x_ == 1) / float(x_.size) 348 | ratio0 = np.sum(x_ == 0) / float(x_.size) 349 | line += '|| {:>8}(1 ratio) {:>8}(0 ratio)'.format(pf(ratio1, 3), pf(ratio0, 3)) 350 | 351 | if nan1 in x_: 352 | nan_cnt = np.sum(x_ == nan1) 353 | line += f'nan_cnt {nan_cnt}' 354 | 355 | # f'{name}: array of shape {x.shape}.' 356 | print(line) 357 | # print(f'{name}: a np.array of shape {x.shape}. nan ratio: {nan_ratio(x)}. ' + line) 358 | 359 | elif isinstance(x, (torch.Tensor)): 360 | if x.numel() > 232965 * 10: 361 | return 362 | x_ = tonp(x) 363 | if len(x_) == 0: 364 | print(f'{name}: zero length np.array') 365 | else: 366 | ave_ = np.mean(x_) 367 | median_ = np.median(x_) 368 | max_ = np.max(x_) 369 | min_ = np.min(x_) 370 | std_ = np.std(x_) 371 | unique_ = len(np.unique(x_)) 372 | 373 | pf_ = partial(pf, precision=2) 374 | ave_, min_, max_, median_, std_, unique_ = list(map(pf_, [ave_, min_, max_, median_, std_, unique_])) 375 | line = '{:>8}(mean) {:>8}(min) {:>8}(max) {:>8}(median) {:>8}(std) {:>8}(unique)'.format(ave_, min_, max_, 376 | median_, std_, 377 | unique_) 378 | 379 | print( 380 | '{:20}'.format(name) + '{:20}'.format(str(x.data.type())[6:]) + '{:15}'.format( 381 | str(x.size())[11:-1]) + line) 382 | # print(line) 383 | # print(f'{name}: a Tensor ({x.data.type()}) of shape {x.size()}') 384 | 385 | elif isinstance(x, tuple): 386 | print(f'{name}: a tuple of shape {len(x)}') 387 | if len(x) < 6: 388 | for ele in x: 389 | summary(ele, name='ele') 390 | 391 | elif isinstance(x, (dict, collections.defaultdict)): 392 | print(f'summarize a dict {name} of len {len(x)}') 393 | for k, v in x.items(): 394 | # print(f'key is {k}') 395 | summary(v, name=k) 396 | 397 | elif isinstance(x, torch_geometric.data.data.Data): 398 | try: 399 | summary_pygeo(x, name=name) 400 | except: 401 | raise Exception('Check pytorch geometric install.') 402 | 403 | elif isinstance(x, pd.DataFrame): 404 | from collections import OrderedDict 405 | 406 | dataType_dict = OrderedDict(x.dtypes) 407 | banner(text=f'start summarize a df ({name}) of shape {x.shape}', ch='-') 408 | print('df info') 409 | print(x.info()) 410 | print('\n') 411 | 412 | print('head of df:') 413 | # print(tabulate(x, headers='firstrow')) 414 | print(x.head()) 415 | print('\n') 416 | 417 | try: 418 | print('continuous feats of Dataframe:') 419 | cont_x = x.describe().T 420 | print(cont_x) 421 | print(cont_x.shape) 422 | print('\n') 423 | except ValueError: 424 | print('x.describe().T raise ValueError') 425 | 426 | try: 427 | print('non-cont\' feats (object type) of Dataframe:') 428 | non_cont = x.describe(include=[object]).T 429 | print(non_cont) 430 | print(non_cont.shape) 431 | except ValueError: 432 | print('x.describe(include=[object]).T raise ValueError') 433 | 434 | banner(text=f'finish summarize a df ({name}) of shape {x.shape}', ch='-') 435 | 436 | elif isinstance(x, (int, float)): 437 | print(f'{name}(float): {x}') 438 | 439 | elif isinstance(x, str): 440 | print(f'{name}(str): {x}') 441 | 442 | else: 443 | print(f'{x}: \t\t {type(x)}') 444 | if terminate: 445 | exit(f'NotImplementedError for input {type(x)}') 446 | else: 447 | pass 448 | 449 | if delimiter is not None: 450 | assert isinstance(delimiter, str) 451 | print(delimiter) 452 | 453 | if exit: 454 | sys.exit() 455 | 456 | 457 | def dict2name(d): 458 | """ 459 | :param d: {'n_epoch': 300, 'bs': 32, 'n_data': 10, 'scheduler': True} 460 | :return: bs_32_n_data_10_n_epoch_300_scheduler_True 461 | """ 462 | assert isinstance(d, dict) 463 | keys = list(d.keys()) 464 | keys.sort() 465 | name = '' 466 | for k in keys: 467 | name += f'{k}_{d[k]}_' 468 | return name[:-1] 469 | 470 | 471 | def update_dict(d1, d2): 472 | # use d1 to update d2, return updated d2. 473 | # keys of d1 has to be a subset of keys of d2. 474 | assert isinstance(d1, dict) 475 | assert isinstance(d2, dict) 476 | assert set(d1.keys()) <= set(d2.keys()), 'Keys of d1 has to be a subset of keys of d2.' 477 | for k, v in d1.items(): 478 | d2[k] = v 479 | return d2 480 | 481 | 482 | def hasany(s, s_list): 483 | """ 484 | :param s: a string 485 | :param s_list: a list of str 486 | :return: 487 | """ 488 | return any(ele in s for ele in s_list) 489 | 490 | 491 | def slicestr(s, f=None, t=None): 492 | """ 493 | :param s: a string 494 | :param f: from 495 | :param t: to 496 | :return: 497 | """ 498 | from_idx = s.index(f) 499 | to_idx = s.index(t) 500 | return s[from_idx:to_idx] 501 | 502 | 503 | def summary_pygeo(data, stat=False, precision=2, name=None): 504 | assert isinstance(data, torch_geometric.data.data.Data) 505 | print(f'Summary of {name} (torch_geometric.data.data.Data):') 506 | 507 | for k, v in data: 508 | print(' ', sep=' ', end=' ') 509 | if isinstance(v, torch.Tensor): 510 | if v.ndim == 1: 511 | summary(v, name=k, precision=precision) 512 | else: 513 | if v.size()[1] != 0: 514 | summary(v, name=k, precision=precision) 515 | else: 516 | warn(f'Empty edge index: {v}') 517 | elif isinstance(v, str): 518 | summary(v, k) 519 | else: 520 | NotImplementedError 521 | 522 | if stat: 523 | for k, v in data: 524 | stats(v, var_name=k) 525 | 526 | 527 | def subset_graph(g, indices, relabel_nodes=None): 528 | """ 529 | :param g: pyG graph where node index are contigious 530 | :param indices: 531 | :param relabel_nodes: if true, relabel nodes of the subgraph 532 | :return: 533 | """ 534 | if isinstance(indices, torch.Tensor): indices = indices.tolist() 535 | if isinstance(indices, set): indices = list(indices) 536 | 537 | assert isinstance(indices, list) 538 | assert isinstance(g, torch_geometric.data.data.Data) 539 | 540 | sub_edge_index, sub_edge_attr = subgraph(indices, g.edge_index, g.edge_attr, relabel_nodes=relabel_nodes) 541 | g_subindices = torch.tensor(indices) 542 | g_subx = g.x.index_select(0, g_subindices) 543 | g_sub = Data(x=g_subx, edge_index=sub_edge_index, edge_attr=sub_edge_attr, node_index=g_subindices) 544 | return g_sub 545 | 546 | 547 | def assert_nonan(x): 548 | res = torch.isnan(x) 549 | assert (res == False).all(), 'contains Nan' 550 | 551 | 552 | def make_dir(dir): 553 | # has side effect 554 | 555 | if dir == None: 556 | return 557 | 558 | if not os.path.exists(dir): 559 | os.makedirs(dir) 560 | 561 | 562 | def args_print(args, one_line=False): 563 | """ pretty print cmd with lots of args 564 | """ 565 | for i in range(20): 566 | args = args.replace(' ', ' ') 567 | 568 | arglis = args.split(' ') 569 | new_arglist = [] 570 | for i, token in enumerate(arglis): 571 | if '--' in token: 572 | token = '\n' + token 573 | elif token in ['-u', 'nohup']: 574 | pass 575 | elif '.py' in token: 576 | pass 577 | elif 'python' in token: 578 | pass 579 | else: 580 | space = (30 - len(arglis[i - 1])) * ' ' 581 | token = space + token # '{:>35}'.format(token) # 582 | new_arglist.append(token) 583 | 584 | newargs = ' '.join(new_arglist) + '\n' 585 | 586 | if not one_line: 587 | print(newargs) 588 | else: 589 | newargs = one_liner(newargs) 590 | print(newargs) 591 | 592 | 593 | def one_liner(cmd): 594 | """ convert cmd that takes many lines into just one line """ 595 | assert isinstance(cmd, str) 596 | cmd = cmd.replace('\n', '') 597 | for _ in range(10): 598 | cmd = cmd.replace(' ', ' ') 599 | return cmd 600 | 601 | 602 | def sig_dir(): 603 | from sparsenet.util.dir_util import DIR 604 | return DIR 605 | 606 | 607 | def fig_dir(): 608 | return f'{sig_dir()}sparsenet/paper/tex/Figs/' 609 | 610 | 611 | def tb_dir(): 612 | return f'{tb_dir()}/result/tensorboardx/' 613 | 614 | 615 | def model_dir(): 616 | dir = f'{sig_dir()}result/model/' 617 | make_dir(dir) 618 | return dir 619 | 620 | 621 | def red(x): 622 | return colored(x, "red") 623 | 624 | 625 | def tex_dir(): 626 | tex_dir = f'{sig_dir()}sparsenet/paper/tex/iclr_table/' 627 | make_dir(tex_dir) 628 | return tex_dir 629 | 630 | 631 | def random_laplacian(n): 632 | from torch_geometric.utils.random import erdos_renyi_graph 633 | edge_index = erdos_renyi_graph(n, 0.1) 634 | i, v = get_laplacian(edge_index, None, normalization=None) 635 | return i, v 636 | 637 | 638 | def runcmd(cmd, print_only=False): 639 | cmd = cmd.replace('--', ' --') 640 | banner('Execution of following cmds:', compact=True) 641 | if len(cmd) > 50 and '--' in cmd: 642 | args_print(cmd) 643 | else: 644 | print(cmd) 645 | 646 | if not print_only: 647 | os.system(cmd) 648 | 649 | if len(cmd) > 50 and '--' in cmd: 650 | args_print(cmd) 651 | 652 | 653 | def sparse_tensor2_sparse_numpyarray(sparse_tensor): 654 | """ 655 | :param sparse_tensor: a COO torch.sparse.FloatTensor 656 | :return: a scipy.sparse.coo_matrix 657 | """ 658 | if sparse_tensor.device.type == 'cuda': 659 | sparse_tensor = sparse_tensor.to('cpu') 660 | 661 | values = sparse_tensor._values().numpy() 662 | indices = sparse_tensor._indices() 663 | rows, cols = indices[0, :].numpy(), indices[1, :].numpy() 664 | size = sparse_tensor.size() 665 | scipy_sparse_mat = coo_matrix((values, (rows, cols)), shape=size, dtype=np.float) 666 | return scipy_sparse_mat 667 | 668 | 669 | def pyg2gsp(g): 670 | """ 671 | convert pyG graph to gsp graph. 672 | discard any info from pyG graph, and only take graph topology. 673 | """ 674 | assert isinstance(g, torch_geometric.data.Data) 675 | edge_indices, edge_weight = tonp(g.edge_index), tonp(g.edge_weight) 676 | row, col = edge_indices[0, :], edge_indices[1, :] 677 | 678 | # memory efficient 679 | n = g.num_nodes 680 | W = scipy.sparse.csr_matrix((edge_weight, (row, col)), shape=(n, n)) 681 | gspG = graphs.Graph(W) 682 | return gspG 683 | 684 | 685 | def dic2tsr(d, dev='cuda'): 686 | """ given a dict where key (size N) are consecutive numbers and values are also numbers (at most n), 687 | convert it into a tensor of size (N) where index is the key value is the value of d. 688 | """ 689 | N = len(d) 690 | assert N == max(d.keys()) + 1, f'keys ({N}) are not consecutive. Max key is {max(d.keys)}' 691 | tsr = [0] * N 692 | for k in d: 693 | tsr[k] = d[k] 694 | return torch.tensor(tsr).to(dev) 695 | 696 | 697 | if __name__ == '__main__': 698 | from scipy.sparse import csc_matrix 699 | 700 | n = 400 701 | x = csc_matrix((n, n), dtype=np.int8) 702 | print(x.mean()) 703 | print(x.max()) 704 | print(x.min()) 705 | 706 | std1 = np.std(tonp(x)) 707 | x_copy = x.copy() 708 | x_copy.data **= 2 709 | std2 = x_copy.mean() - x.mean() ** 2 710 | print(std1, std2) 711 | # summary(x, 'x') 712 | 713 | exit() 714 | # edge = random_edge_index(n_edge=200, n_node=20) 715 | from sparsenet.util.sample import sample_N2Nlandmarks 716 | from sparsenet.util.graph_util import __map_back_from_edge, get_bipartite 717 | 718 | n_node, n_edge = 320, 1000 719 | node_dim = 1 720 | edge_feat_dim = 1 721 | 722 | g = Data(x=torch.rand(n_node, node_dim), 723 | edge_index=random_edge_index(n_edge, n_node), 724 | edge_attr=torch.rand(n_edge, edge_feat_dim).type(torch.LongTensor)) 725 | summary(g, 'original_graph') 726 | 727 | G = to_networkx(g) 728 | G_prime, Assignment = sample_N2Nlandmarks(G, 10) 729 | print(G_prime.edges.data()) 730 | edges = [e for e in G_prime.edges] 731 | print('Select edge:', edges[0]) 732 | G1, G2, crossing_edges = __map_back_from_edge(g, edges[0], Assignment) 733 | summary(G1, 'G1') 734 | summary(G2, 'G2') 735 | print('Crossing edge size:', crossing_edges[0].shape) 736 | summary(get_bipartite(G1, G2, crossing_edges)) 737 | --------------------------------------------------------------------------------