├── .gitattributes ├── .gitignore ├── README.md ├── docs ├── Makefile ├── make.bat └── source │ ├── block_models.rst │ ├── cluster.rst │ ├── conf.py │ ├── implemented_algorithms.rst │ ├── index.rst │ ├── installation.rst │ ├── modules.rst │ ├── usage.rst │ └── utils.rst ├── requirements.txt ├── setup.py └── signet ├── __init__.py ├── block_models.py ├── burer_monteiro_sparse.py ├── cluster.py ├── tests ├── __init__.py └── package_usage.ipynb └── utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # DS_Store 2 | .DS_Store 3 | ./signet/.DS_Store 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | docs/_build/ 72 | docs/_static/ 73 | docs/_templates/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | 112 | 113 | 114 | ##### 115 | 116 | 117 | # .idea 118 | .idea/ 119 | *.iml 120 | 121 | # Icon 122 | Icon* 123 | /signet/Icon* 124 | /signet/tests/Icon* 125 | 126 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SigNet 2 | 3 | [![DOI](https://zenodo.org/badge/147539304.svg)](https://zenodo.org/badge/latestdoi/147539304) 4 | 5 | A package for clustering of Signed Networks, the following algorithms were implemented: 6 | 7 | - Standard spectral clustering with 8 | - Adjacency matrix (with multiple normalisations) 9 | - Signed Laplacian matrix (with multiple normalisations) 10 | - Balance Ratio Cut 11 | - Balance Normalised Cut 12 | 13 | - Semidefinite programming clustering (with exact and approximate solvers) 14 | 15 | - Generalised eigenproblem clustering (with multiple normalisations) 16 | 17 | - Clustering using a signed variant of the Bethe Hessian matrix 18 | 19 | ## Typical usage 20 | 21 | ```python 22 | from signet.cluster import Cluster 23 | from signet.block_models import SSBM 24 | from sklearn.metrics import adjusted_rand_score 25 | 26 | 27 | # simple test on the signed stochastic block model 28 | 29 | n = 50000 # number of nodes 30 | k = 2 # number of clusters 31 | eta = 0.1 # sign flipping probability 32 | p = 0.0002 # edge probability 33 | 34 | (Ap, An), true_assignment = SSBM(n = n, k = k, pin = p, etain = eta) # construct a graph 35 | 36 | c = Cluster((Ap, An)) 37 | 38 | predictions = c.spectral_cluster_laplacian(k = k, normalisation='sym') # cluster with the signed laplacian 39 | score = adjusted_rand_score(predictions, true_assignment) 40 | 41 | print(score) 42 | ``` 43 | 44 | 45 | ## Installation 46 | 47 | Install the latest version from this Github repository. 48 | ``` 49 | pip install git+https://github.com/alan-turing-institute/SigNet.git 50 | ``` 51 | 52 | 53 | ## API Reference 54 | 55 | The documentation of this package was automatically generated using Sphinx. To generate 56 | the documentation locally: 57 | 1. Install sphinx and the readthedocs theme 58 | - `pip install sphinx -U` 59 | - `pip install sphinx_rtd_theme -U` 60 | 2. Switch to the `docs` folder and build the docs with `make html` 61 | 62 | Alternatively, the documentation can be found at https://signet.readthedocs.io/en/latest/index.html. 63 | 64 | 65 | ## Tests 66 | 67 | To check that the code is working for you, try to download and run the jupyter notebook inside the "tests" folder. 68 | 69 | ## Current Authors 70 | 71 | If you have problems with the code please contact 72 | 73 | - Peter Davies: p.w.Davies@warwick.ac.uk 74 | - Aldo Glielmo: aldo.glielmo@kcl.ac.uk 75 | 76 | 77 | ## Reference 78 | 79 | - The generalised eigenproblem clustering has been proposed and analysed in: 80 | 81 | [1] Cucuringu, M., Davies, P., Glielmo, A., Tyagi, H. *SPONGE: A generalized eigenproblem for clustering signed networks.* Proceedings of Machine Learning Research 89 (2019). http://proceedings.mlr.press/v89/cucuringu19a.html 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = signet 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SPHINXPROJ=signet 13 | 14 | if "%1" == "" goto help 15 | 16 | %SPHINXBUILD% >NUL 2>NUL 17 | if errorlevel 9009 ( 18 | echo. 19 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 20 | echo.installed, then set the SPHINXBUILD environment variable to point 21 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 22 | echo.may add the Sphinx directory to PATH. 23 | echo. 24 | echo.If you don't have Sphinx installed, grab it from 25 | echo.http://sphinx-doc.org/ 26 | exit /b 1 27 | ) 28 | 29 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 30 | goto end 31 | 32 | :help 33 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 34 | 35 | :end 36 | popd 37 | -------------------------------------------------------------------------------- /docs/source/block_models.rst: -------------------------------------------------------------------------------- 1 | The block_models module 2 | ================================================= 3 | 4 | This module contains a series of function that can generate random graphs with a signed community structure. 5 | 6 | .. automodule:: block_models 7 | :members: 8 | 9 | -------------------------------------------------------------------------------- /docs/source/cluster.rst: -------------------------------------------------------------------------------- 1 | The cluster module 2 | ============================================= 3 | 4 | Inside the cluster module one can find the Cluster class, which is the main class of the package. It is inisialised with a pair of adjacency matrices (one for the positive and one for the negative graph) and it contain all the implemented algorithms as class methods. 5 | 6 | .. automodule:: cluster 7 | :members: 8 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | sys.path.insert(0, os.path.abspath('.')) 18 | sys.path.insert(0, os.path.abspath('../..')) 19 | sys.path.insert(0, os.path.abspath('../signet')) 20 | sys.path.insert(0, os.path.abspath('../../signet')) 21 | #autodoc_mock_imports = ['utils', 'block_models', 'cluster'] 22 | # -- Project information ----------------------------------------------------- 23 | 24 | project = 'SigNet' 25 | copyright = '2018, Peter Davies and Aldo Glielmo' 26 | author = 'Peter Davies and Aldo Glielmo' 27 | 28 | # The short X.Y version 29 | version = '' 30 | # The full version, including alpha/beta/rc tags 31 | release = '0.0.1' 32 | 33 | 34 | # -- General configuration --------------------------------------------------- 35 | 36 | # If your documentation needs a minimal Sphinx version, state it here. 37 | # 38 | # needs_sphinx = '1.0' 39 | 40 | # Add any Sphinx extension module names here, as strings. They can be 41 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 42 | # ones. 43 | extensions = [ 44 | 'sphinx.ext.autodoc','sphinx.ext.napoleon' 45 | ] 46 | 47 | # Add any paths that contain templates here, relative to this directory. 48 | templates_path = ['_templates'] 49 | 50 | # The suffix(es) of source filenames. 51 | # You can specify multiple suffix as a list of string: 52 | # 53 | # source_suffix = ['.rst', '.md'] 54 | source_suffix = '.rst' 55 | 56 | # The master toctree document. 57 | master_doc = 'index' 58 | 59 | # The language for content autogenerated by Sphinx. Refer to documentation 60 | # for a list of supported languages. 61 | # 62 | # This is also used if you do content translation via gettext catalogs. 63 | # Usually you set "language" from the command line for these cases. 64 | language = None 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | # This pattern also affects html_static_path and html_extra_path . 69 | exclude_patterns = [] 70 | 71 | # The name of the Pygments (syntax highlighting) style to use. 72 | pygments_style = 'sphinx' 73 | 74 | 75 | # -- Options for HTML output ------------------------------------------------- 76 | 77 | # The theme to use for HTML and HTML Help pages. See the documentation for 78 | # a list of builtin themes. 79 | # 80 | html_theme = 'sphinx_rtd_theme' #'alabaster' 81 | 82 | # Theme options are theme-specific and customize the look and feel of a theme 83 | # further. For a list of options available for each theme, see the 84 | # documentation. 85 | # 86 | # html_theme_options = {} 87 | 88 | # Add any paths that contain custom static files (such as style sheets) here, 89 | # relative to this directory. They are copied after the builtin static files, 90 | # so a file named "default.css" will overwrite the builtin "default.css". 91 | html_static_path = ['_static'] 92 | 93 | # Custom sidebar templates, must be a dictionary that maps document names 94 | # to template names. 95 | # 96 | # The default sidebars (for documents that don't match any pattern) are 97 | # defined by theme itself. Builtin themes are using these templates by 98 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 99 | # 'searchbox.html']``. 100 | # 101 | # html_sidebars = {} 102 | 103 | 104 | # -- Options for HTMLHelp output --------------------------------------------- 105 | 106 | # Output file base name for HTML help builder. 107 | htmlhelp_basename = 'signetdoc' 108 | 109 | 110 | # -- Options for LaTeX output ------------------------------------------------ 111 | 112 | latex_elements = { 113 | # The paper size ('letterpaper' or 'a4paper'). 114 | # 115 | # 'papersize': 'letterpaper', 116 | 117 | # The font size ('10pt', '11pt' or '12pt'). 118 | # 119 | # 'pointsize': '10pt', 120 | 121 | # Additional stuff for the LaTeX preamble. 122 | # 123 | # 'preamble': '', 124 | 125 | # Latex figure (float) alignment 126 | # 127 | # 'figure_align': 'htbp', 128 | } 129 | 130 | # Grouping the document tree into LaTeX files. List of tuples 131 | # (source start file, target name, title, 132 | # author, documentclass [howto, manual, or own class]). 133 | latex_documents = [ 134 | (master_doc, 'signet.tex', 'signet Documentation', 135 | 'Peter Davies and Aldo Glielmo', 'manual'), 136 | ] 137 | 138 | 139 | # -- Options for manual page output ------------------------------------------ 140 | 141 | # One entry per manual page. List of tuples 142 | # (source start file, name, description, authors, manual section). 143 | man_pages = [ 144 | (master_doc, 'signet', 'signet Documentation', 145 | [author], 1) 146 | ] 147 | 148 | 149 | # -- Options for Texinfo output ---------------------------------------------- 150 | 151 | # Grouping the document tree into Texinfo files. List of tuples 152 | # (source start file, target name, title, author, 153 | # dir menu entry, description, category) 154 | texinfo_documents = [ 155 | (master_doc, 'signet', 'signet Documentation', 156 | author, 'signet', 'One line description of project.', 157 | 'Miscellaneous'), 158 | ] 159 | 160 | 161 | # -- Extension configuration ------------------------------------------------- 162 | 163 | 164 | # def run_apidoc(_): 165 | # from sphinx.apidoc import main 166 | # import os 167 | # import sys 168 | # sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 169 | # sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..')) 170 | # print(os.path.join(os.path.dirname(__file__), '..', '..')) 171 | # cur_dir = os.path.abspath(os.path.dirname(__file__)) 172 | # module = os.path.join(cur_dir,"..","..","signet") 173 | # print(cur_dir) 174 | # print(module) 175 | # #main(['-e', '-o', cur_dir, module, '--force']) 176 | # main(cur_dir , module) 177 | # 178 | # def setup(app): 179 | # app.connect('builder-inited', run_apidoc) 180 | # 181 | # 182 | 183 | # 184 | # import os 185 | # import sys 186 | # 187 | # def add_to_path(): 188 | # 189 | # partial_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../') 190 | # workspace_path = os.path.abspath(partial_path) 191 | # assert os.path.exists(workspace_path) 192 | # 193 | # projects = [] 194 | # 195 | # for current, dirs, c in os.walk(str(workspace_path)): 196 | # for dir in dirs: 197 | # 198 | # project_path = os.path.join(workspace_path, dir, 'src') 199 | # 200 | # if os.path.exists(project_path): 201 | # projects.append(project_path) 202 | # 203 | # for project_str in projects: 204 | # sys.path.append(project_str) 205 | # 206 | # add_to_path() 207 | 208 | -------------------------------------------------------------------------------- /docs/source/implemented_algorithms.rst: -------------------------------------------------------------------------------- 1 | Implemented Algorithms 2 | ====================== 3 | 4 | The algorithms currently implemented in the package can be *clustered* in three broad groups 5 | 6 | 7 | Spectral clustering 8 | ------------------- 9 | 10 | These algorithms involve finding the top (lowest or highest) eigenvectors of a specific matrix. Depending on the matrix used, one can distinguish several algorithms. Some well known matrices which can be used for signed networks are: 11 | 12 | * Adjacency 13 | * Signed Laplacian matrix 14 | 15 | 16 | Semidefinite clustering 17 | ----------------------- 18 | 19 | These algorithms involve the solution of a semidefinite programming optimisation problem. 20 | 21 | 22 | Generalised eigenproblem clustering 23 | ----------------------------------- 24 | 25 | These algorithms involve the finding the top (lowest or highest) eigenvectors of a pair of matrices. -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. signet documentation master file, created by 2 | sphinx-quickstart on Wed Sep 5 17:28:18 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to the SigNet package! 7 | ================================== 8 | 9 | SigNet is a Python package for clustering of Signed Networks. 10 | 11 | The code can be found on GitHub at https://github.com/alan-turing-institute/SigNet . 12 | 13 | .. toctree:: 14 | :maxdepth: 2 15 | :caption: Table of Contents 16 | 17 | installation 18 | implemented_algorithms 19 | usage 20 | modules 21 | 22 | Indices and tables 23 | ------------------ 24 | 25 | * :ref:`genindex` 26 | * :ref:`modindex` 27 | * :ref:`search` 28 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | 2 | Installation 3 | ============================ 4 | 5 | 6 | Installing the requirements 7 | --------------------------- 8 | 9 | 10 | This package is based on numpy, scipy, networkx, sklearn and cvxpy. These can be easily installed using anaconda or pip. Alternatively, they will be automatically installed with the package. 11 | 12 | 13 | Installing the package 14 | ---------------------- 15 | 16 | Install the latest version from the Github repository via 17 | 18 | .. code-block:: bash 19 | 20 | pip install git+https://github.com/alan-turing-institute/SigNet.git 21 | 22 | -------------------------------------------------------------------------------- /docs/source/modules.rst: -------------------------------------------------------------------------------- 1 | Modules (API reference) 2 | ======================= 3 | 4 | The package contains three modules: cluster, block_models and utils. 5 | 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | 10 | cluster 11 | block_models 12 | utils -------------------------------------------------------------------------------- /docs/source/usage.rst: -------------------------------------------------------------------------------- 1 | Typical usage of the package 2 | ============================ 3 | 4 | A typical usage of SigNet involves the initialisation of the Cluster class with a given pair of adjacency matrices and a subsequent application of a specific method. 5 | 6 | .. code-block:: python 7 | 8 | from signet.cluster import Cluster 9 | from signet.block_models import SSMB 10 | from sklearn.metrics import adjusted_rand_score 11 | 12 | 13 | # simple test on the signed stochastic block model 14 | 15 | n = 50000 # number of nodes 16 | k = 2 # number of clusters 17 | eta = 0.1 # sign flipping probability 18 | p = 0.0002 # edge probability 19 | 20 | (Ap, An), true_assignment = SSBM(n = n, k = k, pin = p, etain = eta) # construct a graph 21 | 22 | c = Cluster((Ap, An)) 23 | 24 | predictions = c.spectral_cluster_laplacian(k = k, normalisation='sym') # cluster with the signed laplacian 25 | score = adjusted_rand_score(predictions, true_assignment) 26 | 27 | print(score) 28 | 29 | -------------------------------------------------------------------------------- /docs/source/utils.rst: -------------------------------------------------------------------------------- 1 | The utils module 2 | ================ 3 | 4 | The utils module mainly contains functions that are used elsewhere in the package. 5 | It also contains the function *objscore* calculating the value of an arbitrary objective function on a given graph partition. 6 | 7 | .. automodule:: utils 8 | :members: 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | . 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | from setuptools import setup 3 | 4 | # Install numpy separately as ecos dependency for cvxpy installation fails if numpy 5 | # is not already installed 6 | subprocess.Popen(["python", '-m', 'pip', 'install', 'numpy']) 7 | 8 | setup(name='SigNet', 9 | version='0.1.0', 10 | description='A package for clustering signed networks', 11 | long_description=open('README.md').read(), 12 | author='Peter Davies, Aldo Glielmo', 13 | author_email='p.w.davies@warwick.ac.uk, aldo.glielmo@kcl.ac.uk', 14 | packages=['signet'], 15 | install_requires=['scikit-learn','cvxpy', 'networkx'], 16 | zip_safe=False) -------------------------------------------------------------------------------- /signet/__init__.py: -------------------------------------------------------------------------------- 1 | from . import utils 2 | from . import burer_monteiro_sparse -------------------------------------------------------------------------------- /signet/block_models.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse as ss 3 | import numpy.random as rnd 4 | import math 5 | import networkx as nx 6 | 7 | def SSBM(n, k, pin, etain, pout=None, etaout=None, values='ones', sizes='uniform'): 8 | """A signed stochastic block model graph generator. 9 | 10 | Args: 11 | n: (int) Number of nodes. 12 | k: (int) Number of communities. 13 | pin: (float) Sparsity value within communities. 14 | etain: (float) Noise value within communities. 15 | pout: (float) Sparsity value between communities. 16 | etaout: (float) Noise value between communities. 17 | values: (string) Edge weight distribution (within community and without sign flip; otherwise weight is negated): 18 | 'ones': Weights are 1. 19 | 'gaussian': Weights are Gaussian, with variance 1 and expectation of 1.# 20 | 'exp': Weights are exponentially distributed, with parameter 1. 21 | 'uniform: Weights are uniformly distributed between 0 and 1. 22 | sizes: (string) How to generate community sizes: 23 | 'uniform': All communities are the same size (up to rounding). 24 | 'random': Nodes are assigned to communities at random. 25 | 'uneven': Communities are given affinities uniformly at random, and nodes are randomly assigned to communities weighted by their affinity. 26 | 27 | Returns: 28 | (a,b),c where a is a sparse n by n matrix of positive edges, b is a sparse n by n matrix of negative edges c is an array of cluster membership. 29 | 30 | """ 31 | 32 | if pout == None: 33 | pout = pin 34 | if etaout == None: 35 | etaout = etain 36 | 37 | rndinrange = math.floor(n * n * pin / 2 + n) 38 | rndin = rnd.geometric(pin, size=rndinrange) 39 | flipinrange = math.floor(n * n / 2 * pin + n) 40 | flipin = rnd.binomial(1, etain, size=flipinrange) 41 | rndoutrange = math.floor(n * n / 2 * pout + n) 42 | rndout = rnd.geometric(pout, size=rndoutrange) 43 | flipoutrange = math.floor(n * n / 2 * pout + n) 44 | flipout = rnd.binomial(1, etaout, size=flipoutrange) 45 | assign = np.zeros(n, dtype=int) 46 | ricount = 0 47 | rocount = 0 48 | ficount = 0 49 | focount = 0 50 | 51 | size = [0] * k 52 | 53 | if sizes == 'uniform': 54 | perm = rnd.permutation(n) 55 | size = [math.floor((i + 1) * n / k) - math.floor((i) * n / k) for i in range(k)] 56 | tot = size[0] 57 | cluster = 0 58 | i = 0 59 | while i < n: 60 | if tot == 0: 61 | cluster += 1 62 | tot += size[cluster] 63 | else: 64 | tot -= 1 65 | assign[perm[i]] = cluster 66 | i += 1 67 | 68 | elif sizes == 'random': 69 | for i in range(n): 70 | assign[i] = rnd.randint(0, k) 71 | size[assign[i]] += 1 72 | perm = [x for clus in range(k) for x in range(n) if assign[x] == clus] 73 | 74 | elif sizes == 'uneven': 75 | probs = rnd.ranf(size=k) 76 | probs = probs / probs.sum() 77 | for i in range(n): 78 | rand = rnd.ranf() 79 | cluster = 0 80 | tot = 0 81 | while rand > tot: 82 | tot += probs[cluster] 83 | cluster += 1 84 | assign[i] = cluster - 1 85 | size[cluster - 1] += 1 86 | perm = [x for clus in range(k) for x in range(n) if assign[x] == clus] 87 | print('Cluster sizes: ', size) 88 | 89 | else: 90 | raise ValueError('please select valid sizes') 91 | 92 | index = -1 93 | last = [0] * k 94 | for i in range(k): 95 | index += size[i] 96 | last[i] = index 97 | 98 | pdat = [] 99 | prow = [] 100 | pcol = [] 101 | ndat = [] 102 | nrow = [] 103 | ncol = [] 104 | for x in range(n): 105 | me = perm[x] 106 | y = x + rndin[ricount] 107 | ricount += 1 108 | while y <= last[assign[me]]: 109 | val = fill(values) 110 | if flipin[ficount] == 1: 111 | ndat.append(val) 112 | ndat.append(val) 113 | ncol.append(me) 114 | ncol.append(perm[y]) 115 | nrow.append(perm[y]) 116 | nrow.append(me) 117 | else: 118 | pdat.append(val) 119 | pdat.append(val) 120 | pcol.append(me) 121 | pcol.append(perm[y]) 122 | prow.append(perm[y]) 123 | prow.append(me) 124 | ficount += 1 125 | y += rndin[ricount] 126 | ricount += 1 127 | y = last[assign[me]] + rndout[rocount] 128 | rocount += 1 129 | while y < n: 130 | val = fill(values) 131 | if flipout[focount] != 1: 132 | ndat.append(val) 133 | ndat.append(val) 134 | ncol.append(me) 135 | ncol.append(perm[y]) 136 | nrow.append(perm[y]) 137 | nrow.append(me) 138 | else: 139 | pdat.append(val) 140 | pdat.append(val) 141 | pcol.append(me) 142 | pcol.append(perm[y]) 143 | prow.append(perm[y]) 144 | prow.append(me) 145 | focount += 1 146 | y += rndout[rocount] 147 | rocount += 1 148 | return (ss.coo_matrix((pdat, (prow, pcol)), shape=(n, n)).tocsc(), 149 | ss.coo_matrix((ndat, (nrow, ncol)), shape=(n, n)).tocsc()), assign 150 | 151 | 152 | def SBAM(n, k, p, eta): 153 | """A signed Barabási–Albert model graph generator. 154 | 155 | Args: 156 | n: (int) Number of nodes. 157 | k: (int) Number of communities. 158 | p: (float) Sparsity value. 159 | eta: (float) Noise value. 160 | 161 | Returns: 162 | (a,b),c where a is a sparse n by n matrix of positive edges, b is a sparse n by n matrix of negative edges c is an array of cluster membership. 163 | 164 | """ 165 | 166 | 167 | # correspondence between m and p (by equating mean degree) 168 | m = int(n * p / 2) 169 | # generate a positive BA graph 170 | net = nx.barabasi_albert_graph(n=n, m=m, seed=None) 171 | 172 | ndk = int(n / k) 173 | 174 | # set signed, noisy community structure 175 | 176 | truth = np.repeat(np.arange(k - 1), ndk) 177 | truth = np.hstack((truth, (k - 1) * np.ones(n - ndk * (k - 1)))) 178 | 179 | for u, v, d in net.edges(data=True): 180 | rnd = np.random.uniform() 181 | 182 | if truth[u] == truth[v]: 183 | if rnd >= eta: 184 | d['weight'] = 1 185 | else: 186 | d['weight'] = -1 187 | else: 188 | if rnd >= eta: 189 | d['weight'] = -1 190 | else: 191 | d['weight'] = 1 192 | 193 | truth = truth[net.nodes()] 194 | A = nx.to_scipy_sparse_matrix(net, format='csc') 195 | 196 | Abar = abs(A) 197 | A_p = (A + Abar) / 2 198 | A_n = -(A - Abar) / 2 199 | A_p.eliminate_zeros() 200 | A_n.eliminate_zeros() 201 | 202 | return (A_p, A_n), truth 203 | 204 | 205 | def SRBM(n, k, p, eta): 206 | """A signed regular graph model generator. 207 | 208 | Args: 209 | n: (int) Number of nodes. 210 | k: (int) Number of communities. 211 | p: (float) Sparsity value. 212 | eta: (float) Noise value. 213 | 214 | Returns: 215 | (a,b),c where a is a sparse n by n matrix of positive edges, b is a sparse n by n matrix of negative edges c is an array of cluster membership. 216 | 217 | """ 218 | 219 | c = int(n * p) 220 | net = nx.random_regular_graph(n=n, d=c) 221 | ndk = int(n / k) 222 | 223 | # set signed, noisy community structure 224 | 225 | truth = np.repeat(np.arange(k - 1), ndk) 226 | truth = np.hstack((truth, (k - 1) * np.ones(n - ndk * (k - 1)))) 227 | 228 | for u, v, d in net.edges(data=True): 229 | 230 | rnd = np.random.uniform() 231 | if truth[u] == truth[v]: 232 | if rnd >= eta: 233 | d['weight'] = 1 234 | else: 235 | d['weight'] = -1 236 | else: 237 | if rnd >= eta: 238 | d['weight'] = -1 239 | else: 240 | d['weight'] = 1 241 | 242 | 243 | truth = truth[net.nodes()] 244 | A = nx.to_scipy_sparse_matrix(net, format='csc') 245 | Abar = abs(A) 246 | 247 | A_p = (A + Abar) / 2 248 | A_n = -(A - Abar) / 2 249 | A_p.eliminate_zeros() 250 | A_n.eliminate_zeros() 251 | 252 | return (A_p, A_n), truth 253 | 254 | 255 | def fill(values='ones'): 256 | if values == 'ones': 257 | return float(1) 258 | elif values == 'gaussian': 259 | return np.random.normal(1) 260 | elif values == 'exp': 261 | return np.random.exponential() 262 | elif values == 'uniform': 263 | return np.random.uniform() 264 | 265 | 266 | if __name__ == '__main__': 267 | (p, n), t = SBAM(n=100, k=3, p=0.1, eta=0.1) 268 | print(p.todense()) 269 | -------------------------------------------------------------------------------- /signet/burer_monteiro_sparse.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import optimize as opt 3 | 4 | 5 | def augmented_lagrangian(A, r, printing=False, init=None): 6 | """Augmented Lagrangian optimisation of the BM problem. 7 | 8 | It finds the matrix X which maximises the Frobenius norm (A, X.dot(X.T)) 9 | with the constraint of having unit elements along the diagonal of X.dot(X.T). 10 | 11 | Args: 12 | A (csc matrix): The adjacency matrix 13 | r (int): The rank of the final solution 14 | printing (bool): Whether to print optimisation information 15 | init (array): Initial guess for the solution. If None a random matrix is used. 16 | 17 | Returns: 18 | array: The optimal matrix of dimensions n x r 19 | """ 20 | 21 | n, _ = A.shape 22 | y = np.ones(n).reshape((-1, 1)) 23 | if init is None: 24 | X = np.random.uniform(-1, 1, size=(n, r)) 25 | else: 26 | X = init 27 | penalty = 1 28 | gamma = 10 29 | eta = .25 30 | target = .01 # 0.01 31 | vec = _constraint_term_vec(n, X) 32 | v = vec.reshape((1, -1)).dot(vec) 33 | v_best = v 34 | while v > target: 35 | Rv = _matrix_to_vector(X) 36 | if printing == True: 37 | print('Starting L-BFGS-B on augmented Lagrangian..., v is ', v) 38 | optimizer = opt.minimize(lambda R_vec: _augmented_lagrangian_func( 39 | R_vec, A, y, penalty, n, r), Rv, jac=lambda R_vec: _jacobian(R_vec, A, n, y, penalty, r), method="L-BFGS-B") 40 | if printing == True: 41 | print('Finishing L-BFGS-B on augmented Lagrangian...') 42 | X = _vector_to_matrix(optimizer.x, r) 43 | vec = _constraint_term_vec(n, X) 44 | v = vec.reshape((1, -1)).dot(vec) 45 | if printing == True: 46 | print('Finish updating variables...') 47 | if v < eta * v_best: 48 | y = y - penalty * vec 49 | v_best = v 50 | else: 51 | penalty = gamma * penalty 52 | if printing == True: 53 | print('Augmented Lagrangian terminated.') 54 | return X 55 | 56 | 57 | def _generate_random_rect(n, k): 58 | """ 59 | Returns a random initialization of matrix. 60 | """ 61 | 62 | X = np.random.uniform(-1, 1, (n, k)) 63 | for i in range(n): 64 | X[i, :] = X[i, :] / np.linalg.norm(X[i, :]) 65 | return X 66 | 67 | 68 | def _basis_vector(size, index): 69 | """ 70 | Returns a basis vector with 1 on certain index. 71 | """ 72 | 73 | vec = np.zeros(size) 74 | vec[index] = 1 75 | return vec 76 | 77 | 78 | def _trace_vec(X): 79 | """ 80 | Returns a vector containing norm square of row vectors of X. 81 | """ 82 | 83 | vec = np.einsum('ij, ij -> i', X, X) 84 | 85 | return vec.reshape((-1, 1)) 86 | 87 | 88 | def _constraint_term_vec(n, X): 89 | """ 90 | Returns the vector required to compute objective function value. 91 | """ 92 | 93 | vec = _trace_vec(X) 94 | constraint = vec - np.ones(n).reshape((-1, 1)) 95 | 96 | return constraint 97 | 98 | 99 | def _augmented_lagrangian_func(Xv, A, y, penalty, n, k): 100 | """ 101 | Returns the value of objective function of augmented Lagrangian. 102 | """ 103 | 104 | X = _vector_to_matrix(Xv, k) 105 | 106 | vec = _constraint_term_vec(n, X) 107 | 108 | AX = A.dot(X) 109 | 110 | objective1 = - np.einsum('ij, ij -> ', X, AX) # Trace(Y*X*X.T) 111 | 112 | objective2 = - y.reshape((1, -1)).dot(vec) 113 | 114 | objective3 = + penalty / 2 * vec.reshape((1, -1)).dot(vec) 115 | 116 | objective = objective1 + objective2 + objective3 117 | 118 | return objective 119 | 120 | 121 | def _vector_to_matrix(Xv, k): 122 | """ 123 | Returns a matrix from reforming a vector. 124 | """ 125 | U = Xv.reshape((-1, k)) 126 | return U 127 | 128 | 129 | def _matrix_to_vector(X): 130 | """ 131 | Returns a vector from flattening a matrix. 132 | """ 133 | 134 | u = X.reshape((1, -1)).ravel() 135 | return u 136 | 137 | 138 | def _jacobian(Xv, Y, n, y, penalty, k): 139 | """ 140 | Returns the Jacobian matrix of the augmented Lagrangian problem. 141 | """ 142 | 143 | X = _vector_to_matrix(Xv, k) 144 | 145 | vec_trace_A_ = _trace_vec(X).ravel() - 1. 146 | 147 | vec_second_part = np.einsum('ij, i -> ij', X, y.ravel()) 148 | 149 | vec_third_part = np.einsum('ij, i -> ij', X, vec_trace_A_) 150 | 151 | jacobian = - 2 * Y.dot(X) - 2 * vec_second_part + \ 152 | 2 * penalty * vec_third_part 153 | 154 | jac_vec = _matrix_to_vector(jacobian) 155 | return jac_vec.reshape((1, -1)).ravel() 156 | 157 | 158 | if __name__ == "__main__": 159 | from block_models import fSSBM 160 | 161 | np.set_printoptions(precision=1) 162 | n = 1000 163 | At, assig = fSSBM(n=n, k=2, p=0.9, eta=0.1) 164 | A = At[0] - At[1] 165 | 166 | r = int(np.sqrt(2 * n)) 167 | X = augmented_lagrangian(A=A, r=r, printing=True, init=None) 168 | -------------------------------------------------------------------------------- /signet/cluster.py: -------------------------------------------------------------------------------- 1 | import scipy.sparse as ss 2 | import scipy as sp 3 | import sklearn.cluster as sl 4 | import sklearn.metrics as sm 5 | import numpy as np 6 | import math 7 | from signet.utils import sqrtinvdiag, invdiag, cut, merge, objscore 8 | from signet.burer_monteiro_sparse import augmented_lagrangian 9 | 10 | np.set_printoptions(2) 11 | 12 | 13 | class Cluster: 14 | """Class containing all clustering algorithms for signed networks. 15 | 16 | This should be initialised with a tuple of two csc matrices, representing positive and negative adjacency 17 | matrix respectively (A^+ and A^-). It contains clustering algorithms as methods and graph specifications 18 | as attributes. 19 | 20 | Args: 21 | data (tuple): Tuple containing positive and negative adjacency matrix (A^+, A^-). 22 | 23 | Attributes: 24 | p (csc matrix): positive adjacency matrix. 25 | n (csc matrix): negative adjacency matrix. 26 | A (csc matrix): total adjacency matrix. 27 | D_p (csc matrix): diagonal degree matrix of positive adjacency. 28 | D_n (csc matrix): diagonal degree matrix of negative adjacency. 29 | Dbar (csc matrix): diagonal signed degree matrix. 30 | normA (csc matrix): symmetrically normalised adjacency matrix. 31 | size (int): number of nodes in network 32 | 33 | """ 34 | 35 | def __init__(self, data): 36 | self.p = data[0] 37 | self.n = data[1] 38 | self.A = (self.p - self.n).tocsc() 39 | self.D_p = ss.diags(self.p.sum(axis=0).tolist(), [0]).tocsc() 40 | self.D_n = ss.diags(self.n.sum(axis=0).tolist(), [0]).tocsc() 41 | self.Dbar = (self.D_p + self.D_n) 42 | d = sqrtinvdiag(self.Dbar) 43 | self.normA = d * self.A * d 44 | self.size = self.p.shape[0] 45 | 46 | def spectral_cluster_adjacency(self, k=2, normalisation='sym_sep', eigens=None, mi=None): 47 | 48 | """Clusters the graph using eigenvectors of the adjacency matrix. 49 | 50 | Args: 51 | k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. 52 | normalisation (string): How to normalise for cluster size: 53 | 'none' - do not normalise. 54 | 'sym' - symmetric normalisation. 55 | 'rw' - random walk normalisation. 56 | 'sym_sep' - separate symmetric normalisation of positive and negative parts. 57 | 'rw_sep' - separate random walk normalisation of positive and negative parts. 58 | 59 | Returns: 60 | array of int, or list of array of int: Output assignment to clusters. 61 | 62 | Other parameters: 63 | eigens (int): The number of eigenvectors to take. Defaults to k. 64 | mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. 65 | 66 | """ 67 | listk = False 68 | if isinstance(k, list): 69 | kk = k 70 | k = max(k) 71 | listk = True 72 | 73 | if eigens == None: 74 | eigens = k 75 | if mi == None: 76 | mi = self.size 77 | 78 | symmetric = True 79 | 80 | if normalisation == 'none': 81 | matrix = self.A 82 | 83 | elif normalisation == 'sym': 84 | d = sqrtinvdiag(self.Dbar) 85 | matrix = d * self.A * d 86 | 87 | elif normalisation == 'rw': 88 | d = invdiag(self.Dbar) 89 | matrix = d * self.A 90 | symmetric = False 91 | 92 | elif normalisation == 'sym_sep': 93 | d = sqrtinvdiag(self.D_p) 94 | matrix = d * self.p * d 95 | d = sqrtinvdiag(self.D_n) 96 | matrix = matrix - (d * self.n * d) 97 | 98 | elif normalisation == 'rw_sep': 99 | d = invdiag(self.D_p) 100 | matrix = d * self.p 101 | d = invdiag(self.D_n) 102 | matrix = matrix - (d * self.n) 103 | symmetric = False 104 | 105 | elif normalisation == 'neg': 106 | pos = self.p 107 | d = invdiag(self.D_n) 108 | neg = d * self.n 109 | x = (pos.sum() / neg.sum()) 110 | neg = neg * x 111 | matrix = pos - neg 112 | 113 | if symmetric: 114 | (w, v) = ss.linalg.eigsh(matrix, eigens, maxiter=mi, which='LA') 115 | else: 116 | (w, v) = ss.linalg.eigs(matrix, eigens, maxiter=mi, which='LR') 117 | v = v * w # weight eigenvalues by eigenvectors, since larger eigenvectors are more likely to be informative 118 | if not listk: 119 | v = np.atleast_2d(v) 120 | x = sl.KMeans(n_clusters=k).fit(v) 121 | return x.labels_ 122 | else: 123 | return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 1 - x:])).labels_ for x in kk] 124 | 125 | def spectral_cluster_adjacency_reg(self, k=2, normalisation='sym_sep', tau_p=None, tau_n=None, eigens=None, 126 | mi=None): 127 | """Clusters the graph using eigenvectors of the regularised adjacency matrix. 128 | 129 | Args: 130 | k (int): The number of clusters to identify. 131 | normalisation (string): How to normalise for cluster size: 132 | 'none' - do not normalise. 133 | 'sym' - symmetric normalisation. 134 | 'rw' - random walk normalisation. 135 | 'sym_sep' - separate symmetric normalisation of positive and negative parts. 136 | 'rw_sep' - separate random walk normalisation of positive and negative parts. 137 | tau_p (int): Regularisation coefficient for positive adjacency matrix. 138 | tau_n (int): Regularisation coefficient for negative adjacency matrix. 139 | 140 | Returns: 141 | array of int: Output assignment to clusters. 142 | 143 | Other parameters: 144 | eigens (int): The number of eigenvectors to take. Defaults to k. 145 | mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. 146 | 147 | """ 148 | 149 | if eigens == None: 150 | eigens = k 151 | 152 | if mi == None: 153 | mi = self.size 154 | 155 | if tau_p == None or tau_n == None: 156 | tau_p = 0.25 * np.mean(self.Dbar.data) / self.size 157 | tau_n = 0.25 * np.mean(self.Dbar.data) / self.size 158 | 159 | symmetric = True 160 | 161 | p_tau = self.p.copy() 162 | n_tau = self.n.copy() 163 | p_tau.data += tau_p 164 | n_tau.data += tau_n 165 | 166 | Dbar_c = self.size - self.Dbar.diagonal() 167 | 168 | Dbar_tau_s = (p_tau + n_tau).sum(axis=0) + (Dbar_c * abs(tau_p - tau_n))[None, :] 169 | 170 | Dbar_tau = ss.diags(Dbar_tau_s.tolist(), [0]) 171 | 172 | if normalisation == 'none': 173 | matrix = self.A 174 | delta_tau = tau_p - tau_n 175 | 176 | def mv(v): 177 | return matrix.dot(v) + delta_tau * v.sum() 178 | 179 | 180 | elif normalisation == 'sym': 181 | d = sqrtinvdiag(Dbar_tau) 182 | matrix = d * self.A * d 183 | dd = d.diagonal() 184 | tau_dd = (tau_p - tau_n) * dd 185 | 186 | def mv(v): 187 | return matrix.dot(v) + tau_dd * dd.dot(v) 188 | 189 | elif normalisation == 'sym_sep': 190 | 191 | diag_corr = ss.diags([self.size * tau_p] * self.size).tocsc() 192 | dp = sqrtinvdiag(self.D_p + diag_corr) 193 | 194 | matrix = dp * self.p * dp 195 | 196 | diag_corr = ss.diags([self.size * tau_n] * self.size).tocsc() 197 | dn = sqrtinvdiag(self.D_n + diag_corr) 198 | 199 | matrix = matrix - (dn * self.n * dn) 200 | 201 | dpd = dp.diagonal() 202 | dnd = dn.diagonal() 203 | tau_dp = tau_p * dpd 204 | tau_dn = tau_n * dnd 205 | 206 | def mv(v): 207 | return matrix.dot(v) + tau_dp * dpd.dot(v) - tau_dn * dnd.dot(v) 208 | 209 | else: 210 | print('Error: choose normalisation') 211 | 212 | matrix_o = ss.linalg.LinearOperator(matrix.shape, matvec=mv) 213 | 214 | if symmetric: 215 | (w, v) = ss.linalg.eigsh(matrix_o, eigens, maxiter=mi, which='LA') 216 | else: 217 | (w, v) = ss.linalg.eigs(matrix_o, eigens, maxiter=mi, which='LR') 218 | 219 | v = v * w # weight eigenvalues by eigenvectors, since larger eigenvectors are more likely to be informative 220 | v = np.atleast_2d(v) 221 | x = sl.KMeans(n_clusters=k).fit(v) 222 | return x.labels_ 223 | 224 | def spectral_cluster_bnc(self, k=2, normalisation='sym', eigens=None, mi=None): 225 | 226 | """Clusters the graph by using the Balance Normalised Cut or Balance Ratio Cut objective matrix. 227 | 228 | Args: 229 | k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. 230 | normalisation (string): How to normalise for cluster size: 231 | 'none' - do not normalise. 232 | 'sym' - symmetric normalisation. 233 | 'rw' - random walk normalisation. 234 | 235 | Returns: 236 | array of int, or list of array of int: Output assignment to clusters. 237 | 238 | Other parameters: 239 | eigens (int): The number of eigenvectors to take. Defaults to k. 240 | mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. 241 | 242 | """ 243 | 244 | listk = False 245 | if isinstance(k, list): 246 | kk = k 247 | k = max(k) 248 | listk = True 249 | 250 | if eigens == None: 251 | eigens = k 252 | if mi == None: 253 | mi = self.size 254 | 255 | symmetric = True 256 | 257 | if normalisation == 'none': 258 | matrix = self.A + self.D_n 259 | 260 | elif normalisation == 'sym': 261 | d = sqrtinvdiag(self.Dbar) 262 | matrix = d * (self.A + self.D_n) * d 263 | 264 | elif normalisation == 'rw': 265 | d = invdiag(self.Dbar) 266 | matrix = d * (self.A + self.D_n) 267 | symmetric = False 268 | 269 | if symmetric: 270 | (w, v) = ss.linalg.eigsh(matrix, eigens, maxiter=mi, which='LA') 271 | else: 272 | (w, v) = ss.linalg.eigs(matrix, eigens, maxiter=mi, which='LR') 273 | 274 | v = v * w # weight eigenvalues by eigenvectors, since larger eigenvectors are more likely to be informative 275 | 276 | if not listk: 277 | v = np.atleast_2d(v) 278 | x = sl.KMeans(n_clusters=k).fit(v) 279 | return x.labels_ 280 | else: 281 | return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 1 - x:])).labels_ for x in kk] 282 | 283 | def spectral_cluster_laplacian(self, k=2, normalisation='sym_sep', eigens=None, mi=None): 284 | 285 | """Clusters the graph using the eigenvectors of the graph signed Laplacian. 286 | 287 | Args: 288 | k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. 289 | normalisation (string): How to normalise for cluster size: 290 | 'none' - do not normalise. 291 | 'sym' - symmetric normalisation. 292 | 'rw' - random walk normalisation. 293 | 'sym_sep' - separate symmetric normalisation of positive and negative parts. 294 | 'rw_sep' - separate random walk normalisation of positive and negative parts. 295 | 296 | Returns: 297 | array of int, or list of array of int: Output assignment to clusters. 298 | 299 | Other parameters: 300 | eigens (int): The number of eigenvectors to take. Defaults to k. 301 | mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. 302 | 303 | """ 304 | listk = False 305 | if isinstance(k, list): 306 | kk = k 307 | k = max(k) 308 | listk = True 309 | 310 | if eigens == None: 311 | eigens = k 312 | if mi == None: 313 | mi = self.size 314 | 315 | symmetric = True 316 | eye = ss.eye(self.size, format="csc") 317 | 318 | if normalisation == 'none': 319 | matrix = self.Dbar - self.A 320 | 321 | elif normalisation == 'sym': 322 | d = sqrtinvdiag(self.Dbar) 323 | matrix = eye - d * self.A * d 324 | 325 | elif normalisation == 'rw': 326 | d = invdiag(self.Dbar) 327 | matrix = eye - d * self.A 328 | symmetric = False 329 | 330 | elif normalisation == 'sym_sep': 331 | d = sqrtinvdiag(self.D_p) 332 | matrix = d * self.p * d 333 | d = sqrtinvdiag(self.D_n) 334 | matrix = matrix - (d * self.n * d) 335 | matrix = eye - matrix 336 | 337 | elif normalisation == 'rw_sep': 338 | d = invdiag(self.D_p) 339 | matrix = d * self.p 340 | d = invdiag(self.D_n) 341 | matrix = matrix - (d * self.n) 342 | matrix = eye - matrix 343 | symmetric = False 344 | 345 | if symmetric: 346 | (w, v) = ss.linalg.eigsh(matrix, eigens, maxiter=mi, which='SA') 347 | else: 348 | (w, v) = ss.linalg.eigs(matrix, eigens, maxiter=mi, which='SR') 349 | 350 | v = v / w # weight eigenvalues by eigenvectors, since smaller eigenvectors are more likely to be informative 351 | v = np.atleast_2d(v) 352 | if not listk: 353 | v = np.atleast_2d(v) 354 | x = sl.KMeans(n_clusters=k).fit(v) 355 | return x.labels_ 356 | else: 357 | return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 0:k - 1])).labels_ for x in kk] 358 | 359 | def geproblem_adjacency(self, k=4, normalisation='multiplicative', eigens=None, mi=None, nudge=0.5): 360 | 361 | """Clusters the graph by solving a adjacency-matrix-based generalised eigenvalue problem. 362 | 363 | Args: 364 | k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. 365 | normalisation (string): How to normalise for cluster size: 366 | 'none' - do not normalise. 367 | 'additive' - add degree matrices appropriately. 368 | 'multiplicative' - multiply by degree matrices appropriately. 369 | 370 | Returns: 371 | array of int, or list of array of int: Output assignment to clusters. 372 | 373 | Other parameters: 374 | eigens (int): The number of eigenvectors to take. Defaults to k. 375 | mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. 376 | nudge (int): Amount added to diagonal to bound eigenvalues away from 0. 377 | 378 | """ 379 | listk = False 380 | if isinstance(k, list): 381 | kk = k 382 | k = max(k) 383 | listk = True 384 | 385 | if eigens == None: 386 | # eigens = min(math.floor(math.sqrt(self.size)), math.ceil(k * math.log(k, 2) - 1)) 387 | eigens = k 388 | if mi == None: 389 | mi = self.size 390 | 391 | eye = ss.eye(self.size, format="csc") 392 | if normalisation == 'none': 393 | matrix1 = self.n 394 | matrix2 = self.p 395 | 396 | elif normalisation == 'additive': 397 | matrix1 = self.n + self.D_p 398 | matrix2 = self.p + self.D_n 399 | 400 | elif normalisation == 'multiplicative': 401 | d = sqrtinvdiag(self.D_n) 402 | matrix1 = d * self.n * d 403 | d = sqrtinvdiag(self.D_p) 404 | matrix2 = d * self.p * d 405 | 406 | (w, v) = ss.linalg.eigsh(matrix1, k=1, maxiter=mi, which='SA') 407 | matrix1 = matrix1 + eye * (nudge - w[0]) 408 | 409 | (w, v) = ss.linalg.eigsh(matrix2, k=1, maxiter=mi, which='SA') 410 | matrix2 = matrix2 + eye * (nudge - w[0]) 411 | 412 | v0 = np.random.normal(0, 1, (self.p.shape[0], eigens)) 413 | (w, v) = ss.linalg.lobpcg(matrix1, v0, B=matrix2, maxiter=mi, largest=False) 414 | 415 | v = v / w 416 | if not listk: 417 | v = np.atleast_2d(v) 418 | x = sl.KMeans(n_clusters=k).fit(v) 419 | return x.labels_ 420 | else: 421 | return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 1 - x:])).labels_ for x in kk] 422 | 423 | def geproblem_laplacian(self, k=4, normalisation='multiplicative', eigens=None, mi=None, tau=1.): 424 | """Clusters the graph by solving a Laplacian-based generalised eigenvalue problem. 425 | 426 | Args: 427 | k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. 428 | normalisation (string): How to normalise for cluster size: 429 | 'none' - do not normalise. 430 | 'additive' - add degree matrices appropriately. 431 | 'multiplicative' - multiply by degree matrices appropriately. 432 | 433 | Returns: 434 | array of int, or list of array of int: Output assignment to clusters. 435 | 436 | Other parameters: 437 | eigens (int): The number of eigenvectors to take. Defaults to k. 438 | mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. 439 | nudge (int): Amount added to diagonal to bound eigenvalues away from 0. 440 | 441 | """ 442 | listk = False 443 | if isinstance(k, list): 444 | kk = k 445 | k = max(k) 446 | listk = True 447 | 448 | if eigens == None: 449 | eigens = k 450 | if mi == None: 451 | mi = self.size 452 | 453 | eye = ss.eye(self.size, format="csc") 454 | 455 | if normalisation == 'none': 456 | matrix1 = self.D_p - self.p 457 | matrix2 = self.D_n - self.n 458 | 459 | elif normalisation == 'additive': 460 | matrix1 = self.Dbar - self.p 461 | matrix2 = self.Dbar - self.n 462 | 463 | elif normalisation == 'multiplicative': 464 | 465 | d = sqrtinvdiag(self.D_n) 466 | matrix = d * self.n * d 467 | matrix2 = eye - matrix 468 | d = sqrtinvdiag(self.D_p) 469 | matrix = d * self.p * d 470 | matrix1 = eye - matrix 471 | 472 | matrix1 = matrix1 + eye * tau 473 | matrix2 = matrix2 + eye * tau 474 | 475 | v0 = np.random.normal(0, 1, (self.p.shape[0], eigens)) 476 | (w, v) = ss.linalg.lobpcg(matrix1, v0, B=matrix2, maxiter=mi, largest=False) 477 | 478 | v = v / w 479 | if not listk: 480 | v = np.atleast_2d(v) 481 | x = sl.KMeans(n_clusters=k).fit(v) 482 | return x.labels_ 483 | else: 484 | return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 0:x - 1])).labels_ for x in kk] 485 | 486 | def SPONGE(self, k=4, tau_p=1, tau_n=1, eigens=None, mi=None): 487 | """Clusters the graph using the Signed Positive Over Negative Generalised Eigenproblem (SPONGE) clustering. 488 | 489 | The algorithm tries to minimises the following ratio (Lbar^+ + tau_n D^-)/(Lbar^- + tau_p D^+). 490 | The parameters tau_p and tau_n can be typically set to one. 491 | 492 | Args: 493 | k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. 494 | tau_n (float): regularisation of the numerator 495 | tau_p (float): regularisation of the denominator 496 | 497 | Returns: 498 | array of int, or list of array of int: Output assignment to clusters. 499 | 500 | Other parameters: 501 | eigens (int): The number of eigenvectors to take. Defaults to k. 502 | mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. 503 | nudge (int): Amount added to diagonal to bound eigenvalues away from 0. 504 | 505 | """ 506 | 507 | listk = False 508 | if isinstance(k, list): 509 | kk = k 510 | k = max(k) 511 | listk = True 512 | 513 | if eigens == None: 514 | eigens = k - 1 515 | if mi == None: 516 | mi = self.size 517 | 518 | matrix1 = self.D_p - self.p 519 | matrix2 = self.D_n - self.n 520 | 521 | matrix1 = matrix1 + tau_n * self.D_n 522 | matrix2 = matrix2 + tau_p * self.D_p 523 | 524 | v0 = np.random.normal(0, 1, (self.p.shape[0], eigens)) 525 | (w, v) = ss.linalg.lobpcg(matrix1, v0, B=matrix2, maxiter=mi, largest=False) 526 | 527 | v = v / w 528 | if not listk: 529 | v = np.atleast_2d(v) 530 | x = sl.KMeans(n_clusters=k).fit(v) 531 | return x.labels_ 532 | else: 533 | return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 0:x - 1])).labels_ for x in kk] 534 | 535 | 536 | 537 | def SPONGE_sym(self, k=4, tau_p=1, tau_n=1, eigens=None, mi=None): 538 | """Clusters the graph using the symmetric normalised version of the SPONGE clustering algorithm. 539 | 540 | The algorithm tries to minimises the following ratio (Lbar_sym^+ + tau_n Id)/(Lbar_sym^- + tau_p Id). 541 | The parameters tau_p and tau_n can be typically set to one. 542 | 543 | Args: 544 | k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. 545 | tau_n (float): regularisation of the numerator 546 | tau_p (float): regularisation of the denominator 547 | 548 | Returns: 549 | array of int, or list of array of int: Output assignment to clusters. 550 | 551 | Other parameters: 552 | eigens (int): The number of eigenvectors to take. Defaults to k. 553 | mi (int): The maximum number of iterations for which to run eigenvlue solvers. Defaults to number of nodes. 554 | nudge (int): Amount added to diagonal to bound eigenvalues away from 0. 555 | 556 | """ 557 | 558 | listk = False 559 | 560 | if isinstance(k, list): 561 | kk = k 562 | k = max(k) 563 | listk = True 564 | 565 | if eigens == None: 566 | eigens = k - 1 567 | if mi == None: 568 | mi = self.size 569 | 570 | eye = ss.eye(self.size, format="csc") 571 | 572 | d = sqrtinvdiag(self.D_n) 573 | matrix = d * self.n * d 574 | matrix2 = eye - matrix 575 | 576 | d = sqrtinvdiag(self.D_p) 577 | matrix = d * self.p * d 578 | matrix1 = eye - matrix 579 | 580 | matrix1 = matrix1 + tau_n * eye 581 | matrix2 = matrix2 + tau_p * eye 582 | 583 | v0 = np.random.normal(0, 1, (self.p.shape[0], eigens)) 584 | (w, v) = ss.linalg.lobpcg(matrix1, v0, B=matrix2, maxiter=mi, largest=False) 585 | 586 | v = v / w 587 | if not listk: 588 | v = np.atleast_2d(v) 589 | x = sl.KMeans(n_clusters=k).fit(v) 590 | return x.labels_ 591 | else: 592 | return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 0:x - 1])).labels_ for x in kk] 593 | 594 | def find_eigenvalues(self, k=100, matrix='laplacian'): 595 | """Find top or bottom k eigenvalues of adjacency or laplacian matrix. 596 | 597 | The list of the top (bottom) k eigenvalues of the adjacency (laplacian) matrix is returned. 598 | This can be useful in identifying the number of clusters. 599 | 600 | Note: 601 | The Laplacian matrix used is the signed symmetric Laplacian. 602 | 603 | Args: 604 | k (int): Number of eigenvalues to return 605 | matrix (str): Type of matrix to diagonalise (either 'adjacency' or 'laplacian') 606 | 607 | Returns: 608 | array of float: An array of the first k eigenvalues, ordered in ascending or descending order 609 | (depending on the matrix type) """ 610 | 611 | k = min(self.size, k) 612 | if matrix == 'adjacency': 613 | (w, v) = ss.linalg.eigsh(A_, k, which='LA') 614 | w = w[::-1] 615 | elif matrix == 'laplacian': 616 | (w, v) = ss.linalg.eigsh(self.symLbar, k, which='SA') 617 | 618 | else: 619 | raise ValueError('please select a valid matrix type') 620 | return w 621 | 622 | def spectral_cluster_bethe_hessian(self, k, mi=1000, r=None, justpos=True): 623 | """Clustering based on signed Bethe Hessian. 624 | 625 | A low dimensional embedding is obtained via the lowest eigenvectors of the signed Bethe Hessian matrix Hbar and 626 | k-means is performed in this space. 627 | 628 | Args: 629 | k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. 630 | mi (int): Maximum number of iterations of the eigensolver. 631 | type (str): Types of normalisation of the Laplacian matrix. 'unnormalised', 'symmetric', 'random_walk'. 632 | 633 | Returns: 634 | array of int, or list of array of int: Label assignments. 635 | int: Suggested number of clusters for network. 636 | 637 | """ 638 | listk = False 639 | if isinstance(k, list): 640 | kk = k 641 | k = max(k) 642 | listk = True 643 | 644 | if r is None: 645 | 646 | d = np.mean(self.Dbar.data) 647 | dsq = np.mean(self.Dbar.data ** 2) 648 | 649 | # r = np.sqrt(d) # SSB 650 | r = np.sqrt(dsq / d - 1) # general 651 | else: 652 | pass 653 | eigens = k - 1 654 | 655 | self.Hbar_p = (r ** 2 - 1) * ss.identity(self.size, format='csc') - r * self.A + self.Dbar 656 | 657 | (w, v) = ss.linalg.eigsh(self.Hbar_p, eigens, which='SA', maxiter=mi) 658 | if not justpos: 659 | r = - r 660 | 661 | self.Hbar_n = (r ** 2 - 1) * ss.identity(self.size, format='csc') - r * self.A + self.Dbar 662 | 663 | (wn, vn) = ss.linalg.eigsh(self.Hbar_n, eigens, which='SA', maxiter=mi) 664 | w = np.hstack((w, wn)) 665 | v = np.hstack((v, vn)) 666 | eigens = 2 * eigens 667 | klen = len([x for x in range(eigens) if w[x] < 0]) 668 | idx = np.argsort(w)[0:k - 1] 669 | v = v[:, idx] 670 | if not listk: 671 | v = np.atleast_2d(v) 672 | x = sl.KMeans(n_clusters=k).fit(v) 673 | return x.labels_, klen 674 | else: 675 | return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 0:x - 1])).labels_ for x in kk], klen + 1 676 | 677 | def SDP_cluster(self, k, solver='BM_proj_grad', normalisation='sym_sep'): 678 | """Clustering based on a SDP relaxation of the clustering problem. 679 | 680 | A low dimensional embedding is obtained via the lowest eigenvectors of positive-semidefinite matrix Z 681 | which maximises its Frobenious product with the adjacency matrix and k-means is performed in this space. 682 | 683 | Args: 684 | k (int, or list of int) : The number of clusters to identify. If a list is given, the output is a corresponding list. 685 | solver (str): Type of solver for the SDP formulation. 686 | 'interior_point_method' - Interior point method. 687 | 'BM_proj_grad' - Burer Monteiro method using projected gradient updates. 688 | 'BM_aug_lag' - Burer Monteiro method using augmented Lagrangian updates. 689 | 690 | Returns: 691 | array of int, or list of array of int: Label assignments. 692 | 693 | """ 694 | 695 | listk = False 696 | if isinstance(k, list): 697 | kk = k 698 | k = max(k) 699 | listk = True 700 | 701 | if normalisation == 'none': 702 | matrix = self.A 703 | 704 | elif normalisation == 'sym': 705 | d = sqrtinvdiag(self.Dbar) 706 | matrix = d * self.A * d 707 | 708 | elif normalisation == 'sym_sep': 709 | d = sqrtinvdiag(self.D_p) 710 | matrix = d * self.p * d 711 | d = sqrtinvdiag(self.D_n) 712 | matrix = matrix - (d * self.n * d) 713 | 714 | if solver == 'interior_point_method': 715 | import cvxpy as cvx 716 | 717 | # Define a cvx optimization variable 718 | Z = cvx.Variable((self.size, self.size), PSD=True) 719 | ones = np.ones(self.size) 720 | # Define constraints 721 | constraints = [cvx.diag(Z) == ones] 722 | # Define an objective function 723 | obj = cvx.Maximize(cvx.trace(self.A * Z)) 724 | # Define an optimisation problem 725 | prob = cvx.Problem(obj, constraints) 726 | # Solve optimisation problem 727 | 728 | prob.solve(solver='CVXOPT') 729 | 730 | print("status:", prob.status) 731 | print("optimal value", prob.value) 732 | # print("optimal var", Z.value) 733 | print(Z.value) 734 | 735 | # Diagonalise solution 736 | (w, v) = sp.linalg.eigh(Z.value, eigvals=(self.size - k, self.size - 1)) 737 | v = v * w 738 | 739 | elif solver == 'BM_proj_grad': 740 | 741 | r = math.floor(np.sqrt(2 * self.size) + 1) 742 | X = np.random.normal(0, 1, (self.size, r)) 743 | ones = np.ones((self.size, 1)) 744 | step = 2 745 | traces = [] 746 | i = 0 747 | while True: 748 | AX = matrix.dot(X) 749 | G = 2 * AX 750 | X = X + step * G 751 | trace = np.einsum('ij, ij -> ', X, AX) 752 | traces.append(trace) 753 | Norms = np.linalg.norm(X, axis=1) 754 | X = np.divide(X, Norms[:, None]) 755 | delta_trace = abs(traces[-1] - traces[-2]) / abs(traces[-2]) if i > 0 else 100. 756 | if delta_trace <= 0.01: 757 | break 758 | i += 1 759 | Z = X.T.dot(X) 760 | (w, v) = sp.linalg.eigh(Z, eigvals=(r - k, r - 1)) 761 | v = X.dot(v) 762 | v = v * w 763 | 764 | elif solver == 'BM_aug_lag': 765 | r = int(np.sqrt(2 * self.size)) 766 | X = augmented_lagrangian(A=matrix, r=r, printing=False, init=None) 767 | Z = X.T.dot(X) 768 | (w, v) = sp.linalg.eigh(Z, eigvals=(r - k, r - 1)) 769 | v = X.dot(v) 770 | v = v * w 771 | 772 | else: 773 | raise ValueError('please specify a valid solver') 774 | 775 | if not listk: 776 | v = np.atleast_2d(v) 777 | x = sl.KMeans(n_clusters=k).fit(v) 778 | return x.labels_ 779 | else: 780 | return [sl.KMeans(n_clusters=x).fit(np.atleast_2d(v[:, 1 - x:])).labels_ for x in kk] 781 | 782 | def waggle(self, k, labs, matrix=None, rounds=50, mini=False): 783 | """Postprocessing based on iteratively merging and cutting clusters of the provided solution. 784 | 785 | Pairs of clusters are merged randomly. 786 | Merged clusters are then partitioned in two by spectral clustering on input matrix. 787 | 788 | Args: 789 | k (int): The number of clusters to identify. 790 | labs (array of int): Initial assignment to clusters. 791 | matrix (csc matrix): Matrix to use for partitioning. Defaults to un-normalised adjacency matrix. 792 | 793 | Returns: 794 | array of int: Output assignment to clusters. 795 | 796 | Other parameters: 797 | rounds (int): Number of iterations to perform. 798 | mini (boolean): Whether to minimise (rather than maximise) the input matrix objective when partitioning. 799 | 800 | """ 801 | if matrix == None: 802 | matrix = self.A 803 | elemlist = [[x for x in range(self.size) if labs[x] == i] for i in range(k)] 804 | if k == 2: 805 | rounds = 0 806 | for i in range(rounds): 807 | elemlist, dc, numbers = merge(elemlist) 808 | elemlist = cut(elemlist, matrix, numbers, dc, mini) 809 | cluster = [0] * self.size 810 | for i in range(len(elemlist)): 811 | for j in elemlist[i]: 812 | cluster[j] = i 813 | return cluster 814 | 815 | 816 | if __name__ == "__main__": 817 | from block_models import SSBM 818 | 819 | n = 10000 820 | k = 10 821 | p = 35 / n 822 | eta = 0. 823 | Ac, truth = SSBM(n, k, p, eta) 824 | 825 | print('Network constructed') 826 | 827 | m = Cluster(Ac) 828 | 829 | pcapreds = m.spectral_cluster_laplacian(k, normalisation='sym') 830 | rscore = sm.adjusted_rand_score(truth, pcapreds) 831 | print('Symmetric Laplacian score is ', rscore) 832 | 833 | pcapreds = m.spectral_cluster_adjacency(k, normalisation='sym') 834 | rscore = sm.adjusted_rand_score(truth, pcapreds) 835 | print('Symmetric Adjacency score is ', rscore) 836 | 837 | pcapreds = m.SPONGE(k) 838 | rscore = sm.adjusted_rand_score(truth, pcapreds) 839 | print('SPONGE core is ', rscore) 840 | 841 | pcapreds = m.SPONGE_sym(k) 842 | rscore = sm.adjusted_rand_score(truth, pcapreds) 843 | print('SPONGE sym score is ', rscore) 844 | -------------------------------------------------------------------------------- /signet/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/alan-turing-institute/SigNet/32e3bf03dea9688f02f6c33728e5c1a7a8dbe5ce/signet/tests/__init__.py -------------------------------------------------------------------------------- /signet/tests/package_usage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Comparing signed clustering algorithms using SigNet" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 43, 13 | "metadata": { 14 | "collapsed": true 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "# import the relevant classes and functions from signet\n", 19 | "\n", 20 | "from signet.cluster import Cluster\n", 21 | "from signet.block_models import SSBM\n", 22 | "from sklearn.metrics import adjusted_rand_score\n" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 35, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# generate a random graph with community structure by the signed stochastic block model \n", 32 | "\n", 33 | "n = 5000 # number of nodes\n", 34 | "k = 15 # number of communities\n", 35 | "eta = 0.05 # sign flipping probability\n", 36 | "p = 0.01 # edge probability\n", 37 | "\n", 38 | "(A_p, A_n), true_assign = SSBM(n = n, k = k, pin=p, etain=eta) " 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 36, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "# initialise the Cluster object with the data (adjacency matrix of positive and negative graph)\n", 48 | "\n", 49 | "c = Cluster((A_p, A_n))" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 39, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "# calculate the assignments provided by the algorithms you want to analyse\n", 59 | "\n", 60 | "A_assign = c.spectral_cluster_adjacency(k = k)\n", 61 | "\n", 62 | "L_assign = c.spectral_cluster_laplacian(k = k, normalisation='sym')\n", 63 | "\n", 64 | "SPONGE_assign = c.geproblem_laplacian(k = k, normalisation='additive')\n", 65 | "\n", 66 | "SPONGEsym_assign = c.geproblem_laplacian(k = k, normalisation='multiplicative')\n" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 40, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "name": "stdout", 76 | "output_type": "stream", 77 | "text": [ 78 | "score_A: 0.638342339376121\nscore_L: 0.011184439304956226\nscore_SPONGE: 0.041242984040083946\nscore_SPONGEsym: 0.5113251430507669\n" 79 | ] 80 | } 81 | ], 82 | "source": [ 83 | "# compute the recovery score of the algorithms against the SSBM ground truth\n", 84 | "\n", 85 | "score_A = adjusted_rand_score(A_assign, true_assign)\n", 86 | "\n", 87 | "score_L = adjusted_rand_score(L_assign, true_assign)\n", 88 | "\n", 89 | "score_SPONGE = adjusted_rand_score(SPONGE_assign, true_assign)\n", 90 | "\n", 91 | "score_SPONGEsym = adjusted_rand_score(SPONGEsym_assign, true_assign)\n", 92 | "\n", 93 | "print('score_A: ', score_A)\n", 94 | "print('score_L: ', score_L)\n", 95 | "print('score_SPONGE: ', score_SPONGE)\n", 96 | "print('score_SPONGEsym: ', score_SPONGEsym)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": null, 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "" 113 | ] 114 | } 115 | ], 116 | "metadata": { 117 | "kernelspec": { 118 | "display_name": "Python 2", 119 | "language": "python", 120 | "name": "python2" 121 | }, 122 | "language_info": { 123 | "codemirror_mode": { 124 | "name": "ipython", 125 | "version": 2 126 | }, 127 | "file_extension": ".py", 128 | "mimetype": "text/x-python", 129 | "name": "python", 130 | "nbconvert_exporter": "python", 131 | "pygments_lexer": "ipython2", 132 | "version": "2.7.6" 133 | } 134 | }, 135 | "nbformat": 4, 136 | "nbformat_minor": 0 137 | } 138 | -------------------------------------------------------------------------------- /signet/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import scipy.sparse as ss 4 | import sklearn.cluster as sl 5 | 6 | def objscore(labels, k, mat1, mat2=None): 7 | """Scores a clustering using the objective matrix given 8 | 9 | Args: 10 | labels (list of int): Clustering assignment. 11 | k (int): Number of clusters. 12 | mat1 (csc matrix): Numerator matrix of objective score. 13 | mat2 (csc matrix): Denominator matrix of objective score. Default is no denominator. 14 | 15 | Returns: 16 | float: Score. 17 | 18 | """ 19 | 20 | tot = 0 21 | row=np.empty(k,dtype=object) 22 | for pos, item in enumerate(labels): 23 | if type(row[item])!=list: 24 | row[item] = [pos] 25 | else: 26 | row[item].append(pos) 27 | for j in range(k): 28 | num = mat1[:,row[j]].tocsr()[row[j],:].sum() 29 | if mat2!=None: 30 | den= mat2[:,row[j]].tocsr()[row[j],:].sum() 31 | if den==0: 32 | den=1 33 | num = num/den 34 | tot += num 35 | return float(round(tot,2)) 36 | 37 | def sizeorder(labels,k,pos,neg,largest=True): 38 | n = len(labels) 39 | eye=ss.eye(n,format='csc') 40 | clusscores=np.empty(k) 41 | lsize=0 42 | lclus=-1 43 | for j in range(k): 44 | row = [i for i in range(n) if labels[i] == j] 45 | col = [0 for i in range(n) if labels[i] == j] 46 | dat = [1 for i in range(n) if labels[i] == j] 47 | if largest==False and len(dat)>lsize: 48 | lsize=len(dat) 49 | lclus=j 50 | vec = ss.coo_matrix((dat, (row, col)), shape=(n, 1)) 51 | vec = vec.tocsc() 52 | x = vec.transpose() * pos * vec 53 | y = vec.transpose() * (neg+eye)* vec 54 | z=float(x[0,0])/float(y[0,0]) 55 | clusscores[j] = z 56 | new=[x for x in range(n) if labels[x]!=lclus] 57 | scores = [clusscores[labels[i]] for i in new] 58 | return [x for _,x in sorted(zip(scores,new))] 59 | 60 | def invdiag(M): 61 | """Inverts a positive diagonal matrix. 62 | 63 | Args: 64 | M (csc matrix): matrix to invert 65 | 66 | Returns: 67 | scipy sparse matrix of inverted diagonal 68 | 69 | """ 70 | 71 | d = M.diagonal() 72 | dd = [1 / max(x, 1 / 999999999) for x in d] 73 | return ss.dia_matrix((dd, [0]), shape=(len(d), len(d))).tocsc() 74 | 75 | 76 | def sqrtinvdiag(M): 77 | """Inverts and square-roots a positive diagonal matrix. 78 | 79 | Args: 80 | M (csc matrix): matrix to invert 81 | 82 | Returns: 83 | scipy sparse matrix of inverted square-root of diagonal 84 | """ 85 | 86 | d = M.diagonal() 87 | dd = [1 / max(np.sqrt(x), 1 / 999999999) for x in d] 88 | 89 | return ss.dia_matrix((dd, [0]), shape=(len(d), len(d))).tocsc() 90 | 91 | 92 | def merge(elemlist): 93 | """Merges pairs of clusters randomly. 94 | 95 | Args: 96 | elemlist (list of lists of int): Specifies the members of each cluster in the current clustering 97 | 98 | Returns: 99 | list of lists of int: New cluster constituents 100 | boolean: Whether last cluster was unable to merge 101 | list of int: List of markers for current clustering, to use as starting vectors. 102 | 103 | """ 104 | k = len(elemlist) 105 | dc = False 106 | elemlist.append([]) 107 | perm = np.random.permutation(k) 108 | match = [k] * k 109 | for i in range(math.floor(k / 2)): 110 | me = perm[2 * i] 111 | you = perm[2 * i + 1] 112 | match[me] = you 113 | match[you] = me 114 | if k % 2 != 0: 115 | dontcut = perm[k - 1] 116 | dc = True 117 | nelemlist = [elemlist[i] + elemlist[match[i]] for i in range(k) if i < match[i] < k] 118 | numbers = [len(elemlist[i]) for i in range(k) if i < match[i] < k] 119 | if dc: 120 | nelemlist.append(elemlist[dontcut]) 121 | return nelemlist, dc, numbers 122 | 123 | def cut(elemlist, matrix, numbers, dc,mini): 124 | """Cuts clusters by separately normalised PCA. 125 | 126 | Args: 127 | elemlist (list of lists of int): Specifies the members of each cluster in the current clustering 128 | matrix (csc matrix): Matrix objective with which to cut. 129 | numbers (list of int): Marks previous clustering to use as starting vector. 130 | dc (boolean): Whether to skip cutting last cluster 131 | mini (boolean): Whether to minimise (instead of maximise) matrix objective. 132 | 133 | Returns: 134 | list of lists of int: new cluster constituents 135 | """ 136 | nelemlist = [] 137 | if dc: 138 | nelemlist.append(elemlist.pop()) 139 | count = 0 140 | for i in elemlist: 141 | l = len(i) 142 | if l > 2: 143 | matrix1 = matrix[:, i].tocsr() 144 | matrix1 = matrix1[i, :].tocsc() 145 | val = 1 / math.sqrt(l) 146 | v = [-val] * numbers[count] 147 | w = [val] * (l - numbers[count]) 148 | v = v + w 149 | if not mini: 150 | (w, v) = ss.linalg.eigsh(matrix1, 2, which='LA', maxiter=l, v0=v) 151 | else: 152 | (w, v) = ss.linalg.eigsh(matrix1, 2, which='SA', maxiter=l, v0=v) 153 | x = sl.KMeans(n_clusters=2,n_init=3,max_iter=100).fit(v) 154 | c1 = [i[y] for y in range(l) if x.labels_[y]==0] 155 | c2 = [i[y] for y in range(l) if x.labels_[y]==1] 156 | nelemlist.append(c1) 157 | nelemlist.append(c2) 158 | elif len(i) == 2: 159 | if matrix[i[0], i[1]] > 0: 160 | nelemlist.append(i) 161 | nelemlist.append([]) 162 | else: 163 | nelemlist.append([i[0]]) 164 | nelemlist.append([i[1]]) 165 | elif len(i) == 1: 166 | nelemlist.append(i) 167 | nelemlist.append([]) 168 | else: 169 | nelemlist.append([]) 170 | nelemlist.append([]) 171 | count += 1 172 | return nelemlist 173 | 174 | 175 | 176 | 177 | 178 | --------------------------------------------------------------------------------