├── requirements.txt ├── src └── fastconsensus │ ├── io.py │ ├── __init__.py │ ├── utils.py │ ├── algorithms.py │ └── core.py ├── .DS_Store ├── setup.py ├── environment.yml ├── LICENSE ├── README.md ├── .gitignore ├── sample_scripts └── test_figure.py └── notebooks └── lfr_test_notebook.ipynb /requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/fastconsensus/io.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/fastconsensus/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/adityat/fastconsensus/HEAD/.DS_Store -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="fastconsensus", 5 | version="0.1.0", 6 | packages=find_packages(where="src"), 7 | package_dir={"": "src"}, 8 | ) -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: fastconsensus 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.9 7 | - igraph 8 | - python-igraph 9 | - cairocffi 10 | - numpy 11 | - matplotlib 12 | - jupyter 13 | - pip -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024, adityat 4 | All rights reserved. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fastconsensus 2 | 3 | Fastconsensus is a Python package that implements a fast consensus clustering algorithm for complex networks. It provides an efficient way to perform community detection on large-scale networks using the igraph library. 4 | 5 | ## Installation 6 | 7 | ### From source 8 | 9 | To install fastconsensus from source, follow these steps: 10 | 11 | 1. Clone the repository: 12 | ```bash 13 | git clone https://github.com/yourusername/fastconsensus.git 14 | cd fastconsensus 15 | ``` 16 | 17 | 2. Create a conda environment (optional but recommended): 18 | ```bash 19 | conda env create -f environment.yml 20 | conda activate fastconsensus 21 | ``` 22 | 23 | 3. Install the package: 24 | ```bash 25 | pip install -e . 26 | ``` 27 | 28 | ## Usage 29 | 30 | Here's a basic example of how to use fastconsensus: 31 | 32 | ```python 33 | import igraph as ig 34 | from fastconsensus import fast_consensus_clustering, read_graph_from_file 35 | 36 | # Read a graph from a file 37 | graph = read_graph_from_file("path/to/your/graph.gml", format="gml") 38 | 39 | # Perform fast consensus clustering 40 | partition = fast_consensus_clustering(graph, n_partitions=20, threshold=0.2) 41 | 42 | # Print the resulting partition 43 | print(partition) 44 | ``` 45 | 46 | For more detailed examples and usage scenarios, please refer to the Jupyter notebooks in the `notebooks/` directory. 47 | 48 | ## Running the Notebooks 49 | 50 | To run the example notebooks: 51 | 52 | 1. Ensure you have Jupyter installed in your environment: 53 | ```bash 54 | conda install jupyter 55 | ``` 56 | 57 | 2. Navigate to the `notebooks/` directory and start Jupyter: 58 | ```bash 59 | cd notebooks 60 | jupyter notebook 61 | ``` 62 | 63 | 3. Open and run the notebook 64 | 65 | ## Contributing 66 | 67 | Contributions are welcome! Please feel free to submit a Pull Request. 68 | 69 | ## License 70 | 71 | This project is licensed under the MIT License - see the LICENSE file for details. 72 | -------------------------------------------------------------------------------- /src/fastconsensus/utils.py: -------------------------------------------------------------------------------- 1 | import igraph as ig 2 | from typing import Dict, Any 3 | import math 4 | 5 | def calculate_modularity(graph: ig.Graph, partition: Dict[int, Any]) -> float: 6 | """ 7 | Calculate modularity of a partition. 8 | 9 | :param graph: Graph object 10 | :param partition: Node to community mapping 11 | :return: Modularity score 12 | """ 13 | return graph.modularity(list(partition.values())) 14 | 15 | def compare_partitions(partition1: Dict[int, Any], partition2: Dict[int, Any]) -> float: 16 | """ 17 | Compare partitions using Normalized Mutual Information (NMI). 18 | 19 | :param partition1: First partition 20 | :param partition2: Second partition 21 | :return: NMI score 22 | :raises ValueError: If partitions have different node sets 23 | """ 24 | if set(partition1.keys()) != set(partition2.keys()): 25 | raise ValueError("Partitions must have the same set of nodes") 26 | 27 | n = len(partition1) 28 | 29 | # Convert all community IDs to strings to ensure they're hashable 30 | partition1 = {node: str(comm) for node, comm in partition1.items()} 31 | partition2 = {node: str(comm) for node, comm in partition2.items()} 32 | 33 | # Count occurrences of each community 34 | count1 = {} 35 | count2 = {} 36 | for node in partition1: 37 | count1[partition1[node]] = count1.get(partition1[node], 0) + 1 38 | count2[partition2[node]] = count2.get(partition2[node], 0) + 1 39 | 40 | # Calculate mutual information 41 | mi = 0 42 | for c1 in count1: 43 | for c2 in count2: 44 | n_ij = sum(1 for node in partition1 if partition1[node] == c1 and partition2[node] == c2) 45 | if n_ij > 0: 46 | mi += (n_ij / n) * math.log2((n * n_ij) / (count1[c1] * count2[c2])) 47 | 48 | # Calculate entropies 49 | h1 = sum(-(count / n) * math.log2(count / n) for count in count1.values()) 50 | h2 = sum(-(count / n) * math.log2(count / n) for count in count2.values()) 51 | 52 | # Calculate NMI 53 | return 2 * mi / (h1 + h2) if (h1 + h2) > 0 else 0 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Jupyter Notebook 55 | .ipynb_checkpoints 56 | 57 | # IPython 58 | profile_default/ 59 | ipython_config.py 60 | 61 | # pyenv 62 | .python-version 63 | 64 | # pipenv 65 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 66 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 67 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 68 | # install all needed dependencies. 69 | #Pipfile.lock 70 | 71 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 72 | __pypackages__/ 73 | 74 | # Environments 75 | .env 76 | .venv 77 | env/ 78 | venv/ 79 | ENV/ 80 | env.bak/ 81 | venv.bak/ 82 | 83 | # Spyder project settings 84 | .spyderproject 85 | .spyproject 86 | 87 | # Rope project settings 88 | .ropeproject 89 | 90 | # mkdocs documentation 91 | /site 92 | 93 | # mypy 94 | .mypy_cache/ 95 | .dmypy.json 96 | dmypy.json 97 | 98 | # Pyre type checker 99 | .pyre/ 100 | 101 | # PyCharm 102 | .idea/ 103 | 104 | # VS Code 105 | .vscode/ 106 | 107 | # Logs 108 | *.log 109 | 110 | # OS generated files 111 | .DS_Store 112 | .DS_Store? 113 | ._* 114 | .Spotlight-V100 115 | .Trashes 116 | ehthumbs.db 117 | Thumbs.db -------------------------------------------------------------------------------- /src/fastconsensus/algorithms.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | import igraph as ig 3 | import community as cm 4 | 5 | class CommunityDetectionAlgorithm(ABC): 6 | """ 7 | Abstract base class for community detection algorithms. 8 | 9 | This class defines the interface for community detection algorithms 10 | to ensure consistent usage across different implementations. 11 | """ 12 | 13 | @abstractmethod 14 | def detect_communities(self, graph: ig.Graph) -> dict: 15 | """ 16 | Detect communities in the given graph. 17 | 18 | :param graph: igraph Graph object 19 | :return: A dictionary mapping node ids to community ids 20 | """ 21 | pass 22 | 23 | class LouvainAlgorithm(CommunityDetectionAlgorithm): 24 | """ 25 | Implementation of the Louvain community detection algorithm. 26 | 27 | This algorithm optimizes modularity in a hierarchical manner. 28 | """ 29 | def detect_communities(self, graph: ig.Graph, weight=None) -> dict: 30 | """ 31 | Detect communities using the Louvain algorithm. 32 | 33 | Find communities by optimizing modularity in a multi-level approach, 0th level of the method. 34 | 35 | :param graph: igraph Graph object 36 | :param weight: Optional name of the edge attribute to be used as weight 37 | :return: A dictionary mapping node ids to community ids 38 | """ 39 | partition = graph.community_multilevel(weights=weight, return_levels=True)[0] 40 | return {v: partition.membership[v] for v in range(graph.vcount())} 41 | 42 | class LabelPropagationAlgorithm(CommunityDetectionAlgorithm): 43 | """ 44 | Implementation of the Label Propagation community detection algorithm. 45 | 46 | This algorithm detects communities by propagating labels through the network. 47 | """ 48 | def detect_communities(self, graph: ig.Graph) -> dict: 49 | 50 | """ 51 | Detect communities using the Label Propagation algorithm. 52 | Use igraph's implementation of the Label Propagation algorithm. 53 | 54 | :param graph: igraph Graph object 55 | :param weight: Optional name of the edge attribute to be used as weight (ignored in this implementation) 56 | :return: A dictionary mapping node ids to community ids 57 | """ 58 | partition = graph.community_label_propagation() 59 | return {v: partition.membership[v] for v in range(graph.vcount())} 60 | 61 | 62 | class InfoMapAlgorithm(CommunityDetectionAlgorithm): 63 | """ 64 | Implementation of the Infomap community detection algorithm. 65 | 66 | This algorithm finds communities by optimizing the map equation. 67 | """ 68 | def detect_communities(self, graph: ig.Graph) -> dict: 69 | """ 70 | Detect communities using the Infomap algorithm. 71 | Use igraph's implementation of the Infomap algorithm. 72 | 73 | :param graph: igraph Graph object 74 | :param weight: Optional name of the edge attribute to be used as weight 75 | :return: A dictionary mapping node ids to community ids 76 | """ 77 | partition = graph.community_infomap() 78 | return {v: partition.membership[v] for v in range(graph.vcount())} 79 | 80 | def get_algorithm(name: str) -> CommunityDetectionAlgorithm: 81 | """ 82 | Function to get the specified community detection algorithm. 83 | 84 | :param name: Name of the algorithm to retrieve 85 | :return: An instance of the requested CommunityDetectionAlgorithm 86 | :raises ValueError: If the requested algorithm is not available 87 | """ 88 | algorithms = { 89 | 'louvain': LouvainAlgorithm(), 90 | 'label_propagation': LabelPropagationAlgorithm(), 91 | 'infomap': InfoMapAlgorithm(), 92 | } 93 | return algorithms[name.lower()] # This will raise a KeyError if the algorithm is not found -------------------------------------------------------------------------------- /sample_scripts/test_figure.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import igraph as ig 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | from tqdm import tqdm 6 | import random 7 | import logging 8 | from networkx.generators.community import LFR_benchmark_graph 9 | from fastconsensus.algorithms import get_algorithm 10 | from fastconsensus.core import fast_consensus_clustering 11 | from fastconsensus.utils import compare_partitions 12 | 13 | # Set up logging 14 | logging.basicConfig(level=logging.INFO, 15 | format='%(asctime)s - %(levelname)s - %(message)s', 16 | handlers=[ 17 | logging.FileHandler("experiment_results.log"), 18 | logging.StreamHandler() 19 | ]) 20 | 21 | def generate_lfr_graph(n, tau1, tau2, mu, average_degree, max_degree, min_community, max_community, max_attempts=10): 22 | for attempt in range(max_attempts): 23 | try: 24 | current_avg_degree = average_degree + random.uniform(-1, 1) 25 | 26 | G = LFR_benchmark_graph( 27 | n, tau1, tau2, mu, average_degree=current_avg_degree, max_degree=max_degree, 28 | min_community=min_community, max_community=max_community, 29 | tol=1e-3, max_iters=5000 30 | ) 31 | 32 | true_communities = {node: frozenset(G.nodes[node]['community']) for node in G.nodes()} 33 | edges = list(G.edges()) 34 | g = ig.Graph(n=G.number_of_nodes(), edges=edges) 35 | return g, true_communities 36 | 37 | except nx.ExceededMaxIterations: 38 | if attempt == max_attempts - 1: 39 | raise ValueError(f"Failed to generate LFR graph after {max_attempts} attempts") 40 | continue 41 | 42 | def run_experiment(n_runs, n_nodes, mu_values): 43 | results = { 44 | 'Louvain': [], 45 | 'FastConsensus': [] 46 | } 47 | 48 | for mu in tqdm(mu_values, desc="Processing μ values"): 49 | louvain_nmi = [] 50 | fastconsensus_nmi = [] 51 | 52 | for run in range(n_runs): 53 | try: 54 | g, true_communities = generate_lfr_graph( 55 | n=n_nodes, 56 | tau1=2, 57 | tau2=2, 58 | mu=mu, 59 | average_degree=20, 60 | max_degree=50, 61 | min_community=20, 62 | max_community=100 63 | ) 64 | 65 | # Run Louvain 66 | louvain_alg = get_algorithm('louvain') 67 | louvain_partition = louvain_alg.detect_communities(g) 68 | louvain_nmi_value = compare_partitions(true_communities, louvain_partition) 69 | louvain_nmi.append(louvain_nmi_value) 70 | 71 | # Run FastConsensus 72 | fastconsensus_partition = fast_consensus_clustering(g, n_partitions=10, threshold=0.2, algorithm='louvain') 73 | fastconsensus_nmi_value = compare_partitions(true_communities, fastconsensus_partition) 74 | fastconsensus_nmi.append(fastconsensus_nmi_value) 75 | 76 | logging.info(f"μ={mu:.2f}, Run {run+1}/{n_runs}: Louvain NMI={louvain_nmi_value:.4f}, FastConsensus NMI={fastconsensus_nmi_value:.4f}") 77 | 78 | except ValueError as e: 79 | logging.error(f"Error generating graph for μ={mu}, run {run+1}: {str(e)}") 80 | continue 81 | 82 | if louvain_nmi and fastconsensus_nmi: 83 | avg_louvain_nmi = np.mean(louvain_nmi) 84 | avg_fastconsensus_nmi = np.mean(fastconsensus_nmi) 85 | results['Louvain'].append(avg_louvain_nmi) 86 | results['FastConsensus'].append(avg_fastconsensus_nmi) 87 | logging.info(f"Average NMI for μ={mu:.2f}: Louvain={avg_louvain_nmi:.4f}, FastConsensus={avg_fastconsensus_nmi:.4f}") 88 | else: 89 | results['Louvain'].append(None) 90 | results['FastConsensus'].append(None) 91 | logging.warning(f"No valid results for μ={mu:.2f}") 92 | 93 | return results 94 | 95 | def plot_results(mu_values, results): 96 | plt.figure(figsize=(10, 6)) 97 | 98 | for algorithm in ['Louvain', 'FastConsensus']: 99 | valid_results = [(mu, nmi) for mu, nmi in zip(mu_values, results[algorithm]) if nmi is not None] 100 | if valid_results: 101 | mu_vals, nmi_vals = zip(*valid_results) 102 | plt.plot(mu_vals, nmi_vals, '-o', label=algorithm) 103 | 104 | plt.xlabel('Mixing parameter μ') 105 | plt.ylabel('Normalized Mutual Information') 106 | plt.title('Louvain vs FastConsensus on LFR Benchmark (1000 nodes)') 107 | plt.legend() 108 | plt.grid(True, linestyle='--', alpha=0.7) 109 | plt.tight_layout() 110 | plt.savefig('louvain_vs_fastconsensus_lfr.png', dpi=300) 111 | plt.show() 112 | 113 | if __name__ == "__main__": 114 | n_runs = 2 # Number of runs for each μ value 115 | n_nodes = 100 # Number of nodes in the LFR graph 116 | mu_values = np.arange(0.1, 0.81, 0.05) # μ values from 0.1 to 0.8 with 0.05 step 117 | 118 | logging.info(f"Starting experiment with {n_runs} runs for each of {len(mu_values)} μ values, on graphs with {n_nodes} nodes") 119 | results = run_experiment(n_runs, n_nodes, mu_values) 120 | logging.info("Experiment completed. Plotting results.") 121 | plot_results(mu_values, results) 122 | logging.info("Results plotted and saved as 'louvain_vs_fastconsensus_lfr.png'") -------------------------------------------------------------------------------- /notebooks/lfr_test_notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# FastConsensus on LFR Benchmark Graphs\n", 8 | "\n", 9 | "This notebook demonstrates the usage of the FastConsensus algorithm on LFR benchmark graphs and compares its performance with individual community detection algorithms." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "source": [ 17 | "import sys\n", 18 | "import os\n", 19 | "sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'src')))\n", 20 | "\n", 21 | "import igraph as ig\n", 22 | "import numpy as np\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "from fastconsensus.algorithms import get_algorithm\n", 25 | "from fastconsensus.core import fast_consensus_clustering\n", 26 | "from fastconsensus.utils import calculate_modularity, compare_partitions\n", 27 | "\n", 28 | "# If you have networkx and community installed, uncomment the following lines\n", 29 | "# import networkx as nx\n", 30 | "# import community as community_louvain\n", 31 | "# from networkx.generators.community import LFR_benchmark_graph" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "## Generate LFR Benchmark Graph\n", 39 | "\n", 40 | "Note: This function requires networkx and community libraries. If you don't have them installed, you can use a pre-generated LFR graph or implement your own LFR generator." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "source": [ 48 | "def generate_lfr_graph(n, tau1, tau2, mu, average_degree, max_degree, min_community, max_community):\n", 49 | " # Uncomment the following lines if you have networkx and community installed\n", 50 | " # G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=average_degree, max_degree=max_degree,\n", 51 | " # min_community=min_community, max_community=max_community)\n", 52 | " # # Convert to igraph\n", 53 | " # edges = list(G.edges())\n", 54 | " # g = ig.Graph(n=n, edges=edges)\n", 55 | " # # Get ground truth communities\n", 56 | " # true_communities = {node: G.nodes[node]['community'] for node in G.nodes()}\n", 57 | " # return g, true_communities\n", 58 | " \n", 59 | " # For demonstration, we'll create a random graph instead\n", 60 | " g = ig.Graph.Erdos_Renyi(n=n, m=int(n * average_degree / 2))\n", 61 | " true_communities = {i: i % 5 for i in range(n)} # Assign random communities\n", 62 | " return g, true_communities\n", 63 | "\n", 64 | "# Generate LFR benchmark graph\n", 65 | "n = 1000\n", 66 | "tau1 = 2.5\n", 67 | "tau2 = 1.5\n", 68 | "mu = 0.1\n", 69 | "average_degree = 20\n", 70 | "max_degree = 50\n", 71 | "min_community = 20\n", 72 | "max_community = 100\n", 73 | "\n", 74 | "g, true_communities = generate_lfr_graph(n, tau1, tau2, mu, average_degree, max_degree, min_community, max_community)\n", 75 | "print(f\"Generated LFR benchmark graph with {g.vcount()} nodes and {g.ecount()} edges\")" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "## Apply community detection algorithms" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "metadata": {}, 89 | "source": [ 90 | "algorithms = ['louvain', 'label_propagation']\n", 91 | "results = {}\n", 92 | "\n", 93 | "for alg_name in algorithms:\n", 94 | " alg = get_algorithm(alg_name)\n", 95 | " partition = alg.detect_communities(g)\n", 96 | " results[alg_name] = partition\n", 97 | " print(f\"{alg_name.capitalize()} algorithm detected {len(set(partition.values()))} communities\")\n", 98 | "\n", 99 | "# Apply FastConsensus\n", 100 | "fast_consensus_partition = fast_consensus_clustering(g, n_partitions=20, threshold=0.2, algorithm='louvain')\n", 101 | "results['FastConsensus'] = fast_consensus_partition\n", 102 | "print(f\"FastConsensus detected {len(set(fast_consensus_partition.values()))} communities\")" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "## Evaluate results" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "source": [ 117 | "# Calculate modularity for each partition\n", 118 | "print(\"Modularity scores:\")\n", 119 | "for alg_name, partition in results.items():\n", 120 | " modularity = calculate_modularity(g, partition)\n", 121 | " print(f\"{alg_name}: {modularity:.4f}\")\n", 122 | "\n", 123 | "# Compare partitions with ground truth\n", 124 | "print(\"\\nNormalized Mutual Information (NMI) with ground truth:\")\n", 125 | "for alg_name, partition in results.items():\n", 126 | " nmi = compare_partitions(true_communities, partition)\n", 127 | " print(f\"{alg_name}: {nmi:.4f}\")\n", 128 | "\n", 129 | "# Compare partitions with each other\n", 130 | "print(\"\\nNormalized Mutual Information (NMI) between partitions:\")\n", 131 | "for i, (alg1, partition1) in enumerate(results.items()):\n", 132 | " for alg2, partition2 in list(results.items())[i+1:]:\n", 133 | " nmi = compare_partitions(partition1, partition2)\n", 134 | " print(f\"{alg1} vs {alg2}: {nmi:.4f}\")" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "## Visualize results" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": {}, 148 | "source": [ 149 | "def plot_communities(g, partition, title):\n", 150 | " color_list = plt.cm.tab20(np.linspace(0, 1, 20))\n", 151 | " node_colors = [color_list[c % 20] for c in partition.values()]\n", 152 | " \n", 153 | " layout = g.layout_fruchterman_reingold()\n", 154 | " visual_style = {\n", 155 | " \"vertex_size\": 7,\n", 156 | " \"vertex_color\": node_colors,\n", 157 | " \"edge_width\": 0.5,\n", 158 | " \"layout\": layout,\n", 159 | " \"bbox\": (600, 600),\n", 160 | " \"margin\": 20\n", 161 | " }\n", 162 | " \n", 163 | " fig, ax = plt.subplots(figsize=(10, 10))\n", 164 | " ig.plot(g, target=ax, **visual_style)\n", 165 | " plt.title(title)\n", 166 | " plt.axis('off')\n", 167 | " plt.tight_layout()\n", 168 | " plt.show()\n", 169 | "\n", 170 | "# Plot ground truth and detected communities\n", 171 | "plot_communities(g, true_communities, \"Ground Truth Communities\")\n", 172 | "for alg_name, partition in results.items():\n", 173 | " plot_communities(g, partition, f\"{alg_name} Communities\")" 174 | ] 175 | } 176 | ], 177 | "metadata": { 178 | "kernelspec": { 179 | "display_name": "Python 3", 180 | "language": "python", 181 | "name": "python3" 182 | }, 183 | "language_info": { 184 | "codemirror_mode": { 185 | "name": "ipython", 186 | "version": 3 187 | }, 188 | "file_extension": ".py", 189 | "mimetype": "text/x-python", 190 | "name": "python", 191 | "nbconvert_exporter": "python", 192 | "pygments_lexer": "ipython3", 193 | "version": "3.9.0" 194 | } 195 | }, 196 | "nbformat": 4, 197 | "nbformat_minor": 4 198 | } -------------------------------------------------------------------------------- /src/fastconsensus/core.py: -------------------------------------------------------------------------------- 1 | import igraph as ig 2 | import random 3 | from typing import Dict, List 4 | from .algorithms import get_algorithm 5 | 6 | def fast_consensus_clustering(graph: ig.Graph, n_partitions: int = 20, threshold: float = 0.2, 7 | max_triads: int = None, algorithm: str = 'louvain', 8 | convergence_threshold: float = 0.05, max_iterations: int = 100) -> Dict[int, int]: 9 | """ 10 | Perform fast consensus clustering on the given graph. 11 | 12 | Problem: Given a graph, find a robust community structure by combining multiple partitions 13 | generated by a chosen community detection algorithm. 14 | 15 | Approach: 16 | 1. Generate multiple partitions using the specified algorithm. 17 | 2. Construct a sparse consensus matrix based on these partitions. 18 | 3. Apply thresholding to remove weak connections. 19 | 4. Perform triadic closure to enhance community structure. 20 | 5. Repeat steps 1-4 until convergence or max iterations reached. 21 | 22 | :param graph: igraph Graph object 23 | :param n_partitions: Number of input partitions to generate 24 | :param threshold: Threshold for consensus matrix entries 25 | :param max_triads: Maximum number of triads to close (default: number of edges) 26 | :param algorithm: Name of the community detection algorithm to use 27 | :param convergence_threshold: Fraction of non-binary entries in consensus matrix to consider converged 28 | :param max_iterations: Maximum number of iterations before stopping 29 | :return: Final consensus partition as a dictionary (node_id: community_id) 30 | :raises ValueError: If the specified algorithm is not supported 31 | """ 32 | if max_triads is None: 33 | max_triads = graph.ecount() 34 | 35 | community_algorithm = get_algorithm(algorithm) 36 | consensus_matrix = graph.copy() 37 | 38 | # Initialize weights if they don't exist 39 | if 'weight' not in consensus_matrix.edge_attributes(): 40 | consensus_matrix.es['weight'] = 1.0 41 | 42 | for iteration in range(max_iterations): 43 | print(iteration, end = '\r') 44 | max_triads = consensus_matrix.ecount() 45 | # Generate partitions 46 | partitions = [community_algorithm.detect_communities(consensus_matrix, weight='weight') for _ in range(n_partitions)] 47 | 48 | # Construct sparse consensus matrix 49 | consensus_matrix = construct_sparse_consensus_matrix(consensus_matrix, partitions) 50 | 51 | # Thresholding 52 | consensus_matrix = threshold_matrix(consensus_matrix, threshold) 53 | 54 | # Triadic closure 55 | consensus_matrix = triadic_closure(consensus_matrix, max_triads, partitions) 56 | 57 | # Check for convergence 58 | if check_convergence(consensus_matrix, convergence_threshold): 59 | break 60 | 61 | # Final partition 62 | final_partition = community_algorithm.detect_communities(consensus_matrix) 63 | return final_partition 64 | 65 | def construct_sparse_consensus_matrix(previous_matrix: ig.Graph, partitions: List[Dict[int, int]]) -> ig.Graph: 66 | """ 67 | Construct a new sparse consensus matrix based on the previous iteration's matrix. 68 | 69 | Problem: Create a consensus matrix that represents the agreement between multiple partitions, 70 | while maintaining the sparsity of the original graph. 71 | 72 | Approach: 73 | 1. Create a copy of the previous matrix. 74 | 2. Reset all edge weights to zero. 75 | 3. For each edge in the previous matrix, calculate the fraction of partitions where 76 | the connected nodes are in the same community. 77 | 4. Set the edge weight to this fraction. 78 | 79 | :param previous_matrix: igraph Graph object representing the previous consensus matrix 80 | :param partitions: List of partitions, each represented as a dictionary (node_id: community_id) 81 | :return: Updated sparse consensus matrix as an igraph Graph object 82 | """ 83 | n = previous_matrix.vcount() 84 | new_matrix = previous_matrix.copy() 85 | new_matrix.es['weight'] = 0.0 # Reset all weights 86 | 87 | # Iterate over existing edges in the previous matrix, not the original graph 88 | for edge in previous_matrix.es: 89 | weight = sum(1 for partition in partitions if partition[edge.source] == partition[edge.target]) 90 | if weight > 0: 91 | new_matrix.add_edge(edge.source, edge.target, weight=weight / len(partitions)) 92 | 93 | return new_matrix 94 | 95 | def threshold_matrix(consensus_matrix: ig.Graph, threshold: float) -> ig.Graph: 96 | """ 97 | Apply thresholding to the consensus matrix to remove weak connections. 98 | 99 | Problem: Remove edges with low weights to focus on strong community structures. 100 | 101 | Approach: 102 | 1. Identify edges with weights below the threshold. 103 | 2. Remove these edges from the graph. 104 | 105 | :param consensus_matrix: igraph Graph object representing the consensus matrix 106 | :param threshold: Minimum weight for an edge to be retained 107 | :return: Thresholded consensus matrix as an igraph Graph object 108 | """ 109 | edges_to_remove = [edge.index for edge in consensus_matrix.es if edge['weight'] < threshold] 110 | consensus_matrix.delete_edges(edges_to_remove) 111 | return consensus_matrix 112 | 113 | def triadic_closure(consensus_matrix: ig.Graph, max_triads: int, input_partitions: List[Dict[int, int]]) -> ig.Graph: 114 | """ 115 | Perform triadic closure on the consensus matrix to enhance community structure. 116 | 117 | Problem: Strengthen community structure by closing triangles in the graph. 118 | 119 | Approach: 120 | 1. Randomly select nodes and their pairs of neighbors. 121 | 2. If the neighbors are not connected, add an edge between them. 122 | 3. Set the weight of the new edge based on community overlap in input partitions. 123 | 4. Repeat until max_triads is reached or no more triads can be closed. 124 | 125 | :param consensus_matrix: igraph Graph object representing the consensus matrix 126 | :param max_triads: Maximum number of triads to close 127 | :param input_partitions: List of input partitions used to calculate edge weights 128 | :return: Updated consensus matrix with closed triads 129 | """ 130 | triads_closed = 0 131 | nodes = list(range(consensus_matrix.vcount())) 132 | 133 | for _ in range(max_triads): 134 | 135 | node = random.choice(nodes) 136 | neighbors = consensus_matrix.neighbors(node) 137 | if len(neighbors) >= 2: 138 | u, v = random.sample(neighbors, 2) 139 | if not consensus_matrix.are_connected(u, v): 140 | # Calculate the weight based on community overlap 141 | weight = calculate_overlap_weight(u, v, input_partitions) 142 | consensus_matrix.add_edge(u, v, weight=weight) 143 | triads_closed += 1 144 | 145 | 146 | #print('triads closed = ', triads_closed) 147 | return consensus_matrix 148 | 149 | def calculate_overlap_weight(u: int, v: int, input_partitions: List[Dict[int, int]]) -> float: 150 | """ 151 | Calculate the weight of an edge based on community overlap in input partitions. 152 | 153 | Problem: Determine the strength of connection between two nodes based on their 154 | co-occurrence in the same communities across multiple partitions. 155 | 156 | Approach: 157 | Count the number of partitions where both nodes are in the same community, 158 | then divide by the total number of partitions. 159 | 160 | :param u: First node ID 161 | :param v: Second node ID 162 | :param input_partitions: List of partitions, each represented as a dictionary (node_id: community_id) 163 | :return: Calculated weight as a float between 0 and 1 164 | """ 165 | co_occurrences = sum(1 for partition in input_partitions if partition[u] == partition[v]) 166 | return co_occurrences / len(input_partitions) 167 | 168 | def check_convergence(consensus_matrix: ig.Graph, convergence_threshold: float) -> bool: 169 | """ 170 | Check if the consensus matrix has converged. 171 | 172 | Problem: Determine if the consensus process has reached a stable state. 173 | 174 | Approach: 175 | 1. Count the number of edges with non-binary weights (not 0 or 1). 176 | 2. Calculate the fraction of such edges. 177 | 3. If this fraction is below the convergence threshold, consider it converged. 178 | 179 | :param consensus_matrix: igraph Graph object representing the consensus matrix 180 | :param convergence_threshold: Fraction of non-binary entries to consider converged 181 | :return: Boolean indicating whether convergence has been reached 182 | """ 183 | non_binary_count = sum(1 for edge in consensus_matrix.es if 0 < edge['weight'] < 1) 184 | return non_binary_count / consensus_matrix.ecount() < convergence_threshold --------------------------------------------------------------------------------