├── requirements.txt
├── src
    └── fastconsensus
    │   ├── io.py
    │   ├── __init__.py
    │   ├── utils.py
    │   ├── algorithms.py
    │   └── core.py
├── .DS_Store
├── setup.py
├── environment.yml
├── LICENSE
├── README.md
├── .gitignore
├── sample_scripts
    └── test_figure.py
└── notebooks
    └── lfr_test_notebook.ipynb


/requirements.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/fastconsensus/io.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/fastconsensus/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/adityat/fastconsensus/HEAD/.DS_Store


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | 
3 | setup(
4 |     name="fastconsensus",
5 |     version="0.1.0",
6 |     packages=find_packages(where="src"),
7 |     package_dir={"": "src"},
8 | )


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: fastconsensus
 2 | channels:
 3 |   - conda-forge
 4 |   - defaults
 5 | dependencies:
 6 |   - python=3.9
 7 |   - igraph
 8 |   - python-igraph
 9 |   - cairocffi
10 |   - numpy
11 |   - matplotlib
12 |   - jupyter
13 |   - pip


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024, adityat
 4 | All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Fastconsensus
 2 | 
 3 | Fastconsensus is a Python package that implements a fast consensus clustering algorithm for complex networks. It provides an efficient way to perform community detection on large-scale networks using the igraph library.
 4 | 
 5 | ## Installation
 6 | 
 7 | ### From source
 8 | 
 9 | To install fastconsensus from source, follow these steps:
10 | 
11 | 1. Clone the repository:
12 |    ```bash
13 |    git clone https://github.com/yourusername/fastconsensus.git
14 |    cd fastconsensus
15 |    ```
16 | 
17 | 2. Create a conda environment (optional but recommended):
18 |    ```bash
19 |    conda env create -f environment.yml
20 |    conda activate fastconsensus
21 |    ```
22 | 
23 | 3. Install the package:
24 |    ```bash
25 |    pip install -e .
26 |    ```
27 | 
28 | ## Usage
29 | 
30 | Here's a basic example of how to use fastconsensus:
31 | 
32 | ```python
33 | import igraph as ig
34 | from fastconsensus import fast_consensus_clustering, read_graph_from_file
35 | 
36 | # Read a graph from a file
37 | graph = read_graph_from_file("path/to/your/graph.gml", format="gml")
38 | 
39 | # Perform fast consensus clustering
40 | partition = fast_consensus_clustering(graph, n_partitions=20, threshold=0.2)
41 | 
42 | # Print the resulting partition
43 | print(partition)
44 | ```
45 | 
46 | For more detailed examples and usage scenarios, please refer to the Jupyter notebooks in the `notebooks/` directory.
47 | 
48 | ## Running the Notebooks
49 | 
50 | To run the example notebooks:
51 | 
52 | 1. Ensure you have Jupyter installed in your environment:
53 |    ```bash
54 |    conda install jupyter
55 |    ```
56 | 
57 | 2. Navigate to the `notebooks/` directory and start Jupyter:
58 |    ```bash
59 |    cd notebooks
60 |    jupyter notebook
61 |    ```
62 | 
63 | 3. Open and run the notebook
64 | 
65 | ## Contributing
66 | 
67 | Contributions are welcome! Please feel free to submit a Pull Request.
68 | 
69 | ## License
70 | 
71 | This project is licensed under the MIT License - see the LICENSE file for details.
72 | 


--------------------------------------------------------------------------------
/src/fastconsensus/utils.py:
--------------------------------------------------------------------------------
 1 | import igraph as ig
 2 | from typing import Dict, Any
 3 | import math
 4 | 
 5 | def calculate_modularity(graph: ig.Graph, partition: Dict[int, Any]) -> float:
 6 |     """
 7 |     Calculate modularity of a partition.
 8 | 
 9 |     :param graph: Graph object
10 |     :param partition: Node to community mapping
11 |     :return: Modularity score
12 |     """
13 |     return graph.modularity(list(partition.values()))
14 | 
15 | def compare_partitions(partition1: Dict[int, Any], partition2: Dict[int, Any]) -> float:
16 |     """
17 |     Compare partitions using Normalized Mutual Information (NMI).
18 | 
19 |     :param partition1: First partition
20 |     :param partition2: Second partition
21 |     :return: NMI score
22 |     :raises ValueError: If partitions have different node sets
23 |     """
24 |     if set(partition1.keys()) != set(partition2.keys()):
25 |         raise ValueError("Partitions must have the same set of nodes")
26 |     
27 |     n = len(partition1)
28 |     
29 |     # Convert all community IDs to strings to ensure they're hashable
30 |     partition1 = {node: str(comm) for node, comm in partition1.items()}
31 |     partition2 = {node: str(comm) for node, comm in partition2.items()}
32 |     
33 |     # Count occurrences of each community
34 |     count1 = {}
35 |     count2 = {}
36 |     for node in partition1:
37 |         count1[partition1[node]] = count1.get(partition1[node], 0) + 1
38 |         count2[partition2[node]] = count2.get(partition2[node], 0) + 1
39 |     
40 |     # Calculate mutual information
41 |     mi = 0
42 |     for c1 in count1:
43 |         for c2 in count2:
44 |             n_ij = sum(1 for node in partition1 if partition1[node] == c1 and partition2[node] == c2)
45 |             if n_ij > 0:
46 |                 mi += (n_ij / n) * math.log2((n * n_ij) / (count1[c1] * count2[c2]))
47 |     
48 |     # Calculate entropies
49 |     h1 = sum(-(count / n) * math.log2(count / n) for count in count1.values())
50 |     h2 = sum(-(count / n) * math.log2(count / n) for count in count2.values())
51 |     
52 |     # Calculate NMI
53 |     return 2 * mi / (h1 + h2) if (h1 + h2) > 0 else 0


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Jupyter Notebook
 55 | .ipynb_checkpoints
 56 | 
 57 | # IPython
 58 | profile_default/
 59 | ipython_config.py
 60 | 
 61 | # pyenv
 62 | .python-version
 63 | 
 64 | # pipenv
 65 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 66 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 67 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 68 | #   install all needed dependencies.
 69 | #Pipfile.lock
 70 | 
 71 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 72 | __pypackages__/
 73 | 
 74 | # Environments
 75 | .env
 76 | .venv
 77 | env/
 78 | venv/
 79 | ENV/
 80 | env.bak/
 81 | venv.bak/
 82 | 
 83 | # Spyder project settings
 84 | .spyderproject
 85 | .spyproject
 86 | 
 87 | # Rope project settings
 88 | .ropeproject
 89 | 
 90 | # mkdocs documentation
 91 | /site
 92 | 
 93 | # mypy
 94 | .mypy_cache/
 95 | .dmypy.json
 96 | dmypy.json
 97 | 
 98 | # Pyre type checker
 99 | .pyre/
100 | 
101 | # PyCharm
102 | .idea/
103 | 
104 | # VS Code
105 | .vscode/
106 | 
107 | # Logs
108 | *.log
109 | 
110 | # OS generated files
111 | .DS_Store
112 | .DS_Store?
113 | ._*
114 | .Spotlight-V100
115 | .Trashes
116 | ehthumbs.db
117 | Thumbs.db


--------------------------------------------------------------------------------
/src/fastconsensus/algorithms.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | import igraph as ig
 3 | import community as cm
 4 | 
 5 | class CommunityDetectionAlgorithm(ABC):
 6 |     """
 7 |     Abstract base class for community detection algorithms.
 8 | 
 9 |     This class defines the interface for community detection algorithms
10 |     to ensure consistent usage across different implementations.
11 |     """
12 | 
13 |     @abstractmethod
14 |     def detect_communities(self, graph: ig.Graph) -> dict:
15 |         """
16 |         Detect communities in the given graph.
17 |         
18 |         :param graph: igraph Graph object
19 |         :return: A dictionary mapping node ids to community ids
20 |         """
21 |         pass
22 | 
23 | class LouvainAlgorithm(CommunityDetectionAlgorithm):
24 |     """
25 |     Implementation of the Louvain community detection algorithm.
26 | 
27 |     This algorithm optimizes modularity in a hierarchical manner.
28 |     """
29 |     def detect_communities(self, graph: ig.Graph, weight=None) -> dict:
30 |         """
31 |         Detect communities using the Louvain algorithm.
32 | 
33 |         Find communities by optimizing modularity in a multi-level approach, 0th level of the method.
34 | 
35 |         :param graph: igraph Graph object
36 |         :param weight: Optional name of the edge attribute to be used as weight
37 |         :return: A dictionary mapping node ids to community ids
38 |         """
39 |         partition = graph.community_multilevel(weights=weight, return_levels=True)[0]
40 |         return {v: partition.membership[v] for v in range(graph.vcount())}
41 | 
42 | class LabelPropagationAlgorithm(CommunityDetectionAlgorithm):
43 |     """
44 |     Implementation of the Label Propagation community detection algorithm.
45 | 
46 |     This algorithm detects communities by propagating labels through the network.
47 |     """
48 |     def detect_communities(self, graph: ig.Graph) -> dict:
49 | 
50 |         """
51 |         Detect communities using the Label Propagation algorithm.
52 |         Use igraph's implementation of the Label Propagation algorithm.
53 | 
54 |         :param graph: igraph Graph object
55 |         :param weight: Optional name of the edge attribute to be used as weight (ignored in this implementation)
56 |         :return: A dictionary mapping node ids to community ids
57 |         """
58 |         partition = graph.community_label_propagation()
59 |         return {v: partition.membership[v] for v in range(graph.vcount())}
60 | 
61 | 
62 | class InfoMapAlgorithm(CommunityDetectionAlgorithm):
63 |     """
64 |     Implementation of the Infomap community detection algorithm.
65 | 
66 |     This algorithm finds communities by optimizing the map equation.
67 |     """
68 |     def detect_communities(self, graph: ig.Graph) -> dict:
69 |         """
70 |         Detect communities using the Infomap algorithm.
71 |         Use igraph's implementation of the Infomap algorithm.
72 | 
73 |         :param graph: igraph Graph object
74 |         :param weight: Optional name of the edge attribute to be used as weight
75 |         :return: A dictionary mapping node ids to community ids
76 |         """
77 |         partition = graph.community_infomap()
78 |         return {v: partition.membership[v] for v in range(graph.vcount())}
79 | 
80 | def get_algorithm(name: str) -> CommunityDetectionAlgorithm:
81 |     """
82 |     Function to get the specified community detection algorithm.
83 | 
84 |     :param name: Name of the algorithm to retrieve
85 |     :return: An instance of the requested CommunityDetectionAlgorithm
86 |     :raises ValueError: If the requested algorithm is not available
87 |     """
88 |     algorithms = {
89 |         'louvain': LouvainAlgorithm(),
90 |         'label_propagation': LabelPropagationAlgorithm(),
91 |          'infomap': InfoMapAlgorithm(),  
92 |     }
93 |     return algorithms[name.lower()]  # This will raise a KeyError if the algorithm is not found


--------------------------------------------------------------------------------
/sample_scripts/test_figure.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | import igraph as ig
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | from tqdm import tqdm
  6 | import random
  7 | import logging
  8 | from networkx.generators.community import LFR_benchmark_graph
  9 | from fastconsensus.algorithms import get_algorithm
 10 | from fastconsensus.core import fast_consensus_clustering
 11 | from fastconsensus.utils import compare_partitions
 12 | 
 13 | # Set up logging
 14 | logging.basicConfig(level=logging.INFO,
 15 |                     format='%(asctime)s - %(levelname)s - %(message)s',
 16 |                     handlers=[
 17 |                         logging.FileHandler("experiment_results.log"),
 18 |                         logging.StreamHandler()
 19 |                     ])
 20 | 
 21 | def generate_lfr_graph(n, tau1, tau2, mu, average_degree, max_degree, min_community, max_community, max_attempts=10):
 22 |     for attempt in range(max_attempts):
 23 |         try:
 24 |             current_avg_degree = average_degree + random.uniform(-1, 1)
 25 |             
 26 |             G = LFR_benchmark_graph(
 27 |                 n, tau1, tau2, mu, average_degree=current_avg_degree, max_degree=max_degree,
 28 |                 min_community=min_community, max_community=max_community,
 29 |                 tol=1e-3, max_iters=5000
 30 |             )
 31 |             
 32 |             true_communities = {node: frozenset(G.nodes[node]['community']) for node in G.nodes()}
 33 |             edges = list(G.edges())
 34 |             g = ig.Graph(n=G.number_of_nodes(), edges=edges)
 35 |             return g, true_communities
 36 |         
 37 |         except nx.ExceededMaxIterations:
 38 |             if attempt == max_attempts - 1:
 39 |                 raise ValueError(f"Failed to generate LFR graph after {max_attempts} attempts")
 40 |             continue
 41 | 
 42 | def run_experiment(n_runs, n_nodes, mu_values):
 43 |     results = {
 44 |         'Louvain': [],
 45 |         'FastConsensus': []
 46 |     }
 47 |     
 48 |     for mu in tqdm(mu_values, desc="Processing μ values"):
 49 |         louvain_nmi = []
 50 |         fastconsensus_nmi = []
 51 |         
 52 |         for run in range(n_runs):
 53 |             try:
 54 |                 g, true_communities = generate_lfr_graph(
 55 |                     n=n_nodes,
 56 |                     tau1=2,
 57 |                     tau2=2,
 58 |                     mu=mu,
 59 |                     average_degree=20,
 60 |                     max_degree=50,
 61 |                     min_community=20,
 62 |                     max_community=100
 63 |                 )
 64 |                 
 65 |                 # Run Louvain
 66 |                 louvain_alg = get_algorithm('louvain')
 67 |                 louvain_partition = louvain_alg.detect_communities(g)
 68 |                 louvain_nmi_value = compare_partitions(true_communities, louvain_partition)
 69 |                 louvain_nmi.append(louvain_nmi_value)
 70 |                 
 71 |                 # Run FastConsensus
 72 |                 fastconsensus_partition = fast_consensus_clustering(g, n_partitions=10, threshold=0.2, algorithm='louvain')
 73 |                 fastconsensus_nmi_value = compare_partitions(true_communities, fastconsensus_partition)
 74 |                 fastconsensus_nmi.append(fastconsensus_nmi_value)
 75 |                 
 76 |                 logging.info(f"μ={mu:.2f}, Run {run+1}/{n_runs}: Louvain NMI={louvain_nmi_value:.4f}, FastConsensus NMI={fastconsensus_nmi_value:.4f}")
 77 |             
 78 |             except ValueError as e:
 79 |                 logging.error(f"Error generating graph for μ={mu}, run {run+1}: {str(e)}")
 80 |                 continue
 81 |         
 82 |         if louvain_nmi and fastconsensus_nmi:
 83 |             avg_louvain_nmi = np.mean(louvain_nmi)
 84 |             avg_fastconsensus_nmi = np.mean(fastconsensus_nmi)
 85 |             results['Louvain'].append(avg_louvain_nmi)
 86 |             results['FastConsensus'].append(avg_fastconsensus_nmi)
 87 |             logging.info(f"Average NMI for μ={mu:.2f}: Louvain={avg_louvain_nmi:.4f}, FastConsensus={avg_fastconsensus_nmi:.4f}")
 88 |         else:
 89 |             results['Louvain'].append(None)
 90 |             results['FastConsensus'].append(None)
 91 |             logging.warning(f"No valid results for μ={mu:.2f}")
 92 |     
 93 |     return results
 94 | 
 95 | def plot_results(mu_values, results):
 96 |     plt.figure(figsize=(10, 6))
 97 |     
 98 |     for algorithm in ['Louvain', 'FastConsensus']:
 99 |         valid_results = [(mu, nmi) for mu, nmi in zip(mu_values, results[algorithm]) if nmi is not None]
100 |         if valid_results:
101 |             mu_vals, nmi_vals = zip(*valid_results)
102 |             plt.plot(mu_vals, nmi_vals, '-o', label=algorithm)
103 |     
104 |     plt.xlabel('Mixing parameter μ')
105 |     plt.ylabel('Normalized Mutual Information')
106 |     plt.title('Louvain vs FastConsensus on LFR Benchmark (1000 nodes)')
107 |     plt.legend()
108 |     plt.grid(True, linestyle='--', alpha=0.7)
109 |     plt.tight_layout()
110 |     plt.savefig('louvain_vs_fastconsensus_lfr.png', dpi=300)
111 |     plt.show()
112 | 
113 | if __name__ == "__main__":
114 |     n_runs = 2  # Number of runs for each μ value
115 |     n_nodes = 100  # Number of nodes in the LFR graph
116 |     mu_values = np.arange(0.1, 0.81, 0.05)  # μ values from 0.1 to 0.8 with 0.05 step
117 |     
118 |     logging.info(f"Starting experiment with {n_runs} runs for each of {len(mu_values)} μ values, on graphs with {n_nodes} nodes")
119 |     results = run_experiment(n_runs, n_nodes, mu_values)
120 |     logging.info("Experiment completed. Plotting results.")
121 |     plot_results(mu_values, results)
122 |     logging.info("Results plotted and saved as 'louvain_vs_fastconsensus_lfr.png'")


--------------------------------------------------------------------------------
/notebooks/lfr_test_notebook.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |     "cells": [
  3 |      {
  4 |       "cell_type": "markdown",
  5 |       "metadata": {},
  6 |       "source": [
  7 |        "# FastConsensus on LFR Benchmark Graphs\n",
  8 |        "\n",
  9 |        "This notebook demonstrates the usage of the FastConsensus algorithm on LFR benchmark graphs and compares its performance with individual community detection algorithms."
 10 |       ]
 11 |      },
 12 |      {
 13 |       "cell_type": "code",
 14 |       "execution_count": null,
 15 |       "metadata": {},
 16 |       "source": [
 17 |        "import sys\n",
 18 |        "import os\n",
 19 |        "sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..', 'src')))\n",
 20 |        "\n",
 21 |        "import igraph as ig\n",
 22 |        "import numpy as np\n",
 23 |        "import matplotlib.pyplot as plt\n",
 24 |        "from fastconsensus.algorithms import get_algorithm\n",
 25 |        "from fastconsensus.core import fast_consensus_clustering\n",
 26 |        "from fastconsensus.utils import calculate_modularity, compare_partitions\n",
 27 |        "\n",
 28 |        "# If you have networkx and community installed, uncomment the following lines\n",
 29 |        "# import networkx as nx\n",
 30 |        "# import community as community_louvain\n",
 31 |        "# from networkx.generators.community import LFR_benchmark_graph"
 32 |       ]
 33 |      },
 34 |      {
 35 |       "cell_type": "markdown",
 36 |       "metadata": {},
 37 |       "source": [
 38 |        "## Generate LFR Benchmark Graph\n",
 39 |        "\n",
 40 |        "Note: This function requires networkx and community libraries. If you don't have them installed, you can use a pre-generated LFR graph or implement your own LFR generator."
 41 |       ]
 42 |      },
 43 |      {
 44 |       "cell_type": "code",
 45 |       "execution_count": null,
 46 |       "metadata": {},
 47 |       "source": [
 48 |        "def generate_lfr_graph(n, tau1, tau2, mu, average_degree, max_degree, min_community, max_community):\n",
 49 |        "    # Uncomment the following lines if you have networkx and community installed\n",
 50 |        "    # G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=average_degree, max_degree=max_degree,\n",
 51 |        "    #                         min_community=min_community, max_community=max_community)\n",
 52 |        "    # # Convert to igraph\n",
 53 |        "    # edges = list(G.edges())\n",
 54 |        "    # g = ig.Graph(n=n, edges=edges)\n",
 55 |        "    # # Get ground truth communities\n",
 56 |        "    # true_communities = {node: G.nodes[node]['community'] for node in G.nodes()}\n",
 57 |        "    # return g, true_communities\n",
 58 |        "    \n",
 59 |        "    # For demonstration, we'll create a random graph instead\n",
 60 |        "    g = ig.Graph.Erdos_Renyi(n=n, m=int(n * average_degree / 2))\n",
 61 |        "    true_communities = {i: i % 5 for i in range(n)}  # Assign random communities\n",
 62 |        "    return g, true_communities\n",
 63 |        "\n",
 64 |        "# Generate LFR benchmark graph\n",
 65 |        "n = 1000\n",
 66 |        "tau1 = 2.5\n",
 67 |        "tau2 = 1.5\n",
 68 |        "mu = 0.1\n",
 69 |        "average_degree = 20\n",
 70 |        "max_degree = 50\n",
 71 |        "min_community = 20\n",
 72 |        "max_community = 100\n",
 73 |        "\n",
 74 |        "g, true_communities = generate_lfr_graph(n, tau1, tau2, mu, average_degree, max_degree, min_community, max_community)\n",
 75 |        "print(f\"Generated LFR benchmark graph with {g.vcount()} nodes and {g.ecount()} edges\")"
 76 |       ]
 77 |      },
 78 |      {
 79 |       "cell_type": "markdown",
 80 |       "metadata": {},
 81 |       "source": [
 82 |        "## Apply community detection algorithms"
 83 |       ]
 84 |      },
 85 |      {
 86 |       "cell_type": "code",
 87 |       "execution_count": null,
 88 |       "metadata": {},
 89 |       "source": [
 90 |        "algorithms = ['louvain', 'label_propagation']\n",
 91 |        "results = {}\n",
 92 |        "\n",
 93 |        "for alg_name in algorithms:\n",
 94 |        "    alg = get_algorithm(alg_name)\n",
 95 |        "    partition = alg.detect_communities(g)\n",
 96 |        "    results[alg_name] = partition\n",
 97 |        "    print(f\"{alg_name.capitalize()} algorithm detected {len(set(partition.values()))} communities\")\n",
 98 |        "\n",
 99 |        "# Apply FastConsensus\n",
100 |        "fast_consensus_partition = fast_consensus_clustering(g, n_partitions=20, threshold=0.2, algorithm='louvain')\n",
101 |        "results['FastConsensus'] = fast_consensus_partition\n",
102 |        "print(f\"FastConsensus detected {len(set(fast_consensus_partition.values()))} communities\")"
103 |       ]
104 |      },
105 |      {
106 |       "cell_type": "markdown",
107 |       "metadata": {},
108 |       "source": [
109 |        "## Evaluate results"
110 |       ]
111 |      },
112 |      {
113 |       "cell_type": "code",
114 |       "execution_count": null,
115 |       "metadata": {},
116 |       "source": [
117 |        "# Calculate modularity for each partition\n",
118 |        "print(\"Modularity scores:\")\n",
119 |        "for alg_name, partition in results.items():\n",
120 |        "    modularity = calculate_modularity(g, partition)\n",
121 |        "    print(f\"{alg_name}: {modularity:.4f}\")\n",
122 |        "\n",
123 |        "# Compare partitions with ground truth\n",
124 |        "print(\"\\nNormalized Mutual Information (NMI) with ground truth:\")\n",
125 |        "for alg_name, partition in results.items():\n",
126 |        "    nmi = compare_partitions(true_communities, partition)\n",
127 |        "    print(f\"{alg_name}: {nmi:.4f}\")\n",
128 |        "\n",
129 |        "# Compare partitions with each other\n",
130 |        "print(\"\\nNormalized Mutual Information (NMI) between partitions:\")\n",
131 |        "for i, (alg1, partition1) in enumerate(results.items()):\n",
132 |        "    for alg2, partition2 in list(results.items())[i+1:]:\n",
133 |        "        nmi = compare_partitions(partition1, partition2)\n",
134 |        "        print(f\"{alg1} vs {alg2}: {nmi:.4f}\")"
135 |       ]
136 |      },
137 |      {
138 |       "cell_type": "markdown",
139 |       "metadata": {},
140 |       "source": [
141 |        "## Visualize results"
142 |       ]
143 |      },
144 |      {
145 |       "cell_type": "code",
146 |       "execution_count": null,
147 |       "metadata": {},
148 |       "source": [
149 |        "def plot_communities(g, partition, title):\n",
150 |        "    color_list = plt.cm.tab20(np.linspace(0, 1, 20))\n",
151 |        "    node_colors = [color_list[c % 20] for c in partition.values()]\n",
152 |        "    \n",
153 |        "    layout = g.layout_fruchterman_reingold()\n",
154 |        "    visual_style = {\n",
155 |        "        \"vertex_size\": 7,\n",
156 |        "        \"vertex_color\": node_colors,\n",
157 |        "        \"edge_width\": 0.5,\n",
158 |        "        \"layout\": layout,\n",
159 |        "        \"bbox\": (600, 600),\n",
160 |        "        \"margin\": 20\n",
161 |        "    }\n",
162 |        "    \n",
163 |        "    fig, ax = plt.subplots(figsize=(10, 10))\n",
164 |        "    ig.plot(g, target=ax, **visual_style)\n",
165 |        "    plt.title(title)\n",
166 |        "    plt.axis('off')\n",
167 |        "    plt.tight_layout()\n",
168 |        "    plt.show()\n",
169 |        "\n",
170 |        "# Plot ground truth and detected communities\n",
171 |        "plot_communities(g, true_communities, \"Ground Truth Communities\")\n",
172 |        "for alg_name, partition in results.items():\n",
173 |        "    plot_communities(g, partition, f\"{alg_name} Communities\")"
174 |       ]
175 |      }
176 |     ],
177 |     "metadata": {
178 |      "kernelspec": {
179 |       "display_name": "Python 3",
180 |       "language": "python",
181 |       "name": "python3"
182 |      },
183 |      "language_info": {
184 |       "codemirror_mode": {
185 |        "name": "ipython",
186 |        "version": 3
187 |       },
188 |       "file_extension": ".py",
189 |       "mimetype": "text/x-python",
190 |       "name": "python",
191 |       "nbconvert_exporter": "python",
192 |       "pygments_lexer": "ipython3",
193 |       "version": "3.9.0"
194 |      }
195 |     },
196 |     "nbformat": 4,
197 |     "nbformat_minor": 4
198 |    }


--------------------------------------------------------------------------------
/src/fastconsensus/core.py:
--------------------------------------------------------------------------------
  1 | import igraph as ig
  2 | import random
  3 | from typing import Dict, List
  4 | from .algorithms import get_algorithm
  5 | 
  6 | def fast_consensus_clustering(graph: ig.Graph, n_partitions: int = 20, threshold: float = 0.2, 
  7 |                               max_triads: int = None, algorithm: str = 'louvain',
  8 |                               convergence_threshold: float = 0.05, max_iterations: int = 100) -> Dict[int, int]:
  9 |     """
 10 |     Perform fast consensus clustering on the given graph.
 11 | 
 12 |     Problem: Given a graph, find a robust community structure by combining multiple partitions
 13 |     generated by a chosen community detection algorithm.
 14 | 
 15 |     Approach:
 16 |     1. Generate multiple partitions using the specified algorithm.
 17 |     2. Construct a sparse consensus matrix based on these partitions.
 18 |     3. Apply thresholding to remove weak connections.
 19 |     4. Perform triadic closure to enhance community structure.
 20 |     5. Repeat steps 1-4 until convergence or max iterations reached.
 21 | 
 22 |     :param graph: igraph Graph object
 23 |     :param n_partitions: Number of input partitions to generate
 24 |     :param threshold: Threshold for consensus matrix entries
 25 |     :param max_triads: Maximum number of triads to close (default: number of edges)
 26 |     :param algorithm: Name of the community detection algorithm to use
 27 |     :param convergence_threshold: Fraction of non-binary entries in consensus matrix to consider converged
 28 |     :param max_iterations: Maximum number of iterations before stopping
 29 |     :return: Final consensus partition as a dictionary (node_id: community_id)
 30 |     :raises ValueError: If the specified algorithm is not supported
 31 |     """
 32 |     if max_triads is None:
 33 |         max_triads = graph.ecount()
 34 |     
 35 |     community_algorithm = get_algorithm(algorithm)
 36 |     consensus_matrix = graph.copy()
 37 |     
 38 |     # Initialize weights if they don't exist
 39 |     if 'weight' not in consensus_matrix.edge_attributes():
 40 |         consensus_matrix.es['weight'] = 1.0
 41 |     
 42 |     for iteration in range(max_iterations):
 43 |         print(iteration, end = '\r')
 44 |         max_triads = consensus_matrix.ecount()
 45 |         # Generate partitions
 46 |         partitions = [community_algorithm.detect_communities(consensus_matrix, weight='weight') for _ in range(n_partitions)]
 47 |         
 48 |         # Construct sparse consensus matrix
 49 |         consensus_matrix = construct_sparse_consensus_matrix(consensus_matrix, partitions)
 50 |         
 51 |         # Thresholding
 52 |         consensus_matrix = threshold_matrix(consensus_matrix, threshold)
 53 | 
 54 |         # Triadic closure
 55 |         consensus_matrix = triadic_closure(consensus_matrix, max_triads, partitions)
 56 |         
 57 |         # Check for convergence
 58 |         if check_convergence(consensus_matrix, convergence_threshold):
 59 |             break
 60 |     
 61 |     # Final partition
 62 |     final_partition = community_algorithm.detect_communities(consensus_matrix)
 63 |     return final_partition
 64 | 
 65 | def construct_sparse_consensus_matrix(previous_matrix: ig.Graph, partitions: List[Dict[int, int]]) -> ig.Graph:
 66 |     """
 67 |     Construct a new sparse consensus matrix based on the previous iteration's matrix.
 68 | 
 69 |     Problem: Create a consensus matrix that represents the agreement between multiple partitions,
 70 |     while maintaining the sparsity of the original graph.
 71 | 
 72 |     Approach:
 73 |     1. Create a copy of the previous matrix.
 74 |     2. Reset all edge weights to zero.
 75 |     3. For each edge in the previous matrix, calculate the fraction of partitions where
 76 |        the connected nodes are in the same community.
 77 |     4. Set the edge weight to this fraction.
 78 | 
 79 |     :param previous_matrix: igraph Graph object representing the previous consensus matrix
 80 |     :param partitions: List of partitions, each represented as a dictionary (node_id: community_id)
 81 |     :return: Updated sparse consensus matrix as an igraph Graph object
 82 |     """
 83 |     n = previous_matrix.vcount()
 84 |     new_matrix = previous_matrix.copy()
 85 |     new_matrix.es['weight'] = 0.0  # Reset all weights
 86 | 
 87 |     # Iterate over existing edges in the previous matrix, not the original graph
 88 |     for edge in previous_matrix.es:
 89 |         weight = sum(1 for partition in partitions if partition[edge.source] == partition[edge.target])
 90 |         if weight > 0:
 91 |             new_matrix.add_edge(edge.source, edge.target, weight=weight / len(partitions))
 92 | 
 93 |     return new_matrix
 94 | 
 95 | def threshold_matrix(consensus_matrix: ig.Graph, threshold: float) -> ig.Graph:
 96 |     """
 97 |     Apply thresholding to the consensus matrix to remove weak connections.
 98 | 
 99 |     Problem: Remove edges with low weights to focus on strong community structures.
100 | 
101 |     Approach:
102 |     1. Identify edges with weights below the threshold.
103 |     2. Remove these edges from the graph.
104 | 
105 |     :param consensus_matrix: igraph Graph object representing the consensus matrix
106 |     :param threshold: Minimum weight for an edge to be retained
107 |     :return: Thresholded consensus matrix as an igraph Graph object
108 |     """
109 |     edges_to_remove = [edge.index for edge in consensus_matrix.es if edge['weight'] < threshold]
110 |     consensus_matrix.delete_edges(edges_to_remove)
111 |     return consensus_matrix
112 | 
113 | def triadic_closure(consensus_matrix: ig.Graph, max_triads: int, input_partitions: List[Dict[int, int]]) -> ig.Graph:
114 |     """
115 |     Perform triadic closure on the consensus matrix to enhance community structure.
116 | 
117 |     Problem: Strengthen community structure by closing triangles in the graph.
118 | 
119 |     Approach:
120 |     1. Randomly select nodes and their pairs of neighbors.
121 |     2. If the neighbors are not connected, add an edge between them.
122 |     3. Set the weight of the new edge based on community overlap in input partitions.
123 |     4. Repeat until max_triads is reached or no more triads can be closed.
124 | 
125 |     :param consensus_matrix: igraph Graph object representing the consensus matrix
126 |     :param max_triads: Maximum number of triads to close
127 |     :param input_partitions: List of input partitions used to calculate edge weights
128 |     :return: Updated consensus matrix with closed triads
129 |     """
130 |     triads_closed = 0
131 |     nodes = list(range(consensus_matrix.vcount()))
132 |     
133 |     for _ in range(max_triads):
134 |         
135 |         node = random.choice(nodes)
136 |         neighbors = consensus_matrix.neighbors(node)
137 |         if len(neighbors) >= 2:
138 |             u, v = random.sample(neighbors, 2)
139 |             if not consensus_matrix.are_connected(u, v):
140 |                 # Calculate the weight based on community overlap
141 |                 weight = calculate_overlap_weight(u, v, input_partitions)
142 |                 consensus_matrix.add_edge(u, v, weight=weight)
143 |                 triads_closed += 1
144 |         
145 |     
146 |     #print('triads closed = ', triads_closed)
147 |     return consensus_matrix
148 | 
149 | def calculate_overlap_weight(u: int, v: int, input_partitions: List[Dict[int, int]]) -> float:
150 |     """
151 |     Calculate the weight of an edge based on community overlap in input partitions.
152 | 
153 |     Problem: Determine the strength of connection between two nodes based on their
154 |     co-occurrence in the same communities across multiple partitions.
155 | 
156 |     Approach:
157 |     Count the number of partitions where both nodes are in the same community,
158 |     then divide by the total number of partitions.
159 | 
160 |     :param u: First node ID
161 |     :param v: Second node ID
162 |     :param input_partitions: List of partitions, each represented as a dictionary (node_id: community_id)
163 |     :return: Calculated weight as a float between 0 and 1
164 |     """
165 |     co_occurrences = sum(1 for partition in input_partitions if partition[u] == partition[v])
166 |     return co_occurrences / len(input_partitions)
167 | 
168 | def check_convergence(consensus_matrix: ig.Graph, convergence_threshold: float) -> bool:
169 |     """
170 |     Check if the consensus matrix has converged.
171 | 
172 |     Problem: Determine if the consensus process has reached a stable state.
173 | 
174 |     Approach:
175 |     1. Count the number of edges with non-binary weights (not 0 or 1).
176 |     2. Calculate the fraction of such edges.
177 |     3. If this fraction is below the convergence threshold, consider it converged.
178 | 
179 |     :param consensus_matrix: igraph Graph object representing the consensus matrix
180 |     :param convergence_threshold: Fraction of non-binary entries to consider converged
181 |     :return: Boolean indicating whether convergence has been reached
182 |     """
183 |     non_binary_count = sum(1 for edge in consensus_matrix.es if 0 < edge['weight'] < 1)
184 |     return non_binary_count / consensus_matrix.ecount() < convergence_threshold


--------------------------------------------------------------------------------