├── docs ├── .touch ├── tutorials │ ├── plotting │ │ ├── data │ │ │ └── sub-0025427_ses-1_dwi_desikan.npy │ │ ├── gridplot.ipynb │ │ └── pairplot.ipynb │ └── simulations │ │ ├── erdos_renyi.ipynb │ │ └── rdpg.ipynb ├── reference │ ├── reference │ │ ├── match.rst │ │ ├── subgraph.rst │ │ ├── nominate.rst │ │ ├── preconditions.rst │ │ ├── align.rst │ │ ├── inference.rst │ │ ├── datasets.rst │ │ ├── index.rst │ │ ├── simulations.rst │ │ ├── cluster.rst │ │ ├── layouts.rst │ │ ├── pipeline.rst │ │ ├── partition.rst │ │ ├── models.rst │ │ ├── plotting.rst │ │ ├── utils.rst │ │ ├── embed.rst │ │ └── preprocessing.rst │ ├── index.rst │ ├── contributing.rst │ ├── release │ │ ├── release_template.rst │ │ ├── graspy_releases.rst │ │ ├── release_0.0.1.rst │ │ ├── release_0.1.rst │ │ ├── release_0.3.rst │ │ ├── release_0.0.3.rst │ │ ├── release_0.2.rst │ │ └── release_0.0.2.rst │ ├── in-the-wild.rst │ ├── install.rst │ └── cli.rst ├── _templates │ └── numpydoc_docstring.rst ├── license.rst ├── sphinx-ext │ └── toctree_filter.py └── index.rst ├── tests ├── __init__.py ├── embed │ └── __init__.py ├── cluster │ ├── __init__.py │ └── test_kclust.py ├── layouts │ ├── __init__.py │ ├── nooverlap │ │ ├── __init__.py │ │ ├── test_grid_cell_creation.py │ │ └── test_grid.py │ └── test_auto.py ├── partition │ ├── __init__.py │ └── test_modularity.py ├── pipeline │ ├── __init__.py │ └── embed │ │ ├── __init__.py │ │ └── test_embeddings.py ├── preprocessing │ └── __init__.py ├── utils.py ├── test_data │ └── actor_bipartite_graph.csv ├── test_datasets.py ├── test_preconditions.py ├── test_mug2vec.py ├── test_base_embed.py ├── test_rdpg_corr.py ├── test_sg.py ├── test_svd.py ├── test_er_and_group_connection_tests.py ├── test_select_dimension.py ├── test_vertex_nomination_via_SGM.py ├── test_sign_flips.py ├── test_latentpositiontest.py ├── test_spectral_nomination.py ├── test_mds.py ├── test_n2v.py └── test_io.py ├── graspologic ├── py.typed ├── pipeline │ ├── embed │ │ ├── _types.py │ │ ├── __init__.py │ │ └── _elbow.py │ ├── __init__.py │ └── graph_builder.py ├── match │ ├── __init__.py │ └── types.py ├── subgraph │ └── __init__.py ├── layouts │ ├── nooverlap │ │ ├── __init__.py │ │ ├── _node.py │ │ ├── nooverlap.py │ │ └── _quad_tree.py │ ├── classes.py │ └── __init__.py ├── datasets │ ├── __init__.py │ ├── mice │ │ ├── blocks.csv │ │ └── participants.csv │ └── drosophila │ │ ├── left_cell_labels.csv │ │ └── right_cell_labels.csv ├── nominate │ └── __init__.py ├── align │ ├── __init__.py │ ├── sign_flips.py │ └── orthogonal_procrustes.py ├── cluster │ ├── __init__.py │ ├── base.py │ └── kclust.py ├── inference │ ├── __init__.py │ ├── utils.py │ └── binomial.py ├── partition │ └── __init__.py ├── version.py ├── models │ └── __init__.py ├── simulations │ └── __init__.py ├── plot │ └── __init__.py ├── __init__.py ├── preprocessing │ └── __init__.py ├── embed │ └── __init__.py ├── utils │ ├── __init__.py │ └── ptr.py ├── types.py └── preconditions.py ├── mypi.ini ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── usage-question.md │ ├── bug_report.md │ └── feature_request.md ├── workflows │ ├── report.yml │ ├── publish.yml │ └── build.yml ├── CODEOWNERS └── PULL_REQUEST_TEMPLATE.md ├── .coveragerc ├── .readthedocs.yml ├── pytest.ini ├── LICENSE.txt ├── CITATION.cff ├── mypy.ini ├── .gitignore ├── ROLES.md ├── SECURITY.md └── README.md /docs/.touch: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /graspologic/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/embed/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/cluster/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/layouts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/partition/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/pipeline/embed/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/layouts/nooverlap/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mypi.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | ignore_missing_imports = True -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-vendored=true 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | 3 | -------------------------------------------------------------------------------- /graspologic/pipeline/embed/_types.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import networkx as nx 4 | 5 | NxGraphType = Union[nx.Graph, nx.DiGraph] 6 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | exclude_lines = 3 | # Have to re-enable the standard pragma 4 | pragma: no cover 5 | @abstract 6 | NotImplementedError -------------------------------------------------------------------------------- /docs/tutorials/plotting/data/sub-0025427_ses-1_dwi_desikan.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/graspologic-org/graspologic/HEAD/docs/tutorials/plotting/data/sub-0025427_ses-1_dwi_desikan.npy -------------------------------------------------------------------------------- /docs/reference/reference/match.rst: -------------------------------------------------------------------------------- 1 | Matching 2 | ======== 3 | 4 | .. currentmodule:: graspologic.match 5 | 6 | Graph Matching 7 | -------------------- 8 | .. autofunction:: graph_match 9 | -------------------------------------------------------------------------------- /.github/workflows/report.yml: -------------------------------------------------------------------------------- 1 | name: graspologic Reporting 2 | on: 3 | schedule: 4 | - cron: "8 16 * * *" # 8:08am PST 5 | jobs: 6 | build: 7 | uses: ./.github/workflows/build.yml 8 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | sphinx: 3 | configuration: docs/reference/conf.py 4 | 5 | python: 6 | version: 3.9 7 | install: 8 | - requirements: requirements.txt 9 | 10 | -------------------------------------------------------------------------------- /graspologic/match/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from .wrappers import graph_match 5 | 6 | __all__ = ["graph_match"] 7 | -------------------------------------------------------------------------------- /graspologic/subgraph/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | 5 | from .sg import SignalSubgraph 6 | 7 | __all__ = ["SignalSubgraph"] 8 | -------------------------------------------------------------------------------- /docs/reference/index.rst: -------------------------------------------------------------------------------- 1 | .. -*- coding: utf-8 -*- 2 | 3 | .. toctree:: 4 | :maxdepth: 1 5 | 6 | install 7 | cli 8 | contributing 9 | in-the-wild 10 | release 11 | reference/index 12 | 13 | -------------------------------------------------------------------------------- /docs/reference/contributing.rst: -------------------------------------------------------------------------------- 1 | Contributing to graspologic 2 | =========================== 3 | 4 | Please see: `Contributing to graspologic `_. 5 | 6 | -------------------------------------------------------------------------------- /docs/reference/reference/subgraph.rst: -------------------------------------------------------------------------------- 1 | Subgraph 2 | ======== 3 | 4 | .. currentmodule:: graspologic.subgraph 5 | 6 | Signal-Subgraph Estimators 7 | -------------------------- 8 | 9 | .. autoclass:: SignalSubgraph 10 | -------------------------------------------------------------------------------- /graspologic/layouts/nooverlap/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | 5 | from graspologic.layouts.nooverlap.nooverlap import remove_overlaps 6 | 7 | __all__ = ["remove_overlaps"] 8 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | 6 | 7 | def data_file(filename): 8 | return os.path.join(os.path.dirname(__file__), "test_data", filename) 9 | -------------------------------------------------------------------------------- /graspologic/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from .base import load_drosophila_left, load_drosophila_right, load_mice 5 | 6 | __all__ = ["load_drosophila_left", "load_drosophila_right", "load_mice"] 7 | -------------------------------------------------------------------------------- /graspologic/nominate/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from .spectralVN import SpectralVertexNomination 5 | from .VNviaSGM import VNviaSGM 6 | 7 | __all__ = [ 8 | "SpectralVertexNomination", 9 | "VNviaSGM", 10 | ] 11 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --doctest-modules 3 | 4 | 5 | filterwarnings = 6 | # Matrix PendingDeprecationWarning. 7 | ignore:Using or importing the ABCs from 'collections' 8 | ignore:the matrix subclass is not 9 | ignore:Using a non-tuple 10 | ignore:Input graph is not fully connected. -------------------------------------------------------------------------------- /docs/_templates/numpydoc_docstring.rst: -------------------------------------------------------------------------------- 1 | {{index}} 2 | {{summary}} 3 | {{extended_summary}} 4 | {{parameters}} 5 | {{returns}} 6 | {{yields}} 7 | {{other_parameters}} 8 | {{attributes}} 9 | {{raises}} 10 | {{warns}} 11 | {{warnings}} 12 | {{see_also}} 13 | {{notes}} 14 | {{references}} 15 | {{examples}} 16 | {{methods}} -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/usage-question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Usage Question 3 | about: Ask us a question about graspologic and graphs! 4 | title: "[Question]" 5 | labels: question 6 | assignees: '' 7 | 8 | --- 9 | 10 | This is our forum for asking whatever network question you'd like! No need to feel shy - we're happy to talk about graphs! 11 | -------------------------------------------------------------------------------- /docs/reference/reference/nominate.rst: -------------------------------------------------------------------------------- 1 | Nomination 2 | ========== 3 | 4 | .. currentmodule:: graspologic.nominate 5 | 6 | Spectral Vertex Nomination 7 | --------------------------------------- 8 | 9 | .. autoclass:: SpectralVertexNomination 10 | 11 | Vertex Nomination via SGM 12 | ------------------------- 13 | .. autoclass:: VNviaSGM 14 | -------------------------------------------------------------------------------- /docs/reference/reference/preconditions.rst: -------------------------------------------------------------------------------- 1 | Preconditions 2 | ============= 3 | 4 | .. autofunction:: graspologic.preconditions.check_argument_types 5 | .. autofunction:: graspologic.preconditions.check_optional_argument_types 6 | .. autofunction:: graspologic.preconditions.check_argument 7 | .. autofunction:: graspologic.preconditions.is_real_weighted 8 | -------------------------------------------------------------------------------- /graspologic/align/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from .orthogonal_procrustes import OrthogonalProcrustes 5 | from .seedless_procrustes import SeedlessProcrustes 6 | from .sign_flips import SignFlips 7 | 8 | __all__ = ["OrthogonalProcrustes", "SeedlessProcrustes", "SignFlips"] 9 | -------------------------------------------------------------------------------- /docs/reference/reference/align.rst: -------------------------------------------------------------------------------- 1 | ******** 2 | Aligning 3 | ******** 4 | 5 | .. currentmodule:: graspologic.align 6 | 7 | Sign flips 8 | ---------- 9 | .. autoclass:: SignFlips 10 | 11 | Orthogonal Procrustes 12 | --------------------- 13 | .. autoclass:: OrthogonalProcrustes 14 | 15 | Seedless Procrustes 16 | ------------------- 17 | .. autoclass:: SeedlessProcrustes 18 | -------------------------------------------------------------------------------- /docs/reference/reference/inference.rst: -------------------------------------------------------------------------------- 1 | Inference 2 | ========= 3 | 4 | .. currentmodule:: graspologic.inference 5 | 6 | Two-graph hypothesis testing 7 | ---------------------------- 8 | 9 | .. autofunction:: density_test 10 | 11 | .. autofunction:: group_connection_test 12 | 13 | .. autofunction:: latent_position_test 14 | 15 | .. autofunction:: latent_distribution_test 16 | -------------------------------------------------------------------------------- /graspologic/cluster/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from .autogmm import AutoGMMCluster 5 | from .divisive_cluster import DivisiveCluster 6 | from .gclust import GaussianCluster 7 | from .kclust import KMeansCluster 8 | 9 | __all__ = ["GaussianCluster", "KMeansCluster", "AutoGMMCluster", "DivisiveCluster"] 10 | -------------------------------------------------------------------------------- /graspologic/datasets/mice/blocks.csv: -------------------------------------------------------------------------------- 1 | block,hemisphere,i,j 2 | isocortex,L,0,41 3 | pallium,L,41,61 4 | subpallium,L,61,68 5 | diencephalon,L,68,79 6 | midbrain,L,79,88 7 | hindbrain,L,88,116 8 | white_matter,L,116,166 9 | isocortex,R,166,207 10 | pallium,R,207,227 11 | subpallium,R,227,234 12 | diencephalon,R,234,245 13 | midbrain,R,245,254 14 | hindbrain,R,254,282 15 | white_matter,R,282,332 16 | -------------------------------------------------------------------------------- /docs/reference/reference/datasets.rst: -------------------------------------------------------------------------------- 1 | Datasets 2 | ======== 3 | 4 | .. currentmodule:: graspologic.datasets 5 | 6 | Drosophila larval mushroom body 7 | ------------------------------- 8 | 9 | .. autofunction:: load_drosophila_left 10 | 11 | .. autofunction:: load_drosophila_right 12 | 13 | Duke mouse whole-brain connectomes 14 | ---------------------------------- 15 | 16 | .. autofunction:: load_mice -------------------------------------------------------------------------------- /docs/reference/reference/index.rst: -------------------------------------------------------------------------------- 1 | .. _reference: 2 | 3 | Reference 4 | ********* 5 | 6 | .. toctree:: 7 | :maxdepth: 2 8 | 9 | align 10 | cluster 11 | datasets 12 | embed 13 | inference 14 | layouts 15 | match 16 | models 17 | nominate 18 | partition 19 | preconditions 20 | pipeline 21 | plotting 22 | preprocessing 23 | simulations 24 | subgraph 25 | utils 26 | -------------------------------------------------------------------------------- /docs/reference/reference/simulations.rst: -------------------------------------------------------------------------------- 1 | *********** 2 | Simulations 3 | *********** 4 | 5 | .. currentmodule:: graspologic.simulations 6 | 7 | 8 | 9 | .. autofunction:: er_np 10 | 11 | .. autofunction:: er_nm 12 | 13 | .. autofunction:: sbm 14 | 15 | .. autofunction:: rdpg 16 | 17 | .. autofunction:: er_corr 18 | 19 | .. autofunction:: sbm_corr 20 | 21 | .. autofunction:: rdpg_corr 22 | 23 | .. autofunction:: mmsbm 24 | 25 | -------------------------------------------------------------------------------- /graspologic/layouts/classes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | from typing import NamedTuple 5 | 6 | __all__ = [ 7 | "NodePosition", 8 | ] 9 | 10 | 11 | class NodePosition(NamedTuple): 12 | """ 13 | Contains the node id, 2d coordinates, size, and community id for a node. 14 | """ 15 | 16 | node_id: str 17 | x: float 18 | y: float 19 | size: float 20 | community: int 21 | -------------------------------------------------------------------------------- /tests/test_data/actor_bipartite_graph.csv: -------------------------------------------------------------------------------- 1 | Person,Movie,Role 2 | Tom Hanks,Apollo 13,Cast 3 | Bill Paxton,Apollo 13,Cast 4 | Kevin Bacon,Apollo 13,Cast 5 | Kathleen Quinlan,Apollo 13,Cast 6 | Kevin Bacon,Planes Trains & Automobiles,Cast 7 | Steve Martin,Planes Trains & Automobiles,Cast 8 | John Candy,Planes Trains & Automobiles,Cast 9 | Tom Hanks,Mamma Mia! Here We Go Again,Executive Producer 10 | Tom Hanks,Forrest Gump,Cast 11 | Sally Field,Forrest Gump,Cast 12 | -------------------------------------------------------------------------------- /docs/reference/reference/cluster.rst: -------------------------------------------------------------------------------- 1 | ********** 2 | Clustering 3 | ********** 4 | 5 | .. currentmodule:: graspologic.cluster 6 | 7 | K-Means Clustering 8 | ------------------ 9 | .. autoclass:: KMeansCluster 10 | 11 | Gaussian Mixture Models Clustering 12 | ---------------------------------- 13 | .. autoclass:: GaussianCluster 14 | 15 | .. autoclass:: AutoGMMCluster 16 | 17 | Hierarchical Clustering 18 | ---------------------------------- 19 | .. autoclass:: DivisiveCluster 20 | :no-inherited-members: 21 | -------------------------------------------------------------------------------- /docs/reference/reference/layouts.rst: -------------------------------------------------------------------------------- 1 | Layouts 2 | ======= 3 | .. currentmodule:: graspologic.layouts 4 | 5 | NodePosition 6 | ------------ 7 | .. autoclass:: NodePosition 8 | 9 | Automatic Graph Layout 10 | ---------------------- 11 | .. autofunction:: layout_tsne 12 | .. autofunction:: layout_umap 13 | 14 | Colors 15 | ------ 16 | .. autofunction:: categorical_colors 17 | .. autofunction:: sequential_colors 18 | 19 | Rendering 20 | --------- 21 | .. autofunction:: save_graph 22 | .. autofunction:: show_graph 23 | -------------------------------------------------------------------------------- /graspologic/inference/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from .density_test import density_test 5 | from .group_connection_test import group_connection_test 6 | from .latent_distribution_test import latent_distribution_test 7 | from .latent_position_test import latent_position_test 8 | 9 | __all__ = [ 10 | "density_test", 11 | "group_connection_test", 12 | "latent_position_test", 13 | "latent_distribution_test", 14 | ] 15 | -------------------------------------------------------------------------------- /graspologic/partition/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from .leiden import ( 5 | HierarchicalCluster, 6 | HierarchicalClusters, 7 | hierarchical_leiden, 8 | leiden, 9 | ) 10 | from .modularity import modularity, modularity_components 11 | 12 | __all__ = [ 13 | "HierarchicalCluster", 14 | "HierarchicalClusters", 15 | "hierarchical_leiden", 16 | "leiden", 17 | "modularity", 18 | "modularity_components", 19 | ] 20 | -------------------------------------------------------------------------------- /graspologic/layouts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | 5 | from .classes import NodePosition 6 | from .colors import categorical_colors, sequential_colors 7 | from .render import save_graph, show_graph 8 | 9 | from .auto import layout_tsne, layout_umap # isort:skip 10 | 11 | __all__ = [ 12 | "NodePosition", 13 | "categorical_colors", 14 | "sequential_colors", 15 | "layout_tsne", 16 | "layout_umap", 17 | "save_graph", 18 | "show_graph", 19 | ] 20 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # graspologic package 2 | # /graspologic/align @alyakin314 @bdpedigo 3 | # /graspologic/cluster @bdpedigo @j1c 4 | # /graspologic/datasets @bdpedigo 5 | # /graspologic/embed @j1c @bdpedigo 6 | # /graspologic/inference @alyakin314 @bdpedigo 7 | # /graspologic/match @asaadeldin11 @bdpedigo 8 | # /graspologic/models @bdpedigo @j1c 9 | # /graspologic/pipeline @j1c @bdpedigo 10 | # /graspologic/plot @j1c @bdpedigo 11 | # /graspologic/subgraph @j1c 12 | # /graspologic/utils/ @j1c @bdpedigo 13 | 14 | # tutorials 15 | 16 | # tests 17 | -------------------------------------------------------------------------------- /docs/reference/reference/pipeline.rst: -------------------------------------------------------------------------------- 1 | Pipeline 2 | ======== 3 | .. automodule:: graspologic.pipeline 4 | 5 | GraphBuilder 6 | ------------ 7 | .. autoclass:: GraphBuilder 8 | 9 | Embed 10 | ----- 11 | .. automodule:: graspologic.pipeline.embed 12 | .. autoclass:: graspologic.pipeline.embed.embeddings.Embeddings 13 | .. autofunction:: graspologic.pipeline.embed.adjacency_spectral_embedding 14 | .. autofunction:: graspologic.pipeline.embed.laplacian_spectral_embedding 15 | .. autofunction:: graspologic.pipeline.embed.omnibus_embedding_pairwise 16 | -------------------------------------------------------------------------------- /graspologic/version.py: -------------------------------------------------------------------------------- 1 | """Utilities for exposing the package version.""" 2 | 3 | from importlib import metadata 4 | 5 | 6 | def __version() -> str: 7 | """Return the installed graspologic version. 8 | 9 | Falls back to a sensible default when the distribution metadata is 10 | unavailable (for example when running directly from a source checkout). 11 | """ 12 | 13 | try: 14 | return metadata.version("graspologic") 15 | except metadata.PackageNotFoundError: 16 | return "0.0.0" 17 | 18 | 19 | __version__ = __version() -------------------------------------------------------------------------------- /graspologic/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from .base import BaseGraphEstimator 5 | from .edge_swaps import EdgeSwapper 6 | from .er import DCEREstimator, EREstimator 7 | from .rdpg import RDPGEstimator 8 | from .sbm_estimators import DCSBMEstimator, SBMEstimator 9 | 10 | __all__ = [ 11 | "BaseGraphEstimator", 12 | "EREstimator", 13 | "DCEREstimator", 14 | "SBMEstimator", 15 | "DCSBMEstimator", 16 | "RDPGEstimator", 17 | "EdgeSwapper", 18 | ] 19 | -------------------------------------------------------------------------------- /docs/reference/reference/partition.rst: -------------------------------------------------------------------------------- 1 | Partition 2 | ========= 3 | 4 | .. currentmodule:: graspologic.partition 5 | 6 | Modularity and Component Modularity 7 | ----------------------------------- 8 | 9 | .. autofunction:: modularity 10 | 11 | .. autofunction:: modularity_components 12 | 13 | Leiden and Hierarchical Leiden 14 | ------------------------------ 15 | 16 | .. autofunction:: leiden 17 | 18 | .. autoclass:: HierarchicalCluster 19 | :members: 20 | 21 | .. autoclass:: HierarchicalClusters 22 | :members: 23 | 24 | .. autofunction:: hierarchical_leiden 25 | -------------------------------------------------------------------------------- /docs/reference/reference/models.rst: -------------------------------------------------------------------------------- 1 | Models 2 | ====== 3 | 4 | .. currentmodule:: graspologic.models 5 | 6 | Erdos-Reyni models 7 | ------------------ 8 | 9 | .. autoclass:: EREstimator 10 | 11 | .. autoclass:: DCEREstimator 12 | 13 | Stochastic block models 14 | ----------------------- 15 | 16 | .. autoclass:: SBMEstimator 17 | 18 | .. autoclass:: DCSBMEstimator 19 | 20 | Latent position models 21 | ---------------------- 22 | 23 | .. autoclass:: RDPGEstimator 24 | 25 | Edge swapping (configuration models) 26 | ------------------------------------ 27 | 28 | .. autoclass:: EdgeSwapper -------------------------------------------------------------------------------- /graspologic/simulations/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from .simulations import er_nm, er_np, mmsbm, p_from_latent, rdpg, sample_edges, sbm 5 | from .simulations_corr import er_corr, sample_edges_corr, sbm_corr 6 | 7 | from .rdpg_corr import rdpg_corr # isort:skip 8 | 9 | __all__ = [ 10 | "sample_edges", 11 | "er_np", 12 | "er_nm", 13 | "sbm", 14 | "rdpg", 15 | "p_from_latent", 16 | "sample_edges_corr", 17 | "er_corr", 18 | "sbm_corr", 19 | "rdpg_corr", 20 | "mmsbm", 21 | ] 22 | -------------------------------------------------------------------------------- /tests/test_datasets.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | from graspologic.datasets import * 7 | 8 | 9 | class TestDatasets(unittest.TestCase): 10 | def test_drosphila_left(self): 11 | graph = load_drosophila_left() 12 | graph, labels = load_drosophila_left(return_labels=True) 13 | 14 | def test_drosphila_right(self): 15 | graph = load_drosophila_right() 16 | graph, labels = load_drosophila_right(return_labels=True) 17 | 18 | def test_load_mice(self): 19 | data = load_mice() 20 | -------------------------------------------------------------------------------- /docs/reference/reference/plotting.rst: -------------------------------------------------------------------------------- 1 | ******** 2 | Plotting 3 | ******** 4 | 5 | .. currentmodule:: graspologic.plot 6 | 7 | Heatmap 8 | ------- 9 | .. autofunction:: heatmap 10 | 11 | Gridplot 12 | -------- 13 | .. autofunction:: gridplot 14 | 15 | Pairplot 16 | -------- 17 | .. autofunction:: pairplot 18 | .. autofunction:: pairplot_with_gmm 19 | 20 | Degreeplot 21 | ---------- 22 | .. autofunction:: degreeplot 23 | 24 | Edgeplot 25 | -------- 26 | .. autofunction:: edgeplot 27 | 28 | Screeplot 29 | --------- 30 | .. autofunction:: screeplot 31 | 32 | Adjplot 33 | ------- 34 | .. autofunction:: adjplot 35 | 36 | Matrixplot 37 | ---------- 38 | .. autofunction:: matrixplot 39 | -------------------------------------------------------------------------------- /graspologic/plot/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | # type: ignore 5 | 6 | import sys 7 | 8 | import matplotlib as mpl 9 | 10 | from .plot import ( 11 | degreeplot, 12 | edgeplot, 13 | gridplot, 14 | heatmap, 15 | networkplot, 16 | pairplot, 17 | pairplot_with_gmm, 18 | screeplot, 19 | ) 20 | from .plot_matrix import adjplot, matrixplot 21 | 22 | __all__ = [ 23 | "heatmap", 24 | "gridplot", 25 | "pairplot", 26 | "pairplot_with_gmm", 27 | "degreeplot", 28 | "edgeplot", 29 | "screeplot", 30 | "adjplot", 31 | "matrixplot", 32 | "networkplot", 33 | ] 34 | -------------------------------------------------------------------------------- /graspologic/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import graspologic.align 5 | import graspologic.cluster 6 | import graspologic.datasets 7 | import graspologic.embed 8 | import graspologic.inference 9 | import graspologic.layouts 10 | import graspologic.models 11 | import graspologic.nominate 12 | import graspologic.partition 13 | import graspologic.pipeline 14 | import graspologic.plot 15 | import graspologic.preprocessing 16 | import graspologic.simulations 17 | import graspologic.subgraph 18 | import graspologic.utils 19 | from graspologic.types import * 20 | from graspologic.version import __version 21 | 22 | __version__ = __version() 23 | -------------------------------------------------------------------------------- /graspologic/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from .graph_cuts import ( 5 | DefinedHistogram, 6 | cut_edges_by_weight, 7 | cut_vertices_by_betweenness_centrality, 8 | cut_vertices_by_degree_centrality, 9 | histogram_betweenness_centrality, 10 | histogram_degree_centrality, 11 | histogram_edge_weight, 12 | ) 13 | 14 | __all__ = [ 15 | "DefinedHistogram", 16 | "histogram_betweenness_centrality", 17 | "histogram_degree_centrality", 18 | "histogram_edge_weight", 19 | "cut_edges_by_weight", 20 | "cut_vertices_by_betweenness_centrality", 21 | "cut_vertices_by_degree_centrality", 22 | ] 23 | -------------------------------------------------------------------------------- /docs/reference/reference/utils.rst: -------------------------------------------------------------------------------- 1 | Utility 2 | ======= 3 | 4 | .. currentmodule:: graspologic.utils 5 | 6 | Transformations 7 | --------------- 8 | 9 | .. autofunction:: pass_to_ranks 10 | 11 | .. autofunction:: to_laplacian 12 | 13 | .. autofunction:: augment_diagonal 14 | 15 | .. autofunction:: symmetrize 16 | 17 | .. autofunction:: remove_loops 18 | 19 | Connected Components 20 | -------------------- 21 | 22 | .. autofunction:: is_fully_connected 23 | 24 | .. autofunction:: largest_connected_component 25 | 26 | .. autofunction:: multigraph_lcc_union 27 | 28 | .. autofunction:: multigraph_lcc_intersection 29 | 30 | IO 31 | -- 32 | 33 | .. autofunction:: import_graph 34 | 35 | .. autofunction:: import_edgelist 36 | 37 | Other 38 | ----- 39 | 40 | .. autofunction:: remap_labels -------------------------------------------------------------------------------- /docs/reference/reference/embed.rst: -------------------------------------------------------------------------------- 1 | .. _ase_tutorial: https://microsoft.github.io/graspologic/tutorials/embedding/AdjacencySpectralEmbed.html 2 | 3 | Embedding 4 | ========= 5 | 6 | .. currentmodule:: graspologic.embed 7 | 8 | Decomposition 9 | ------------- 10 | 11 | .. autofunction:: select_dimension 12 | 13 | .. autofunction:: select_svd 14 | 15 | Single graph embedding 16 | ---------------------- 17 | 18 | .. autoclass:: AdjacencySpectralEmbed 19 | .. autoclass:: LaplacianSpectralEmbed 20 | .. autofunction:: node2vec_embed 21 | 22 | Multiple graph embedding 23 | ------------------------ 24 | 25 | .. autoclass:: OmnibusEmbed 26 | .. autoclass:: MultipleASE 27 | .. autoclass:: mug2vec 28 | 29 | Dissimilarity graph embedding 30 | ----------------------------- 31 | 32 | .. autoclass:: ClassicalMDS 33 | -------------------------------------------------------------------------------- /graspologic/embed/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from .ase import AdjacencySpectralEmbed 5 | from .base import BaseSpectralEmbed 6 | from .case import CovariateAssistedEmbed 7 | from .lse import LaplacianSpectralEmbed 8 | from .mase import MultipleASE 9 | from .mds import ClassicalMDS 10 | from .mug2vec import mug2vec 11 | from .n2v import node2vec_embed 12 | from .omni import OmnibusEmbed 13 | from .svd import select_dimension, select_svd 14 | 15 | __all__ = [ 16 | "ClassicalMDS", 17 | "OmnibusEmbed", 18 | "AdjacencySpectralEmbed", 19 | "LaplacianSpectralEmbed", 20 | "MultipleASE", 21 | "node2vec_embed", 22 | "select_dimension", 23 | "select_svd", 24 | "BaseSpectralEmbed", 25 | "CovariateAssistedEmbed", 26 | ] 27 | -------------------------------------------------------------------------------- /graspologic/pipeline/embed/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | # ruff: noqa: E402 SVD_SOLVER_TYPES needs to be first 4 | """ 5 | The embed module of ``graspologic.pipeline.embed`` is intended to provide faster 6 | application development support. The functions provided in it reflect common call 7 | patterns used when developing data processing pipelines and future consumption 8 | by nearest neighbor services and visualization routines. 9 | """ 10 | 11 | __SVD_SOLVER_TYPES = ["randomized", "full", "truncated"] 12 | from .adjacency_spectral_embedding import adjacency_spectral_embedding 13 | from .embeddings import Embeddings, EmbeddingsView 14 | from .laplacian_spectral_embedding import laplacian_spectral_embedding 15 | from .omnibus_embedding import omnibus_embedding_pairwise 16 | 17 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Bug reports help us improve! Thanks for submitting yours! 4 | title: "[BUG] " 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Expected Behavior 11 | Tell us what should happen 12 | 13 | ## Actual Behavior 14 | Tell us what happens instead 15 | 16 | ## Example Code 17 | Please see [How to create a Minimal, Reproducible example](https://stackoverflow.com/help/minimal-reproducible-example) for some guidance on creating the best possible example of the problem 18 | ```python 19 | 20 | ``` 21 | 22 | ## Full Traceback 23 | ```pytb 24 | Paste the full traceback in case there is an exception 25 | 26 | ``` 27 | 28 | ## Your Environment 29 | * Python version: 30 | * graspologic version: 31 | 32 | ## Additional Details 33 | Any other contextual information you might feel is important. 34 | -------------------------------------------------------------------------------- /graspologic/datasets/mice/participants.csv: -------------------------------------------------------------------------------- 1 | participant_id,genotype,sex 2 | sub-54776,DBA2,male 3 | sub-54777,DBA2,male 4 | sub-54779,DBA2,female 5 | sub-54781,DBA2,female 6 | sub-54790,B6,male 7 | sub-54793,B6,male 8 | sub-54794,B6,female 9 | sub-54797,B6,female 10 | sub-54811,BTBR,male 11 | sub-54813,BTBR,male 12 | sub-54815,BTBR,female 13 | sub-54817,BTBR,female 14 | sub-54821,CAST,male 15 | sub-54823,CAST,male 16 | sub-54829,DBA2,male 17 | sub-54831,DBA2,male 18 | sub-54833,DBA2,female 19 | sub-54835,DBA2,female 20 | sub-54842,CAST,female 21 | sub-54847,CAST,female 22 | sub-54849,BTBR,male 23 | sub-54851,BTBR,male 24 | sub-54853,BTBR,female 25 | sub-54855,BTBR,female 26 | sub-54864,B6,male 27 | sub-54866,B6,male 28 | sub-54868,B6,female 29 | sub-54870,B6,female 30 | sub-54883,CAST,male 31 | sub-54885,CAST,male 32 | sub-54887,CAST,female 33 | sub-54890,CAST,female 34 | -------------------------------------------------------------------------------- /docs/reference/release/release_template.rst: -------------------------------------------------------------------------------- 1 | :orphan: 2 | 3 | Release Notes: GraSPy 0.0.X 4 | =========================== 5 | 6 | We're happy to announce the release of GraSPy 0.0.X! GraSPy is a Python package for 7 | understanding the properties of random graphs that arise from modern datasets, such as social networks 8 | and brain networks. 9 | 10 | For more information, please visit our `website `_ 11 | and our `tutorials `_. 12 | 13 | 14 | Highlights 15 | ---------- 16 | This release is the result of X of work with over X pull requests by X contributors. Highlights include: 17 | 18 | 19 | Improvements 20 | ------------ 21 | 22 | 23 | API Changes 24 | ----------- 25 | 26 | 27 | Deprecations 28 | ------------ 29 | 30 | 31 | Contributors to this release 32 | ---------------------------- 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Is your feature request related to a problem? Please describe. 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | ## Describe the solution you'd like 14 | A clear and concise description of what you want to happen. 15 | 16 | ## Describe alternatives you've considered 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | ## Provide references (if applicable) 20 | If your feature request is related to a published algorithm/idea, please provide links to 21 | any relevant articles or webpages. 22 | 23 | ## Additional context 24 | Add any other context or screenshots about the feature request here. 25 | -------------------------------------------------------------------------------- /graspologic/match/types.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from typing import Union 5 | 6 | import numpy as np 7 | from packaging import version 8 | from scipy import __version__ as scipy_version 9 | from scipy.sparse import csr_array 10 | 11 | if version.parse(scipy_version) >= version.parse("1.8.0"): 12 | from scipy.sparse import csr_array 13 | else: 14 | csr_array = csr_array 15 | 16 | from typing_extensions import Literal 17 | 18 | from graspologic.types import List, Tuple 19 | 20 | # redefining since I don't want to add csr_array for ALL code in graspologic yet 21 | AdjacencyMatrix = Union[np.ndarray, csr_array, csr_array] 22 | 23 | MultilayerAdjacency = Union[List[AdjacencyMatrix], AdjacencyMatrix, np.ndarray] 24 | 25 | PaddingType = Literal["adopted", "naive"] 26 | 27 | Scalar = Union[int, float, np.integer] 28 | 29 | Int = Union[int, np.integer] 30 | 31 | PartialMatchType = Union[np.ndarray, Tuple] 32 | -------------------------------------------------------------------------------- /docs/reference/reference/preprocessing.rst: -------------------------------------------------------------------------------- 1 | Preprocessing 2 | ============= 3 | 4 | .. currentmodule:: graspologic.preprocessing 5 | 6 | Graph Cuts 7 | ---------- 8 | 9 | Constants 10 | ^^^^^^^^^ 11 | .. py:data:: LARGER_THAN_INCLUSIVE 12 | 13 | Cut any edge or node > the ``cut_threshold`` 14 | 15 | .. py:data:: LARGER_THAN_EXCLUSIVE 16 | 17 | Cut any edge or node >= the ``cut_threshold`` 18 | 19 | .. py:data:: SMALLER_THAN_INCLUSIVE 20 | 21 | Cut any edge or node < the ``cut_threshold`` 22 | 23 | .. py:data:: SMALLER_THAN_EXCLUSIVE 24 | 25 | Cut any edge or node <= the ``cut_threshold`` 26 | 27 | Classes 28 | ^^^^^^^ 29 | .. autoclass:: DefinedHistogram 30 | 31 | Functions 32 | ^^^^^^^^^ 33 | .. autofunction:: cut_edges_by_weight 34 | 35 | .. autofunction:: cut_vertices_by_betweenness_centrality 36 | 37 | .. autofunction:: cut_vertices_by_degree_centrality 38 | 39 | .. autofunction:: histogram_betweenness_centrality 40 | 41 | .. autofunction:: histogram_degree_centrality 42 | 43 | .. autofunction:: histogram_edge_weight 44 | -------------------------------------------------------------------------------- /graspologic/layouts/nooverlap/_node.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | from typing import Any 5 | 6 | 7 | class _Node: 8 | def __init__( 9 | self, 10 | node_id: Any, 11 | x: float, 12 | y: float, 13 | size: float, 14 | community: int = 9999999, 15 | color: str = "", 16 | ): 17 | self.node_id = node_id 18 | self.x = float(x) 19 | self.y = float(y) 20 | self.original_x = self.x 21 | self.original_y = self.y 22 | self.size = float(size) 23 | self.community = community 24 | self.color = color 25 | 26 | def reset_original_position(self, new_x: float, new_y: float) -> None: 27 | self.original_x = self.x = new_x 28 | self.original_y = self.y = new_y 29 | 30 | def __eq__(self, other: Any) -> bool: 31 | return self.node_id == other.node_id # type: ignore 32 | 33 | def __hash__(self) -> int: 34 | return hash(self.node_id) 35 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 5 | - [ ] Does this PR have a descriptive title that could go in our release notes? 6 | - [ ] Does this PR add any new dependencies? 7 | - [ ] Does this PR modify any existing APIs? 8 | - [ ] Is the change to the API backwards compatible? 9 | - [ ] Have you built the documentation (reference and/or tutorial) and verified the generated documentation is appropriate? 10 | 11 | #### Reference Issues/PRs 12 | 18 | 19 | #### What does this implement/fix? Briefly explain your changes. 20 | 21 | #### Any other comments? 22 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) Microsoft Corporation. 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /graspologic/layouts/nooverlap/nooverlap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import logging 5 | import time 6 | 7 | from graspologic.types import List 8 | 9 | from .. import NodePosition 10 | from ._node import _Node 11 | from ._quad_tree import _QuadTree 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def remove_overlaps(node_positions: List[NodePosition]) -> List[NodePosition]: 17 | start = time.time() 18 | logger.info("removing overlaps") 19 | local_nodes = [ 20 | _Node(node.node_id, node.x, node.y, node.size, node.community) 21 | for node in node_positions 22 | ] 23 | qt = _QuadTree(local_nodes, 50) 24 | qt.layout_dense_first(first_color=None) 25 | stop = time.time() 26 | logger.info(f"removed overlap in {stop - start} seconds") 27 | 28 | new_positions = [ 29 | NodePosition( 30 | node_id=node.node_id, 31 | x=node.x, 32 | y=node.y, 33 | size=node.size, 34 | community=node.community, 35 | ) 36 | for node in local_nodes 37 | ] 38 | return new_positions 39 | -------------------------------------------------------------------------------- /docs/reference/release/graspy_releases.rst: -------------------------------------------------------------------------------- 1 | GraSPy Release Log 2 | ================== 3 | 4 | GraSPy 0.3 5 | ---------- 6 | Release date: 04 Aug 2020 7 | Supports Python 3.6, and 3.7 8 | 9 | .. toctree:: 10 | :maxdepth: 1 11 | 12 | release_0.3.rst 13 | 14 | GraSPy 0.2 15 | ---------- 16 | Release date: 02 Mar 2020 17 | Supports Python 3.5, 3.6, and 3.7 18 | 19 | .. toctree:: 20 | :maxdepth: 1 21 | 22 | release_0.2.rst 23 | 24 | GraSPy 0.1 25 | ---------- 26 | Release date: 05 Aug 2019 27 | Supports Python 3.5, 3.6, and 3.7 28 | 29 | .. toctree:: 30 | :maxdepth: 1 31 | 32 | release_0.1.rst 33 | 34 | GraSPy 0.0.3 35 | ------------ 36 | Release date: 11 June 2019 37 | Supports Python 3.5, 3.6, and 3.7. 38 | 39 | .. toctree:: 40 | :maxdepth: 1 41 | 42 | release_0.0.3.rst 43 | 44 | GraSPy 0.0.2 45 | ------------ 46 | Release date: 26 March 2019 47 | Supports Python 3.5, 3.6, and 3.7. 48 | 49 | .. toctree:: 50 | :maxdepth: 1 51 | 52 | release_0.0.2.rst 53 | 54 | GraSPy 0.0.1 55 | ------------ 56 | Release date: 14 December 2018 57 | Supports Python 3.5, 3.6, and 3.7. 58 | 59 | .. toctree:: 60 | :maxdepth: 1 61 | 62 | release_0.0.1.rst 63 | -------------------------------------------------------------------------------- /graspologic/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The ``pipeline`` module includes a collection of higher level API abstractions from 3 | the functionality exposed elsewhere in ``graspologic``. The classes, functions, and 4 | modules elsewhere in ``graspologic`` are intended to provide fine-grained, expert-level 5 | control over the features they implement. These building blocks provide an excellent 6 | backbone of utility, for researchers in mathematics and science, especially as they 7 | hew so closely to ``scikit-learn``'s programming paradigms and object model. 8 | 9 | But for software engineers and datascientists, there is a certain ritualistic cost to 10 | preparing a graph, setting up the objects for use, and tearing them down afterwards. 11 | 12 | ``pipeline`` is intended to smooth the transition between a common developer and 13 | a graph machine learning subject matter expert. We make a presumption that most 14 | programmers are software developers first, and dabbling in ML second, and our intention 15 | is to bridge this gap. 16 | 17 | """ 18 | 19 | # Copyright (c) Microsoft Corporation. 20 | # Licensed under the MIT license. 21 | 22 | from . import embed 23 | from .graph_builder import GraphBuilder 24 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you find graspologic useful in your work, please cite the package via the GraSPy paper" 3 | authors: 4 | - family-names: "Chung" 5 | given-names: "Jaewon" 6 | - family-names: "Pedigo" 7 | given-names: "Benjamin D." 8 | - family-names: "Bridgeford" 9 | given-names: "Eric W." 10 | - family-names: "Varjavand" 11 | given-names: "Bijan K." 12 | - family-names: "Helm" 13 | given-names: "Hayden S." 14 | - family-names: "Vogelstein" 15 | given-names: "Joshua T." 16 | title: "GraSPy: Graph Statistics in Python" 17 | version: 3.0.0 18 | url: "https://github.com/graspologic-org/graspologic" 19 | preferred-citation: 20 | type: software 21 | issue: 158 22 | volume: 20 23 | journal: "Journal of Machine Learning Research" 24 | authors: 25 | - family-names: "Chung" 26 | given-names: "Jaewon" 27 | - family-names: "Pedigo" 28 | given-names: "Benjamin D." 29 | - family-names: "Bridgeford" 30 | given-names: "Eric W." 31 | - family-names: "Varjavand" 32 | given-names: "Bijan K." 33 | - family-names: "Helm" 34 | given-names: "Hayden S." 35 | - family-names: "Vogelstein" 36 | given-names: "Joshua T." 37 | start: 1 38 | end: 7 39 | year: 2019 40 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | #warn_return_any = True 3 | warn_unused_configs = True 4 | disallow_untyped_defs = True 5 | disallow_incomplete_defs = True 6 | no_implicit_optional = True 7 | 8 | [mypy-anytree] 9 | ignore_missing_imports = True 10 | 11 | [mypy-hyppo.*] 12 | ignore_missing_imports = True 13 | 14 | [mypy-joblib] 15 | ignore_missing_imports = True 16 | 17 | [mypy-gensim.models] 18 | ignore_missing_imports = True 19 | 20 | [mypy-matplotlib] 21 | ignore_missing_imports = True 22 | 23 | [mypy-matplotlib.*] 24 | ignore_missing_imports = True 25 | 26 | [mypy-mpl_toolkits.*] 27 | ignore_missing_imports = True 28 | 29 | [mypy-numba.*] 30 | ignore_missing_imports = True 31 | 32 | [mypy-numpy] 33 | ignore_missing_imports = True 34 | 35 | [mypy-networkx] 36 | ignore_missing_imports = True 37 | 38 | [mypy-ot] 39 | ignore_missing_imports = True 40 | 41 | [mypy-pandas] 42 | ignore_missing_imports = True 43 | 44 | [mypy-pkg_resources] 45 | ignore_missing_imports = True 46 | 47 | [mypy-seaborn] 48 | ignore_missing_imports = True 49 | 50 | [mypy-scipy] 51 | ignore_missing_imports = True 52 | 53 | [mypy-scipy.*] 54 | ignore_missing_imports = True 55 | 56 | [mypy-sklearn.*] 57 | ignore_missing_imports = True 58 | 59 | [mypy-statsmodels.*] 60 | ignore_missing_imports = True 61 | 62 | [mypy-umap] 63 | ignore_missing_imports = True 64 | -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | License 2 | ======= 3 | graspologic is distributed with MIT license. 4 | 5 | :: 6 | 7 | Copyright (c) Microsoft Corporation and contributors. 8 | 9 | MIT License 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy 12 | of this software and associated documentation files (the "Software"), to deal 13 | in the Software without restriction, including without limitation the rights 14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 | copies of the Software, and to permit persons to whom the Software is 16 | furnished to do so, subject to the following conditions: 17 | 18 | The above copyright notice and this permission notice shall be included in all 19 | copies or substantial portions of the Software. 20 | 21 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 27 | SOFTWARE. 28 | -------------------------------------------------------------------------------- /docs/reference/release/release_0.0.1.rst: -------------------------------------------------------------------------------- 1 | Release Notes: GraSPy 0.0.1 2 | =========================== 3 | 4 | We're happy to announce the release of GraSPy 0.0.1! GraSPy is a Python package for 5 | understanding the properties of random graphs that arise from modern datasets, such as social networks 6 | and brain networks. 7 | 8 | For more information, please visit our `website `_ 9 | and our `tutorials 10 | `_. 11 | 12 | Highlights 13 | ---------- 14 | This release is the result of over two years of work with 238 commits and 35 merges by 4 contributors. 15 | Highlights include: 16 | - Fast implementation of dimensionailty reduction using different implementation of SVD. 17 | - Single and multiple graph embedding methods. 18 | - Methods for preprocessing graphs for meaningful embeddings. 19 | - Hypothesis testing, specifically semiparametric testing of two graphs. 20 | - Methods for clustering vertices or population of graphs 21 | - Plotting functions for visualization of graphs and high dimensional data. 22 | 23 | API Changes 24 | ----------- 25 | All classes are based on scikit-learn's API, making the use familiar. 26 | 27 | Deprecations 28 | ------------ 29 | None. 30 | 31 | Contributors to this release 32 | ---------------------------- 33 | 34 | - Jaewon Chung 35 | - Benjamin Pedigo 36 | - Eric Bridgeford 37 | - Bijan Varjavand -------------------------------------------------------------------------------- /tests/test_preconditions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import unittest 5 | 6 | from graspologic import preconditions 7 | 8 | 9 | class TestPreconditions(unittest.TestCase): 10 | def test_check_arguments(self): 11 | test_true_expressions = [1 < 3, 3 == 3, True, 1 == 1] 12 | test_false_expressions = [ 13 | 3 < 1, 14 | 3 != 3, 15 | None is True, 16 | 1 == "1", 17 | ] 18 | for resolved_expression in test_true_expressions: 19 | preconditions.check_argument(resolved_expression, "This should be true") 20 | 21 | for resolved_expression in test_false_expressions: 22 | with self.assertRaises(ValueError): 23 | preconditions.check_argument(resolved_expression, "This is false") 24 | 25 | def test_check_argument_types(self): 26 | preconditions.check_argument_types(1, int, "Some message") 27 | with self.assertRaises(TypeError): 28 | preconditions.check_argument_types(1, set, "This fails") 29 | 30 | def test_check_optional_argument_types(self): 31 | preconditions.check_optional_argument_types(1, int, "Some message") 32 | preconditions.check_optional_argument_types(None, int, "Some message") 33 | with self.assertRaises(TypeError): 34 | preconditions.check_optional_argument_types(1, set, "This fails") 35 | -------------------------------------------------------------------------------- /graspologic/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from .ptr import pass_to_ranks 5 | from .utils import ( 6 | LaplacianFormType, 7 | augment_diagonal, 8 | average_matrices, 9 | binarize, 10 | cartesian_product, 11 | fit_plug_in_variance_estimator, 12 | import_edgelist, 13 | import_graph, 14 | is_almost_symmetric, 15 | is_fully_connected, 16 | is_loopless, 17 | is_symmetric, 18 | is_unweighted, 19 | largest_connected_component, 20 | multigraph_lcc_intersection, 21 | multigraph_lcc_union, 22 | remap_labels, 23 | remap_node_ids, 24 | remove_loops, 25 | remove_vertices, 26 | symmetrize, 27 | to_laplacian, 28 | ) 29 | 30 | __all__ = [ 31 | "average_matrices", 32 | "import_graph", 33 | "import_edgelist", 34 | "is_symmetric", 35 | "is_loopless", 36 | "is_unweighted", 37 | "is_almost_symmetric", 38 | "symmetrize", 39 | "remove_loops", 40 | "to_laplacian", 41 | "LaplacianFormType", 42 | "is_fully_connected", 43 | "largest_connected_component", 44 | "multigraph_lcc_union", 45 | "multigraph_lcc_intersection", 46 | "augment_diagonal", 47 | "binarize", 48 | "cartesian_product", 49 | "pass_to_ranks", 50 | "fit_plug_in_variance_estimator", 51 | "remove_vertices", 52 | "remap_labels", 53 | "remap_node_ids", 54 | ] 55 | -------------------------------------------------------------------------------- /docs/reference/release/release_0.1.rst: -------------------------------------------------------------------------------- 1 | Release Notes: GraSPy 0.1 2 | ========================= 3 | 4 | We're happy to announce the release of GraSPy 0.1! GraSPy is a Python package for 5 | understanding the properties of random graphs that arise from modern datasets, such as 6 | social networks and brain networks. 7 | 8 | For more information, please visit our `website `_ 9 | and our `tutorials `_. 10 | 11 | 12 | Highlights 13 | ---------- 14 | This release is the result of over 2 months of work with over 18 pull requests by 15 | 3 contributors. Highlights include: 16 | 17 | - Added ``MultipleASE``, which is a new method for embedding population of graphs. 18 | - Added ``mug2vec`` within ``pipieline`` module, which learns a feature vector for population of graphs. 19 | 20 | Improvements 21 | ------------ 22 | - Improved contribution guidelines. 23 | - Fixed bugs in ``GaussianCluster``. 24 | - ``symmeterize`` function now uses ``avg`` as default method. 25 | - Fixed ``dataset`` module loading errors. 26 | - Improve underlying `ER` sampling code. 27 | 28 | API Changes 29 | ----------- 30 | - Added ``sort_nodes`` argument for ``heatmap`` and ``gridplot`` functions. 31 | 32 | Deprecations 33 | ------------ 34 | None 35 | 36 | Contributors to this release 37 | ---------------------------- 38 | - `Jaewon Chung `_ 39 | - `Benjamin Pedigo `_ 40 | - `Kiki Zhang `_ (new contributor!) -------------------------------------------------------------------------------- /tests/layouts/nooverlap/test_grid_cell_creation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | from graspologic.layouts.nooverlap._node import _Node 7 | from graspologic.layouts.nooverlap._quad_node import _QuadNode 8 | 9 | 10 | class TestGridCellCreation(unittest.TestCase): 11 | def setUp(self): 12 | self.qn = _QuadNode( 13 | [_Node(99, 3, 7, 2, 0, "red"), _Node(100, 2, 9, 3, 0, "blue")], 5, 50 14 | ) 15 | 16 | def test_grid_cell_center(self): 17 | cell_x, cell_y, center_x, center_y = self.qn.find_grid_cell_and_center( 18 | 0, 0, 10, 50, 50 19 | ) 20 | self.assertEqual(cell_x, 2) 21 | self.assertEqual(cell_y, 2) 22 | self.assertEqual(center_x, 40) 23 | self.assertEqual(center_y, 40) 24 | 25 | def test_grid_cell_center2(self): 26 | cell_x, cell_y, center_x, center_y = self.qn.find_grid_cell_and_center( 27 | 0, 0, 10, 50, 40 28 | ) 29 | self.assertEqual(cell_x, 2) 30 | self.assertEqual(cell_y, 2) 31 | self.assertEqual(center_x, 40) 32 | self.assertEqual(center_y, 40) 33 | 34 | def test_grid_cell_center3(self): 35 | cell_x, cell_y, center_x, center_y = self.qn.find_grid_cell_and_center( 36 | 3, 4, 10, 53, 44 37 | ) 38 | self.assertEqual(cell_x, 2) 39 | self.assertEqual(cell_y, 2) 40 | self.assertEqual(center_x, 43) 41 | self.assertEqual(center_y, 44) 42 | 43 | 44 | if __name__ == "__main__": 45 | unittest.main() 46 | -------------------------------------------------------------------------------- /graspologic/datasets/drosophila/left_cell_labels.csv: -------------------------------------------------------------------------------- 1 | K 2 | K 3 | K 4 | K 5 | K 6 | K 7 | K 8 | K 9 | K 10 | K 11 | K 12 | K 13 | K 14 | K 15 | K 16 | K 17 | K 18 | K 19 | K 20 | K 21 | K 22 | K 23 | K 24 | K 25 | K 26 | K 27 | K 28 | K 29 | K 30 | K 31 | K 32 | K 33 | K 34 | K 35 | K 36 | K 37 | K 38 | K 39 | K 40 | K 41 | K 42 | K 43 | K 44 | K 45 | K 46 | K 47 | K 48 | K 49 | K 50 | K 51 | K 52 | K 53 | K 54 | K 55 | K 56 | K 57 | K 58 | K 59 | K 60 | K 61 | K 62 | K 63 | K 64 | K 65 | K 66 | K 67 | K 68 | K 69 | K 70 | K 71 | K 72 | K 73 | K 74 | K 75 | K 76 | K 77 | K 78 | K 79 | K 80 | K 81 | K 82 | K 83 | K 84 | K 85 | K 86 | K 87 | K 88 | K 89 | K 90 | K 91 | K 92 | K 93 | K 94 | K 95 | K 96 | K 97 | K 98 | K 99 | K 100 | K 101 | K 102 | I 103 | I 104 | I 105 | I 106 | I 107 | I 108 | I 109 | I 110 | I 111 | I 112 | I 113 | I 114 | I 115 | I 116 | I 117 | I 118 | I 119 | I 120 | I 121 | I 122 | I 123 | O 124 | O 125 | O 126 | O 127 | O 128 | O 129 | O 130 | O 131 | O 132 | O 133 | O 134 | O 135 | O 136 | O 137 | O 138 | O 139 | O 140 | O 141 | O 142 | O 143 | O 144 | O 145 | O 146 | O 147 | O 148 | O 149 | O 150 | O 151 | O 152 | P 153 | P 154 | P 155 | P 156 | P 157 | P 158 | P 159 | P 160 | P 161 | P 162 | P 163 | P 164 | P 165 | P 166 | P 167 | P 168 | P 169 | P 170 | P 171 | P 172 | P 173 | P 174 | P 175 | P 176 | P 177 | P 178 | P 179 | P 180 | P 181 | P 182 | P 183 | P 184 | P 185 | P 186 | P 187 | P 188 | P 189 | P 190 | P 191 | P 192 | P 193 | P 194 | P 195 | P 196 | P 197 | P 198 | P 199 | P 200 | P 201 | P 202 | P 203 | P 204 | P 205 | P 206 | P 207 | P 208 | P 209 | P 210 | -------------------------------------------------------------------------------- /tests/test_mug2vec.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from graspologic.cluster import GaussianCluster 9 | from graspologic.embed import mug2vec 10 | from graspologic.simulations import sbm 11 | 12 | 13 | def generate_data(): 14 | np.random.seed(2) 15 | 16 | p1 = [[0.3, 0.1], [0.1, 0.3]] 17 | p2 = [[0.1, 0.3], [0.3, 0.1]] 18 | n = [50, 50] 19 | 20 | g1 = [sbm(n, p1) for _ in range(20)] 21 | g2 = [sbm(n, p2) for _ in range(20)] 22 | g = g1 + g2 23 | 24 | y = ["0"] * 20 + ["1"] * 20 25 | 26 | return g, y 27 | 28 | 29 | class TestMug2Vec(unittest.TestCase): 30 | def test_mug2vec(self): 31 | graphs, labels = generate_data() 32 | 33 | mugs = mug2vec(pass_to_ranks=None, svd_seed=1) 34 | xhat = mugs.fit_transform(graphs) 35 | 36 | gmm = GaussianCluster(5) 37 | gmm.fit(xhat, labels) 38 | 39 | self.assertEqual(gmm.n_components_, 2) 40 | 41 | def test_inputs(self): 42 | graphs, labels = generate_data() 43 | 44 | mugs = mug2vec(omnibus_components=-1, svd_seed=1) 45 | with self.assertRaises(ValueError): 46 | mugs.fit(graphs) 47 | 48 | mugs = mug2vec(cmds_components=-1, svd_seed=1) 49 | with self.assertRaises(ValueError): 50 | mugs.fit(graphs) 51 | 52 | mugs = mug2vec(omnibus_n_elbows=-1, svd_seed=1) 53 | with self.assertRaises(ValueError): 54 | mugs.fit(graphs) 55 | 56 | mugs = mug2vec(cmds_n_elbows=-1, svd_seed=1) 57 | with self.assertRaises(ValueError): 58 | mugs.fit(graphs) 59 | -------------------------------------------------------------------------------- /graspologic/datasets/drosophila/right_cell_labels.csv: -------------------------------------------------------------------------------- 1 | K 2 | K 3 | K 4 | K 5 | K 6 | K 7 | K 8 | K 9 | K 10 | K 11 | K 12 | K 13 | K 14 | K 15 | K 16 | K 17 | K 18 | K 19 | K 20 | K 21 | K 22 | K 23 | K 24 | K 25 | K 26 | K 27 | K 28 | K 29 | K 30 | K 31 | K 32 | K 33 | K 34 | K 35 | K 36 | K 37 | K 38 | K 39 | K 40 | K 41 | K 42 | K 43 | K 44 | K 45 | K 46 | K 47 | K 48 | K 49 | K 50 | K 51 | K 52 | K 53 | K 54 | K 55 | K 56 | K 57 | K 58 | K 59 | K 60 | K 61 | K 62 | K 63 | K 64 | K 65 | K 66 | K 67 | K 68 | K 69 | K 70 | K 71 | K 72 | K 73 | K 74 | K 75 | K 76 | K 77 | K 78 | K 79 | K 80 | K 81 | K 82 | K 83 | K 84 | K 85 | K 86 | K 87 | K 88 | K 89 | K 90 | K 91 | K 92 | K 93 | K 94 | K 95 | K 96 | K 97 | K 98 | K 99 | K 100 | K 101 | I 102 | I 103 | I 104 | I 105 | I 106 | I 107 | I 108 | I 109 | I 110 | I 111 | I 112 | I 113 | I 114 | I 115 | I 116 | I 117 | I 118 | I 119 | I 120 | I 121 | I 122 | O 123 | O 124 | O 125 | O 126 | O 127 | O 128 | O 129 | O 130 | O 131 | O 132 | O 133 | O 134 | O 135 | O 136 | O 137 | O 138 | O 139 | O 140 | O 141 | O 142 | O 143 | O 144 | O 145 | O 146 | O 147 | O 148 | O 149 | O 150 | O 151 | P 152 | P 153 | P 154 | P 155 | P 156 | P 157 | P 158 | P 159 | P 160 | P 161 | P 162 | P 163 | P 164 | P 165 | P 166 | P 167 | P 168 | P 169 | P 170 | P 171 | P 172 | P 173 | P 174 | P 175 | P 176 | P 177 | P 178 | P 179 | P 180 | P 181 | P 182 | P 183 | P 184 | P 185 | P 186 | P 187 | P 188 | P 189 | P 190 | P 191 | P 192 | P 193 | P 194 | P 195 | P 196 | P 197 | P 198 | P 199 | P 200 | P 201 | P 202 | P 203 | P 204 | P 205 | P 206 | P 207 | P 208 | P 209 | P 210 | P 211 | P 212 | P 213 | P 214 | -------------------------------------------------------------------------------- /graspologic/types.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | """ 5 | This module includes common graspologic type hint declarations. 6 | """ 7 | 8 | import sys 9 | from typing import Optional, Union 10 | 11 | import networkx as nx 12 | import numpy as np 13 | import scipy.sparse as sp 14 | 15 | # an explanation, for those who come in the later times: 16 | # the following is because when type hinting first came out, Python 3.6 up to 3.8 17 | # (inclusive) specifically couldn't really handle having generics in the 18 | # list/dict/tuple/set whatever primitives that are in builtins 19 | # so we all used the typing module to provide their type signature 20 | # and then 3.9 said 'oh, we can fix that now, and also if you use typing we'll cut you', 21 | # so our choice was either to never support 3.9 onward, never support pre 3.9, or 22 | # do this... jankery 23 | # some things of note: mypy specifically looks for `sys.version_info` - so 24 | # using a `from sys import version_info` gets ignored, and you will get mypy errors 25 | # on top of that, doing `List = list` in the 3.9+ block doesn't work at all, so we 26 | # have to use this VERY specific syntax. if you want to test it, try it out, but as of 27 | # today, `from builtins import foo as Foo` is the right way to do it. 28 | # PEP 484 & PEP 585 Fun 29 | if sys.version_info >= (3, 9): 30 | from builtins import dict as Dict 31 | from builtins import list as List 32 | from builtins import set as Set 33 | from builtins import tuple as Tuple 34 | else: 35 | from typing import Dict, List, Set, Tuple 36 | 37 | AdjacencyMatrix = Union[np.ndarray, sp.csr_array] 38 | 39 | GraphRepresentation = Union[np.ndarray, sp.csr_array, nx.Graph] 40 | 41 | RngType = Optional[Union[int, np.integer, np.random.Generator]] 42 | 43 | __all__ = [ 44 | "AdjacencyMatrix", 45 | "Dict", 46 | "List", 47 | "GraphRepresentation", 48 | "RngType", 49 | "Set", 50 | "Tuple", 51 | ] 52 | -------------------------------------------------------------------------------- /docs/sphinx-ext/toctree_filter.py: -------------------------------------------------------------------------------- 1 | # Copied and modified from https://stackoverflow.com/questions/15001888/conditional-toctree-in-sphinx 2 | 3 | import re 4 | 5 | from sphinx.directives.other import TocTree 6 | 7 | 8 | def setup(app): 9 | app.add_config_value("toc_filter_exclude", [], "html") 10 | app.add_directive("toctree-filt", TocTreeFilt) 11 | return {"version": "1.0.0"} 12 | 13 | 14 | class TocTreeFilt(TocTree): 15 | """ 16 | Directive to notify Sphinx about the hierarchical structure of the docs, 17 | and to include a table-of-contents like tree in the current document. This 18 | version filters the entries based on a list of prefixes. We simply filter 19 | the content of the directive and call the super's version of run. The 20 | list of exclusions is stored in the **toc_filter_exclusion** list. Any 21 | table of content entry prefixed by one of these strings will be excluded. 22 | If `toc_filter_exclusion=['secret','draft']` then all toc entries of the 23 | form `:secret:ultra-api` or `:draft:new-features` will be excuded from 24 | the final table of contents. Entries without a prefix are always included. 25 | """ 26 | 27 | hasPat = re.compile("\s*(.*)$") 28 | 29 | # Remove any entries in the content that we dont want and strip 30 | # out any filter prefixes that we want but obviously don't want the 31 | # prefix to mess up the file name. 32 | def filter_entries(self, entries): 33 | excl = self.state.document.settings.env.config.toc_filter_exclude 34 | filtered = [] 35 | for e in entries: 36 | m = self.hasPat.match(e) 37 | if m != None: 38 | if not m.groups()[0] in excl: 39 | filtered.append(m.groups()[0]) 40 | else: 41 | filtered.append(e) 42 | return filtered 43 | 44 | def run(self): 45 | # Remove all TOC entries that should not be on display 46 | self.content = self.filter_entries(self.content) 47 | return super().run() 48 | -------------------------------------------------------------------------------- /docs/reference/release/release_0.3.rst: -------------------------------------------------------------------------------- 1 | .. _last-graspy-label: 2 | 3 | Release Notes: GraSPy 0.3 4 | ========================= 5 | 6 | We're happy to announce the release of GraSPy 0.3! GraSPy is a Python package for 7 | understanding the properties of random graphs that arise from modern datasets, such as 8 | social networks and brain networks. 9 | 10 | For more information, please visit our `website `_ 11 | and our `tutorials `_. 12 | 13 | 14 | Highlights 15 | ---------- 16 | This release is the result of over 5 months of work with over 11 pull requests by 17 | 7 contributors. Highlights include: 18 | 19 | - Added seeded graph matching as a capability for graph matching, renamed graph matching class to ``GraphMatch`` 20 | - Added functions for simulating a pair of correlated RDPG graphs. 21 | - Deprecated Python 3.5 22 | - Added different backend hypothesis tests for the ``LatentDistributionTest`` from Hyppo 23 | - Added a correction to make ``LatentDistributionTest`` valid for differently sized graphs 24 | 25 | Improvements 26 | ------------ 27 | - Updated default value of ``rescale`` in RDPG simulation 28 | - Updated default value of ``scaled`` in MASE estimation 29 | - Improved error throwing in ``AutoGMM`` 30 | - Clarified the API for ``inference`` submodule 31 | 32 | API Changes 33 | ----------- 34 | - ``FastApproximateQAP`` was renamed to ``GraphMatch`` 35 | - ``fit`` method of ``LatentDistributionTest`` and ``LatentPositionTest`` now returns self instead of a p-value 36 | 37 | Deprecations 38 | ------------ 39 | - Python 3.5 40 | 41 | Contributors to this release 42 | ---------------------------- 43 | - `Jaewon Chung `_ 44 | - `Benjamin Pedigo `_ 45 | - `Ali Saad-Eldin `_ 46 | - `Shan Qiu `_ 47 | - `Bijan Varjavand `_ 48 | - `Anton Alyakin `_ (new contributor!) 49 | - `Casey Weiner `_ (new contributor!) -------------------------------------------------------------------------------- /docs/reference/release/release_0.0.3.rst: -------------------------------------------------------------------------------- 1 | Release Notes: GraSPy 0.0.3 2 | =========================== 3 | 4 | We're happy to announce the release of GraSPy 0.0.3! GraSPy is a Python package for 5 | understanding the properties of random graphs that arise from modern datasets, such as 6 | social networks and brain networks. 7 | 8 | For more information, please visit our `website `_ 9 | and our `tutorials `_. 10 | 11 | 12 | Highlights 13 | ---------- 14 | This release is the result of over 2 months of work with over 16 pull requests by 15 | 4 contributors. Highlights include: 16 | 17 | - Optimization over covariance structures when using ``GaussianCluster`` 18 | - Standardized sorting for visualizing graphs when using ``heatmap`` or ``gridplot`` 19 | - Graph model classes for fitting several random graph models to input datasets 20 | - Improved customization for ``heatmaps`` and ``gridplots`` 21 | 22 | 23 | Improvements 24 | ------------ 25 | - Added badges to Github for arxiv paper and number of downloads 26 | - Remove author headers for individual source files 27 | - Fix bugs in documentation 28 | - Bug fix for calculating intersection of largest connected components between graphs 29 | - Pre-defined axes can be passed to ``heatmap`` for making subplot figures 30 | - Colormap objects and color bounds can be passed to ``heatmap`` directly 31 | 32 | API Changes 33 | ----------- 34 | - ``SemiparametricTest`` was renamed to ``LatentPositionTest`` 35 | - ``NonparametricTest`` was renamed to ``LatentDistributionTest`` 36 | - ``heatmap`` and ``gridplot`` accept ``hier_label_fontsize`` and ``title_pad`` kwargs 37 | 38 | Deprecations 39 | ------------ 40 | - The notebooks folder was removed from ``GraSPy`` 41 | - ``SemiparametricTest`` and ``NonparametricTest`` renamed (see above) 42 | 43 | Contributors to this release 44 | ---------------------------- 45 | - `Benjamin Pedigo `_ 46 | - `Jaewon Chung `_ 47 | - `Hayden Helm `_ (new contributor!) 48 | - `Alex Loftus `_ (new contributor!) -------------------------------------------------------------------------------- /docs/reference/release/release_0.2.rst: -------------------------------------------------------------------------------- 1 | Release Notes: GraSPy 0.2 2 | ========================= 3 | 4 | We're happy to announce the release of GraSPy 0.2! GraSPy is a Python package for 5 | understanding the properties of random graphs that arise from modern datasets, such as 6 | social networks and brain networks. 7 | 8 | For more information, please visit our `website `_ 9 | and our `tutorials `_. 10 | 11 | 12 | Highlights 13 | ---------- 14 | This release is the result of over 8 months of work with over 25 pull requests by 15 | 10 contributors. Highlights include: 16 | 17 | - Added ``AutoGMMCluster`` in ``cluster`` submodule. ``AutoGMMCluster`` is Python equivalent to ``mclust`` in R. 18 | - Added ``subgraph`` submodule, which detects vertices that maximally correlates to given features. 19 | - Added ``match`` submodule. Used for matching vertices from a pair of graphs with unknown vertex correspondence. 20 | - Added functions for simulating a pair of correlated ER and SBM graphs. 21 | 22 | Improvements 23 | ------------ 24 | - Diagonal augmentation is default behavior in AdjacencySpectralEmbed. 25 | - Added functionality in ``to_laplacian`` to allow for directed graphs. 26 | - Updated docstrings. 27 | - Updated documentation website. 28 | - Various bug fixes. 29 | 30 | API Changes 31 | ----------- 32 | - Added ``**kwargs`` argument for ``heatmap``. 33 | 34 | Deprecations 35 | ------------ 36 | None 37 | 38 | Contributors to this release 39 | ---------------------------- 40 | - `Jaewon Chung `_ 41 | - `Benjamin Pedigo `_ 42 | - `Tommy Athey `_ (new contributor!) 43 | - `Jayanta Dey `_ (new contributor!) 44 | - `Iain Carmichael `_ (new contributor!) 45 | - `Shiyu Sun `_ (new contributor!) 46 | - `Ali Saad-Eldin `_ (new contributor!) 47 | - `Gun Kang `_ (new contributor!) 48 | - `Shan Qiu `_ (new contributor!) 49 | - `Ben Falk `_ (new contributor!) 50 | - `Jennifer Heiko `_ (new contributor!) -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # test files 2 | _.py 3 | _.ipynb 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | # *.csv 10 | 11 | # C extensions 12 | *.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build 73 | docs/reference/_build/ 74 | docs/tutorials/_build/ 75 | 76 | # PyBuilder 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | .python-version 88 | 89 | # celery beat schedule file 90 | celerybeat-schedule 91 | 92 | # SageMath parsed files 93 | *.sage.py 94 | 95 | # Environments 96 | .env 97 | .venv 98 | env/ 99 | venv/ 100 | ENV/ 101 | env.bak/ 102 | venv.bak/ 103 | 104 | # Spyder project settings 105 | .spyderproject 106 | .spyproject 107 | 108 | # Rope project settings 109 | .ropeproject 110 | 111 | # mkdocs documentation 112 | /site 113 | 114 | # mypy 115 | .mypy_cache/ 116 | .dmypy.json 117 | dmypy.json 118 | 119 | # misc 120 | notebooks 121 | .vscode/ 122 | docs/bpedigo/ase_clustering.py 123 | *.pickle 124 | /graspy-env 125 | *.code-workspace 126 | *.png 127 | notebooks/bpedigo 128 | 129 | *.DS_Store 130 | .idea 131 | *.iml 132 | pip-wheel-metadata 133 | 134 | -------------------------------------------------------------------------------- /ROLES.md: -------------------------------------------------------------------------------- 1 | # Roles and Areas of Responsibility 2 | 3 | The following is a non-exhaustive list of the primary contributors to `graspologic` and 4 | their roles and areas of responsibilities. Please feel free to use this list to `@` 5 | specific contributors in your issues or pull requests that seem to line up best with 6 | your issue! 7 | 8 | ## Core Contributors 9 | 10 | ### Ali Saad-Eldin ([@asaadeldin11](https://github.com/asaadeldin11)) (he/him) 11 | Ali is a Masters Student at Johns Hopkins University. He contributes and reviews code 12 | mostly for the `match` and `embed` modules. 13 | 14 | ### Anton Alyakin ([@alyakin314](https://github.com/alyakin314)) 15 | Anton is an Assistant Research Engineer at Johns Hopkins University. His primary 16 | contributions to `graspologic` are within `align` and `inference` modules. Ask him 17 | anything about those. 18 | 19 | ### Benjamin Pedigo ([@bdpedigo](https://github.com/bdpedigo)) (he/him) 20 | Ben is a PhD student at Johns Hopkins University in the NeuroData lab. Ask Ben about 21 | network model fitting and sampling, clustering, and spectral embedding (`models`, 22 | `simulations`, `cluster`, and `embed`, respectfully). Ben is also happy to hear how we 23 | can improve our tutorials. 24 | 25 | ### Carolyn Buractaon ([@carolyncb](https://github.com/carolyncb)) (she/her) 26 | Carolyn is a Technical Program Manager at Microsoft. Ask Carolyn about where the project 27 | is going and how it’s organized. 28 | 29 | ### Dwayne Pryce ([@dwaynepryce](https://github.com/dwaynepryce)) (he/him) 30 | Dwayne Pryce is a Software Engineer at Microsoft Research. His primary contributions to 31 | `graspologic` are on the steering committee, quality of life utility functions, and 32 | build and release processes. 33 | 34 | ### Jaewon Chung ([@j1c](https://github.com/j1c)) (he/him) 35 | Jaewon is a PhD student at Johns Hopkins University. He is a maintainer and developer 36 | for `graspologic`, and is responsible for reviewing code contributions, merging pull 37 | requests, and making decisions on the `graspologic` API. 38 | 39 | ### Nick Caurvina ([@nyecarr](https://github.com/nyecarr)) (he/him) 40 | Nick is a Software Engineer at Microsoft Research. Ask Nick about the network 41 | embeddings and their application to business problems. 42 | -------------------------------------------------------------------------------- /docs/reference/in-the-wild.rst: -------------------------------------------------------------------------------- 1 | graspologic in the wild 2 | ======================= 3 | 4 | Below we include some examples of graspologic being used in projects. Feel free to make 5 | a pull request if you'd like to add to this list! 6 | 7 | Papers 8 | """""" 9 | * `Statistical connectomics `_: 10 | Authors in the NeuroData group at Johns Hopkins University "provide an overview from the perspective of statistical network science of 11 | the kinds of models, assumptions, problems, and applications that are theoretically and 12 | empirically justified for analysis of connectome data," using graspologic for most 13 | analysis and examples. 14 | 15 | * `Graph matching via optimal transport `_: 16 | Authors in the NeuroData group improved on a state-of-the-art technique for graph matching, making the algorithm faster and more accurate on 17 | larger and less-correlated networks. 18 | 19 | Educational materials 20 | """"""""""""""""""""" 21 | * `Network data science `_: 22 | A short course on network data science (using many tools in `graspologic`) taught at Johns Hopkins University. 23 | 24 | * `Hands-on Network Machine Learning with Scikit-Learn and Graspologic `: 25 | A JupyterBook (in progress) on network machine learning based on the tools in `graspologic` and `scikit-learn`. Written 26 | by authors in the NeuroData group. 27 | 28 | Blog posts 29 | """""""""" 30 | * `Advancing organizational science using network machine learning to measure innovation in the workplace `_: 31 | Researchers at Microsoft Research and collaborators used tools from graspologic (network 32 | layout, partitioning and modularity) to understand how workplace collaboration networks 33 | changed during the COVID-19 pandemic. 34 | 35 | * `NeuroData notebooks `_ : 36 | Check out this page to see what some of the members of the NeuroData lab are working on 37 | using graspologic. 38 | -------------------------------------------------------------------------------- /docs/reference/install.rst: -------------------------------------------------------------------------------- 1 | Install 2 | ======= 3 | 4 | 5 | Below we assume you have the default Python environment already configured on 6 | your computer and you intend to install ``graspologic`` inside of it. If you want 7 | to create and work with Python virtual environments, please follow instructions 8 | on `venv `_ and `virtual 9 | environments `_. 10 | 11 | First, make sure you have the latest version of ``pip`` (the Python package manager) 12 | installed. If you do not, refer to the `Pip documentation 13 | `_ and install ``pip`` first. 14 | 15 | Install the released version 16 | ---------------------------- 17 | 18 | Install the current release of ``graspologic`` with ``pip``:: 19 | 20 | $ pip install graspologic 21 | 22 | To upgrade to a newer release use the ``--upgrade`` flag:: 23 | 24 | $ pip install --upgrade graspologic 25 | 26 | If you do not have permission to install software systemwide, you can 27 | install into your user directory using the ``--user`` flag:: 28 | 29 | $ pip install --user graspologic 30 | 31 | Alternatively, you can manually download ``graspologic`` from 32 | `PyPI `_. 33 | To install in this way, navigate to the download folder in your terminal and run 34 | `pip install graspologic-VERSION.tar.gz` 35 | 36 | Python package dependencies 37 | --------------------------- 38 | graspologic requires the following packages: 39 | 40 | - networkx 41 | - numpy 42 | - pandas 43 | - scikit-learn 44 | - scipy 45 | - seaborn 46 | 47 | 48 | Hardware requirements 49 | --------------------- 50 | `graspologic` package requires only a standard computer with enough RAM to support the in-memory operations. 51 | 52 | OS Requirements 53 | --------------- 54 | This package is supported for *Linux* and *macOS*. However, the package has been tested on the following systems: 55 | 56 | - Linux: N/A 57 | - macOS: N/A 58 | - Windows: N/A 59 | 60 | 61 | Testing 62 | ------- 63 | graspologic uses the Python ``pytest`` testing package. If you don't already have 64 | that package installed, follow the directions on the `pytest homepage 65 | `_. 66 | -------------------------------------------------------------------------------- /tests/pipeline/embed/test_embeddings.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | from beartype.roar import BeartypeCallHintParamViolation 8 | 9 | from graspologic.pipeline.embed import Embeddings 10 | 11 | 12 | class TestEmbeddings(unittest.TestCase): 13 | def setUp(self) -> None: 14 | self.fake_embeddings = np.array([[0, 1, 2, 3], [5, 4, 3, 2], [3, 5, 1, 2]]) 15 | self.labels = np.array(["dax", "nick", "ben"]) 16 | self.embeddings = Embeddings(self.labels, self.fake_embeddings) 17 | 18 | def test_embeddings_index(self): 19 | for i in range(0, 3): 20 | entry = self.embeddings[i] 21 | self.assertEqual(self.labels[i], entry[0]) 22 | np.testing.assert_array_equal(self.fake_embeddings[i], entry[1]) 23 | 24 | def test_embeddings_iterable(self): 25 | labels = [] 26 | embeddings = [] 27 | for label, embedding in self.embeddings: 28 | labels.append(label) 29 | embeddings.append(embedding) 30 | 31 | np.testing.assert_array_equal(self.labels, labels) 32 | np.testing.assert_array_equal(self.fake_embeddings, embeddings) 33 | 34 | def test_embeddings_size(self): 35 | self.assertEqual(3, len(self.embeddings)) 36 | 37 | def test_view(self): 38 | expected = { 39 | "ben": np.array([3, 5, 1, 2]), 40 | "dax": np.array([0, 1, 2, 3]), 41 | "nick": np.array([5, 4, 3, 2]), 42 | } 43 | view = self.embeddings.as_dict() 44 | self.assertSetEqual(set(view.keys()), set(expected.keys())) 45 | for key in expected: 46 | np.testing.assert_array_equal(expected[key], view[key]) 47 | 48 | def test_argument_types(self): 49 | with self.assertRaises(BeartypeCallHintParamViolation): 50 | Embeddings(None, None) 51 | with self.assertRaises(BeartypeCallHintParamViolation): 52 | Embeddings(np.array(["hello"]), None) 53 | with self.assertRaises(BeartypeCallHintParamViolation): 54 | Embeddings(["hello"], [1.0]) 55 | with self.assertRaises(ValueError): 56 | Embeddings(np.array(["hello"]), np.array([[1.1, 1.2], [2.1, 2.2]])) 57 | -------------------------------------------------------------------------------- /tests/cluster/test_kclust.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | from numpy.testing import assert_equal 8 | from sklearn.exceptions import NotFittedError 9 | 10 | from graspologic.cluster.kclust import KMeansCluster 11 | 12 | 13 | class TestKMeansCluster(unittest.TestCase): 14 | def test_inputs(self): 15 | # Generate random data 16 | X = np.random.normal(0, 1, size=(100, 3)) 17 | 18 | with self.assertRaises(TypeError): 19 | max_clusters = "1" 20 | kclust = KMeansCluster(max_clusters=max_clusters) 21 | 22 | # max_cluster < 0 23 | with self.assertRaises(ValueError): 24 | kclust = KMeansCluster(max_clusters=-1) 25 | 26 | # max_cluster more than n_samples 27 | with self.assertRaises(ValueError): 28 | kclust = KMeansCluster(max_clusters=1000) 29 | kclust.fit_predict(X) 30 | 31 | def test_predict_without_fit(self): 32 | # Generate random data 33 | X = np.random.normal(0, 1, size=(100, 3)) 34 | 35 | with self.assertRaises(NotFittedError): 36 | kclust = KMeansCluster(max_clusters=2) 37 | kclust.predict(X) 38 | 39 | def test_outputs_gaussians(self): 40 | np.random.seed(2) 41 | 42 | n = 100 43 | d = 3 44 | num_sims = 10 45 | for _ in range(num_sims): 46 | X1 = np.random.normal(2, 0.5, size=(n, d)) 47 | X2 = np.random.normal(-2, 0.5, size=(n, d)) 48 | X = np.vstack((X1, X2)) 49 | y = np.repeat([0, 1], n) 50 | 51 | kclust = KMeansCluster(max_clusters=5) 52 | kclust.fit(X, y) 53 | aris = kclust.ari_ 54 | 55 | # Assert that the two cluster model is the best 56 | assert_equal(np.max(aris), 1) 57 | 58 | def test_no_y(self): 59 | np.random.seed(2) 60 | n = 100 61 | d = 3 62 | X1 = np.random.normal(2, 0.5, size=(n, d)) 63 | X2 = np.random.normal(-2, 0.5, size=(n, d)) 64 | X = np.vstack((X1, X2)) 65 | 66 | kclust = KMeansCluster(max_clusters=5) 67 | kclust.fit(X) 68 | 69 | assert_equal(np.argmax(kclust.silhouette_), 0) 70 | -------------------------------------------------------------------------------- /docs/reference/cli.rst: -------------------------------------------------------------------------------- 1 | CLI 2 | === 3 | 4 | In addition to the main library, there is also a CLI runnable module for automatically 5 | generating layouts for graphs in an edge list. 6 | 7 | You can run this from the command line like so: 8 | 9 | .. code-block:: bash 10 | 11 | python -m graspologic.layouts --help 12 | 13 | Which should return something like: 14 | 15 | .. code-block:: none 16 | 17 | usage: python -m graspologic.layouts [-h] [--verbose VERBOSE] {n2vumap,n2vtsne,render} ... 18 | 19 | Runnable module that automatically generates a layout of a graph by a provided edge list 20 | 21 | positional arguments: 22 | {n2vumap,n2vtsne,render} 23 | n2vumap Auto layout using UMAP for dimensionality reduction 24 | n2vtsne Auto layout using tSNE for dimensionality reduction 25 | render Renders a graph via an input file 26 | 27 | Of those commands, you can then do: 28 | 29 | .. code-block:: bash 30 | 31 | python -m graspologic.layouts n2vumap --help 32 | 33 | Which will return something like: 34 | 35 | .. code-block:: none 36 | 37 | usage: python -m graspologic.layouts n2vumap [-h] --edge_list EDGE_LIST [--skip_header] [--image_file IMAGE_FILE] [--location_file LOCATION_FILE] [--max_edges MAX_EDGES] [--dpi DPI] 38 | [--allow_overlaps] 39 | 40 | optional arguments: 41 | -h, --help show this help message and exit 42 | --edge_list EDGE_LIST 43 | edge list in csv file. must be source,target,weight. 44 | --skip_header skip first line in csv file, corresponding to header. 45 | --image_file IMAGE_FILE 46 | output path and filename for generated image file. required if --location_file is omitted. 47 | --location_file LOCATION_FILE 48 | output path and filename for location file. required if --image_file is omitted. 49 | --max_edges MAX_EDGES 50 | maximum edges to keep during embedding. edges with low weights will be pruned to keep at most this many edges 51 | --dpi DPI used with --image_file to render an image at this dpi 52 | --allow_overlaps skip the no overlap algorithm and let nodes stack as per the results of the down projection algorithm 53 | -------------------------------------------------------------------------------- /graspologic/layouts/nooverlap/_quad_tree.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | from typing import Optional 4 | 5 | from graspologic.types import List, Tuple 6 | 7 | from ._node import _Node 8 | from ._quad_node import _QuadNode 9 | 10 | 11 | class _QuadTree: 12 | # used to hold objects that have x, y, and mass property 13 | # nodes = [] 14 | 15 | def __init__(self, nodes: List[_Node], max_nodes_per_quad: int): 16 | self.nodes = nodes 17 | self.root = _QuadNode(nodes, 0, max_nodes_per_quad, None) 18 | 19 | def get_quad_density_list(self) -> List[Tuple[float, float, int, _QuadNode]]: 20 | density_list = self.root.get_density_list() 21 | return sorted(density_list, reverse=True) 22 | 23 | def layout_graph(self) -> List[_Node]: 24 | return self.layout_dense_first() 25 | 26 | def tree_stats(self) -> List[float]: 27 | results = self.root.quad_stats() 28 | return list(results) + [ 29 | results[3] / len(self.nodes), 30 | results[4] / len(self.nodes), 31 | self.root.sq_ratio, 32 | ] 33 | 34 | def collect_nodes(self) -> List[_Node]: 35 | ret_val: List[_Node] = [] 36 | self.root.collect_nodes(ret_val) 37 | return ret_val 38 | 39 | def get_tree_node_bounds(self) -> List[Tuple[int, float, float, float, float]]: 40 | ret_val: List[Tuple[int, float, float, float, float]] = [] 41 | self.root.boxes_by_level(ret_val) 42 | return ret_val 43 | 44 | def count_overlaps(self) -> int: 45 | return self.root.num_overlapping() 46 | 47 | def count_overlaps_across_quads(self) -> int: 48 | return self.root.num_overlapping_across_quads(self.root.nodes) 49 | 50 | def layout_dense_first(self, first_color: Optional[str] = None) -> List[_Node]: 51 | den_list = list(self.get_quad_density_list()) 52 | first = True 53 | # count = 0 54 | for cell_density, density_ratio, cell_count, qn in den_list: 55 | # print ('cell density', cell_density, 'sq_density', density_ratio, 'cell_count', cell_count) 56 | qn.layout_quad() 57 | if first: 58 | if first_color is not None and qn.parent is not None: 59 | for n in qn.parent.nodes: 60 | n.color = first_color #'#FF0004' 61 | first = False 62 | return self.nodes 63 | -------------------------------------------------------------------------------- /docs/reference/release/release_0.0.2.rst: -------------------------------------------------------------------------------- 1 | Release Notes: GraSPy 0.0.2 2 | =========================== 3 | 4 | We're happy to announce the release of GraSPy 0.0.2! GraSPy is a Python package for 5 | understanding the properties of random graphs that arise from modern datasets, such as social networks 6 | and brain networks. 7 | 8 | For more information, please visit our `website `_ 9 | and our `tutorials `_. 10 | 11 | 12 | Highlights 13 | ---------- 14 | This release is the result of 3 months of work with over 16 pull requests by 5 contributors. Highlights include: 15 | 16 | - Nonparametric hypothesis testing method for testing two non-vertex matched graphs. 17 | - Plotting updates to ``pairplot``, ``gridplot`` and ``heatmaps``. 18 | - Sampling degree-correlcted stochatic block models (DC-SBM). 19 | - ``import_edgelist`` function for importing single or multiple edgelists. 20 | - Enforcing ``Black`` formatting for the package. 21 | 22 | Improvements 23 | ------------ 24 | - Embedding methods are now fully sklearn-compliant. This is tested via ``check_estimator`` function in sklearn. 25 | - ``gridplot`` and ``heatmap`` can now plot hierchical labels. 26 | - New Laplacian computing method ('R-DAD') by adding a constant to the diagonal degree matrix. 27 | - Semiparametric testing only checks for largest connected component (LCC) in the intial embeddings. 28 | - Various bug fixes. 29 | - Various tutorial latex fixes. 30 | - Various documentation clarifications. 31 | - More consistent documentation. 32 | 33 | API Changes 34 | ----------- 35 | - ``check_lcc`` argument in ``AdjacencySpectralEmbed``, ``LaplacianSpectralEmbed``, and ``OmnibusEmbed`` classes, which checks if input graph(s) are fully connected when ``check_lcc`` is True. 36 | - ``gridplot`` and ``heatmap`` now have a ``inner_hier_labels`` and ``outer_hier_labels``, which are used for hierarchical labeling of nodes. 37 | - ``to_laplacian`` function now has ``regularizer`` arg for when ``form`` is 'R-DAD'. 38 | - ``sbm`` function now has ``dc`` and ``dc_kws`` arguments for sampling SBM with degree-correction. 39 | 40 | Deprecations 41 | ------------ 42 | None. 43 | 44 | Contributors to this release 45 | ---------------------------- 46 | - `Benjamin Pedigo `_ 47 | - `Jaewon Chung `_ 48 | - `Bijan Varjavand `_ 49 | - `Vikram Chandrashekhar `_ 50 | - `Ronan Perry `_ 51 | -------------------------------------------------------------------------------- /graspologic/inference/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..types import AdjacencyMatrix 4 | 5 | 6 | def compute_density(adjacency: AdjacencyMatrix, loops: bool = False) -> float: 7 | """ 8 | For a given graph, this function computes the graph density, defined as the actual number of edges divided by the total possible number 9 | of edges in the graph. 10 | 11 | Parameters 12 | ---------- 13 | adjacency: int, array shape (n_nodes,n_nodes) 14 | The adjancy matrix for the graph. Edges are denoted by 1s while non-edges are denoted by 0s. 15 | 16 | loops: boolean 17 | Optional variable to select whether to include self-loops (i.e. connections between a node and itself). Default is "false," meaning 18 | such connections are ignored. 19 | 20 | Returns 21 | ------- 22 | n_edges/n_possible: float 23 | The computed density, calculated as the total number of edges divided by the total number of possible edges. 24 | 25 | """ 26 | n_edges = np.count_nonzero(adjacency) 27 | n_nodes = adjacency.shape[0] 28 | n_possible = n_nodes**2 29 | if not loops: 30 | n_possible -= n_nodes 31 | return n_edges / n_possible 32 | 33 | 34 | def compute_density_adjustment( 35 | adjacency1: AdjacencyMatrix, adjacency2: AdjacencyMatrix 36 | ) -> float: 37 | """ 38 | Computes the density adjustment to be used when testing the hypothesis that the density of one network is equal to a fixed parameter 39 | times the density of a second network. This function first calls the compute_density function above to compute the densities of both 40 | networks, then computes an odds ratio by calculating the odds of an edge in each network and taking the ratio of the results. 41 | 42 | Parameters 43 | ---------- 44 | adjacency1: int, array of size (n_nodes1,n_nodes1) 45 | Adjacency matrix for the first graph. 1s represent edges while 0s represent the absence of an edge. The array is a square of side length 46 | n_nodes1, where this corresponds to the number of nodes in graph 1. 47 | 48 | adjacency2: int, array of size (n_nodes2,n_nodes2) 49 | Same as above, but for the second graph. 50 | 51 | Returns 52 | --------- 53 | odds_ratio: float 54 | Computed as the ratio of the odds of an edge in graph 1 to the odds of an edge in graph 2. 55 | 56 | """ 57 | density1 = compute_density(adjacency1) 58 | density2 = compute_density(adjacency2) 59 | # return density1 / density2 60 | odds1 = density1 / (1 - density1) 61 | odds2 = density2 / (1 - density2) 62 | odds_ratio = odds1 / odds2 63 | return odds_ratio 64 | -------------------------------------------------------------------------------- /graspologic/pipeline/embed/_elbow.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | 5 | from typing import Union 6 | 7 | import numpy as np 8 | from scipy.stats import norm 9 | 10 | from graspologic.types import List, Tuple 11 | 12 | 13 | def _compute_likelihood(arr: np.ndarray) -> np.ndarray: 14 | """ 15 | Computes the log likelihoods based on normal distribution given 16 | a 1d-array of sorted values. If the input has no variance, 17 | the likelihood will be nan. 18 | """ 19 | n_elements = len(arr) 20 | likelihoods = np.zeros(n_elements) 21 | 22 | for idx in range(1, n_elements + 1): 23 | # split into two samples 24 | s1 = arr[:idx] 25 | s2 = arr[idx:] 26 | 27 | # deal with when input only has 2 elements 28 | if (s1.size == 1) & (s2.size == 1): 29 | likelihoods[idx - 1] = -np.inf 30 | continue 31 | 32 | # compute means 33 | mu1 = np.mean(s1) 34 | if s2.size != 0: 35 | mu2 = np.mean(s2) 36 | else: 37 | # Prevent numpy warning for taking mean of empty array 38 | mu2 = -np.inf 39 | 40 | # compute pooled variance 41 | variance = (np.sum((s1 - mu1) ** 2) + np.sum((s2 - mu2) ** 2)) / ( 42 | n_elements - 1 - (idx < n_elements) 43 | ) 44 | std = np.sqrt(variance) 45 | 46 | # compute log likelihoods 47 | likelihoods[idx - 1] = np.sum(norm.logpdf(s1, loc=mu1, scale=std)) + np.sum( 48 | norm.logpdf(s2, loc=mu2, scale=std) 49 | ) 50 | 51 | return likelihoods 52 | 53 | 54 | def _find_elbows(priority_ordered_matrix: np.ndarray, n_elbows: int) -> List[int]: 55 | # use Ghodsi & Zhu method for finding elbow 56 | idx = 0 57 | elbows = [] 58 | for _ in range(n_elbows): 59 | arr = priority_ordered_matrix[idx:] 60 | if arr.size <= 1: # Cant compute likelihoods with 1 numbers 61 | break 62 | lq = _compute_likelihood(arr) 63 | idx += int(np.argmax(lq)) + 1 64 | elbows.append(idx) 65 | 66 | return elbows 67 | 68 | 69 | def _index_of_elbow( 70 | priority_ordered_matrix: Union[np.ndarray, Tuple[np.ndarray, np.ndarray]], 71 | n_elbows: int, 72 | ) -> int: 73 | if isinstance(priority_ordered_matrix, tuple): 74 | left_elbows = _find_elbows(priority_ordered_matrix[0], n_elbows) 75 | right_elbows = _find_elbows(priority_ordered_matrix[1], n_elbows) 76 | return max(left_elbows[-1], right_elbows[-1]) 77 | else: 78 | elbows = _find_elbows(priority_ordered_matrix, n_elbows) 79 | return elbows[-1] 80 | -------------------------------------------------------------------------------- /tests/layouts/test_auto.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import random 5 | import unittest 6 | 7 | import networkx as nx 8 | import numpy as np 9 | 10 | from graspologic.layouts.auto import _get_bounds, layout_umap 11 | 12 | 13 | class TestAuto(unittest.TestCase): 14 | def test_get_bounds(self): 15 | y = np.array([(1, 2), (4, 5), (-1, -2), (10, -20)]) 16 | minx, miny, maxx, maxy = _get_bounds(y) 17 | self.assertEqual(-1, minx) 18 | self.assertEqual(-20, miny) 19 | self.assertEqual(10, maxx) 20 | self.assertEqual(5, maxy) 21 | 22 | def test_layout_umap_string_node_ids(self): 23 | graph = nx.florentine_families_graph() 24 | 25 | for s, t in graph.edges(): 26 | graph.add_edge(s, t, weight=1) 27 | 28 | _, node_positions = layout_umap(graph=graph) 29 | 30 | self.assertEqual(len(node_positions), len(graph.nodes())) 31 | 32 | def test_layout_umap_int_node_ids(self): 33 | graph = nx.florentine_families_graph() 34 | graph_int_node_ids = nx.Graph() 35 | ids_as_ints = dict() 36 | 37 | for s, t in graph.edges(): 38 | if s not in ids_as_ints: 39 | ids_as_ints[s] = int(len(ids_as_ints.keys())) 40 | 41 | if t not in ids_as_ints: 42 | ids_as_ints[t] = int(len(ids_as_ints.keys())) 43 | 44 | graph_int_node_ids.add_edge(ids_as_ints[s], ids_as_ints[t], weight=1) 45 | 46 | _, node_positions = layout_umap(graph=graph_int_node_ids) 47 | 48 | self.assertEqual(len(node_positions), len(graph.nodes())) 49 | 50 | def test_layout_umap_directed_weighted(self): 51 | graph = nx.erdos_renyi_graph(10, 0.7, directed=True) 52 | 53 | for s, t in graph.edges(): 54 | graph.edges[s, t]["weight"] = np.random.randint(1, 10) 55 | 56 | _, node_positions = layout_umap(graph=graph) 57 | 58 | self.assertEqual(len(node_positions), len(graph.nodes())) 59 | 60 | def test_layout_umap_directed_unweighted(self): 61 | graph = nx.erdos_renyi_graph(10, 0.7, directed=True) 62 | 63 | _, node_positions = layout_umap(graph=graph) 64 | 65 | self.assertEqual(len(node_positions), len(graph.nodes())) 66 | 67 | def test_exercise_approximate_prune(self): 68 | form = nx.erdos_renyi_graph(100, 0.7, directed=False) 69 | graph = nx.Graph() 70 | rng = random.Random(12345) 71 | for source, target in form.edges(): 72 | graph.add_edge(str(source), str(target), weight=rng.uniform(0.0, 10.0)) 73 | 74 | result_graph, positions = layout_umap(graph, max_edges=100) 75 | self.assertTrue(result_graph.number_of_edges() <= 100) 76 | 77 | 78 | if __name__ == "__main__": 79 | unittest.main() 80 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /tests/test_base_embed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | import graspologic as gs 9 | from graspologic.embed.base import BaseSpectralEmbed 10 | from graspologic.simulations.simulations import er_nm, er_np 11 | 12 | 13 | class TestBaseEmbed(unittest.TestCase): 14 | @classmethod 15 | def setup_class(cls): 16 | # simple ERxN graph 17 | cls.n = 20 18 | cls.p = 0.5 19 | cls.A = er_np(cls.n, cls.p, directed=True, loops=False) 20 | 21 | def test_baseembed_er(self): 22 | n_components = 4 23 | embed = BaseSpectralEmbed(n_components=n_components) 24 | n = 10 25 | M = 20 26 | A = er_nm(n, M) + 5 27 | embed._reduce_dim(A) 28 | self.assertEqual(embed.latent_left_.shape, (n, n_components)) 29 | self.assertTrue(embed.latent_right_ is None) 30 | 31 | def test_baseembed_er_directed(self): 32 | n_components = 4 33 | embed = BaseSpectralEmbed(n_components=n_components) 34 | n = 10 35 | M = 20 36 | A = er_nm(n, M, directed=True) 37 | embed._reduce_dim(A) 38 | self.assertEqual(embed.latent_left_.shape, (n, n_components)) 39 | self.assertEqual(embed.latent_right_.shape, (n, n_components)) 40 | self.assertTrue(embed.latent_right_ is not None) 41 | 42 | def test_baseembed_er_directed_concat(self): 43 | n_components = 4 44 | embed = BaseSpectralEmbed(n_components=n_components, concat=True) 45 | n = 10 46 | M = 20 47 | A = er_nm(n, M, directed=True) 48 | embed._reduce_dim(A) 49 | out = embed.fit_transform(A) 50 | self.assertEqual(out.shape, (n, 2 * n_components)) 51 | self.assertTrue(embed.latent_right_ is not None) 52 | 53 | def test_baseembed(self): 54 | embed = BaseSpectralEmbed(n_components=None) 55 | n = 10 56 | M = 20 57 | A = er_nm(n, M) + 5 58 | embed._reduce_dim(A) 59 | 60 | def test_algorithms(self): 61 | embed = BaseSpectralEmbed(n_components=self.n, algorithm="full") 62 | embed._reduce_dim(self.A) 63 | self.assertEqual(embed.latent_left_.shape, (self.n, self.n)) 64 | self.assertEqual(embed.latent_right_.shape, (self.n, self.n)) 65 | 66 | # When algoritm != 'full', cannot decompose to all dimensions 67 | embed = BaseSpectralEmbed(n_components=self.n, algorithm="truncated") 68 | with self.assertRaises(ValueError): 69 | embed._reduce_dim(self.A) 70 | 71 | embed = BaseSpectralEmbed(n_components=self.n, algorithm="randomized") 72 | with self.assertRaises(ValueError): 73 | embed._reduce_dim(self.A) 74 | 75 | def test_input_checks(self): 76 | with self.assertRaises(TypeError): 77 | BaseSpectralEmbed(n_components=self.n, concat=42) 78 | -------------------------------------------------------------------------------- /graspologic/cluster/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from abc import ABC, abstractmethod 5 | from typing import Any, Optional 6 | 7 | import numpy as np 8 | from sklearn.base import BaseEstimator, ClusterMixin 9 | from sklearn.metrics import adjusted_rand_score 10 | from sklearn.utils.validation import check_is_fitted 11 | 12 | 13 | class BaseCluster(ABC, BaseEstimator, ClusterMixin): 14 | """ 15 | Base clustering class. 16 | """ 17 | 18 | @abstractmethod 19 | def fit(self, X: np.ndarray, y: Optional[Any] = None) -> "BaseCluster": 20 | """ 21 | Compute clusters based on given method. 22 | 23 | Parameters 24 | ---------- 25 | X : array-like, shape (n_samples, n_features) 26 | List of n_features-dimensional data points. Each row 27 | corresponds to a single data point. 28 | 29 | y : array-like, shape (n_samples,), optional (default=None) 30 | List of labels for X if available. Used to compute 31 | ARI scores. 32 | 33 | Returns 34 | ------- 35 | self 36 | """ 37 | 38 | def predict( 39 | self, X: np.ndarray, y: Optional[Any] = None 40 | ) -> np.ndarray: # pragma: no cover 41 | """ 42 | Predict clusters based on best model. 43 | 44 | Parameters 45 | ---------- 46 | X : array-like, shape (n_samples, n_features) 47 | List of n_features-dimensional data points. Each row 48 | corresponds to a single data point. 49 | y : array-like, shape (n_samples, ), optional (default=None) 50 | List of labels for X if available. Used to compute 51 | ARI scores. 52 | 53 | Returns 54 | ------- 55 | labels : array, shape (n_samples,) 56 | Component labels. 57 | """ 58 | # Check if fit is already called 59 | check_is_fitted(self, ["model_"], all_or_any=all) 60 | labels = self.model_.predict(X) 61 | 62 | return labels 63 | 64 | def fit_predict( 65 | self, X: np.ndarray, y: Optional[Any] = None 66 | ) -> np.ndarray: # pragma: no cover 67 | """ 68 | Fit the models and predict clusters based on best model. 69 | 70 | Parameters 71 | ---------- 72 | X : array-like, shape (n_samples, n_features) 73 | List of n_features-dimensional data points. Each row 74 | corresponds to a single data point. 75 | 76 | y : array-like, shape (n_samples,), optional (default=None) 77 | List of labels for X if available. Used to compute 78 | ARI scores. 79 | 80 | Returns 81 | ------- 82 | labels : array, shape (n_samples,) 83 | Component labels. 84 | """ 85 | self.fit(X, y) 86 | 87 | labels = self.predict(X, y) 88 | return labels 89 | -------------------------------------------------------------------------------- /tests/partition/test_modularity.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import unittest 5 | from typing import Dict 6 | 7 | import networkx as nx 8 | import numpy as np 9 | 10 | from graspologic.partition import modularity, modularity_components 11 | from tests.utils import data_file 12 | 13 | 14 | def _modularity_graph() -> nx.Graph: 15 | graph = nx.Graph() 16 | graph.add_edge("a", "b", weight=4.0) 17 | graph.add_edge("b", "c", weight=3.0) 18 | graph.add_edge("e", "f", weight=5.0) 19 | 20 | return graph 21 | 22 | 23 | _PARTITIONS: Dict[str, int] = {"a": 0, "b": 0, "c": 0, "e": 1, "f": 1} 24 | 25 | 26 | class TestModularity(unittest.TestCase): 27 | def test_modularity(self): 28 | graph = _modularity_graph() # links = 12.0 29 | partition = _PARTITIONS # in community degree for -> 0: 14, 1: 10, community degree -> 0:14, 1:10 30 | # modularity component for partition 0: (14.0 / (2.0 * 12.0)) - (1.0 * ((14.0 / (2 * 12.0)) ** 2.0)) 31 | # (cont): 0.5833333333333334 - 0.34027777777777785 = 0.24305555555555552 32 | # modularity component for partition 1: (10.0 / (2.0 * 12.0)) - (1.0 * ((10.0 / (2 * 12.0)) ** 2.0)) 33 | # (cont): 0.4166666666666667 - 0.17361111111111113 = 0.24305555555555555 34 | modularity_value = modularity(graph, partition) 35 | 36 | np.testing.assert_almost_equal(0.48611111111111105, modularity_value) 37 | 38 | def test_modularity_components(self): 39 | graph = nx.Graph() 40 | with open(data_file("large-graph.csv"), "r") as edge_list_io: 41 | for line in edge_list_io: 42 | source, target, weight = line.strip().split(",") 43 | previous_weight = graph.get_edge_data(source, target, {"weight": 0})[ 44 | "weight" 45 | ] 46 | weight = float(weight) + previous_weight 47 | graph.add_edge(source, target, weight=weight) 48 | 49 | partitions = {} 50 | with open(data_file("large-graph-partitions.csv"), "r") as communities_io: 51 | for line in communities_io: 52 | vertex, comm = line.strip().split(",") 53 | partitions[vertex] = int(comm) 54 | 55 | partition_count = max(partitions.values()) 56 | 57 | graph.add_node("disconnected_node") 58 | partitions["disconnected_node"] = partition_count + 1 59 | 60 | components = modularity_components(graph, partitions) 61 | 62 | # from python-louvain modularity function 63 | community_modularity = 0.8008595783563607 64 | total_modularity = sum(components.values()) 65 | 66 | self.assertSetEqual(set(components.keys()), set(partitions.values())) 67 | self.assertEqual(0, components[partition_count + 1]) 68 | 69 | np.testing.assert_almost_equal( 70 | community_modularity, total_modularity, decimal=8 71 | ) 72 | -------------------------------------------------------------------------------- /docs/tutorials/plotting/gridplot.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Gridplot: Visualize Multiple Graphs\n", 8 | "\n", 9 | "This example provides how to visualize graphs using the gridplot." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import graspologic\n", 19 | "\n", 20 | "import numpy as np\n", 21 | "%matplotlib inline" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "## Overlaying two sparse graphs using gridplot\n", 29 | "\n", 30 | "### Simulate more graphs using weighted stochastic block models\n", 31 | "The 2-block model is defined as below:\n", 32 | "\n", 33 | "\\begin{align*}\n", 34 | "P = \n", 35 | "\\begin{bmatrix}0.25 & 0.05 \\\\\n", 36 | "0.05 & 0.25\n", 37 | "\\end{bmatrix}\n", 38 | "\\end{align*}\n", 39 | "\n", 40 | "We generate two weighted SBMs where the weights are distributed from a discrete uniform(1, 10) and discrete uniform(2, 5)." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "from graspologic.simulations import sbm\n", 50 | "\n", 51 | "n_communities = [50, 50]\n", 52 | "p = np.array([[0.25, 0.05], [0.05, 0.25]])\n", 53 | "wt = np.random.randint\n", 54 | "wtargs = dict(low=1, high=10)\n", 55 | "\n", 56 | "np.random.seed(1)\n", 57 | "A_unif1= sbm(n_communities, p, wt=wt, wtargs=wtargs)\n", 58 | "\n", 59 | "wtargs = dict(low=2, high=5)\n", 60 | "A_unif2= sbm(n_communities, p, wt=wt, wtargs=wtargs)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Visualizing both graphs" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "metadata": {}, 74 | "outputs": [], 75 | "source": [ 76 | "from graspologic.plot import gridplot\n", 77 | "\n", 78 | "X = [A_unif1, A_unif2]\n", 79 | "labels = [\"Uniform(1, 10)\", \"Uniform(2, 5)\"]\n", 80 | "\n", 81 | "f = gridplot(X=X, \n", 82 | " labels=labels, \n", 83 | " title='Two Weighted Stochastic Block Models', \n", 84 | " height=12, \n", 85 | " font_scale=1.5)" 86 | ] 87 | } 88 | ], 89 | "metadata": { 90 | "kernelspec": { 91 | "display_name": "Python 3", 92 | "language": "python", 93 | "name": "python3" 94 | }, 95 | "language_info": { 96 | "codemirror_mode": { 97 | "name": "ipython", 98 | "version": 3 99 | }, 100 | "file_extension": ".py", 101 | "mimetype": "text/x-python", 102 | "name": "python", 103 | "nbconvert_exporter": "python", 104 | "pygments_lexer": "ipython3", 105 | "version": "3.7.0" 106 | } 107 | }, 108 | "nbformat": 4, 109 | "nbformat_minor": 4 110 | } 111 | -------------------------------------------------------------------------------- /graspologic/inference/binomial.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from typing import Literal 3 | 4 | import numpy as np 5 | from scipy.stats import chi2_contingency, fisher_exact 6 | from statsmodels.stats.proportion import test_proportions_2indep 7 | 8 | BinomialResult = namedtuple("BinomialResult", ["stat", "pvalue"]) 9 | BinomialTestMethod = Literal["score", "fisher", "chi2"] 10 | 11 | 12 | def binom_2samp( 13 | x1: int, 14 | n1: int, 15 | x2: int, 16 | n2: int, 17 | null_ratio: float = 1.0, 18 | method: BinomialTestMethod = "score", 19 | ) -> BinomialResult: 20 | """ 21 | This function computes the likelihood that two binomial samples are drown from 22 | identical underlying distributions. Null hypothesis is that the success probability 23 | for each sample is identical (i.e. p1 = p2), and this function returns the 24 | probability that the null hypothesis is accurate, under a variety of potential 25 | statistical tests (default is score test). 26 | 27 | Parameters 28 | ---------- 29 | x1 : int 30 | Success count for group 1 31 | n1 : int 32 | The number of possible successes for group 1 33 | x2 : int 34 | Success count for group 2 35 | n2 : int 36 | The number of possible successes for group 2 37 | null_ratio : float, optional 38 | Optional parameter for testing whether p1 is a fixed ratio larger or smaller 39 | than p2, i.e. p1 = cp2, where c is the null_ratio. Default is 1.0. This 40 | parameter can only be !=1 if the chosen statistical test is the score test. 41 | method : str, optional 42 | Defines the statistical test to be run in order to reject or fail to reject the 43 | null hypothesis. By default, this is the score test (i.e. "score"). 44 | 45 | Returns 46 | ------- 47 | BinomialResult: namedtuple 48 | This namedtuple contains the following data: 49 | stat: float 50 | Test statistic for the requested test. 51 | pvalue: float 52 | The p-value for the requested test. 53 | 54 | References 55 | ------ 56 | [1] Alan Agresti. Categorical data analysis. John Wiley & Sons, 3 edition, 2013. 57 | 58 | """ 59 | if x1 == 0 or x2 == 0: 60 | # logging.warn("One or more counts were 0, not running test and returning nan") 61 | return BinomialResult(np.nan, np.nan) 62 | if null_ratio != 1 and method != "score": 63 | raise ValueError("Non-unity null odds only works with ``method=='score'``") 64 | 65 | cont_table = np.array([[x1, n1 - x1], [x2, n2 - x2]]) 66 | if method == "fisher" and null_ratio == 1.0: 67 | stat, pvalue = fisher_exact(cont_table, alternative="two-sided") 68 | elif method == "chi2": 69 | stat, pvalue, _, _ = chi2_contingency(cont_table) 70 | elif method == "score": 71 | stat, pvalue = test_proportions_2indep( 72 | x1, 73 | n1, 74 | x2, 75 | n2, 76 | method="score", 77 | compare="ratio", 78 | alternative="two-sided", 79 | value=null_ratio, 80 | ) 81 | else: 82 | raise ValueError 83 | 84 | return BinomialResult(stat, pvalue) 85 | -------------------------------------------------------------------------------- /tests/layouts/nooverlap/test_grid.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | from graspologic.layouts.nooverlap._grid import _GridBuckets 7 | from graspologic.layouts.nooverlap._node import _Node 8 | 9 | 10 | class TestGrid(unittest.TestCase): 11 | # def setUp(self): 12 | # self.g = _GridBuckets(10) 13 | 14 | def test_get_cell(self): 15 | g = _GridBuckets(10) 16 | cell = g.get_cell(0, 0) 17 | self.assertTupleEqual((0, 0), cell) 18 | 19 | cell = g.get_cell(-1, -1) 20 | self.assertTupleEqual((-10, -10), cell) 21 | 22 | cell = g.get_cell(11, 11) 23 | self.assertTupleEqual((10, 10), cell) 24 | 25 | cell = g.get_cell(105, 87) 26 | self.assertTupleEqual((100, 80), cell) 27 | 28 | cell = g.get_cell(-105, -87) 29 | self.assertTupleEqual((-110, -90), cell) 30 | 31 | cell = g.get_cell(-105, 87) 32 | self.assertTupleEqual((-110, 80), cell) 33 | 34 | cell = g.get_cell(105, -57) 35 | self.assertTupleEqual((100, -60), cell) 36 | 37 | def test_get_grid_cells(self): 38 | g = _GridBuckets(10) 39 | cells = g._get_grid_cells(5, 12, 1) 40 | self.assertSetEqual({(0, 10)}, cells) 41 | 42 | g2 = _GridBuckets(20) 43 | cells = g2._get_grid_cells(5, 12, 10) 44 | self.assertSetEqual({(-20, 20), (0, 20), (0, 0), (-20, 0)}, cells) 45 | 46 | g3 = _GridBuckets(20) 47 | cells = g3._get_grid_cells(-5, -12, 10) 48 | self.assertSetEqual({(-20, -20), (0, -20), (0, -40), (-20, -40)}, cells) 49 | 50 | def test_add_node(self): 51 | g = _GridBuckets(10) 52 | n0 = _Node(0, 1, 1, 10, 1, "blue") 53 | n1 = _Node(1, 2, 1, 10, 1, "blue") 54 | n2 = _Node(2, 40, -20, 10, 1, "blue") 55 | 56 | g.add_node(n0) 57 | nodes = g.get_potential_overlapping_nodes_by_node(n0) 58 | self.assertSetEqual(nodes, {n0}) 59 | 60 | g.add_node(n1) 61 | nodes = g.get_potential_overlapping_nodes_by_node(n1) 62 | self.assertSetEqual(nodes, {n0, n1}) 63 | 64 | g.add_node(n2) 65 | nodes = g.get_potential_overlapping_nodes_by_node(n1) 66 | self.assertSetEqual(nodes, {n0, n1}) 67 | 68 | def test_get_cell_stats(self): 69 | g = _GridBuckets(10) 70 | n0 = _Node(0, 1, 1, 10, 1, "blue") 71 | n1 = _Node(1, 2, 1, 10, 1, "blue") 72 | n2 = _Node(2, 40, -20, 10, 1, "blue") 73 | n3 = _Node(3, -33, -33, 1, 1, "blue") 74 | n4 = _Node(4, -193, 78, 1, 1, "blue") 75 | g.add_node(n0) 76 | g.add_node(n1) 77 | g.add_node(n2) 78 | g.add_node(n3) 79 | g.add_node(n4) 80 | stats = g.get_grid_cell_stats() 81 | self.assertEqual(3, len(stats), "Correct size list") 82 | self.assertEqual(254, stats[0][1], "empty cells") 83 | self.assertEqual(6, stats[1][1], "one item in cell") 84 | self.assertEqual(4, stats[2][1], "two items in cell") 85 | self.assertEqual( 86 | [(0, 254), (1, 6), (2, 4)], stats, "grid cell stats are in expected format" 87 | ) 88 | 89 | 90 | if __name__ == "__main__": 91 | unittest.main() 92 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. -*- coding: utf-8 -*- 2 | 3 | .. _contents: 4 | 5 | Overview of graspologic_ 6 | ======================== 7 | 8 | .. _graspologic: https://graspologic.readthedocs.org/en/latest 9 | 10 | graspologic is a Python package for analysis of graphs, or networks. 11 | 12 | Motivation 13 | ---------- 14 | 15 | A graph, or network, provides a mathematically intuitive representation of data with 16 | some sort of relationship between items. For example, a social network can be 17 | represented as a graph by considering all participants in the social network as nodes, 18 | with connections representing whether each pair of individuals in the network are friends 19 | with one another. Naively, one might apply traditional statistical techniques to a graph, 20 | which neglects the spatial arrangement of nodes within the network and is not utilizing 21 | all of the information present in the graph. In this package, we provide utilities and 22 | algorithms designed for the processing and analysis of graphs with specialized graph 23 | statistical algorithms. 24 | 25 | Python 26 | ------ 27 | 28 | Python is a powerful programming language that allows concise expressions of network 29 | algorithms. Python has a vibrant and growing ecosystem of packages that 30 | graspologic uses to provide more features such as numerical linear algebra and 31 | plotting. In order to make the most out of graspologic you will want to know how 32 | to write basic programs in Python. Among the many guides to Python, we 33 | recommend the `Python documentation `_. 34 | 35 | Free software 36 | ------------- 37 | 38 | graspologic is free software; you can redistribute it and/or modify it under the 39 | terms of the :doc:`MIT ` license. We welcome contributions. 40 | Join us on `GitHub `_. 41 | 42 | History 43 | ------- 44 | 45 | ``graspologic`` first released in September 2020, but it got its start as a pair of Python libraries 46 | written by Johns Hopkins University's NeuroData lab and Microsoft Research's Project Essex. 47 | Both teams worked on many of the same algorithms, shared research, findings, and generally duplicated a lot of effort. 48 | 49 | ``GraSPy`` - the NeuroData library - and ``topologic`` - the Microsoft Research library began merging in September of 2020, but both got their starts far earlier, with GraSPy starting in September 2018 and topologic starting just a short time later, on October 2nd, 2018. 50 | 51 | GraSPy was originally designed and written by Jaewon Chung, Benjamin Pedigo, and Eric Bridgeford. 52 | 53 | Topologic was originally designed and written by Patrick Bourke, Jonathan McLean, Nick Caurvina, and Dwayne Pryce. 54 | 55 | .. toctree-filt:: 56 | :maxdepth: 1 57 | :caption: Documentation 58 | 59 | license 60 | reference/index 61 | tutorials/index 62 | 63 | .. toctree:: 64 | :maxdepth: 1 65 | :caption: Useful Links 66 | 67 | graspologic @ GitHub 68 | graspologic @ PyPI 69 | Issue Tracker 70 | 71 | Indices and tables 72 | ================== 73 | 74 | * :ref:`genindex` 75 | * :ref:`search` 76 | -------------------------------------------------------------------------------- /tests/test_rdpg_corr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from graspologic.simulations.rdpg_corr import rdpg_corr 9 | from graspologic.simulations.simulations import p_from_latent, sample_edges 10 | 11 | 12 | class TestRDPGCorr(unittest.TestCase): 13 | @classmethod 14 | def setUpClass(cls): 15 | cls.r = 0.3 16 | cls.Y = None 17 | cls.X = np.random.dirichlet([20, 20], size=300) 18 | 19 | def test_dimensions(self): 20 | np.random.seed(1234) 21 | A, B = rdpg_corr( 22 | self.X, self.Y, self.r, rescale=False, directed=False, loops=False 23 | ) 24 | self.assertTrue(A.shape, (300, 300)) 25 | self.assertTrue(B.shape, (300, 300)) 26 | 27 | def test_inputs(self): 28 | x1 = np.array([[1, 1], [1, 1]]) 29 | x2 = np.array([[1, 1]]) 30 | x3 = np.zeros((2, 2, 2)) 31 | with self.assertRaises(TypeError): 32 | p_from_latent("hi") # wrong type 33 | with self.assertRaises(ValueError): 34 | p_from_latent(x1, x2) # dimension mismatch 35 | with self.assertRaises(ValueError): 36 | p_from_latent(x3) # wrong num dimensions 37 | with self.assertRaises(TypeError): 38 | sample_edges("XD") # wrong type 39 | with self.assertRaises(ValueError): 40 | sample_edges(x3) # wrong num dimensions 41 | with self.assertRaises(ValueError): 42 | sample_edges(x2) # wrong shape for P 43 | 44 | if any(self.X[self.X > 1]) or any(self.X[self.X < -1]): # wrong values for P 45 | raise ValueError("P values should be less than 1 and bigger than -1") 46 | 47 | def test_rdpg_corr(self): 48 | np.random.seed(123) 49 | g1, g2 = rdpg_corr( 50 | self.X, self.Y, self.r, rescale=False, directed=False, loops=False 51 | ) 52 | 53 | # check the dimention of g1, g2 54 | self.assertTrue(g1.shape == (self.X.shape[0], self.X.shape[0])) 55 | self.assertTrue(g1.shape == (self.X.shape[0], self.X.shape[0])) 56 | 57 | # check rho 58 | g1 = g1[np.where(~np.eye(g1.shape[0], dtype=bool))] 59 | g2 = g2[np.where(~np.eye(g2.shape[0], dtype=bool))] 60 | correlation = np.corrcoef(g1, g2)[0, 1] 61 | self.assertTrue(np.isclose(correlation, self.r, atol=0.01)) 62 | 63 | # check P 64 | def test_p_is_close(self): 65 | P = p_from_latent(self.X, self.Y, rescale=False, loops=True) 66 | if any(P[P > 1]) or any(P[P < -1]): # wrong values for P 67 | raise ValueError("P values should be less than 1 and bigger than -1") 68 | 69 | np.random.seed(8888) 70 | graphs1 = [] 71 | graphs2 = [] 72 | for i in range(100): 73 | g1, g2 = rdpg_corr( 74 | self.X, self.Y, self.r, rescale=False, directed=True, loops=True 75 | ) 76 | graphs1.append(g1) 77 | graphs2.append(g2) 78 | graphs1 = np.stack(graphs1) 79 | graphs2 = np.stack(graphs2) 80 | np.testing.assert_allclose(np.mean(graphs1, axis=0), P, atol=0.3) 81 | np.testing.assert_allclose(np.mean(graphs2, axis=0), P, atol=0.3) 82 | -------------------------------------------------------------------------------- /tests/test_sg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | from numpy.testing import assert_equal 8 | 9 | import graspologic.subgraph as sg 10 | 11 | 12 | class TestEstimateSubgraph(unittest.TestCase): 13 | def test_estimate_subgraph_coh(self): 14 | ys = np.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1]) 15 | blank = np.ones((10, 10)) 16 | blank[1:6, 0] = 0 17 | A = np.ones((10, 10, 10)) 18 | 19 | for ind in range(10): 20 | if ys[ind] == 1: 21 | A[:, :, ind] = blank 22 | test_model = sg.SignalSubgraph() 23 | estsub = test_model.fit_transform(A, ys, [5, 1]) 24 | ver = np.ones((10, 10)) 25 | ver[estsub] = 0 26 | np.testing.assert_array_equal(blank, ver) 27 | 28 | def test_estimate_subgraph_inc(self): 29 | ys = np.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1]) 30 | blank = np.ones((10, 10)) 31 | blank[1:6, 0] = 0 32 | A = np.ones((10, 10, 10)) 33 | 34 | for ind in range(10): 35 | if ys[ind] == 1: 36 | A[:, :, ind] = blank 37 | test_model = sg.SignalSubgraph() 38 | estsub = test_model.fit_transform(A, ys, 5) 39 | ver = np.ones((10, 10)) 40 | ver[estsub] = 0 41 | np.testing.assert_array_equal(blank, ver) 42 | 43 | def test_fit_bad_constraints(self): 44 | A = np.ones((5, 5, 5)) 45 | ys = np.ones(5) 46 | test_model = sg.SignalSubgraph() 47 | with self.assertRaises(TypeError): 48 | test_model.fit(A, ys, [1]) 49 | with self.assertRaises(TypeError): 50 | test_model.fit(A, ys, [1, 1, 1]) 51 | 52 | def test_construct_contingency(self): 53 | A = np.ones((1, 1, 5)) 54 | A[:, :, 1::2] = 0 55 | ys = np.array([1, 0, 1, 0, 0]) 56 | test_model = sg.SignalSubgraph() 57 | test_model.fit(A, ys, 1) 58 | test_model._SignalSubgraph__construct_contingency() 59 | cmat = test_model.contmat_ 60 | ver = np.array([[[[1, 2], [2, 0]]]], dtype=float) 61 | np.testing.assert_array_equal(cmat, ver) 62 | 63 | def test_fit_bad_type(self): 64 | A = [[[1 for i in range(5)] for j in range(5)] for k in range(5)] 65 | ys = [1, 1, 1, 1, 1] 66 | test_model = sg.SignalSubgraph() 67 | with self.assertRaises(TypeError): 68 | test_model.fit(A, np.ones(5), 1) 69 | with self.assertRaises(TypeError): 70 | test_model.fit(A, set(ys), 1) 71 | 72 | def test_fit_bad_size(self): 73 | test_model = sg.SignalSubgraph() 74 | with self.assertRaises(ValueError): 75 | test_model.fit(np.ones((5, 5)), np.ones(5), 1) 76 | with self.assertRaises(ValueError): 77 | test_model.fit(np.ones((3, 4, 2)), np.ones(2), 1) 78 | 79 | def test_fit_bad_len(self): 80 | A = np.ones((3, 3, 3)) 81 | test_model = sg.SignalSubgraph() 82 | with self.assertRaises(ValueError): 83 | test_model.fit(A, np.ones((3, 3)), 1) 84 | with self.assertRaises(ValueError): 85 | test_model.fit(A, np.array([0, 1, 2]), 1) 86 | with self.assertRaises(ValueError): 87 | test_model.fit(A, np.ones(2), 1) 88 | -------------------------------------------------------------------------------- /tests/test_svd.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | from scipy.spatial import procrustes 8 | 9 | from graspologic.embed.svd import select_svd 10 | from graspologic.utils import symmetrize 11 | 12 | 13 | class TestSVD(unittest.TestCase): 14 | def test_bad_inputs(self): 15 | X = np.random.normal(size=(100, 5)) 16 | with self.assertRaises(ValueError): 17 | bad_algo = "ROFLMAO" 18 | select_svd(X, algorithm=bad_algo) 19 | 20 | with self.assertRaises(ValueError): 21 | algorithm = "full" 22 | bad_components = 1000 23 | select_svd(X, n_components=bad_components, algorithm=algorithm) 24 | 25 | with self.assertRaises(ValueError): 26 | algorithm = "truncated" 27 | bad_components = 1000 28 | select_svd(X, n_components=bad_components, algorithm=algorithm) 29 | 30 | def test_outputs(self): 31 | np.random.seed(123) 32 | X = np.vstack([ 33 | np.repeat([[0.2, 0.2, 0.2]], 50, axis=0), 34 | np.repeat([[0.5, 0.5, 0.5]], 50, axis=0), 35 | ]) 36 | P = X @ X.T 37 | A = np.random.binomial(1, P).astype(float) 38 | 39 | n_components = 3 40 | 41 | # Full SVD 42 | U_full, D_full, V_full = select_svd( 43 | A, n_components=n_components, algorithm="full" 44 | ) 45 | X_full = U_full @ np.diag(np.sqrt(D_full)) 46 | _, _, norm_full = procrustes(X, X_full) 47 | 48 | # Truncated SVD 49 | U_trunc, D_trunc, V_trunc = select_svd( 50 | A, n_components=n_components, algorithm="truncated" 51 | ) 52 | X_trunc = U_trunc @ np.diag(np.sqrt(D_trunc)) 53 | _, _, norm_trunc = procrustes(X, X_trunc) 54 | 55 | # Randomized SVD 56 | U_rand, D_rand, V_rand = select_svd( 57 | A, n_components=n_components, algorithm="randomized", n_iter=10 58 | ) 59 | X_rand = U_rand @ np.diag(np.sqrt(D_rand)) 60 | _, _, norm_rand = procrustes(X, X_rand) 61 | 62 | rtol = 1e-4 63 | atol = 1e-4 64 | np.testing.assert_allclose(norm_full, norm_trunc, rtol, atol) 65 | np.testing.assert_allclose(norm_full, norm_rand, rtol, atol) 66 | 67 | def test_eigsh(self): 68 | np.random.seed(123) 69 | X = np.vstack([ 70 | np.repeat([[0.2, 0.2, 0.2]], 50, axis=0), 71 | np.repeat([[0.5, 0.5, 0.5]], 50, axis=0), 72 | ]) 73 | P = X @ X.T 74 | A = np.random.binomial(1, P).astype(float) 75 | A = symmetrize(A, method="triu") 76 | n_components = 3 77 | 78 | # Full SVD 79 | U_full, D_full, V_full = select_svd( 80 | A, n_components=n_components, algorithm="full" 81 | ) 82 | X_full = U_full @ np.diag(np.sqrt(D_full)) 83 | _, _, norm_full = procrustes(X, X_full) 84 | 85 | # eigsh SVD 86 | U_square, D_square, V_square = select_svd( 87 | A, n_components=n_components, algorithm="eigsh", n_iter=10 88 | ) 89 | X_square = U_square @ np.diag(np.sqrt(D_square)) 90 | _, _, norm_square = procrustes(X, X_square) 91 | 92 | rtol = 1e-4 93 | atol = 1e-4 94 | np.testing.assert_allclose(norm_full, norm_square, rtol, atol) 95 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: graspologic Publish 2 | on: 3 | # 4 | # When a release tag is created (e.g. v1.0.0), this workflow will be triggered. Hatch's VCS versioning will use the correct version tag. 5 | # 6 | release: 7 | types: [created] 8 | # 9 | # On pushes to main and dev, a prerelease version will be cut for the branch. e.g. v1.0.0-pre.10+ 10 | # 11 | push: 12 | paths-ignore: 13 | - '.all-contributorsrc' 14 | - 'CONTRIBUTORS.md' 15 | branches: 16 | - 'main' 17 | - 'dev' 18 | env: 19 | PYTHON_VERSION: '3.10' 20 | jobs: 21 | build: 22 | uses: ./.github/workflows/build.yml 23 | publish: 24 | runs-on: ubuntu-latest 25 | needs: build 26 | permissions: 27 | id-token: write 28 | outputs: 29 | version: ${{ steps.export-version.outputs.version }} 30 | steps: 31 | - uses: actions/checkout@v4 32 | with: 33 | fetch-depth: 0 34 | fetch-tags: true 35 | - name: Set up Python 36 | uses: actions/setup-python@v2 37 | with: 38 | python-version: ${{ env.PYTHON_VERSION }} 39 | - name: Install uv 40 | uses: astral-sh/setup-uv@v2 41 | - name: Install dependencies 42 | run: uv sync --python ${{ env.PYTHON_VERSION }} 43 | - name: Build Artifacts 44 | run: | 45 | RAW_VERSION=$(uvx hatch version) 46 | # Strip any local version metadata (everything after '+') to satisfy PyPI rules 47 | CLEAN_VERSION=${RAW_VERSION%%+*} 48 | echo "Raw version: $RAW_VERSION" 49 | echo "Clean version (for PyPI): $CLEAN_VERSION" 50 | # Force hatch to use the sanitized version for the build 51 | SETUPTOOLS_SCM_PRETEND_VERSION=$CLEAN_VERSION uv build 52 | - name: Publish package distributions to PyPI 53 | uses: pypa/gh-action-pypi-publish@release/v1 54 | with: 55 | packages-dir: dist 56 | skip-existing: true 57 | verbose: true 58 | docsite: 59 | runs-on: ubuntu-latest 60 | needs: [publish, build] 61 | if: github.ref=='refs/heads/main' || github.ref=='refs/heads/dev' 62 | permissions: 63 | id-token: write 64 | contents: write 65 | steps: 66 | - name: Download documentation artifact 67 | uses: actions/download-artifact@v4 68 | with: 69 | name: documentation-site 70 | path: docs/documentation-site 71 | - name: Publish reference docs (dev branch) 72 | uses: peaceiris/actions-gh-pages@v3 73 | if: github.ref=='refs/heads/dev' 74 | with: 75 | github_token: ${{ secrets.GITHUB_TOKEN }} 76 | publish_dir: docs/documentation-site 77 | destination_dir: pre-release 78 | - name: Publish reference docs (main branch) 79 | uses: peaceiris/actions-gh-pages@v3 80 | if: github.ref=='refs/heads/main' 81 | with: 82 | github_token: ${{ secrets.GITHUB_TOKEN }} 83 | publish_dir: docs/documentation-site 84 | destination_dir: ${{ needs.publish.outputs.version }} 85 | - name: Publish latest reference docs (main branch) 86 | uses: peaceiris/actions-gh-pages@v3 87 | if: github.ref=='refs/heads/main' 88 | with: 89 | github_token: ${{ secrets.GITHUB_TOKEN }} 90 | publish_dir: docs/documentation-site 91 | destination_dir: latest 92 | 93 | 94 | -------------------------------------------------------------------------------- /tests/test_er_and_group_connection_tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | from scipy.sparse import csr_array 5 | 6 | from graspologic.inference import density_test, group_connection_test 7 | from graspologic.simulations import er_np, sbm 8 | 9 | 10 | class TestGroupConnection(unittest.TestCase): 11 | def test_gctest_works(self): 12 | np.random.seed(8888) 13 | B1 = np.array([[0.8, 0.6], [0.6, 0.8]]) 14 | B2 = 0.8 * B1 15 | A1, labels1 = sbm([50, 50], B1, return_labels=True) 16 | A2, labels2 = sbm([60, 60], B2, return_labels=True) 17 | stat, pvalue, misc = group_connection_test( 18 | A1, A2, labels1, labels2, density_adjustment=True 19 | ) 20 | self.assertTrue(pvalue > 0.05) 21 | 22 | def test_all_kwargs(self): 23 | B1 = np.array([[0.4, 0.6], [0.6, 0.8]]) 24 | B2 = np.array([[0.9, 0.4], [0.2, 0.865]]) 25 | A1, labels1 = sbm([60, 60], B1, return_labels=True, directed=True) 26 | A2, labels2 = sbm([50, 50], B2, return_labels=True, directed=True) 27 | stat, pvalue, misc = group_connection_test( 28 | A1, 29 | A2, 30 | labels1, 31 | labels2, 32 | combine_method="tippett", 33 | method="score", 34 | correct_method="Bonferroni", 35 | density_adjustment=True, 36 | ) 37 | self.assertTrue(pvalue < 0.05) 38 | self.assertTrue(misc["uncorrected_pvalues"].size == 4) 39 | self.assertTrue(misc["probabilities1"].size == 4) 40 | self.assertTrue(misc["probabilities2"].size == 4) 41 | self.assertTrue(np.sum(misc["observed1"].to_numpy()) == np.count_nonzero(A1)) 42 | self.assertTrue(np.sum(misc["observed2"].to_numpy()) == np.count_nonzero(A2)) 43 | self.assertTrue(misc["null_ratio"] != 1.0) 44 | self.assertTrue(misc["n_tests"] == 4) 45 | self.assertTrue(misc["rejections"].to_numpy().size == 4) 46 | self.assertTrue(misc["corrected_pvalues"].size == 4) 47 | 48 | def test_sparse(self): 49 | B1 = np.array([[0.8, 0.6], [0.6, 0.8]]) 50 | B2 = np.array([[0.87, 0.66], [0.66, 0.87]]) 51 | A1, labels1 = sbm([50, 50], B1, return_labels=True) 52 | A2, labels2 = sbm([60, 60], B2, return_labels=True) 53 | sA1 = csr_array(A1) 54 | sA2 = csr_array(A2) 55 | 56 | stat, pvalue, misc = group_connection_test(sA1, sA2, labels1, labels2) 57 | self.assertTrue(pvalue <= 0.05) 58 | 59 | 60 | class TestER(unittest.TestCase): 61 | def test_er(self): 62 | np.random.seed(234) 63 | A1 = er_np(500, 0.6) 64 | A2 = er_np(400, 0.8) 65 | stat, pvalue, er_misc = density_test(A1, A2) 66 | self.assertTrue(pvalue <= 0.05) 67 | A3 = er_np(500, 0.8) 68 | A4 = er_np(400, 0.8) 69 | stat, pvalue, er_misc = density_test(A3, A4) 70 | self.assertTrue(pvalue > 0.05) 71 | 72 | def test_all(self): 73 | np.random.seed(234) 74 | A1 = er_np(500, 0.6) 75 | A2 = er_np(400, 0.8) 76 | stat, pvalue, er_misc = density_test(A1, A2, method="chi2") 77 | self.assertTrue(pvalue <= 0.05) 78 | self.assertTrue(er_misc["probability1"].to_numpy() < 1.0) 79 | self.assertTrue(er_misc["probability2"].to_numpy() < 1.0) 80 | self.assertTrue(er_misc["observed1"].to_numpy() == np.count_nonzero(A1)) 81 | self.assertTrue(er_misc["observed2"].to_numpy() == np.count_nonzero(A2)) 82 | 83 | 84 | if __name__ == "__main__": 85 | unittest.main() 86 | -------------------------------------------------------------------------------- /docs/tutorials/plotting/pairplot.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pairplot: Visualizing High Dimensional Data\n", 8 | "\n", 9 | "This example provides how to visualize high dimensional data using the pairplot." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import graspologic\n", 19 | "\n", 20 | "import numpy as np\n", 21 | "%matplotlib inline" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "## Simulate a binary graph using stochastic block model\n", 29 | "The 3-block model is defined as below:\n", 30 | "\n", 31 | "\\begin{align*}\n", 32 | "n &= [50, 50, 50]\\\\\n", 33 | "P &= \n", 34 | "\\begin{bmatrix}0.5 & 0.1 & 0.05 \\\\\n", 35 | "0.1 & 0.4 & 0.15 \\\\\n", 36 | "0.05 & 0.15 & 0.3\n", 37 | "\\end{bmatrix}\n", 38 | "\\end{align*}\n", 39 | "\n", 40 | "Thus, the first 50 vertices belong to block 1, the second 50 vertices belong to block 2, and the last 50 vertices belong to block 3." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "from graspologic.simulations import sbm\n", 50 | "\n", 51 | "n_communities = [50, 50, 50]\n", 52 | "p = [[0.5, 0.1, 0.05], \n", 53 | " [0.1, 0.4, 0.15], \n", 54 | " [0.05, 0.15, 0.3],]\n", 55 | "\n", 56 | "np.random.seed(2)\n", 57 | "A = sbm(n_communities, p)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "## Embed using adjacency spectral embedding to obtain lower dimensional representation of the graph\n", 65 | "\n", 66 | "The embedding dimension is automatically chosen. It should embed to 3 dimensions." 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "from graspologic.embed import AdjacencySpectralEmbed\n", 76 | "\n", 77 | "ase = AdjacencySpectralEmbed()\n", 78 | "X = ase.fit_transform(A)\n", 79 | "\n", 80 | "print(X.shape)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "## Use pairplot to plot the embedded data\n", 88 | "\n", 89 | "First we generate labels that correspond to blocks. We pass the labels along with the data for pair plot." 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "from graspologic.plot import pairplot\n", 99 | "\n", 100 | "labels = ['Block 1'] * 50 + ['Block 2'] * 50 + ['Block 3'] * 50\n", 101 | "\n", 102 | "plot = pairplot(X, labels)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [] 111 | } 112 | ], 113 | "metadata": { 114 | "kernelspec": { 115 | "display_name": "Python 3", 116 | "language": "python", 117 | "name": "python3" 118 | }, 119 | "language_info": { 120 | "codemirror_mode": { 121 | "name": "ipython", 122 | "version": 3 123 | }, 124 | "file_extension": ".py", 125 | "mimetype": "text/x-python", 126 | "name": "python", 127 | "nbconvert_exporter": "python", 128 | "pygments_lexer": "ipython3", 129 | "version": "3.7.3" 130 | } 131 | }, 132 | "nbformat": 4, 133 | "nbformat_minor": 4 134 | } 135 | -------------------------------------------------------------------------------- /tests/test_select_dimension.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | from numpy.testing import assert_equal 8 | from scipy.linalg import orth 9 | 10 | from graspologic.embed.svd import select_dimension 11 | from graspologic.simulations.simulations import sbm 12 | 13 | 14 | def generate_data(n=10, elbows=3, seed=1): 15 | """ 16 | Generate data matrix with a specific number of elbows on scree plot 17 | """ 18 | np.random.seed(seed) 19 | x = np.random.binomial(1, 0.6, (n**2)).reshape(n, n) 20 | xorth = orth(x) 21 | d = np.zeros(xorth.shape[0]) 22 | for i in range(0, len(d), int(len(d) / (elbows + 1))): 23 | d[:i] += 10 24 | A = xorth.T.dot(np.diag(d)).dot(xorth) 25 | return A, d 26 | 27 | 28 | class TestSelectDimension(unittest.TestCase): 29 | def test_invalid_inputes(self): 30 | X, D = generate_data() 31 | 32 | # invalid n_elbows 33 | with self.assertRaises(ValueError): 34 | bad_n_elbows = -2 35 | select_dimension(X, n_elbows=bad_n_elbows) 36 | 37 | with self.assertRaises(ValueError): 38 | bad_n_elbows = "string" 39 | select_dimension(X, n_elbows=bad_n_elbows) 40 | 41 | # invalid n_components 42 | with self.assertRaises(ValueError): 43 | bad_n_components = -1 44 | select_dimension(X, n_components=bad_n_components) 45 | 46 | with self.assertRaises(ValueError): 47 | bad_n_components = "string" 48 | select_dimension(X, n_components=bad_n_components) 49 | 50 | # invalid threshold 51 | with self.assertRaises(ValueError): 52 | bad_threshold = -2 53 | select_dimension(X, threshold=bad_threshold) 54 | 55 | with self.assertRaises(ValueError): 56 | bad_threshold = "string" 57 | select_dimension(X, threshold=bad_threshold) 58 | 59 | with self.assertRaises(IndexError): 60 | bad_threshold = 1000000 61 | select_dimension(X, threshold=bad_threshold) 62 | 63 | # invalid X 64 | with self.assertRaises(ValueError): 65 | bad_X = -2 66 | select_dimension(X=bad_X) 67 | 68 | with self.assertRaises(ValueError): 69 | # input is tensor 70 | bad_X = np.random.normal(size=(100, 10, 10)) 71 | select_dimension(X=bad_X) 72 | 73 | with self.assertRaises(ValueError): 74 | bad_X = np.random.normal(size=100).reshape(100, -1) 75 | select_dimension(X=bad_X) 76 | 77 | def test_output_synthetic(self): 78 | data, l = generate_data(10, 3) 79 | elbows, _, _ = select_dimension(X=data, n_elbows=2, return_likelihoods=True) 80 | assert_equal(elbows, [2, 4]) 81 | 82 | def test_output_simple(self): 83 | """ 84 | Elbow should be at 2. 85 | """ 86 | X = np.array([10, 9, 3, 2, 1]) 87 | elbows, _ = select_dimension(X, n_elbows=1) 88 | assert_equal(elbows[0], 2) 89 | 90 | def test_output_uniform(self): 91 | """ 92 | Generate two sets of synthetic eigenvalues based on two uniform distributions. 93 | The elbow must be at 50. 94 | """ 95 | np.random.seed(9) 96 | x1 = np.random.uniform(0, 45, 50) 97 | x2 = np.random.uniform(55, 100, 50) 98 | X = np.sort(np.hstack([x1, x2]))[::-1] 99 | elbows, _ = select_dimension(X, n_elbows=1) 100 | assert_equal(elbows[0], 50) 101 | 102 | def test_output_two_block_sbm(self): 103 | np.random.seed(10) 104 | n_communities = [100, 100] 105 | P = np.array([[0.5, 0.1], [0.1, 0.5]]) 106 | A = sbm(n_communities, P) 107 | 108 | elbows, _ = select_dimension(A, n_elbows=2) 109 | assert_equal(elbows[0], 2) 110 | -------------------------------------------------------------------------------- /docs/tutorials/simulations/erdos_renyi.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Erdos-Renyi (ER) Model" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import graspologic\n", 17 | "\n", 18 | "import matplotlib.pyplot as plt\n", 19 | "import numpy as np\n", 20 | "%matplotlib inline" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "Erdos-Renyi (ER) graphs are the simplest generative model. We assume that each edge for all pairs of vertices is sampled independently from all the edges. There are two ways to parameterize the model:\n", 28 | "\n", 29 | "1. ER(n, p) - this model specifies the number of vertices, $n$, and each pair of vertices has $p$ probability of an edge existing between the two.\n", 30 | "2. ER(n, m) - this model specifies the number of vertices, $n$, and the total number of edges $m$.\n", 31 | "\n", 32 | "Below, we sample two binary graphs (undirected and no self-loops) $G_1 \\sim ER_{NP}(50, 0.3)$ and $G_2 \\sim ER_{NM}(50, 250)$." 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "from graspologic.simulations import er_nm, er_np\n", 42 | "\n", 43 | "n = 50\n", 44 | "m = 250\n", 45 | "p = 0.3\n", 46 | "\n", 47 | "np.random.seed(1)\n", 48 | "G1 = er_np(n=n, p=p)\n", 49 | "G2 = er_nm(n=n, m=m)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "## Visualize the graphs using heatmap" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "from graspologic.plot import heatmap\n", 66 | "\n", 67 | "heatmap(G1, title = 'ER-NP(50, 0.3) Simulation')\n", 68 | "_ = heatmap(G2, title = 'ER-NM(50, 250) Simulation')" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## Weighted ER Graphs\n", 76 | "\n", 77 | "Both ``er_np()`` and ``er_nm()`` functions provide ways to sample weights for all edges that were sampled via a probability distribution function. In order to sample with weights, we provide a probability distribution function with corresponding keyword arguments for the distribution function.\n", 78 | "\n", 79 | "Below we sample $G_1 \\sim ER_{NP}(50, 0.2)$ where the weights are distributed normally with $\\mu = 0,~\\sigma^2 = 1$." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "wt = np.random.normal\n", 89 | "wtargs = dict(loc=0, scale=1)\n", 90 | "\n", 91 | "G1 = er_np(n=50, p=0.2, wt=wt, wtargs=wtargs)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "## Visualize the graph using heatmap" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "_ = heatmap(G1, title = 'ER-NP(50, 0.2) with N(0,1) Weights Simulation')" 108 | ] 109 | } 110 | ], 111 | "metadata": { 112 | "kernelspec": { 113 | "display_name": "Python 3", 114 | "language": "python", 115 | "name": "python3" 116 | }, 117 | "language_info": { 118 | "codemirror_mode": { 119 | "name": "ipython", 120 | "version": 3 121 | }, 122 | "file_extension": ".py", 123 | "mimetype": "text/x-python", 124 | "name": "python", 125 | "nbconvert_exporter": "python", 126 | "pygments_lexer": "ipython3", 127 | "version": "3.7.0" 128 | } 129 | }, 130 | "nbformat": 4, 131 | "nbformat_minor": 4 132 | } 133 | -------------------------------------------------------------------------------- /graspologic/preconditions.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import numbers 5 | from typing import Any, Union 6 | 7 | import networkx as nx 8 | 9 | from graspologic.types import Tuple 10 | 11 | 12 | def check_argument_types( 13 | value: Any, required_types: Union[type, Tuple[type, ...]], message: str 14 | ) -> None: 15 | """ 16 | Raises a TypeError if the provided ``value`` is not one of the ``required_types`` 17 | 18 | Parameters 19 | ---------- 20 | value : Any 21 | The argument to test for valid type 22 | required_types : Union[type, Tuple[type, ...]] 23 | A type or a n-ary tuple of types to test for validity 24 | message : str 25 | The message to use as the body of the TypeError 26 | 27 | Raises 28 | ------ 29 | TypeError if the type is not one of the ``required_types`` 30 | """ 31 | if not isinstance(value, required_types): 32 | raise TypeError(message) 33 | 34 | 35 | def check_optional_argument_types( 36 | value: Any, required_types: Union[type, Tuple[type, ...]], message: str 37 | ) -> None: 38 | """ 39 | Raises a TypeError if the provided ``value`` is not one of the ``required_types``, 40 | unless it is None. A None value is treated as a valid type. 41 | 42 | Parameters 43 | ---------- 44 | value : Any 45 | The argument to test for valid type 46 | required_types : Union[type, Tuple[type, ...]] 47 | A type or a n-ary tuple of types to test for validity 48 | message : str 49 | The message to use as the body of the TypeError 50 | 51 | Raises 52 | ------ 53 | TypeError if the type is not one of the ``required_types``, unless it is None 54 | """ 55 | if value is None: 56 | return 57 | check_argument_types(value, required_types, message) 58 | 59 | 60 | def check_argument(check: bool, message: str) -> None: 61 | """ 62 | Raises a ValueError if the provided check is false 63 | 64 | >>> from graspologic import preconditions 65 | >>> x = 5 66 | >>> preconditions.check_argument(x < 5, "x must be less than 5") 67 | Traceback (most recent call last): 68 | ... 69 | ValueError: x must be less than 5 70 | 71 | Parameters 72 | ---------- 73 | value : Any 74 | The argument to test for valid type 75 | required_types : Union[type, Tuple[type, ...]] 76 | A type or a n-ary tuple of types to test for validity 77 | message : str 78 | The message to use as the body of the TypeError 79 | 80 | Raises 81 | ------ 82 | TypeError if the type is not one of the ``required_types`` 83 | """ 84 | if not check: 85 | raise ValueError(message) 86 | 87 | 88 | def is_real_weighted( 89 | graph: Union[nx.Graph, nx.DiGraph], weight_attribute: str = "weight" 90 | ) -> bool: 91 | """ 92 | Checks every edge in ``graph`` to ascertain whether it has: 93 | 94 | - a ``weight_attribute`` key in the data dictionary for the edge 95 | - if that ``weight_attribute`` value is a subclass of numbers.Real 96 | 97 | If any edge fails this test, it returns ``False``, else ``True`` 98 | 99 | Parameters 100 | ---------- 101 | graph : Union[nx.Graph, nx.DiGraph] 102 | The networkx graph to test 103 | weight_attribute : str (default="weight") 104 | The edge dictionary data attribute that holds the weight. Default is ``weight``. 105 | 106 | Returns 107 | ------- 108 | bool 109 | ``True`` if every edge has a numeric ``weight_attribute`` weight, ``False`` if 110 | any edge fails this test 111 | 112 | """ 113 | # not only must every edge have a weight attribute but the value must be numeric 114 | return all( 115 | ( 116 | weight_attribute in data 117 | and isinstance(data[weight_attribute], numbers.Real) 118 | for _, _, data in graph.edges(data=True) 119 | ) 120 | ) 121 | -------------------------------------------------------------------------------- /tests/test_vertex_nomination_via_SGM.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from graspologic.nominate import VNviaSGM 9 | from graspologic.simulations import er_np 10 | 11 | np.random.seed(1) 12 | 13 | 14 | class TestVNviaSGM(unittest.TestCase): 15 | def test_VNviaSGM_inputs(self): 16 | with self.assertRaises(ValueError): 17 | VNviaSGM(order_voi_subgraph=-1) 18 | with self.assertRaises(ValueError): 19 | VNviaSGM(order_voi_subgraph=1.5) 20 | with self.assertRaises(ValueError): 21 | VNviaSGM(order_seeds_subgraph=-1) 22 | with self.assertRaises(ValueError): 23 | VNviaSGM(order_seeds_subgraph=1.5) 24 | with self.assertRaises(ValueError): 25 | VNviaSGM(n_init=-1) 26 | with self.assertRaises(ValueError): 27 | VNviaSGM(n_init=1.5) 28 | with self.assertRaises(ValueError): 29 | VNviaSGM(max_nominations=0) 30 | 31 | with self.assertRaises(ValueError): 32 | VNviaSGM().fit( 33 | np.random.randn(3, 4), 34 | np.random.randn(4, 4), 35 | 0, 36 | [np.arange(2), np.arange(2)], 37 | ) 38 | with self.assertRaises(ValueError): 39 | VNviaSGM().fit( 40 | np.random.randn(4, 4), 41 | np.random.randn(3, 4), 42 | 0, 43 | [np.arange(2), np.arange(2)], 44 | ) 45 | with self.assertRaises(ValueError): 46 | VNviaSGM().fit( 47 | np.random.randn(4, 4), 48 | np.random.randn(4, 4), 49 | 0, 50 | [np.arange(2), 1], 51 | ) 52 | with self.assertRaises(ValueError): 53 | VNviaSGM().fit( 54 | np.random.randn(4, 4), 55 | np.random.randn(4, 4), 56 | 0, 57 | np.random.randn(3, 3), 58 | ) 59 | with self.assertRaises(ValueError): 60 | VNviaSGM().fit( 61 | np.random.randn(4, 4), 62 | np.random.randn(4, 4), 63 | 0, 64 | [np.arange(2), np.arange(3)], 65 | ) 66 | with self.assertRaises(ValueError): 67 | VNviaSGM().fit( 68 | np.random.randn(4, 4), 69 | np.random.randn(4, 4), 70 | 0, 71 | [np.arange(5), np.arange(5)], 72 | ) 73 | with self.assertRaises(ValueError): 74 | VNviaSGM().fit( 75 | np.random.randn(4, 4), 76 | np.random.randn(4, 4), 77 | 0, 78 | [[], []], 79 | ) 80 | with self.assertRaises(ValueError): 81 | VNviaSGM().fit( 82 | np.random.randn(4, 4), 83 | np.random.randn(4, 4), 84 | 0, 85 | [[1, 1], [1, 2]], 86 | ) 87 | with self.assertRaises(ValueError): 88 | VNviaSGM().fit( 89 | np.random.randn(4, 4), 90 | np.random.randn(4, 4), 91 | 0, 92 | [[1, 5], [1, 2]], 93 | ) 94 | 95 | def test_vn_algorithm(self): 96 | g1 = er_np(n=50, p=0.6) 97 | node_shuffle = np.random.permutation(50) 98 | 99 | g2 = g1[np.ix_(node_shuffle, node_shuffle)] 100 | 101 | kklst = [(xx, yy) for xx, yy in zip(node_shuffle, np.arange(len(node_shuffle)))] 102 | kklst.sort(key=lambda x: x[0]) 103 | kklst = np.array(kklst) 104 | 105 | voi = 7 106 | nseeds = 6 107 | 108 | vnsgm = VNviaSGM() 109 | nomlst = vnsgm.fit_predict( 110 | g1, g2, voi, [kklst[0:nseeds, 0], kklst[0:nseeds, 1]] 111 | ) 112 | 113 | self.assertEqual(nomlst[0][0], kklst[np.where(kklst[:, 0] == voi)[0][0], 1]) 114 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: graspologic Build 2 | on: 3 | push: 4 | paths-ignore: 5 | - '.all-contributorsrc' 6 | - 'CONTRIBUTORS.md' 7 | branches-ignore: 8 | - 'dev' 9 | - 'main' 10 | pull_request: 11 | paths-ignore: 12 | - '.all-contributorsrc' 13 | - 'CONTRIBUTORS.md' 14 | workflow_call: 15 | 16 | env: 17 | PYTHON_VERSION: '3.10' 18 | 19 | jobs: 20 | build-reference-documentation: 21 | runs-on: ubuntu-latest 22 | steps: 23 | - run: sudo apt-get install -y pandoc 24 | - uses: actions/checkout@v2 25 | - name: Set up Python 26 | uses: actions/setup-python@v2 27 | with: 28 | python-version: ${{ env.PYTHON_VERSION }} 29 | - name: Install uv 30 | uses: astral-sh/setup-uv@v2 31 | - name: Install dependencies 32 | run: uv sync --python ${{ env.PYTHON_VERSION }} 33 | - name: "Run Reference Documentation Generation" 34 | run: | 35 | echo "documentation" > dependencies_documentation.txt 36 | uv run pipdeptree >> dependencies_documentation.txt 37 | uv run poe docsWithTutorials 38 | - name: Archive documentation version artifact 39 | uses: actions/upload-artifact@v4 40 | with: 41 | name: dependencies 42 | path: | 43 | dependencies_documentation.txt 44 | - name: Archive documentation artifacts 45 | uses: actions/upload-artifact@v4 46 | with: 47 | name: documentation-site 48 | path: | 49 | docs/_build/html 50 | code-format-check: 51 | runs-on: ubuntu-latest 52 | steps: 53 | - uses: actions/checkout@v2 54 | - name: Set up Python 55 | uses: actions/setup-python@v2 56 | with: 57 | python-version: ${{ env.PYTHON_VERSION }} 58 | - name: Install uv 59 | uses: astral-sh/setup-uv@v2 60 | - name: Install dependencies 61 | run: uv sync --python ${{ env.PYTHON_VERSION }} 62 | - name: Run Format Check 63 | run: uv run poe static_checks 64 | test-coverage: 65 | runs-on: ubuntu-latest 66 | steps: 67 | - uses: actions/checkout@v2 68 | - name: Set up Python 69 | uses: actions/setup-python@v2 70 | with: 71 | python-version: ${{ env.PYTHON_VERSION }} 72 | - name: Install uv 73 | uses: astral-sh/setup-uv@v2 74 | - name: Install dependencies 75 | run: uv sync --python ${{ env.PYTHON_VERSION }} 76 | - name: Run Test Coverage 77 | env: 78 | MPLBACKEND: Agg 79 | run: uv run poe coverage 80 | unit-and-doc-test: 81 | runs-on: ${{matrix.os}} 82 | strategy: 83 | matrix: 84 | os: [ubuntu-latest, windows-latest, macos-latest] 85 | python_version: ["3.9", "3.10", "3.11", "3.12"] 86 | fail-fast: false 87 | steps: 88 | - uses: actions/checkout@v2 89 | - name: Set up Python ${{matrix.python_version}} ${{matrix.os}} 90 | uses: actions/setup-python@v2 91 | with: 92 | python-version: ${{matrix.python_version}} 93 | - name: Install uv 94 | uses: astral-sh/setup-uv@v2 95 | - name: Install dependencies 96 | run: uv sync --python ${{ matrix.python_version }} 97 | - name: Run Unit Tests and Doctests Python ${{matrix.python_version}} ${{matrix.os}} 98 | env: 99 | MPLBACKEND: Agg 100 | run: uv run poe tests 101 | - name: Run mypy type check Python ${{matrix.python_version}} ${{matrix.os}} 102 | run: uv run poe type_check 103 | - name: Generate dependency tree 104 | run: | 105 | export DEPS='dependencies_${{matrix.python_version}}_${{matrix.os}}.txt' 106 | echo "${{matrix.python_version}} ${{matrix.os}}" > $DEPS 107 | uv run pipdeptree >> $DEPS 108 | shell: bash 109 | - name: Archive dependency tree 110 | uses: actions/upload-artifact@v4 111 | with: 112 | name: dependencies-${{matrix.python_version}}-${{matrix.os}} 113 | path: | 114 | dependencies_${{matrix.python_version}}_${{matrix.os}}.txt 115 | -------------------------------------------------------------------------------- /graspologic/align/sign_flips.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import numpy as np 5 | 6 | from .base import BaseAlign 7 | 8 | 9 | class SignFlips(BaseAlign): 10 | """ 11 | Flips the signs of all entries in one dataset, ``X`` along some of the 12 | dimensions. In particular, it does so in a way that brings this dataset to 13 | the same orthant as the second dataset, ``Y``, according to some criterion, 14 | computed along each dimension. The two critera currently available are the 15 | median and the maximum (in magnitude) value along each dimension. 16 | 17 | This module can also be used to bring the dataset to the first orthant 18 | (i.e. with all criteras being positive) by providing the identity matrix as 19 | the second dataset. 20 | 21 | Parameters 22 | ---------- 23 | criterion : string, {'median' (default), 'max'}, optional 24 | String describing the criterion used to choose whether to flip signs. 25 | Two options are currently supported: 26 | 27 | - 'median' 28 | Uses the median along each dimension 29 | - 'max' 30 | Uses the maximum (in magintude) along each dimension 31 | 32 | Attributes 33 | ---------- 34 | Q_ : array, size (d, d) 35 | Final orthogonal matrix, used to modify ``X``. 36 | 37 | """ 38 | 39 | def __init__( 40 | self, 41 | criterion: str = "median", 42 | ): 43 | # checking criterion argument 44 | if type(criterion) is not str: 45 | raise TypeError("Criterion must be str") 46 | if criterion not in ["median", "max"]: 47 | raise ValueError(f"{criterion} is not a valid criterion.") 48 | 49 | super().__init__() 50 | 51 | self.criterion = criterion 52 | 53 | def set_criterion_function(self) -> None: 54 | # perform a check, in case it was modified directly 55 | if self.criterion not in ["median", "max"]: 56 | raise ValueError(f"{self.criterion} is not a valid criterion") 57 | 58 | if self.criterion == "median": 59 | 60 | def median_criterion(X: np.ndarray) -> np.ndarray: 61 | result: np.ndarray = np.median(X, axis=0) 62 | return result 63 | 64 | self.criterion_function_ = median_criterion 65 | if self.criterion == "max": 66 | 67 | def max_criterion(X: np.ndarray) -> np.ndarray: 68 | result: np.ndarray = X[ 69 | np.argmax(np.abs(X), axis=0), np.arange(X.shape[1]) 70 | ] 71 | return result 72 | 73 | self.criterion_function_ = max_criterion 74 | 75 | def fit(self, X: np.ndarray, Y: np.ndarray) -> "SignFlips": 76 | """ 77 | Uses the two datasets to learn the matrix :attr:`~graspologic.align.SignFlips.Q_` that aligns the 78 | first dataset with the second. 79 | 80 | In sign flips, :attr:`~graspologic.align.SignFlips.Q_` is an diagonal orthogonal matrices (i.e. a 81 | matrix with 1 or -1 in each entry on diagonal and 0 everywhere else) 82 | picked such that all dimensions of ``X`` @ :attr:`~graspologic.align.SignFlips.Q_` 83 | and ``Y`` are in the same orthant using some critera (median or max magnitude). 84 | 85 | Parameters 86 | ---------- 87 | X : np.ndarray, shape (n, d) 88 | Dataset to be mapped to ``Y``, must have same number of dimensions 89 | (axis 1) as ``Y``. 90 | 91 | Y : np.ndarray, shape (m, d) 92 | Target dataset, must have same number of dimensions (axis 1) as ``X``. 93 | 94 | Returns 95 | ------- 96 | self : returns an instance of self 97 | 98 | """ 99 | X, Y = self._check_datasets(X, Y) 100 | _, d = X.shape 101 | 102 | self.set_criterion_function() 103 | X_criterias = self.criterion_function_(X) 104 | Y_criterias = self.criterion_function_(Y) 105 | 106 | val = np.multiply(X_criterias, Y_criterias) 107 | t_X = (val >= 0) * 2 - 1 108 | 109 | self.Q_ = np.diag(t_X) 110 | return self 111 | -------------------------------------------------------------------------------- /tests/test_sign_flips.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from graspologic.align import SignFlips 9 | 10 | 11 | class TestSignFlips(unittest.TestCase): 12 | def test_bad_kwargs(self): 13 | with self.assertRaises(TypeError): 14 | SignFlips(criterion={"this is a": "dict"}) 15 | with self.assertRaises(ValueError): 16 | SignFlips(criterion="cep") 17 | # check delayed ValueError 18 | with self.assertRaises(ValueError): 19 | aligner = SignFlips(criterion="median") 20 | X = np.arange(6).reshape(6, 1) 21 | Y = np.arange(6).reshape(6, 1) 22 | aligner.criterion = "something" 23 | aligner.fit(X, Y) 24 | 25 | def test_bad_datasets(self): 26 | X = np.arange(6).reshape(6, 1) 27 | Y = np.arange(6).reshape(6, 1) 28 | Y_wrong_d = np.arange(12).reshape(6, 2) 29 | # check passing weird stuff as input (caught by us) 30 | with self.assertRaises(TypeError): 31 | aligner = SignFlips() 32 | aligner.fit("hello there", Y) 33 | with self.assertRaises(TypeError): 34 | aligner = SignFlips() 35 | aligner.fit(X, "hello there") 36 | with self.assertRaises(TypeError): 37 | aligner = SignFlips() 38 | aligner.fit({"hello": "there"}, Y) 39 | with self.assertRaises(TypeError): 40 | aligner = SignFlips() 41 | aligner.fit(X, {"hello": "there"}) 42 | # check passing arrays of weird ndims (caught by check_array) 43 | with self.assertRaises(ValueError): 44 | aligner = SignFlips() 45 | aligner.fit(X, Y.reshape(3, 2, 1)) 46 | with self.assertRaises(ValueError): 47 | aligner = SignFlips() 48 | aligner.fit(X.reshape(3, 2, 1), Y) 49 | # check passing arrays with different dimensions (caught by us) 50 | with self.assertRaises(ValueError): 51 | aligner = SignFlips() 52 | aligner.fit(X, Y_wrong_d) 53 | # check passing array with wrong dimensions to transform (caught by us) 54 | with self.assertRaises(ValueError): 55 | aligner = SignFlips() 56 | aligner.fit(X, Y) 57 | aligner.transform(Y_wrong_d) 58 | 59 | def test_two_datasets(self): 60 | X = np.arange(6).reshape(3, 2) * (-1) 61 | Y = np.arange(6).reshape(3, 2) @ np.diag([1, -1]) + 0.5 62 | # X flips sign in the first dimension 63 | Q_answer = np.array([[-1, 0], [0, 1]]) 64 | X_answer = X.copy() @ Q_answer 65 | # first, do fit and transform separately 66 | aligner_1 = SignFlips() 67 | aligner_1.fit(X, Y) 68 | Q_test = aligner_1.Q_ 69 | X_test = aligner_1.transform(X) 70 | self.assertTrue(np.all(Q_test == Q_answer)) 71 | self.assertTrue(np.all(X_test == X_answer)) 72 | # now, do fit_transform 73 | aligner_2 = SignFlips() 74 | X_test = aligner_2.fit_transform(X, Y) 75 | Q_test = aligner_2.Q_ 76 | self.assertTrue(np.all(Q_test == Q_answer)) 77 | self.assertTrue(np.all(X_test == X_answer)) 78 | # try giving a different matrix as the sole input (I) 79 | I_test = aligner_2.transform(np.eye(2)) 80 | I_answer = np.diag([-1, 1]) 81 | self.assertTrue(np.all(I_test == I_answer)) 82 | 83 | def test_max_criterion(self): 84 | X = np.arange(6).reshape(3, 2) * (-1) 85 | Y = np.arange(6).reshape(3, 2) @ np.diag([1, -1]) + 0.5 86 | # in this case, Y should be unchanged, and X matched to Y 87 | # so X flips sign in the first dimension 88 | Q_answer = np.array([[-1, 0], [0, 1]]) 89 | X_answer = X.copy() @ Q_answer 90 | # set criterion to "max", see if that works 91 | aligner = SignFlips(criterion="max") 92 | aligner.fit(X, Y) 93 | Q_test = aligner.Q_ 94 | X_test = aligner.transform(X) 95 | self.assertTrue(np.all(Q_test == Q_answer)) 96 | self.assertTrue(np.all(X_test == X_answer)) 97 | 98 | 99 | if __name__ == "__main__": 100 | unittest.main() 101 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # graspologic 3 | [![Paper shield](https://img.shields.io/badge/JMLR-Paper-red)](http://www.jmlr.org/papers/volume20/19-490/19-490.pdf) 4 | [![PyPI version](https://img.shields.io/pypi/v/graspologic.svg)](https://pypi.org/project/graspologic/) 5 | [![Downloads shield](https://pepy.tech/badge/graspologic)](https://pepy.tech/project/graspologic) 6 | [![graspologic Build](https://github.com/graspologic-org/graspologic/actions/workflows/build.yml/badge.svg)](https://github.com/graspologic-org/graspologic/actions/workflows/build.yml) 7 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 8 | 9 | ## `graspologic` is a package for graph statistical algorithms. 10 | 11 | - [Overview](#overview) 12 | - [Documentation](#documentation) 13 | - [System Requirements](#system-requirements) 14 | - [Installation Guide](#installation-guide) 15 | - [Contributing](#contributing) 16 | - [License](#license) 17 | - [Issues](#issues) 18 | - [Citing `graspologic`](#citing-graspologic) 19 | 20 | # Overview 21 | A graph, or network, provides a mathematically intuitive representation of data with some sort of relationship between items. For example, a social network can be represented as a graph by considering all participants in the social network as nodes, with connections representing whether each pair of individuals in the network are friends with one another. Naively, one might apply traditional statistical techniques to a graph, which neglects the spatial arrangement of nodes within the network and is not utilizing all of the information present in the graph. In this package, we provide utilities and algorithms designed for the processing and analysis of graphs with specialized graph statistical algorithms. 22 | 23 | # Documentation 24 | The official documentation with usage is at [https://graspologic-org.github.io/graspologic/latest](https://graspologic-org.github.io/graspologic/latest) 25 | 26 | Please visit the [tutorial section](https://graspologic-org.github.io/graspologic/latest/tutorials/index.html) in the official website for more in depth usage. 27 | 28 | # System Requirements 29 | 30 | ## Hardware requirements 31 | `graspologic` package requires only a standard computer with enough RAM to support the in-memory operations. 32 | 33 | 34 | ## Software requirements 35 | 36 | ### OS Requirements 37 | `graspologic` is tested on the following OSes: 38 | - Linux x64 39 | - macOS x64 40 | - Windows 10 x64 41 | 42 | And across the following **x86_64** versions of Python: 43 | - 3.9 44 | - 3.10 45 | - 3.11 46 | - 3.12 47 | 48 | If you try to use `graspologic` for a different platform than the ones listed and notice any unexpected behavior, 49 | please feel free to [raise an issue](https://github.com/graspologic-org/graspologic/issues/new). It's better for ourselves and our users 50 | if we have concrete examples of things not working! 51 | 52 | # Installation Guide 53 | 54 | ## Install from pip 55 | ``` 56 | pip install graspologic 57 | ``` 58 | 59 | 60 | ## Install from Github 61 | ``` 62 | git clone https://github.com/graspologic-org/graspologic 63 | cd graspologic 64 | python3 -m venv venv 65 | source venv/bin/activate 66 | pip install . 67 | ``` 68 | 69 | # Contributing 70 | We welcome contributions from anyone. Please see our [contribution guidelines](https://github.com/graspologic-org/graspologic/blob/dev/CONTRIBUTING.md) before making a pull request. Our 71 | [issues](https://github.com/graspologic-org/graspologic/issues) page is full of places we could use help! 72 | If you have an idea for an improvement not listed there, please 73 | [make an issue](https://github.com/graspologic-org/graspologic/issues/new) first so you can discuss with the developers. 74 | 75 | # License 76 | This project is covered under the MIT License. 77 | 78 | # Issues 79 | We appreciate detailed bug reports and feature requests (though we appreciate pull requests even more!). Please visit our [issues](https://github.com/graspologic-org/graspologic/issues) page if you have questions or ideas. 80 | 81 | # Citing `graspologic` 82 | If you find `graspologic` useful in your work, please cite the package via the [GraSPy paper](http://www.jmlr.org/papers/volume20/19-490/19-490.pdf) 83 | 84 | > Chung, J., Pedigo, B. D., Bridgeford, E. W., Varjavand, B. K., Helm, H. S., & Vogelstein, J. T. (2019). GraSPy: Graph Statistics in Python. Journal of Machine Learning Research, 20(158), 1-7. 85 | -------------------------------------------------------------------------------- /tests/test_latentpositiontest.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | 8 | from graspologic.inference import latent_position_test 9 | from graspologic.inference.latent_position_test import _difference_norm 10 | from graspologic.simulations import er_np, sbm 11 | 12 | 13 | class TestLatentPositionTest(unittest.TestCase): 14 | @classmethod 15 | def test_ase_works(self): 16 | np.random.seed(1234556) 17 | A1 = er_np(5, 0.8) 18 | A2 = er_np(5, 0.8) 19 | lpt = latent_position_test(A1, A2) 20 | 21 | def test_omni_works(self): 22 | np.random.seed(1234556) 23 | A1 = er_np(5, 0.8) 24 | A2 = er_np(5, 0.8) 25 | lpt = latent_position_test(A1, A2, embedding="omnibus") 26 | 27 | def test_bad_kwargs(self): 28 | np.random.seed(1234556) 29 | A1 = er_np(5, 0.8) 30 | A2 = er_np(5, 0.8) 31 | 32 | with self.assertRaises(ValueError): 33 | latent_position_test(A1, A2, n_components=-100) 34 | with self.assertRaises(ValueError): 35 | latent_position_test(A1, A2, test_case="oops") 36 | with self.assertRaises(ValueError): 37 | latent_position_test(A1, A2, n_bootstraps=-100) 38 | with self.assertRaises(ValueError): 39 | latent_position_test(A1, A2, embedding="oops") 40 | with self.assertRaises(TypeError): 41 | latent_position_test(A1, A2, n_bootstraps=0.5) 42 | with self.assertRaises(TypeError): 43 | latent_position_test(A1, A2, n_components=0.5) 44 | with self.assertRaises(TypeError): 45 | latent_position_test(A1, A2, embedding=6) 46 | with self.assertRaises(TypeError): 47 | latent_position_test(A1, A2, test_case=6) 48 | with self.assertRaises(TypeError): 49 | latent_position_test(A1, A2, workers="oops") 50 | 51 | def test_n_bootstraps(self): 52 | np.random.seed(1234556) 53 | A1 = er_np(5, 0.8) 54 | A2 = er_np(5, 0.8) 55 | 56 | lpt = latent_position_test(A1, A2, n_bootstraps=234, n_components=None) 57 | assert lpt[2]["null_distribution_1"].shape[0] == 234 58 | 59 | def test_bad_matrix_inputs(self): 60 | np.random.seed(1234556) 61 | A1 = er_np(5, 0.8) 62 | A2 = er_np(5, 0.8) 63 | A1[2, 0] = 1 # make asymmetric 64 | A1[0, 2] = 0 65 | with self.assertRaises(NotImplementedError): # TODO : remove when we implement 66 | latent_position_test(A1, A2) 67 | 68 | bad_matrix = [[1, 2]] 69 | with self.assertRaises(TypeError): 70 | latent_position_test(bad_matrix, A2) 71 | 72 | with self.assertRaises(ValueError): 73 | latent_position_test(A1[:2, :2], A2) 74 | 75 | def test_rotation_norm(self): 76 | # two triangles rotated by 90 degrees 77 | points1 = np.array([[0, 0], [3, 0], [3, -2]]) 78 | rotation = np.array([[0, 1], [-1, 0]]) 79 | points2 = np.dot(points1, rotation) 80 | 81 | n = _difference_norm(points1, points2, embedding="ase", test_case="rotation") 82 | self.assertAlmostEqual(n, 0) 83 | 84 | def test_diagonal_rotation_norm(self): 85 | # triangle in 2d 86 | points1 = np.array([[0, 0], [3, 0], [3, -2]], dtype=np.float64) 87 | rotation = np.array([[0, 1], [-1, 0]]) 88 | # rotated 90 degrees 89 | points2 = np.dot(points1, rotation) 90 | # diagonally scaled 91 | diagonal = np.array([[2, 0, 0], [0, 3, 0], [0, 0, 2]]) 92 | points2 = np.dot(diagonal, points2) 93 | 94 | n = _difference_norm( 95 | points1, points2, embedding="ase", test_case="diagonal-rotation" 96 | ) 97 | self.assertAlmostEqual(n, 0) 98 | 99 | def test_scalar_rotation_norm(self): 100 | # triangle in 2d 101 | points1 = np.array([[0, 0], [3, 0], [3, -2]], dtype=np.float64) 102 | rotation = np.array([[0, 1], [-1, 0]]) 103 | # rotated 90 degrees 104 | points2 = np.dot(points1, rotation) 105 | # scaled 106 | points2 = 2 * points2 107 | 108 | n = _difference_norm( 109 | points1, points2, embedding="ase", test_case="scalar-rotation" 110 | ) 111 | self.assertAlmostEqual(n, 0) 112 | 113 | 114 | if __name__ == "__main__": 115 | unittest.main() 116 | -------------------------------------------------------------------------------- /tests/test_spectral_nomination.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import itertools 5 | import unittest 6 | 7 | import numpy as np 8 | 9 | from graspologic.embed.ase import AdjacencySpectralEmbed 10 | from graspologic.nominate import SpectralVertexNomination 11 | from graspologic.simulations.simulations import sbm 12 | 13 | # global constants for tests 14 | n_verts = 50 15 | p = np.array([[0.7, 0.25, 0.2], [0.25, 0.8, 0.3], [0.2, 0.3, 0.85]]) 16 | labels = np.array([0] * n_verts + [1] * n_verts + [2] * n_verts) 17 | adj = np.array(sbm(3 * [n_verts], p), dtype=int) 18 | embeder = AdjacencySpectralEmbed() 19 | pre_embeded = embeder.fit_transform(adj) 20 | 21 | 22 | class TestSpectralVertexNominatorOutputs(unittest.TestCase): 23 | def _nominate(self, X, seed, nominator=None, k=None): 24 | if nominator is None: 25 | nominator = SpectralVertexNomination(n_neighbors=k) 26 | nominator.fit(X) 27 | n_verts = X.shape[0] 28 | nom_list, dists = nominator.predict(seed) 29 | self.assertEqual(nom_list.shape, (n_verts, seed.shape[0])) 30 | self.assertEqual(dists.shape, (n_verts, seed.shape[0])) 31 | return nom_list 32 | 33 | def test_seed_inputs(self): 34 | with self.assertRaises(IndexError): 35 | self._nominate(adj, np.zeros((1, 50), dtype=int)) 36 | with self.assertRaises(TypeError): 37 | self._nominate(adj, np.random.random((10, 2))) 38 | 39 | def test_X_inputs(self): 40 | with self.assertRaises(IndexError): 41 | self._nominate(np.zeros((5, 5, 5), dtype=int), np.zeros(3, dtype=int)) 42 | with self.assertRaises(TypeError): 43 | self._nominate([[0] * 10] * 10, np.zeros(3, dtype=int)) 44 | # embedding should have fewer cols than rows. 45 | svn = SpectralVertexNomination(input_graph=False) 46 | with self.assertRaises(IndexError): 47 | self._nominate( 48 | np.zeros((10, 20), dtype=int), 49 | np.zeros(3, dtype=int), 50 | nominator=svn, 51 | ) 52 | # adj matrix should be square 53 | with self.assertRaises(IndexError): 54 | self._nominate(np.zeros((3, 4), dtype=int), np.zeros(3, dtype=int)) 55 | 56 | def _test_k(self): 57 | # k should be > 0 58 | with self.assertRaises(ValueError): 59 | self._nominate(adj, np.zeros(3, dtype=int), k=0) 60 | # k of wrong type 61 | with self.assertRaises(TypeError): 62 | self._nominate(adj, np.zeros(3, dtype=int), k="hello world") 63 | 64 | def test_constructor_inputs(self): 65 | with self.assertRaises(ValueError): 66 | svn = SpectralVertexNomination(embedder="hi") 67 | self._nominate(adj, np.zeros(3, dtype=int), nominator=svn) 68 | 69 | def test_constructor_inputs1(self): 70 | # embedder must be BaseSpectralEmbed or str 71 | with self.assertRaises(TypeError): 72 | svn = SpectralVertexNomination(embedder=45) 73 | 74 | def test_constructor_inputs2(self): 75 | # input graph param has wrong type 76 | with self.assertRaises(TypeError): 77 | svn = SpectralVertexNomination(input_graph=4) 78 | 79 | def test_basic_unattributed(self): 80 | """ 81 | Runs two attributed seeds and two unattributed seeds with each nominator. 82 | Ensures all options work. Should be fast. Nested parametrization tests all 83 | combinations of listed parameters. 84 | """ 85 | nominators = [ 86 | SpectralVertexNomination(embedder="ASE"), 87 | SpectralVertexNomination(embedder="LSE"), 88 | SpectralVertexNomination(embedder=embeder), 89 | ] 90 | seeds = [ 91 | np.array([8]), 92 | np.array([2, 6, 9, 15, 25]), 93 | np.arange(n_verts - 1, dtype=int), 94 | ] 95 | for nominator, seed in itertools.product(nominators, seeds): 96 | self._nominate(adj, seed, nominator) 97 | 98 | def test_pre_embedded(self): 99 | seeds = [ 100 | np.array([8]), 101 | np.array([2, 6, 9, 15, 25]), 102 | np.arange(n_verts - 1, dtype=int), 103 | ] 104 | for seed in seeds: 105 | svn = SpectralVertexNomination(input_graph=False) 106 | self._nominate(pre_embeded, seed, nominator=svn) 107 | -------------------------------------------------------------------------------- /graspologic/pipeline/graph_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | from collections import OrderedDict 5 | from typing import Any, Union 6 | 7 | import networkx as nx 8 | from beartype import beartype 9 | 10 | from graspologic.types import Dict, List, Tuple 11 | 12 | __all__ = ["GraphBuilder"] 13 | 14 | 15 | class GraphBuilder: 16 | """ 17 | GraphBuilder is a simple builder for networkx Graphs. To use less memory, 18 | it automatically maps all node ids of any hashable type to ``int``. 19 | 20 | In other words, if you can use it as a key in a dictionary, it will work. 21 | 22 | By default, the main method it provides, ``add_edge``, will sum edge weights 23 | if the edge already exists. 24 | 25 | Parameters 26 | ---------- 27 | directed : bool (default=False) 28 | Used to create either a :class:`networkx.Graph` or 29 | :class:`networkx.DiGraph` object. 30 | """ 31 | 32 | @beartype 33 | def __init__(self, directed: bool = False): 34 | # OrderedDict is the default for {} anyway, but I wanted to be very explicit, 35 | # since we absolutely rely on the ordering 36 | self._id_map: Dict[Any, int] = OrderedDict() 37 | self._graph = nx.DiGraph() if directed else nx.Graph() 38 | 39 | @beartype 40 | def add_edge( 41 | self, 42 | source: Any, 43 | target: Any, 44 | weight: Union[int, float] = 1.0, 45 | sum_weight: bool = True, 46 | **attributes: Any, 47 | ) -> None: 48 | """ 49 | Adds a weighted edge between the provided source and target. The source 50 | and target id are converted to a unique ``int``. 51 | 52 | If no weight is provided, a default weight of ``1.0`` is used. 53 | 54 | If an edge between the source and target already exists, and if the 55 | ``sum_weight`` argument is ``True``, then the weights are summed. 56 | 57 | Otherwise, the last weight provided will be used as the edge's weight. 58 | 59 | Any other attributes specified will be added to the edge's data dictionary. 60 | 61 | Parameters 62 | ---------- 63 | source : Any 64 | source node id 65 | target : Any 66 | target node id 67 | weight : Union[int, float] (default=1.0) 68 | The weight for the edge. If none is provided, the weight is defaulted to 1. 69 | sum_weight : bool (default=True) 70 | If an edge between the ``source`` and ``target`` already exist, should we 71 | sum the edge weights or overwrite the edge weight with the provided 72 | ``weight`` value. 73 | attributes : kwargs 74 | The attributes kwargs are presumed to be attributes that should be added 75 | to the edge dictionary for ``source`` and ``target``. 76 | """ 77 | source_id = self._map_node_id(source) 78 | target_id = self._map_node_id(target) 79 | if sum_weight: 80 | old = self._graph.get_edge_data(source_id, target_id, default={}).get( 81 | "weight", 0 82 | ) 83 | self._graph.add_edge( 84 | source_id, target_id, weight=old + weight, **attributes 85 | ) 86 | else: 87 | self._graph.add_edge(source_id, target_id, weight=weight, **attributes) 88 | 89 | def build(self) -> Tuple[Union[nx.Graph, nx.DiGraph], Dict[Any, int], List[Any]]: 90 | """ 91 | Returns 92 | ------- 93 | Tuple[Union[nx.Graph, nx.DiGraph], Dict[Any, int], List[Any]] 94 | The returned tuple is either an undirected or directed graph, depending on 95 | the constructor argument ``directed``. The second value in the tuple is a 96 | dictionary of original node ids to their assigned integer ids. The third 97 | and final value in the tuple is a List of original node ids, where the 98 | index corresponds to the assigned integer and the value is the corresponding 99 | original ID. 100 | """ 101 | old_to_new = self._id_map 102 | new_to_old = [key for key, _ in old_to_new.items()] 103 | return self._graph, old_to_new, new_to_old 104 | 105 | def _map_node_id(self, node_id: Any) -> int: 106 | mapped_node_id = self._id_map.get(node_id, len(self._id_map)) 107 | self._id_map[node_id] = mapped_node_id 108 | return mapped_node_id 109 | -------------------------------------------------------------------------------- /tests/test_mds.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | 6 | import numpy as np 7 | from numpy.testing import assert_almost_equal 8 | from sklearn.utils.estimator_checks import check_estimator 9 | 10 | from graspologic.embed.mds import ClassicalMDS 11 | 12 | 13 | class TestMDS(unittest.TestCase): 14 | def test_sklearn_conventions(self): 15 | check_estimator(ClassicalMDS()) 16 | 17 | def test_input(self): 18 | X = np.random.normal(0, 1, size=(10, 3)) 19 | 20 | # X cannot be tensor when precomputed dissimilarity 21 | with self.assertRaises(ValueError): 22 | tensor = np.random.normal(0, 1, size=(10, 3, 3)) 23 | mds = ClassicalMDS(n_components=3, dissimilarity="precomputed") 24 | mds.fit(tensor) 25 | 26 | with self.assertRaises(ValueError): 27 | one_dimensional = np.random.normal(size=10) 28 | mds = ClassicalMDS(n_components=2, dissimilarity="euclidean") 29 | mds.fit(one_dimensional) 30 | 31 | # n_components > n_samples 32 | with self.assertRaises(ValueError): 33 | mds = ClassicalMDS(n_components=100) 34 | mds.fit(X) 35 | 36 | # Invalid n_components 37 | with self.assertRaises(ValueError): 38 | mds = ClassicalMDS(n_components=-2) 39 | mds.fit(X) 40 | 41 | with self.assertRaises(TypeError): 42 | mds = ClassicalMDS(n_components="1") 43 | mds.fit(X) 44 | 45 | # Invalid dissimilarity 46 | with self.assertRaises(ValueError): 47 | mds = ClassicalMDS(dissimilarity="abc") 48 | mds.fit(X) 49 | 50 | # Invalid input for fit function 51 | with self.assertRaises(ValueError): 52 | mds = ClassicalMDS(n_components=3, dissimilarity="precomputed") 53 | mds.fit(X="bad_input") 54 | 55 | # Must be square and symmetric matrix if precomputed dissimilarity 56 | with self.assertRaises(ValueError): 57 | mds = ClassicalMDS(n_components=3, dissimilarity="precomputed") 58 | mds.fit(X) 59 | 60 | def test_tensor_input(self): 61 | X = np.random.normal(size=(100, 5, 5)) 62 | mds = ClassicalMDS(n_components=3, dissimilarity="euclidean") 63 | mds.fit(X) 64 | 65 | self.assertEqual(mds.dissimilarity_matrix_.shape, (100, 100)) 66 | 67 | X_transformed = mds.fit_transform(X) 68 | self.assertEqual(X_transformed.shape, (100, 3)) 69 | 70 | def test_output(self): 71 | """ 72 | Recover a 3D tetrahedron with distance 1 between all points 73 | 74 | Use both fit and fit_transform functions 75 | """ 76 | 77 | def _compute_dissimilarity(arr): 78 | out = np.zeros((4, 4)) 79 | for i in range(4): 80 | out[i] = np.linalg.norm(arr - arr[i], axis=1) 81 | 82 | return out 83 | 84 | def use_fit_transform(): 85 | A = np.ones((4, 4)) - np.identity(4) 86 | 87 | mds = ClassicalMDS(n_components=3, dissimilarity="precomputed") 88 | B = mds.fit_transform(A) 89 | 90 | Ahat = _compute_dissimilarity(B) 91 | 92 | # Checks up to 7 decimal points 93 | assert_almost_equal(A, Ahat) 94 | 95 | def use_fit(): 96 | A = np.ones((4, 4)) - np.identity(4) 97 | 98 | mds = ClassicalMDS(n_components=3, dissimilarity="precomputed") 99 | mds.fit(A) 100 | B = np.dot(mds.components_, np.diag(mds.singular_values_)) 101 | 102 | Ahat = _compute_dissimilarity(B) 103 | 104 | # Checks up to 7 decimal points 105 | assert_almost_equal(A, Ahat) 106 | 107 | def use_euclidean(): 108 | A = np.array([ 109 | [-7.62291243e-17, 6.12372436e-01, 4.95031815e-16], 110 | [-4.97243701e-01, -2.04124145e-01, -2.93397401e-01], 111 | [5.02711453e-01, -2.04124145e-01, -2.83926977e-01], 112 | [-5.46775198e-03, -2.04124145e-01, 5.77324378e-01], 113 | ]) 114 | 115 | mds = ClassicalMDS(dissimilarity="euclidean") 116 | B = mds.fit_transform(A) 117 | 118 | target = np.ones((4, 4)) - np.identity(4) 119 | assert_almost_equal(mds.dissimilarity_matrix_, target) 120 | 121 | use_fit_transform() 122 | use_fit() 123 | use_euclidean() 124 | -------------------------------------------------------------------------------- /docs/tutorials/simulations/rdpg.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Random Dot Product Graph (RDPG) Model" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import graspologic\n", 17 | "\n", 18 | "import numpy as np\n", 19 | "%matplotlib inline" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "RDPG is a latent position generative model, in which the probability of an edge existing between pairs of vertices is determined by the dot product of the associated latent position vectors. In other words, given $X \\in \\mathbb{R}^{n\\times d}$, where $n$ is the number of vertices and $d$ is the dimensionality of each vector, the probability matrix $P$ is given by:\n", 27 | "\n", 28 | "$$ P = X X^T $$\n", 29 | "\n", 30 | "Both ER and SBM models can be formulated as a RDPG. Below, we sample $ER_{NP}(100, 0.5)$ using RDPG formulation. In this case, we set $X \\in \\mathbb{R}^{100\\times 2}$ where all the values in $X$ is 0.5. This results in $P$ matrix where all the probabilities are also 0.5." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "tags": [] 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "from graspologic.simulations import rdpg\n", 42 | "\n", 43 | "# Create a latent position matrix\n", 44 | "X = np.full((100, 2), 0.5)\n", 45 | "print(X @ X.T)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "A = rdpg(X)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "## Visualize the adjacency matrix" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "from graspologic.plot import heatmap\n", 71 | "\n", 72 | "_ = heatmap(A, title='ER_NP(100, 0.5) Using RDPG')" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "## Stochastic block model as RDPG\n", 80 | "\n", 81 | "We can formulate the following 2-block SBM parameters as RDPG, where the latent positions live in $\\mathbb{R}^3$.\n", 82 | "\n", 83 | "\\begin{align*}\n", 84 | "n &= [50, 50]\\\\\n", 85 | "p &= \\begin{bmatrix}0.33 & 0.09\\\\\n", 86 | "0.09 & 0.03\n", 87 | "\\end{bmatrix}\n", 88 | "\\end{align*}\n", 89 | "\n", 90 | "as\n", 91 | "\n", 92 | "\\begin{align*}\n", 93 | "X &= \\begin{bmatrix}0.5 & 0.2 & 0.2\\\\\n", 94 | "& \\vdots & \\\\\n", 95 | "0.1 & 0.1 & 0.1\\\\\n", 96 | "& \\vdots & \n", 97 | "\\end{bmatrix}\\\\\n", 98 | "P &= XX^T\n", 99 | "\\end{align*}" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "X = np.array([[0.5, 0.2, 0.2]] * 50 + [[0.1, 0.1, 0.1]] * 50)\n", 109 | "A_rdpg = rdpg(X, loops=False)\n", 110 | "_ = heatmap(A_rdpg, title='2-block SBM as RDPG')" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "### Results from SBM simulation using same formulation shows similar structure" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "from graspologic.simulations import sbm\n", 127 | "\n", 128 | "n = [50, 50]\n", 129 | "p = [[0.33, 0.09], [0.09, 0.03]]\n", 130 | "\n", 131 | "A_sbm = sbm(n, p)\n", 132 | "_ = heatmap(A_sbm, title = 'SBM Simulation')" 133 | ] 134 | } 135 | ], 136 | "metadata": { 137 | "kernelspec": { 138 | "display_name": "Python 3", 139 | "language": "python", 140 | "name": "python3" 141 | }, 142 | "language_info": { 143 | "codemirror_mode": { 144 | "name": "ipython", 145 | "version": 3 146 | }, 147 | "file_extension": ".py", 148 | "mimetype": "text/x-python", 149 | "name": "python", 150 | "nbconvert_exporter": "python", 151 | "pygments_lexer": "ipython3", 152 | "version": "3.7.0" 153 | } 154 | }, 155 | "nbformat": 4, 156 | "nbformat_minor": 4 157 | } 158 | -------------------------------------------------------------------------------- /graspologic/utils/ptr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import numpy as np 5 | from scipy.stats import rankdata 6 | 7 | from ..types import GraphRepresentation 8 | from .utils import import_graph, is_loopless, is_symmetric, is_unweighted, symmetrize 9 | 10 | 11 | def pass_to_ranks( 12 | graph: GraphRepresentation, method: str = "simple-nonzero" 13 | ) -> GraphRepresentation: 14 | """ 15 | Rescales edge weights of an adjacency matrix based on their relative rank in 16 | the graph. 17 | 18 | Parameters 19 | ---------- 20 | graph: array_like or networkx.Graph 21 | Adjacency matrix 22 | 23 | method: {'simple-nonzero' (default), 'simple-all', 'zero-boost'} string, optional 24 | 25 | - 'simple-nonzero' 26 | assigns ranks to all non-zero edges, settling ties using 27 | the average. Ranks are then scaled by 28 | :math:`\\frac{rank(\\text{non-zero edges})}{\\text{total non-zero edges} + 1}` 29 | - 'simple-all' 30 | assigns ranks to all non-zero edges, settling ties using 31 | the average. Ranks are then scaled by 32 | :math:`\\frac{rank(\\text{non-zero edges})}{n^2 + 1}` 33 | where n is the number of nodes 34 | - 'zero-boost' 35 | preserves the edge weight for all 0s, but ranks the other 36 | edges as if the ranks of all 0 edges has been assigned. If there are 37 | 10 0-valued edges, the lowest non-zero edge gets weight 11 / (number 38 | of possible edges). Ties settled by the average of the weight that those 39 | edges would have received. Number of possible edges is determined 40 | by the type of graph (loopless or looped, directed or undirected). 41 | 42 | See also 43 | -------- 44 | scipy.stats.rankdata 45 | 46 | Returns 47 | ------- 48 | graph: numpy.ndarray, shape(n_vertices, n_vertices) 49 | Adjacency matrix of graph after being passed to ranks 50 | """ 51 | 52 | graph = import_graph(graph) # just for typechecking 53 | 54 | if is_unweighted(graph): 55 | return graph 56 | 57 | if graph.min() < 0: 58 | raise UserWarning( 59 | "Current pass-to-ranks on graphs with negative" 60 | + " weights will yield nonsensical results, especially for zero-boost" 61 | ) 62 | 63 | if method == "zero-boost": 64 | if is_symmetric(graph): 65 | # start by working with half of the graph, since symmetric 66 | triu = np.triu(graph) 67 | non_zeros = triu[triu != 0] 68 | else: 69 | non_zeros = graph[graph != 0] 70 | rank = rankdata(non_zeros) 71 | 72 | if is_symmetric(graph): 73 | if is_loopless(graph): 74 | num_zeros = (len(graph[graph == 0]) - graph.shape[0]) / 2 75 | possible_edges = graph.shape[0] * (graph.shape[0] - 1) / 2 76 | else: 77 | num_zeros = ( 78 | len(triu[triu == 0]) - graph.shape[0] * (graph.shape[0] - 1) / 2 79 | ) 80 | possible_edges = graph.shape[0] * (graph.shape[0] + 1) / 2 81 | else: 82 | if is_loopless(graph): 83 | # n^2 - num_nonzero - num_diagonal 84 | num_zeros = graph.size - len(non_zeros) - graph.shape[0] 85 | # n^2 - num_diagonal 86 | possible_edges = graph.size - graph.shape[0] 87 | else: 88 | num_zeros = graph.size - len(non_zeros) 89 | possible_edges = graph.size 90 | 91 | # shift up by the number of zeros 92 | rank = rank + num_zeros 93 | # normalize by the number of possible edges for this kind of graph 94 | rank = rank / possible_edges 95 | # put back into matrix form (and reflect over the diagonal if necessary) 96 | if is_symmetric(graph): 97 | triu[triu != 0] = rank 98 | graph = symmetrize(triu, method="triu") 99 | else: 100 | graph[graph != 0] = rank 101 | return graph 102 | elif method in ["simple-all", "simple-nonzero"]: 103 | non_zeros = graph[graph != 0] 104 | rank = rankdata(non_zeros) 105 | if method == "simple-all": 106 | normalizer = graph.size 107 | elif method == "simple-nonzero": 108 | normalizer = rank.shape[0] 109 | rank = rank / (normalizer + 1) 110 | graph[graph != 0] = rank 111 | return graph 112 | else: 113 | raise ValueError("Unsuported pass-to-ranks method") 114 | -------------------------------------------------------------------------------- /graspologic/align/orthogonal_procrustes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import numpy as np 5 | from scipy.linalg import orthogonal_procrustes 6 | 7 | from .base import BaseAlign 8 | 9 | 10 | class OrthogonalProcrustes(BaseAlign): 11 | """ 12 | Computes the matrix solution of the classical orthogonal Procrustes [1]_ 13 | problem, which is that given two matrices ``X`` and ``Y`` of equal shape 14 | (n, d), find an orthogonal matrix that most closely maps ``X`` to 15 | ``Y``. Subsequently, uses that matrix to transform either the original ``X``, 16 | or a different dataset in the same space. 17 | 18 | Note that when used to match two datasets, this method unlike 19 | :class:`~graspologic.align.SeedlessProcrustes`, not only requires that the 20 | datasets have the same number of entries, but also that there is some 21 | correspondence between the entries. In graph embeddings, this usually 22 | corresponds to the assumption that the vertex :math:`i` in graph ``X`` has the same 23 | latent position as the vertex :math:`i` in graph ``Y``. 24 | 25 | Attributes 26 | ---------- 27 | Q_ : array, size (d, d) 28 | Final orthogonal matrix, used to modify ``X``. 29 | 30 | score_ : float 31 | Final value of the objective function: :math:`|| X Q - Y ||_F` 32 | Lower means the datasets have been matched together better. 33 | 34 | References 35 | ---------- 36 | 37 | .. [1] https://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem 38 | 39 | .. [2] Peter H. Schonemann, "A generalized solution of the orthogonal 40 | Procrustes problem", Psychometrica -- Vol. 31, No. 1, March, 1996. 41 | 42 | Notes 43 | ----- 44 | Formally, minimizes :math:`|| X Q - Y ||_F`, which has a closed form 45 | solution, whenever :math:`Q` is constrained to be an orthogonal matrix, 46 | that is a matrix that satisfies :math:`Q^T Q = Q Q^T = I`. For the more 47 | details, including the proof of the closed-form solution see [1]_. 48 | 49 | Implementation-wise, this class is a wrapper of the 50 | :func:`scipy.linalg.orthogonal_procrustes`, which itself uses an algorithm 51 | described in find the optimal solution algorithm [2]_. 52 | 53 | """ 54 | 55 | def __init__( 56 | self, 57 | ) -> None: 58 | super().__init__() 59 | 60 | def fit(self, X: np.ndarray, Y: np.ndarray) -> "OrthogonalProcrustes": 61 | """ 62 | Uses the two datasets to learn the matrix :attr:`~graspologic.align.OrthogonalProcrustes.Q_` that aligns the 63 | first dataset with the second. 64 | 65 | Parameters 66 | ---------- 67 | X : np.ndarray, shape (n, d) 68 | Dataset to be mapped to ``Y``, must have the same shape as ``Y``. 69 | 70 | Y : np.ndarray, shape (m, d) 71 | Target dataset, must have the same shape as ``X``. 72 | 73 | 74 | Returns 75 | ------- 76 | self : returns an instance of self 77 | 78 | """ 79 | X, Y = self._check_datasets(X, Y) 80 | 81 | _, d = X.shape 82 | if X.shape[0] != Y.shape[0]: 83 | msg = ( 84 | "Two datasets have different number of entries! " 85 | "OrthogonalProcrustes assumes that entries of the two " 86 | "datasets are matched. consider using SeedlessProcrustes " 87 | "instead." 88 | ) 89 | raise ValueError(msg) 90 | 91 | _, d = X.shape 92 | self.Q_, _ = orthogonal_procrustes(X, Y) 93 | self.score_ = np.linalg.norm(X @ self.Q_ - Y, ord="fro") 94 | return self 95 | 96 | def fit_transform(self, X: np.ndarray, Y: np.ndarray) -> np.ndarray: 97 | """ 98 | Uses the two datasets to learn the matrix :attr:`~graspologic.align.OrthogonalProcrustes.Q_` that aligns the 99 | first dataset with the second. Then, transforms the first dataset ``X`` 100 | using the learned matrix :attr:`~graspologic.align.OrthogonalProcrustes.Q_`. 101 | 102 | Parameters 103 | ---------- 104 | X : np.ndarray, shape (n, d) 105 | Dataset to be mapped to ``Y``, must have the same shape as ``Y``. 106 | 107 | Y : np.ndarray, shape (m, d) 108 | Target dataset, must have the same shape as ``X``. 109 | 110 | Returns 111 | ------- 112 | X_prime : np.ndarray, shape (n, d) 113 | First dataset of vectors, aligned to second. Equal to 114 | ``X`` @ :attr:`~graspologic.align.BaseAlign.Q_`. 115 | """ 116 | return super().fit_transform(X, Y) 117 | -------------------------------------------------------------------------------- /graspologic/cluster/kclust.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | from typing import Optional, Union 5 | 6 | import numpy as np 7 | from sklearn.cluster import KMeans 8 | from sklearn.metrics import adjusted_rand_score, silhouette_score 9 | 10 | from graspologic.types import List 11 | 12 | from .base import BaseCluster 13 | 14 | 15 | class KMeansCluster(BaseCluster): 16 | ari_: Optional[List[float]] 17 | 18 | """ 19 | KMeans Cluster. 20 | 21 | It computes all possible models from one component to ``max_clusters``. 22 | When the true labels are known, the best model is given by the model with highest 23 | adjusted Rand index (ARI). 24 | Otherwise, the best model is given by the model with highest silhouette score. 25 | 26 | Parameters 27 | ---------- 28 | max_clusters : int, default=2. 29 | The maximum number of clusters to consider. Must be ``>=2``. 30 | 31 | random_state : int, RandomState instance or None, optional (default=None) 32 | If int, ``random_state`` is the seed used by the random number generator; 33 | If RandomState instance, ``random_state`` is the random number generator; 34 | If None, the random number generator is the RandomState instance used 35 | by ``np.random``. 36 | 37 | Attributes 38 | ---------- 39 | n_clusters_ : int 40 | Optimal number of clusters. If y is given, it is based on largest 41 | ARI. Otherwise, it is based on highest silhouette score. 42 | 43 | model_ : KMeans object 44 | Fitted KMeans object fitted with ``n_clusters_``. 45 | 46 | silhouette_ : list 47 | List of silhouette scores computed for all possible number 48 | of clusters given by ``range(2, max_clusters)``. 49 | 50 | ari_ : list 51 | Only computed when y is given. List of ARI values computed for 52 | all possible number of clusters given by ``range(2, max_clusters)``. 53 | """ 54 | 55 | def __init__( 56 | self, 57 | max_clusters: int = 2, 58 | random_state: Optional[Union[int, np.random.RandomState]] = None, 59 | ): 60 | if isinstance(max_clusters, int): 61 | if max_clusters <= 1: 62 | msg = "n_components must be >= 2 or None." 63 | raise ValueError(msg) 64 | else: 65 | self.max_clusters = max_clusters 66 | else: 67 | msg = "max_clusters must be an integer, not {}.".format(type(max_clusters)) 68 | raise TypeError(msg) 69 | self.random_state = random_state 70 | 71 | def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "KMeansCluster": 72 | """ 73 | Fits kmeans model to the data. 74 | 75 | Parameters 76 | ---------- 77 | X : array-like, shape (n_samples, n_features) 78 | List of n_features-dimensional data points. Each row 79 | corresponds to a single data point. 80 | 81 | y : array-like, shape (n_samples,), optional (default=None) 82 | List of labels for `X` if available. Used to compute ARI scores. 83 | 84 | Returns 85 | ------- 86 | self 87 | """ 88 | # Deal with number of clusters 89 | if self.max_clusters > X.shape[0]: 90 | msg = "n_components must be >= n_samples, but got \ 91 | n_components = {}, n_samples = {}".format(self.max_clusters, X.shape[0]) 92 | raise ValueError(msg) 93 | else: 94 | max_clusters = self.max_clusters 95 | 96 | # Get parameters 97 | random_state = self.random_state 98 | 99 | # Compute all models 100 | models = [] 101 | silhouettes = [] 102 | aris = [] 103 | for n in range(2, max_clusters + 1): 104 | model = KMeans(n_clusters=n, random_state=random_state) 105 | 106 | # Fit and compute values 107 | predictions = model.fit_predict(X) 108 | models.append(model) 109 | silhouettes.append(silhouette_score(X, predictions)) 110 | if y is not None: 111 | aris.append(adjusted_rand_score(y, predictions)) 112 | 113 | if y is not None: 114 | self.ari_ = aris 115 | self.silhouette_ = silhouettes 116 | self.n_clusters_ = np.argmax(aris) + 1 117 | self.model_ = models[np.argmax(aris)] 118 | else: 119 | self.ari_ = None 120 | self.silhouette_ = silhouettes 121 | self.n_clusters_ = np.argmax(silhouettes) + 1 122 | self.model_ = models[np.argmax(silhouettes)] 123 | 124 | return self 125 | -------------------------------------------------------------------------------- /tests/test_n2v.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import unittest 5 | from typing import TYPE_CHECKING 6 | 7 | import networkx as nx 8 | 9 | import graspologic.embed.n2v as n2v 10 | from graspologic.embed.n2v import _Node2VecGraph 11 | 12 | if TYPE_CHECKING: 13 | import numpy as np 14 | 15 | 16 | class Node2VecEmbedTest(unittest.TestCase): 17 | def test_node2vec_embedding_correct_shape_is_returned(self): 18 | import io 19 | 20 | graph = nx.read_edgelist( 21 | io.StringIO(_edge_list), nodetype=int, create_using=nx.DiGraph() 22 | ) 23 | 24 | model = n2v.node2vec_embed(graph) 25 | model_matrix: np.ndarray = model[0] 26 | vocab_list = model[1] 27 | self.assertIsNotNone(model) 28 | self.assertIsNotNone(model[0]) 29 | self.assertIsNotNone(model[1]) 30 | 31 | # model matrix should be 34 x 128 32 | self.assertEqual(model_matrix.shape[0], 34) 33 | self.assertEqual(model_matrix.shape[1], 128) 34 | 35 | # vocab list should have exactly 34 elements 36 | self.assertEqual(len(vocab_list), 34) 37 | 38 | def test_node2vec_embedding_florentine_graph_correct_shape_is_returned(self): 39 | graph = nx.florentine_families_graph() 40 | for s, t in graph.edges(): 41 | graph.add_edge(s, t, weight=1) 42 | 43 | model = n2v.node2vec_embed(graph) 44 | model_matrix: np.ndarray = model[0] 45 | vocab_list = model[1] 46 | self.assertIsNotNone(model) 47 | self.assertIsNotNone(model[0]) 48 | self.assertIsNotNone(model[1]) 49 | 50 | # model matrix should be 34 x 128 51 | self.assertEqual(model_matrix.shape[0], 15) 52 | self.assertEqual(model_matrix.shape[1], 128) 53 | 54 | # vocab list should have exactly 34 elements 55 | self.assertEqual(len(vocab_list), 15) 56 | 57 | def test_node2vec_embedding_barbell_graph_correct_shape_is_returned(self): 58 | graph = nx.barbell_graph(25, 2) 59 | for s, t in graph.edges(): 60 | graph.add_edge(s, t, weight=1) 61 | 62 | model = n2v.node2vec_embed(graph) 63 | model_matrix: np.ndarray = model[0] 64 | vocab_list = model[1] 65 | self.assertIsNotNone(model) 66 | self.assertIsNotNone(model[0]) 67 | self.assertIsNotNone(model[1]) 68 | 69 | # model matrix should be 34 x 128 70 | self.assertEqual(model_matrix.shape[0], 52) 71 | self.assertEqual(model_matrix.shape[1], 128) 72 | 73 | # vocab list should have exactly 34 elements 74 | self.assertEqual(len(vocab_list), 52) 75 | 76 | def test_get_walk_length_lower_defaults_to_1(self): 77 | expected_walk_length = 1 78 | 79 | g = _Node2VecGraph(nx.Graph(), 1, 1) 80 | w = g._get_walk_length_interpolated( 81 | degree=0, percentiles=[1, 2, 3, 4, 10, 100], max_walk_length=10 82 | ) 83 | 84 | self.assertEqual(w, expected_walk_length) 85 | 86 | def test_get_walk_length_higher_default_to_walk_length(self): 87 | expected_walk_length = 100 88 | 89 | g = _Node2VecGraph(nx.Graph(), 1, 1) 90 | w = g._get_walk_length_interpolated( 91 | degree=10, 92 | percentiles=[2, 3, 4, 5, 6, 7, 8, 9], 93 | max_walk_length=expected_walk_length, 94 | ) 95 | 96 | self.assertEqual(w, expected_walk_length) 97 | 98 | def test_get_walk_length_in_middle_selects_interpolated_bucket(self): 99 | expected_walk_length = 5 100 | 101 | g = _Node2VecGraph(nx.Graph(), 1, 1) 102 | w = g._get_walk_length_interpolated( 103 | degree=5, percentiles=[2, 3, 4, 5, 6, 7, 8, 9], max_walk_length=10 104 | ) 105 | 106 | self.assertEqual(w, expected_walk_length) 107 | 108 | 109 | _edge_list = """ 110 | 1 32 111 | 1 22 112 | 1 20 113 | 1 18 114 | 1 14 115 | 1 13 116 | 1 12 117 | 1 11 118 | 1 9 119 | 1 8 120 | 1 7 121 | 1 6 122 | 1 5 123 | 1 4 124 | 1 3 125 | 1 2 126 | 2 31 127 | 2 22 128 | 2 20 129 | 2 18 130 | 2 14 131 | 2 8 132 | 2 4 133 | 2 3 134 | 3 14 135 | 3 9 136 | 3 10 137 | 3 33 138 | 3 29 139 | 3 28 140 | 3 8 141 | 3 4 142 | 4 14 143 | 4 13 144 | 4 8 145 | 5 11 146 | 5 7 147 | 6 17 148 | 6 11 149 | 6 7 150 | 7 17 151 | 9 34 152 | 9 33 153 | 9 33 154 | 10 34 155 | 14 34 156 | 15 34 157 | 15 33 158 | 16 34 159 | 16 33 160 | 19 34 161 | 19 33 162 | 20 34 163 | 21 34 164 | 21 33 165 | 23 34 166 | 23 33 167 | 24 30 168 | 24 34 169 | 24 33 170 | 24 28 171 | 24 26 172 | 25 32 173 | 25 28 174 | 25 26 175 | 26 32 176 | 27 34 177 | 27 30 178 | 28 34 179 | 29 34 180 | 29 32 181 | 30 34 182 | 30 33 183 | 31 34 184 | 31 33 185 | 32 34 186 | 32 33 187 | 33 34 188 | """ 189 | -------------------------------------------------------------------------------- /tests/test_io.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation and contributors. 2 | # Licensed under the MIT License. 3 | 4 | import os 5 | import sys 6 | import tempfile 7 | import unittest 8 | from pathlib import Path 9 | 10 | import networkx as nx 11 | import numpy as np 12 | import pytest 13 | 14 | import graspologic as gs 15 | 16 | 17 | class TestImportGraph(unittest.TestCase): 18 | @classmethod 19 | def setUpClass(cls) -> None: 20 | # simple ERxN graph 21 | n = 15 22 | p = 0.5 23 | cls.A = np.zeros((n, n)) 24 | nedge = int(round(n * n * p)) 25 | np.put( 26 | cls.A, 27 | np.random.choice(np.arange(0, n * n), size=nedge, replace=False), 28 | np.random.normal(size=nedge), 29 | ) 30 | 31 | def test_graphin(self): 32 | G = nx.from_numpy_array(self.A) 33 | np.testing.assert_array_equal(nx.to_numpy_array(G), gs.utils.import_graph(G)) 34 | 35 | def test_npin(self): 36 | np.testing.assert_array_equal(self.A, gs.utils.import_graph(self.A)) 37 | 38 | def test_wrongtypein(self): 39 | a = 5 40 | with self.assertRaises(TypeError): 41 | gs.utils.import_graph(a) 42 | with self.assertRaises(TypeError): 43 | gs.utils.import_graph(None) 44 | 45 | def test_nonsquare(self): 46 | non_square = np.hstack((self.A, self.A)) 47 | with self.assertRaises(ValueError): 48 | gs.utils.import_graph(non_square) 49 | 50 | 51 | class TestImportEdgelist(unittest.TestCase): 52 | @classmethod 53 | def tearDownClass(cls) -> None: 54 | cls.tmpdir.cleanup() 55 | 56 | @classmethod 57 | def setUpClass(cls) -> None: 58 | cls.tmpdir = tempfile.TemporaryDirectory() 59 | n = 10 60 | p = 0.5 61 | wt = np.random.exponential 62 | wtargs = dict(scale=4) 63 | 64 | np.random.seed(1) 65 | 66 | cls.A = gs.simulations.er_np(n, p) 67 | cls.B = gs.simulations.er_np(n, p, wt=wt, wtargs=wtargs) 68 | 69 | G_A = nx.from_numpy_array(cls.A) 70 | G_B = nx.from_numpy_array(cls.B) 71 | G_B = nx.relabel_nodes(G_B, lambda x: x + 10) # relabel nodes to go from 10-19. 72 | 73 | cls.root = str(cls.tmpdir.name) 74 | cls.A_path = os.path.join(cls.root, "A_unweighted.edgelist") 75 | cls.B_path = os.path.join(cls.root, "B.edgelist") 76 | 77 | nx.write_edgelist(G_A, cls.A_path, data=False) 78 | nx.write_weighted_edgelist(G_B, cls.B_path) 79 | 80 | def test_in(self): 81 | A_from_edgelist = gs.utils.import_edgelist(self.A_path) 82 | B_from_edgelist = gs.utils.import_edgelist(self.B_path) 83 | 84 | np.testing.assert_allclose(A_from_edgelist, self.A) 85 | np.testing.assert_allclose(B_from_edgelist, self.B) 86 | 87 | def test_in_Path_obj(self): 88 | A_from_edgelist = gs.utils.import_edgelist(Path(self.A_path)) 89 | B_from_edgelist = gs.utils.import_edgelist(Path(self.B_path)) 90 | 91 | np.testing.assert_allclose(A_from_edgelist, self.A) 92 | np.testing.assert_allclose(B_from_edgelist, self.B) 93 | 94 | def test_multiple_in(self): 95 | graphs = gs.utils.import_edgelist(self.root) 96 | A = np.zeros((20, 20)) 97 | A[:10, :10] = self.A 98 | 99 | B = np.zeros((20, 20)) 100 | B[10:, 10:] = self.B 101 | 102 | self.assertEqual(len(graphs), 2) 103 | self.assertTrue(all(graph.shape == (20, 20) for graph in graphs)) 104 | np.testing.assert_allclose(graphs[0], A) 105 | np.testing.assert_allclose(graphs[1], B) 106 | 107 | def test_wrongtypein(self): 108 | path = 5 109 | with self.assertRaises(TypeError): 110 | gs.utils.import_edgelist(path) 111 | with self.assertRaises(TypeError): 112 | gs.utils.import_edgelist(None) 113 | 114 | def test_vertices(self): 115 | expected_vertices_A = np.arange(0, 10) 116 | expected_vertices_B = np.arange(10, 20) 117 | 118 | _, A_vertices = gs.utils.import_edgelist(self.A_path, return_vertices=True) 119 | _, B_vertices = gs.utils.import_edgelist(self.B_path, return_vertices=True) 120 | 121 | np.testing.assert_allclose(expected_vertices_A, A_vertices) 122 | np.testing.assert_allclose(expected_vertices_B, B_vertices) 123 | 124 | def test_no_graphs_found(self): 125 | path = str(self.root + "invalid_edgelist.edgelist") 126 | with self.assertRaises(ValueError): 127 | gs.utils.import_edgelist(path) 128 | 129 | def test_bad_delimiter(self): 130 | delimiter = "," 131 | with pytest.warns(UserWarning): 132 | graphs = gs.utils.import_edgelist(self.root, delimiter=delimiter) 133 | --------------------------------------------------------------------------------