├── docs
    ├── .touch
    ├── tutorials
    │   ├── plotting
    │   │   ├── data
    │   │   │   └── sub-0025427_ses-1_dwi_desikan.npy
    │   │   ├── gridplot.ipynb
    │   │   └── pairplot.ipynb
    │   └── simulations
    │   │   ├── erdos_renyi.ipynb
    │   │   └── rdpg.ipynb
    ├── reference
    │   ├── reference
    │   │   ├── match.rst
    │   │   ├── subgraph.rst
    │   │   ├── nominate.rst
    │   │   ├── preconditions.rst
    │   │   ├── align.rst
    │   │   ├── inference.rst
    │   │   ├── datasets.rst
    │   │   ├── index.rst
    │   │   ├── simulations.rst
    │   │   ├── cluster.rst
    │   │   ├── layouts.rst
    │   │   ├── pipeline.rst
    │   │   ├── partition.rst
    │   │   ├── models.rst
    │   │   ├── plotting.rst
    │   │   ├── utils.rst
    │   │   ├── embed.rst
    │   │   └── preprocessing.rst
    │   ├── index.rst
    │   ├── contributing.rst
    │   ├── release
    │   │   ├── release_template.rst
    │   │   ├── graspy_releases.rst
    │   │   ├── release_0.0.1.rst
    │   │   ├── release_0.1.rst
    │   │   ├── release_0.3.rst
    │   │   ├── release_0.0.3.rst
    │   │   ├── release_0.2.rst
    │   │   └── release_0.0.2.rst
    │   ├── in-the-wild.rst
    │   ├── install.rst
    │   └── cli.rst
    ├── _templates
    │   └── numpydoc_docstring.rst
    ├── license.rst
    ├── sphinx-ext
    │   └── toctree_filter.py
    └── index.rst
├── tests
    ├── __init__.py
    ├── embed
    │   └── __init__.py
    ├── cluster
    │   ├── __init__.py
    │   └── test_kclust.py
    ├── layouts
    │   ├── __init__.py
    │   ├── nooverlap
    │   │   ├── __init__.py
    │   │   ├── test_grid_cell_creation.py
    │   │   └── test_grid.py
    │   └── test_auto.py
    ├── partition
    │   ├── __init__.py
    │   └── test_modularity.py
    ├── pipeline
    │   ├── __init__.py
    │   └── embed
    │   │   ├── __init__.py
    │   │   └── test_embeddings.py
    ├── preprocessing
    │   └── __init__.py
    ├── utils.py
    ├── test_data
    │   └── actor_bipartite_graph.csv
    ├── test_datasets.py
    ├── test_preconditions.py
    ├── test_mug2vec.py
    ├── test_base_embed.py
    ├── test_rdpg_corr.py
    ├── test_sg.py
    ├── test_svd.py
    ├── test_er_and_group_connection_tests.py
    ├── test_select_dimension.py
    ├── test_vertex_nomination_via_SGM.py
    ├── test_sign_flips.py
    ├── test_latentpositiontest.py
    ├── test_spectral_nomination.py
    ├── test_mds.py
    ├── test_n2v.py
    └── test_io.py
├── graspologic
    ├── py.typed
    ├── pipeline
    │   ├── embed
    │   │   ├── _types.py
    │   │   ├── __init__.py
    │   │   └── _elbow.py
    │   ├── __init__.py
    │   └── graph_builder.py
    ├── match
    │   ├── __init__.py
    │   └── types.py
    ├── subgraph
    │   └── __init__.py
    ├── layouts
    │   ├── nooverlap
    │   │   ├── __init__.py
    │   │   ├── _node.py
    │   │   ├── nooverlap.py
    │   │   └── _quad_tree.py
    │   ├── classes.py
    │   └── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── mice
    │   │   ├── blocks.csv
    │   │   └── participants.csv
    │   └── drosophila
    │   │   ├── left_cell_labels.csv
    │   │   └── right_cell_labels.csv
    ├── nominate
    │   └── __init__.py
    ├── align
    │   ├── __init__.py
    │   ├── sign_flips.py
    │   └── orthogonal_procrustes.py
    ├── cluster
    │   ├── __init__.py
    │   ├── base.py
    │   └── kclust.py
    ├── inference
    │   ├── __init__.py
    │   ├── utils.py
    │   └── binomial.py
    ├── partition
    │   └── __init__.py
    ├── version.py
    ├── models
    │   └── __init__.py
    ├── simulations
    │   └── __init__.py
    ├── plot
    │   └── __init__.py
    ├── __init__.py
    ├── preprocessing
    │   └── __init__.py
    ├── embed
    │   └── __init__.py
    ├── utils
    │   ├── __init__.py
    │   └── ptr.py
    ├── types.py
    └── preconditions.py
├── mypi.ini
├── .gitattributes
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── usage-question.md
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── workflows
    │   ├── report.yml
    │   ├── publish.yml
    │   └── build.yml
    ├── CODEOWNERS
    └── PULL_REQUEST_TEMPLATE.md
├── .coveragerc
├── .readthedocs.yml
├── pytest.ini
├── LICENSE.txt
├── CITATION.cff
├── mypy.ini
├── .gitignore
├── ROLES.md
├── SECURITY.md
└── README.md


/docs/.touch:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/graspologic/py.typed:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/embed/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/cluster/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/layouts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/partition/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/pipeline/embed/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/preprocessing/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/layouts/nooverlap/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/mypi.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | ignore_missing_imports = True


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.ipynb  linguist-vendored=true
2 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: false
2 | 
3 | 


--------------------------------------------------------------------------------
/graspologic/pipeline/embed/_types.py:
--------------------------------------------------------------------------------
1 | from typing import Union
2 | 
3 | import networkx as nx
4 | 
5 | NxGraphType = Union[nx.Graph, nx.DiGraph]
6 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | exclude_lines =
3 |     # Have to re-enable the standard pragma
4 |     pragma: no cover
5 |     @abstract
6 |     NotImplementedError


--------------------------------------------------------------------------------
/docs/tutorials/plotting/data/sub-0025427_ses-1_dwi_desikan.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/graspologic-org/graspologic/HEAD/docs/tutorials/plotting/data/sub-0025427_ses-1_dwi_desikan.npy


--------------------------------------------------------------------------------
/docs/reference/reference/match.rst:
--------------------------------------------------------------------------------
1 | Matching
2 | ========
3 | 
4 | .. currentmodule:: graspologic.match
5 | 
6 | Graph Matching
7 | --------------------
8 | .. autofunction:: graph_match
9 | 


--------------------------------------------------------------------------------
/.github/workflows/report.yml:
--------------------------------------------------------------------------------
1 | name: graspologic Reporting
2 | on:
3 |   schedule:
4 |     - cron: "8 16 * * *"  # 8:08am PST
5 | jobs:
6 |   build:
7 |     uses: ./.github/workflows/build.yml
8 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | sphinx:
 3 |   configuration: docs/reference/conf.py
 4 | 
 5 | python:
 6 |   version: 3.9
 7 |   install:
 8 |     - requirements: requirements.txt
 9 | 
10 | 


--------------------------------------------------------------------------------
/graspologic/match/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation and contributors.
2 | # Licensed under the MIT License.
3 | 
4 | from .wrappers import graph_match
5 | 
6 | __all__ = ["graph_match"]
7 | 


--------------------------------------------------------------------------------
/graspologic/subgraph/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation and contributors.
2 | # Licensed under the MIT License.
3 | 
4 | 
5 | from .sg import SignalSubgraph
6 | 
7 | __all__ = ["SignalSubgraph"]
8 | 


--------------------------------------------------------------------------------
/docs/reference/index.rst:
--------------------------------------------------------------------------------
 1 | ..  -*- coding: utf-8 -*-
 2 | 
 3 | .. toctree::
 4 |    :maxdepth: 1
 5 | 
 6 |    install
 7 |    cli
 8 |    contributing
 9 |    in-the-wild
10 |    release
11 |    reference/index
12 | 
13 | 


--------------------------------------------------------------------------------
/docs/reference/contributing.rst:
--------------------------------------------------------------------------------
1 | Contributing to graspologic
2 | ===========================
3 | 
4 | Please see: `Contributing to graspologic <https://github.com/graspologic-org/graspologic/blob/dev/CONTRIBUTING.md>`_.
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/reference/reference/subgraph.rst:
--------------------------------------------------------------------------------
 1 | Subgraph
 2 | ========
 3 | 
 4 | .. currentmodule:: graspologic.subgraph
 5 | 
 6 | Signal-Subgraph Estimators
 7 | --------------------------
 8 | 
 9 | .. autoclass:: SignalSubgraph
10 | 


--------------------------------------------------------------------------------
/graspologic/layouts/nooverlap/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation.
2 | # Licensed under the MIT license.
3 | 
4 | 
5 | from graspologic.layouts.nooverlap.nooverlap import remove_overlaps
6 | 
7 | __all__ = ["remove_overlaps"]
8 | 


--------------------------------------------------------------------------------
/tests/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation and contributors.
2 | # Licensed under the MIT License.
3 | 
4 | import os
5 | 
6 | 
7 | def data_file(filename):
8 |     return os.path.join(os.path.dirname(__file__), "test_data", filename)
9 | 


--------------------------------------------------------------------------------
/graspologic/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation and contributors.
2 | # Licensed under the MIT License.
3 | 
4 | from .base import load_drosophila_left, load_drosophila_right, load_mice
5 | 
6 | __all__ = ["load_drosophila_left", "load_drosophila_right", "load_mice"]
7 | 


--------------------------------------------------------------------------------
/graspologic/nominate/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .spectralVN import SpectralVertexNomination
 5 | from .VNviaSGM import VNviaSGM
 6 | 
 7 | __all__ = [
 8 |     "SpectralVertexNomination",
 9 |     "VNviaSGM",
10 | ]
11 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [pytest]
 2 | addopts = --doctest-modules
 3 | 
 4 | 
 5 | filterwarnings = 
 6 | # Matrix PendingDeprecationWarning.
 7 |     ignore:Using or importing the ABCs from 'collections'
 8 |     ignore:the matrix subclass is not
 9 |     ignore:Using a non-tuple
10 |     ignore:Input graph is not fully connected.


--------------------------------------------------------------------------------
/docs/_templates/numpydoc_docstring.rst:
--------------------------------------------------------------------------------
 1 | {{index}}
 2 | {{summary}}
 3 | {{extended_summary}}
 4 | {{parameters}}
 5 | {{returns}}
 6 | {{yields}}
 7 | {{other_parameters}}
 8 | {{attributes}}
 9 | {{raises}}
10 | {{warns}}
11 | {{warnings}}
12 | {{see_also}}
13 | {{notes}}
14 | {{references}}
15 | {{examples}}
16 | {{methods}}


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/usage-question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Usage Question
 3 | about: Ask us a question about graspologic and graphs!
 4 | title: "[Question]"
 5 | labels: question
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | This is our forum for asking whatever network question you'd like!  No need to feel shy - we're happy to talk about graphs!
11 | 


--------------------------------------------------------------------------------
/docs/reference/reference/nominate.rst:
--------------------------------------------------------------------------------
 1 | Nomination
 2 | ==========
 3 | 
 4 | .. currentmodule:: graspologic.nominate
 5 | 
 6 | Spectral Vertex Nomination
 7 | ---------------------------------------
 8 | 
 9 | .. autoclass:: SpectralVertexNomination
10 | 
11 | Vertex Nomination via SGM
12 | -------------------------
13 | .. autoclass:: VNviaSGM
14 | 


--------------------------------------------------------------------------------
/docs/reference/reference/preconditions.rst:
--------------------------------------------------------------------------------
1 | Preconditions
2 | =============
3 | 
4 | .. autofunction:: graspologic.preconditions.check_argument_types
5 | .. autofunction:: graspologic.preconditions.check_optional_argument_types
6 | .. autofunction:: graspologic.preconditions.check_argument
7 | .. autofunction:: graspologic.preconditions.is_real_weighted
8 | 


--------------------------------------------------------------------------------
/graspologic/align/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft Corporation and contributors.
2 | # Licensed under the MIT License.
3 | 
4 | from .orthogonal_procrustes import OrthogonalProcrustes
5 | from .seedless_procrustes import SeedlessProcrustes
6 | from .sign_flips import SignFlips
7 | 
8 | __all__ = ["OrthogonalProcrustes", "SeedlessProcrustes", "SignFlips"]
9 | 


--------------------------------------------------------------------------------
/docs/reference/reference/align.rst:
--------------------------------------------------------------------------------
 1 | ********
 2 | Aligning
 3 | ********
 4 | 
 5 | .. currentmodule:: graspologic.align
 6 | 
 7 | Sign flips
 8 | ----------
 9 | .. autoclass:: SignFlips
10 | 
11 | Orthogonal Procrustes
12 | ---------------------
13 | .. autoclass:: OrthogonalProcrustes
14 | 
15 | Seedless Procrustes
16 | -------------------
17 | .. autoclass:: SeedlessProcrustes
18 | 


--------------------------------------------------------------------------------
/docs/reference/reference/inference.rst:
--------------------------------------------------------------------------------
 1 | Inference
 2 | =========
 3 | 
 4 | .. currentmodule:: graspologic.inference
 5 | 
 6 | Two-graph hypothesis testing
 7 | ----------------------------
 8 | 
 9 | .. autofunction:: density_test
10 | 
11 | .. autofunction:: group_connection_test
12 | 
13 | .. autofunction:: latent_position_test
14 | 
15 | .. autofunction:: latent_distribution_test
16 | 


--------------------------------------------------------------------------------
/graspologic/cluster/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .autogmm import AutoGMMCluster
 5 | from .divisive_cluster import DivisiveCluster
 6 | from .gclust import GaussianCluster
 7 | from .kclust import KMeansCluster
 8 | 
 9 | __all__ = ["GaussianCluster", "KMeansCluster", "AutoGMMCluster", "DivisiveCluster"]
10 | 


--------------------------------------------------------------------------------
/graspologic/datasets/mice/blocks.csv:
--------------------------------------------------------------------------------
 1 | block,hemisphere,i,j
 2 | isocortex,L,0,41
 3 | pallium,L,41,61
 4 | subpallium,L,61,68
 5 | diencephalon,L,68,79
 6 | midbrain,L,79,88
 7 | hindbrain,L,88,116
 8 | white_matter,L,116,166
 9 | isocortex,R,166,207
10 | pallium,R,207,227
11 | subpallium,R,227,234
12 | diencephalon,R,234,245
13 | midbrain,R,245,254
14 | hindbrain,R,254,282
15 | white_matter,R,282,332
16 | 


--------------------------------------------------------------------------------
/docs/reference/reference/datasets.rst:
--------------------------------------------------------------------------------
 1 | Datasets
 2 | ========
 3 | 
 4 | .. currentmodule:: graspologic.datasets
 5 | 
 6 | Drosophila larval mushroom body
 7 | -------------------------------
 8 | 
 9 | .. autofunction:: load_drosophila_left
10 | 
11 | .. autofunction:: load_drosophila_right
12 | 
13 | Duke mouse whole-brain connectomes
14 | ----------------------------------
15 | 
16 | .. autofunction:: load_mice


--------------------------------------------------------------------------------
/docs/reference/reference/index.rst:
--------------------------------------------------------------------------------
 1 | .. _reference:
 2 | 
 3 | Reference
 4 | *********
 5 | 
 6 | .. toctree::
 7 |    :maxdepth: 2
 8 | 
 9 |    align
10 |    cluster
11 |    datasets
12 |    embed
13 |    inference
14 |    layouts
15 |    match
16 |    models
17 |    nominate
18 |    partition
19 |    preconditions
20 |    pipeline
21 |    plotting
22 |    preprocessing
23 |    simulations
24 |    subgraph
25 |    utils
26 | 


--------------------------------------------------------------------------------
/docs/reference/reference/simulations.rst:
--------------------------------------------------------------------------------
 1 | ***********
 2 | Simulations
 3 | ***********
 4 | 
 5 | .. currentmodule:: graspologic.simulations
 6 | 
 7 | 
 8 | 
 9 | .. autofunction:: er_np
10 | 
11 | .. autofunction:: er_nm
12 | 
13 | .. autofunction:: sbm
14 | 
15 | .. autofunction:: rdpg
16 | 
17 | .. autofunction:: er_corr
18 | 
19 | .. autofunction:: sbm_corr
20 | 
21 | .. autofunction:: rdpg_corr
22 | 
23 | .. autofunction:: mmsbm
24 | 
25 | 


--------------------------------------------------------------------------------
/graspologic/layouts/classes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT license.
 3 | 
 4 | from typing import NamedTuple
 5 | 
 6 | __all__ = [
 7 |     "NodePosition",
 8 | ]
 9 | 
10 | 
11 | class NodePosition(NamedTuple):
12 |     """
13 |     Contains the node id, 2d coordinates, size, and community id for a node.
14 |     """
15 | 
16 |     node_id: str
17 |     x: float
18 |     y: float
19 |     size: float
20 |     community: int
21 | 


--------------------------------------------------------------------------------
/tests/test_data/actor_bipartite_graph.csv:
--------------------------------------------------------------------------------
 1 | Person,Movie,Role
 2 | Tom Hanks,Apollo 13,Cast
 3 | Bill Paxton,Apollo 13,Cast
 4 | Kevin Bacon,Apollo 13,Cast
 5 | Kathleen Quinlan,Apollo 13,Cast
 6 | Kevin Bacon,Planes Trains & Automobiles,Cast
 7 | Steve Martin,Planes Trains & Automobiles,Cast
 8 | John Candy,Planes Trains & Automobiles,Cast
 9 | Tom Hanks,Mamma Mia! Here We Go Again,Executive Producer
10 | Tom Hanks,Forrest Gump,Cast
11 | Sally Field,Forrest Gump,Cast
12 | 


--------------------------------------------------------------------------------
/docs/reference/reference/cluster.rst:
--------------------------------------------------------------------------------
 1 | **********
 2 | Clustering
 3 | **********
 4 | 
 5 | .. currentmodule:: graspologic.cluster
 6 | 
 7 | K-Means Clustering
 8 | ------------------
 9 | .. autoclass:: KMeansCluster
10 | 
11 | Gaussian Mixture Models Clustering
12 | ----------------------------------
13 | .. autoclass:: GaussianCluster
14 | 
15 | .. autoclass:: AutoGMMCluster
16 | 
17 | Hierarchical Clustering
18 | ----------------------------------
19 | .. autoclass:: DivisiveCluster
20 |     :no-inherited-members:
21 | 


--------------------------------------------------------------------------------
/docs/reference/reference/layouts.rst:
--------------------------------------------------------------------------------
 1 | Layouts
 2 | =======
 3 | .. currentmodule:: graspologic.layouts
 4 | 
 5 | NodePosition
 6 | ------------
 7 | .. autoclass:: NodePosition
 8 | 
 9 | Automatic Graph Layout
10 | ----------------------
11 | .. autofunction:: layout_tsne
12 | .. autofunction:: layout_umap
13 | 
14 | Colors
15 | ------
16 | .. autofunction:: categorical_colors
17 | .. autofunction:: sequential_colors
18 | 
19 | Rendering
20 | ---------
21 | .. autofunction:: save_graph
22 | .. autofunction:: show_graph
23 | 


--------------------------------------------------------------------------------
/graspologic/inference/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .density_test import density_test
 5 | from .group_connection_test import group_connection_test
 6 | from .latent_distribution_test import latent_distribution_test
 7 | from .latent_position_test import latent_position_test
 8 | 
 9 | __all__ = [
10 |     "density_test",
11 |     "group_connection_test",
12 |     "latent_position_test",
13 |     "latent_distribution_test",
14 | ]
15 | 


--------------------------------------------------------------------------------
/graspologic/partition/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .leiden import (
 5 |     HierarchicalCluster,
 6 |     HierarchicalClusters,
 7 |     hierarchical_leiden,
 8 |     leiden,
 9 | )
10 | from .modularity import modularity, modularity_components
11 | 
12 | __all__ = [
13 |     "HierarchicalCluster",
14 |     "HierarchicalClusters",
15 |     "hierarchical_leiden",
16 |     "leiden",
17 |     "modularity",
18 |     "modularity_components",
19 | ]
20 | 


--------------------------------------------------------------------------------
/graspologic/layouts/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT license.
 3 | 
 4 | 
 5 | from .classes import NodePosition
 6 | from .colors import categorical_colors, sequential_colors
 7 | from .render import save_graph, show_graph
 8 | 
 9 | from .auto import layout_tsne, layout_umap  # isort:skip
10 | 
11 | __all__ = [
12 |     "NodePosition",
13 |     "categorical_colors",
14 |     "sequential_colors",
15 |     "layout_tsne",
16 |     "layout_umap",
17 |     "save_graph",
18 |     "show_graph",
19 | ]
20 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
 1 | # graspologic package
 2 | # /graspologic/align @alyakin314 @bdpedigo
 3 | # /graspologic/cluster @bdpedigo @j1c
 4 | # /graspologic/datasets @bdpedigo
 5 | # /graspologic/embed @j1c @bdpedigo
 6 | # /graspologic/inference @alyakin314 @bdpedigo
 7 | # /graspologic/match @asaadeldin11 @bdpedigo
 8 | # /graspologic/models @bdpedigo @j1c
 9 | # /graspologic/pipeline @j1c @bdpedigo
10 | # /graspologic/plot @j1c @bdpedigo
11 | # /graspologic/subgraph @j1c
12 | # /graspologic/utils/ @j1c @bdpedigo
13 | 
14 | # tutorials
15 | 
16 | # tests
17 | 


--------------------------------------------------------------------------------
/docs/reference/reference/pipeline.rst:
--------------------------------------------------------------------------------
 1 | Pipeline
 2 | ========
 3 | .. automodule:: graspologic.pipeline
 4 | 
 5 | GraphBuilder
 6 | ------------
 7 | .. autoclass:: GraphBuilder
 8 | 
 9 | Embed
10 | -----
11 | .. automodule:: graspologic.pipeline.embed
12 | .. autoclass:: graspologic.pipeline.embed.embeddings.Embeddings
13 | .. autofunction:: graspologic.pipeline.embed.adjacency_spectral_embedding
14 | .. autofunction:: graspologic.pipeline.embed.laplacian_spectral_embedding
15 | .. autofunction:: graspologic.pipeline.embed.omnibus_embedding_pairwise
16 | 


--------------------------------------------------------------------------------
/graspologic/version.py:
--------------------------------------------------------------------------------
 1 | """Utilities for exposing the package version."""
 2 | 
 3 | from importlib import metadata
 4 | 
 5 | 
 6 | def __version() -> str:
 7 |     """Return the installed graspologic version.
 8 | 
 9 |     Falls back to a sensible default when the distribution metadata is
10 |     unavailable (for example when running directly from a source checkout).
11 |     """
12 | 
13 |     try:
14 |         return metadata.version("graspologic")
15 |     except metadata.PackageNotFoundError:
16 |         return "0.0.0"
17 | 
18 | 
19 | __version__ = __version()


--------------------------------------------------------------------------------
/graspologic/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .base import BaseGraphEstimator
 5 | from .edge_swaps import EdgeSwapper
 6 | from .er import DCEREstimator, EREstimator
 7 | from .rdpg import RDPGEstimator
 8 | from .sbm_estimators import DCSBMEstimator, SBMEstimator
 9 | 
10 | __all__ = [
11 |     "BaseGraphEstimator",
12 |     "EREstimator",
13 |     "DCEREstimator",
14 |     "SBMEstimator",
15 |     "DCSBMEstimator",
16 |     "RDPGEstimator",
17 |     "EdgeSwapper",
18 | ]
19 | 


--------------------------------------------------------------------------------
/docs/reference/reference/partition.rst:
--------------------------------------------------------------------------------
 1 | Partition
 2 | =========
 3 | 
 4 | .. currentmodule:: graspologic.partition
 5 | 
 6 | Modularity and Component Modularity
 7 | -----------------------------------
 8 | 
 9 | .. autofunction:: modularity
10 | 
11 | .. autofunction:: modularity_components
12 | 
13 | Leiden and Hierarchical Leiden
14 | ------------------------------
15 | 
16 | .. autofunction:: leiden
17 | 
18 | .. autoclass:: HierarchicalCluster
19 |     :members:
20 | 
21 | .. autoclass:: HierarchicalClusters
22 |     :members:
23 | 
24 | .. autofunction:: hierarchical_leiden
25 | 


--------------------------------------------------------------------------------
/docs/reference/reference/models.rst:
--------------------------------------------------------------------------------
 1 | Models
 2 | ======
 3 | 
 4 | .. currentmodule:: graspologic.models
 5 | 
 6 | Erdos-Reyni models
 7 | ------------------
 8 | 
 9 | .. autoclass:: EREstimator
10 | 
11 | .. autoclass:: DCEREstimator
12 | 
13 | Stochastic block models
14 | -----------------------
15 | 
16 | .. autoclass:: SBMEstimator
17 | 
18 | .. autoclass:: DCSBMEstimator
19 | 
20 | Latent position models
21 | ----------------------
22 | 
23 | .. autoclass:: RDPGEstimator
24 | 
25 | Edge swapping (configuration models)
26 | ------------------------------------
27 | 
28 | .. autoclass:: EdgeSwapper


--------------------------------------------------------------------------------
/graspologic/simulations/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .simulations import er_nm, er_np, mmsbm, p_from_latent, rdpg, sample_edges, sbm
 5 | from .simulations_corr import er_corr, sample_edges_corr, sbm_corr
 6 | 
 7 | from .rdpg_corr import rdpg_corr  # isort:skip
 8 | 
 9 | __all__ = [
10 |     "sample_edges",
11 |     "er_np",
12 |     "er_nm",
13 |     "sbm",
14 |     "rdpg",
15 |     "p_from_latent",
16 |     "sample_edges_corr",
17 |     "er_corr",
18 |     "sbm_corr",
19 |     "rdpg_corr",
20 |     "mmsbm",
21 | ]
22 | 


--------------------------------------------------------------------------------
/tests/test_datasets.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | from graspologic.datasets import *
 7 | 
 8 | 
 9 | class TestDatasets(unittest.TestCase):
10 |     def test_drosphila_left(self):
11 |         graph = load_drosophila_left()
12 |         graph, labels = load_drosophila_left(return_labels=True)
13 | 
14 |     def test_drosphila_right(self):
15 |         graph = load_drosophila_right()
16 |         graph, labels = load_drosophila_right(return_labels=True)
17 | 
18 |     def test_load_mice(self):
19 |         data = load_mice()
20 | 


--------------------------------------------------------------------------------
/docs/reference/reference/plotting.rst:
--------------------------------------------------------------------------------
 1 | ********
 2 | Plotting
 3 | ********
 4 | 
 5 | .. currentmodule:: graspologic.plot
 6 | 
 7 | Heatmap
 8 | -------
 9 | .. autofunction:: heatmap
10 | 
11 | Gridplot
12 | --------
13 | .. autofunction:: gridplot
14 | 
15 | Pairplot
16 | --------
17 | .. autofunction:: pairplot
18 | .. autofunction:: pairplot_with_gmm
19 | 
20 | Degreeplot
21 | ----------
22 | .. autofunction:: degreeplot
23 | 
24 | Edgeplot
25 | --------
26 | .. autofunction:: edgeplot
27 | 
28 | Screeplot
29 | ---------
30 | .. autofunction:: screeplot
31 | 
32 | Adjplot
33 | -------
34 | .. autofunction:: adjplot
35 | 
36 | Matrixplot
37 | ----------
38 | .. autofunction:: matrixplot
39 | 


--------------------------------------------------------------------------------
/graspologic/plot/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | # type: ignore
 5 | 
 6 | import sys
 7 | 
 8 | import matplotlib as mpl
 9 | 
10 | from .plot import (
11 |     degreeplot,
12 |     edgeplot,
13 |     gridplot,
14 |     heatmap,
15 |     networkplot,
16 |     pairplot,
17 |     pairplot_with_gmm,
18 |     screeplot,
19 | )
20 | from .plot_matrix import adjplot, matrixplot
21 | 
22 | __all__ = [
23 |     "heatmap",
24 |     "gridplot",
25 |     "pairplot",
26 |     "pairplot_with_gmm",
27 |     "degreeplot",
28 |     "edgeplot",
29 |     "screeplot",
30 |     "adjplot",
31 |     "matrixplot",
32 |     "networkplot",
33 | ]
34 | 


--------------------------------------------------------------------------------
/graspologic/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import graspologic.align
 5 | import graspologic.cluster
 6 | import graspologic.datasets
 7 | import graspologic.embed
 8 | import graspologic.inference
 9 | import graspologic.layouts
10 | import graspologic.models
11 | import graspologic.nominate
12 | import graspologic.partition
13 | import graspologic.pipeline
14 | import graspologic.plot
15 | import graspologic.preprocessing
16 | import graspologic.simulations
17 | import graspologic.subgraph
18 | import graspologic.utils
19 | from graspologic.types import *
20 | from graspologic.version import __version
21 | 
22 | __version__ = __version()
23 | 


--------------------------------------------------------------------------------
/graspologic/preprocessing/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .graph_cuts import (
 5 |     DefinedHistogram,
 6 |     cut_edges_by_weight,
 7 |     cut_vertices_by_betweenness_centrality,
 8 |     cut_vertices_by_degree_centrality,
 9 |     histogram_betweenness_centrality,
10 |     histogram_degree_centrality,
11 |     histogram_edge_weight,
12 | )
13 | 
14 | __all__ = [
15 |     "DefinedHistogram",
16 |     "histogram_betweenness_centrality",
17 |     "histogram_degree_centrality",
18 |     "histogram_edge_weight",
19 |     "cut_edges_by_weight",
20 |     "cut_vertices_by_betweenness_centrality",
21 |     "cut_vertices_by_degree_centrality",
22 | ]
23 | 


--------------------------------------------------------------------------------
/docs/reference/reference/utils.rst:
--------------------------------------------------------------------------------
 1 | Utility
 2 | =======
 3 | 
 4 | .. currentmodule:: graspologic.utils
 5 | 
 6 | Transformations
 7 | ---------------
 8 | 
 9 | .. autofunction:: pass_to_ranks
10 | 
11 | .. autofunction:: to_laplacian
12 | 
13 | .. autofunction:: augment_diagonal
14 | 
15 | .. autofunction:: symmetrize
16 | 
17 | .. autofunction:: remove_loops
18 | 
19 | Connected Components
20 | --------------------
21 | 
22 | .. autofunction:: is_fully_connected
23 | 
24 | .. autofunction:: largest_connected_component
25 | 
26 | .. autofunction:: multigraph_lcc_union
27 | 
28 | .. autofunction:: multigraph_lcc_intersection
29 | 
30 | IO
31 | --
32 | 
33 | .. autofunction:: import_graph
34 | 
35 | .. autofunction:: import_edgelist
36 | 
37 | Other
38 | -----
39 | 
40 | .. autofunction:: remap_labels


--------------------------------------------------------------------------------
/docs/reference/reference/embed.rst:
--------------------------------------------------------------------------------
 1 | .. _ase_tutorial: https://microsoft.github.io/graspologic/tutorials/embedding/AdjacencySpectralEmbed.html
 2 | 
 3 | Embedding
 4 | =========
 5 | 
 6 | .. currentmodule:: graspologic.embed
 7 | 
 8 | Decomposition
 9 | -------------
10 | 
11 | .. autofunction:: select_dimension
12 | 
13 | .. autofunction:: select_svd
14 | 
15 | Single graph embedding
16 | ----------------------
17 | 
18 | .. autoclass:: AdjacencySpectralEmbed
19 | .. autoclass:: LaplacianSpectralEmbed
20 | .. autofunction:: node2vec_embed
21 | 
22 | Multiple graph embedding
23 | ------------------------
24 | 
25 | .. autoclass:: OmnibusEmbed
26 | .. autoclass:: MultipleASE
27 | .. autoclass:: mug2vec
28 | 
29 | Dissimilarity graph embedding
30 | -----------------------------
31 | 
32 | .. autoclass:: ClassicalMDS
33 | 


--------------------------------------------------------------------------------
/graspologic/embed/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .ase import AdjacencySpectralEmbed
 5 | from .base import BaseSpectralEmbed
 6 | from .case import CovariateAssistedEmbed
 7 | from .lse import LaplacianSpectralEmbed
 8 | from .mase import MultipleASE
 9 | from .mds import ClassicalMDS
10 | from .mug2vec import mug2vec
11 | from .n2v import node2vec_embed
12 | from .omni import OmnibusEmbed
13 | from .svd import select_dimension, select_svd
14 | 
15 | __all__ = [
16 |     "ClassicalMDS",
17 |     "OmnibusEmbed",
18 |     "AdjacencySpectralEmbed",
19 |     "LaplacianSpectralEmbed",
20 |     "MultipleASE",
21 |     "node2vec_embed",
22 |     "select_dimension",
23 |     "select_svd",
24 |     "BaseSpectralEmbed",
25 |     "CovariateAssistedEmbed",
26 | ]
27 | 


--------------------------------------------------------------------------------
/graspologic/pipeline/embed/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT license.
 3 | # ruff: noqa: E402 SVD_SOLVER_TYPES needs to be first
 4 | """
 5 | The embed module of ``graspologic.pipeline.embed`` is intended to provide faster
 6 | application development support. The functions provided in it reflect common call
 7 | patterns used when developing data processing pipelines and future consumption
 8 | by nearest neighbor services and visualization routines.
 9 | """
10 | 
11 | __SVD_SOLVER_TYPES = ["randomized", "full", "truncated"]
12 | from .adjacency_spectral_embedding import adjacency_spectral_embedding
13 | from .embeddings import Embeddings, EmbeddingsView
14 | from .laplacian_spectral_embedding import laplacian_spectral_embedding
15 | from .omnibus_embedding import omnibus_embedding_pairwise
16 | 
17 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Bug reports help us improve!  Thanks for submitting yours!
 4 | title: "[BUG] "
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## Expected Behavior
11 | Tell us what should happen
12 | 
13 | ## Actual Behavior
14 | Tell us what happens instead
15 | 
16 | ## Example Code
17 | Please see [How to create a Minimal, Reproducible example](https://stackoverflow.com/help/minimal-reproducible-example) for some guidance on creating the best possible example of the problem
18 | ```python
19 | 
20 | ```
21 | 
22 | ## Full Traceback
23 | ```pytb
24 | Paste the full traceback in case there is an exception
25 | 
26 | ```
27 | 
28 | ## Your Environment
29 | * Python version:
30 | * graspologic version:
31 | 
32 | ## Additional Details
33 | Any other contextual information you might feel is important.
34 | 


--------------------------------------------------------------------------------
/graspologic/datasets/mice/participants.csv:
--------------------------------------------------------------------------------
 1 | participant_id,genotype,sex
 2 | sub-54776,DBA2,male
 3 | sub-54777,DBA2,male
 4 | sub-54779,DBA2,female
 5 | sub-54781,DBA2,female
 6 | sub-54790,B6,male
 7 | sub-54793,B6,male
 8 | sub-54794,B6,female
 9 | sub-54797,B6,female
10 | sub-54811,BTBR,male
11 | sub-54813,BTBR,male
12 | sub-54815,BTBR,female
13 | sub-54817,BTBR,female
14 | sub-54821,CAST,male
15 | sub-54823,CAST,male
16 | sub-54829,DBA2,male
17 | sub-54831,DBA2,male
18 | sub-54833,DBA2,female
19 | sub-54835,DBA2,female
20 | sub-54842,CAST,female
21 | sub-54847,CAST,female
22 | sub-54849,BTBR,male
23 | sub-54851,BTBR,male
24 | sub-54853,BTBR,female
25 | sub-54855,BTBR,female
26 | sub-54864,B6,male
27 | sub-54866,B6,male
28 | sub-54868,B6,female
29 | sub-54870,B6,female
30 | sub-54883,CAST,male
31 | sub-54885,CAST,male
32 | sub-54887,CAST,female
33 | sub-54890,CAST,female
34 | 


--------------------------------------------------------------------------------
/docs/reference/release/release_template.rst:
--------------------------------------------------------------------------------
 1 | :orphan:
 2 | 
 3 | Release Notes: GraSPy 0.0.X
 4 | ===========================
 5 | 
 6 | We're happy to announce the release of GraSPy 0.0.X! GraSPy is a Python package for 
 7 | understanding the properties of random graphs that arise from modern datasets, such as social networks 
 8 | and brain networks.
 9 | 
10 | For more information, please visit our `website <http://graspy.neurodata.io/>`_
11 | and our `tutorials <https://graspy.neurodata.io/tutorial.html>`_.
12 | 
13 | 
14 | Highlights
15 | ----------
16 | This release is the result of X of work with over X pull requests by X contributors. Highlights include:
17 | 
18 | 
19 | Improvements
20 | ------------
21 | 
22 | 
23 | API Changes
24 | -----------
25 | 
26 | 
27 | Deprecations
28 | ------------
29 | 
30 | 
31 | Contributors to this release
32 | ----------------------------
33 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | ## Is your feature request related to a problem? Please describe.
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | ## Describe the solution you'd like
14 | A clear and concise description of what you want to happen.
15 | 
16 | ## Describe alternatives you've considered
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | ## Provide references (if applicable)
20 | If your feature request is related to a published algorithm/idea, please provide links to 
21 | any relevant articles or webpages.
22 | 
23 | ## Additional context
24 | Add any other context or screenshots about the feature request here.
25 | 


--------------------------------------------------------------------------------
/graspologic/match/types.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from typing import Union
 5 | 
 6 | import numpy as np
 7 | from packaging import version
 8 | from scipy import __version__ as scipy_version
 9 | from scipy.sparse import csr_array
10 | 
11 | if version.parse(scipy_version) >= version.parse("1.8.0"):
12 |     from scipy.sparse import csr_array
13 | else:
14 |     csr_array = csr_array
15 | 
16 | from typing_extensions import Literal
17 | 
18 | from graspologic.types import List, Tuple
19 | 
20 | # redefining since I don't want to add csr_array for ALL code in graspologic yet
21 | AdjacencyMatrix = Union[np.ndarray, csr_array, csr_array]
22 | 
23 | MultilayerAdjacency = Union[List[AdjacencyMatrix], AdjacencyMatrix, np.ndarray]
24 | 
25 | PaddingType = Literal["adopted", "naive"]
26 | 
27 | Scalar = Union[int, float, np.integer]
28 | 
29 | Int = Union[int, np.integer]
30 | 
31 | PartialMatchType = Union[np.ndarray, Tuple]
32 | 


--------------------------------------------------------------------------------
/docs/reference/reference/preprocessing.rst:
--------------------------------------------------------------------------------
 1 | Preprocessing
 2 | =============
 3 | 
 4 | .. currentmodule:: graspologic.preprocessing
 5 | 
 6 | Graph Cuts
 7 | ----------
 8 | 
 9 | Constants
10 | ^^^^^^^^^
11 | .. py:data:: LARGER_THAN_INCLUSIVE
12 | 
13 | Cut any edge or node > the ``cut_threshold``
14 | 
15 | .. py:data:: LARGER_THAN_EXCLUSIVE
16 | 
17 | Cut any edge or node >= the ``cut_threshold``
18 | 
19 | .. py:data:: SMALLER_THAN_INCLUSIVE
20 | 
21 | Cut any edge or node < the ``cut_threshold``
22 | 
23 | .. py:data:: SMALLER_THAN_EXCLUSIVE
24 | 
25 | Cut any edge or node <= the ``cut_threshold``
26 | 
27 | Classes
28 | ^^^^^^^
29 | .. autoclass:: DefinedHistogram
30 | 
31 | Functions
32 | ^^^^^^^^^
33 | .. autofunction:: cut_edges_by_weight
34 | 
35 | .. autofunction:: cut_vertices_by_betweenness_centrality
36 | 
37 | .. autofunction:: cut_vertices_by_degree_centrality
38 | 
39 | .. autofunction:: histogram_betweenness_centrality
40 | 
41 | .. autofunction:: histogram_degree_centrality
42 | 
43 | .. autofunction:: histogram_edge_weight
44 | 


--------------------------------------------------------------------------------
/graspologic/layouts/nooverlap/_node.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT license.
 3 | 
 4 | from typing import Any
 5 | 
 6 | 
 7 | class _Node:
 8 |     def __init__(
 9 |         self,
10 |         node_id: Any,
11 |         x: float,
12 |         y: float,
13 |         size: float,
14 |         community: int = 9999999,
15 |         color: str = "",
16 |     ):
17 |         self.node_id = node_id
18 |         self.x = float(x)
19 |         self.y = float(y)
20 |         self.original_x = self.x
21 |         self.original_y = self.y
22 |         self.size = float(size)
23 |         self.community = community
24 |         self.color = color
25 | 
26 |     def reset_original_position(self, new_x: float, new_y: float) -> None:
27 |         self.original_x = self.x = new_x
28 |         self.original_y = self.y = new_y
29 | 
30 |     def __eq__(self, other: Any) -> bool:
31 |         return self.node_id == other.node_id  # type: ignore
32 | 
33 |     def __hash__(self) -> int:
34 |         return hash(self.node_id)
35 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!--
 2 | Thanks for contributing a pull request! Please ensure you have taken a look at
 3 | the contribution guidelines: https://github.com/graspologic-org/graspologic/blob/dev/CONTRIBUTING.md
 4 | -->
 5 | - [ ] Does this PR have a descriptive title that could go in our release notes?
 6 | - [ ] Does this PR add any new dependencies?
 7 | - [ ] Does this PR modify any existing APIs?
 8 |    - [ ] Is the change to the API backwards compatible?
 9 | - [ ] Have you built the documentation (reference and/or tutorial) and verified the generated documentation is appropriate?
10 | 
11 | #### Reference Issues/PRs
12 | <!--
13 | Example: Fixes #1234. See also #3456.
14 | Please use keywords (e.g., Fixes) to create link to the issues or pull requests
15 | you resolved, so that they will automatically be closed when your pull request
16 | is merged. See https://github.com/blog/1506-closing-issues-via-pull-requests
17 | -->
18 | 
19 | #### What does this implement/fix? Briefly explain your changes.
20 | 
21 | #### Any other comments?
22 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) Microsoft Corporation.
 2 | 
 3 | MIT License
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/graspologic/layouts/nooverlap/nooverlap.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT license.
 3 | 
 4 | import logging
 5 | import time
 6 | 
 7 | from graspologic.types import List
 8 | 
 9 | from .. import NodePosition
10 | from ._node import _Node
11 | from ._quad_tree import _QuadTree
12 | 
13 | logger = logging.getLogger(__name__)
14 | 
15 | 
16 | def remove_overlaps(node_positions: List[NodePosition]) -> List[NodePosition]:
17 |     start = time.time()
18 |     logger.info("removing overlaps")
19 |     local_nodes = [
20 |         _Node(node.node_id, node.x, node.y, node.size, node.community)
21 |         for node in node_positions
22 |     ]
23 |     qt = _QuadTree(local_nodes, 50)
24 |     qt.layout_dense_first(first_color=None)
25 |     stop = time.time()
26 |     logger.info(f"removed overlap in {stop - start} seconds")
27 | 
28 |     new_positions = [
29 |         NodePosition(
30 |             node_id=node.node_id,
31 |             x=node.x,
32 |             y=node.y,
33 |             size=node.size,
34 |             community=node.community,
35 |         )
36 |         for node in local_nodes
37 |     ]
38 |     return new_positions
39 | 


--------------------------------------------------------------------------------
/docs/reference/release/graspy_releases.rst:
--------------------------------------------------------------------------------
 1 | GraSPy Release Log
 2 | ==================
 3 | 
 4 | GraSPy 0.3
 5 | ----------
 6 | Release date: 04 Aug 2020
 7 | Supports Python 3.6, and 3.7
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 1
11 | 
12 |    release_0.3.rst
13 | 
14 | GraSPy 0.2
15 | ----------
16 | Release date: 02 Mar 2020
17 | Supports Python 3.5, 3.6, and 3.7
18 | 
19 | .. toctree::
20 |    :maxdepth: 1
21 | 
22 |    release_0.2.rst
23 | 
24 | GraSPy 0.1
25 | ----------
26 | Release date: 05 Aug 2019
27 | Supports Python 3.5, 3.6, and 3.7
28 | 
29 | .. toctree::
30 |    :maxdepth: 1
31 | 
32 |    release_0.1.rst
33 | 
34 | GraSPy 0.0.3
35 | ------------
36 | Release date: 11 June 2019
37 | Supports Python 3.5, 3.6, and 3.7.
38 | 
39 | .. toctree::
40 |    :maxdepth: 1
41 | 
42 |    release_0.0.3.rst
43 | 
44 | GraSPy 0.0.2
45 | ------------
46 | Release date: 26 March 2019
47 | Supports Python 3.5, 3.6, and 3.7.
48 | 
49 | .. toctree::
50 |    :maxdepth: 1
51 | 
52 |    release_0.0.2.rst
53 | 
54 | GraSPy 0.0.1
55 | ------------
56 | Release date: 14 December 2018
57 | Supports Python 3.5, 3.6, and 3.7.
58 | 
59 | .. toctree::
60 |    :maxdepth: 1
61 | 
62 |    release_0.0.1.rst
63 | 


--------------------------------------------------------------------------------
/graspologic/pipeline/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The ``pipeline`` module includes a collection of higher level API abstractions from
 3 | the functionality exposed elsewhere in ``graspologic``. The classes, functions, and
 4 | modules elsewhere in ``graspologic`` are intended to provide fine-grained, expert-level
 5 | control over the features they implement. These building blocks provide an excellent
 6 | backbone of utility, for researchers in mathematics and science, especially as they
 7 | hew so closely to ``scikit-learn``'s programming paradigms and object model.
 8 | 
 9 | But for software engineers and datascientists, there is a certain ritualistic cost to
10 | preparing a graph, setting up the objects for use, and tearing them down afterwards.
11 | 
12 | ``pipeline`` is intended to smooth the transition between a common developer and
13 | a graph machine learning subject matter expert. We make a presumption that most
14 | programmers are software developers first, and dabbling in ML second, and our intention
15 | is to bridge this gap.
16 | 
17 | """
18 | 
19 | # Copyright (c) Microsoft Corporation.
20 | # Licensed under the MIT license.
21 | 
22 | from . import embed
23 | from .graph_builder import GraphBuilder
24 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you find graspologic useful in your work, please cite the package via the GraSPy paper"  
 3 | authors:
 4 |   - family-names: "Chung"
 5 |     given-names: "Jaewon"
 6 |   - family-names: "Pedigo"
 7 |     given-names: "Benjamin D."
 8 |   - family-names: "Bridgeford"
 9 |     given-names: "Eric W."
10 |   - family-names: "Varjavand"
11 |     given-names: "Bijan K."
12 |   - family-names: "Helm"
13 |     given-names: "Hayden S."
14 |   - family-names: "Vogelstein"
15 |     given-names: "Joshua T."
16 | title: "GraSPy: Graph Statistics in Python"
17 | version: 3.0.0
18 | url: "https://github.com/graspologic-org/graspologic"
19 | preferred-citation:
20 |   type: software
21 |   issue: 158
22 |   volume: 20
23 |   journal: "Journal of Machine Learning Research"
24 |   authors:
25 |   - family-names: "Chung"
26 |     given-names: "Jaewon"
27 |   - family-names: "Pedigo"
28 |     given-names: "Benjamin D."
29 |   - family-names: "Bridgeford"
30 |     given-names: "Eric W."
31 |   - family-names: "Varjavand"
32 |     given-names: "Bijan K."
33 |   - family-names: "Helm"
34 |     given-names: "Hayden S."
35 |   - family-names: "Vogelstein"
36 |     given-names: "Joshua T."
37 |   start: 1
38 |   end: 7
39 |   year: 2019
40 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | #warn_return_any = True
 3 | warn_unused_configs = True
 4 | disallow_untyped_defs = True
 5 | disallow_incomplete_defs = True
 6 | no_implicit_optional = True
 7 | 
 8 | [mypy-anytree]
 9 | ignore_missing_imports = True
10 | 
11 | [mypy-hyppo.*]
12 | ignore_missing_imports = True
13 | 
14 | [mypy-joblib]
15 | ignore_missing_imports = True
16 | 
17 | [mypy-gensim.models]
18 | ignore_missing_imports = True
19 | 
20 | [mypy-matplotlib]
21 | ignore_missing_imports = True
22 | 
23 | [mypy-matplotlib.*]
24 | ignore_missing_imports = True
25 | 
26 | [mypy-mpl_toolkits.*]
27 | ignore_missing_imports = True
28 | 
29 | [mypy-numba.*]
30 | ignore_missing_imports = True
31 | 
32 | [mypy-numpy]
33 | ignore_missing_imports = True
34 | 
35 | [mypy-networkx]
36 | ignore_missing_imports = True
37 | 
38 | [mypy-ot]
39 | ignore_missing_imports = True
40 | 
41 | [mypy-pandas]
42 | ignore_missing_imports = True
43 | 
44 | [mypy-pkg_resources]
45 | ignore_missing_imports = True
46 | 
47 | [mypy-seaborn]
48 | ignore_missing_imports = True
49 | 
50 | [mypy-scipy]
51 | ignore_missing_imports = True
52 | 
53 | [mypy-scipy.*]
54 | ignore_missing_imports = True
55 | 
56 | [mypy-sklearn.*]
57 | ignore_missing_imports = True
58 | 
59 | [mypy-statsmodels.*]
60 | ignore_missing_imports = True
61 | 
62 | [mypy-umap]
63 | ignore_missing_imports = True
64 | 


--------------------------------------------------------------------------------
/docs/license.rst:
--------------------------------------------------------------------------------
 1 | License
 2 | =======
 3 | graspologic is distributed with MIT license.
 4 | 
 5 | ::
 6 | 
 7 |     Copyright (c) Microsoft Corporation and contributors.
 8 | 
 9 |     MIT License
10 | 
11 |     Permission is hereby granted, free of charge, to any person obtaining a copy
12 |     of this software and associated documentation files (the "Software"), to deal
13 |     in the Software without restriction, including without limitation the rights
14 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 |     copies of the Software, and to permit persons to whom the Software is
16 |     furnished to do so, subject to the following conditions:
17 | 
18 |     The above copyright notice and this permission notice shall be included in all
19 |     copies or substantial portions of the Software.
20 | 
21 |     THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 |     SOFTWARE.
28 | 


--------------------------------------------------------------------------------
/docs/reference/release/release_0.0.1.rst:
--------------------------------------------------------------------------------
 1 | Release Notes: GraSPy 0.0.1
 2 | ===========================
 3 | 
 4 | We're happy to announce the release of GraSPy 0.0.1! GraSPy is a Python package for 
 5 | understanding the properties of random graphs that arise from modern datasets, such as social networks 
 6 | and brain networks.
 7 | 
 8 | For more information, please visit our `website <http://graspy.neurodata.io/>`_
 9 | and our `tutorials
10 | <https://graspy.neurodata.io/tutorial.html>`_.
11 | 
12 | Highlights
13 | ----------
14 | This release is the result of over two years of work with 238 commits and 35 merges by 4 contributors. 
15 | Highlights include:
16 | - Fast implementation of dimensionailty reduction using different implementation of SVD.
17 | - Single and multiple graph embedding methods.
18 | - Methods for preprocessing graphs for meaningful embeddings.
19 | - Hypothesis testing, specifically semiparametric testing of two graphs.
20 | - Methods for clustering vertices or population of graphs
21 | - Plotting functions for visualization of graphs and high dimensional data.
22 | 
23 | API Changes
24 | -----------
25 | All classes are based on scikit-learn's API, making the use familiar. 
26 | 
27 | Deprecations
28 | ------------
29 | None.
30 | 
31 | Contributors to this release
32 | ----------------------------
33 | 
34 | - Jaewon Chung
35 | - Benjamin Pedigo
36 | - Eric Bridgeford
37 | - Bijan Varjavand


--------------------------------------------------------------------------------
/tests/test_preconditions.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT license.
 3 | 
 4 | import unittest
 5 | 
 6 | from graspologic import preconditions
 7 | 
 8 | 
 9 | class TestPreconditions(unittest.TestCase):
10 |     def test_check_arguments(self):
11 |         test_true_expressions = [1 < 3, 3 == 3, True, 1 == 1]
12 |         test_false_expressions = [
13 |             3 < 1,
14 |             3 != 3,
15 |             None is True,
16 |             1 == "1",
17 |         ]
18 |         for resolved_expression in test_true_expressions:
19 |             preconditions.check_argument(resolved_expression, "This should be true")
20 | 
21 |         for resolved_expression in test_false_expressions:
22 |             with self.assertRaises(ValueError):
23 |                 preconditions.check_argument(resolved_expression, "This is false")
24 | 
25 |     def test_check_argument_types(self):
26 |         preconditions.check_argument_types(1, int, "Some message")
27 |         with self.assertRaises(TypeError):
28 |             preconditions.check_argument_types(1, set, "This fails")
29 | 
30 |     def test_check_optional_argument_types(self):
31 |         preconditions.check_optional_argument_types(1, int, "Some message")
32 |         preconditions.check_optional_argument_types(None, int, "Some message")
33 |         with self.assertRaises(TypeError):
34 |             preconditions.check_optional_argument_types(1, set, "This fails")
35 | 


--------------------------------------------------------------------------------
/graspologic/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from .ptr import pass_to_ranks
 5 | from .utils import (
 6 |     LaplacianFormType,
 7 |     augment_diagonal,
 8 |     average_matrices,
 9 |     binarize,
10 |     cartesian_product,
11 |     fit_plug_in_variance_estimator,
12 |     import_edgelist,
13 |     import_graph,
14 |     is_almost_symmetric,
15 |     is_fully_connected,
16 |     is_loopless,
17 |     is_symmetric,
18 |     is_unweighted,
19 |     largest_connected_component,
20 |     multigraph_lcc_intersection,
21 |     multigraph_lcc_union,
22 |     remap_labels,
23 |     remap_node_ids,
24 |     remove_loops,
25 |     remove_vertices,
26 |     symmetrize,
27 |     to_laplacian,
28 | )
29 | 
30 | __all__ = [
31 |     "average_matrices",
32 |     "import_graph",
33 |     "import_edgelist",
34 |     "is_symmetric",
35 |     "is_loopless",
36 |     "is_unweighted",
37 |     "is_almost_symmetric",
38 |     "symmetrize",
39 |     "remove_loops",
40 |     "to_laplacian",
41 |     "LaplacianFormType",
42 |     "is_fully_connected",
43 |     "largest_connected_component",
44 |     "multigraph_lcc_union",
45 |     "multigraph_lcc_intersection",
46 |     "augment_diagonal",
47 |     "binarize",
48 |     "cartesian_product",
49 |     "pass_to_ranks",
50 |     "fit_plug_in_variance_estimator",
51 |     "remove_vertices",
52 |     "remap_labels",
53 |     "remap_node_ids",
54 | ]
55 | 


--------------------------------------------------------------------------------
/docs/reference/release/release_0.1.rst:
--------------------------------------------------------------------------------
 1 | Release Notes: GraSPy 0.1
 2 | =========================
 3 | 
 4 | We're happy to announce the release of GraSPy 0.1! GraSPy is a Python package for 
 5 | understanding the properties of random graphs that arise from modern datasets, such as
 6 | social networks and brain networks.
 7 | 
 8 | For more information, please visit our `website <http://graspy.neurodata.io/>`_
 9 | and our `tutorials <https://graspy.neurodata.io/tutorial.html>`_.
10 | 
11 | 
12 | Highlights
13 | ----------
14 | This release is the result of over 2 months of work with over 18 pull requests by 
15 | 3 contributors. Highlights include:
16 | 
17 | - Added ``MultipleASE``, which is a new method for embedding population of graphs.
18 | - Added ``mug2vec`` within ``pipieline`` module, which learns a feature vector for population of graphs.
19 | 
20 | Improvements
21 | ------------
22 | - Improved contribution guidelines.
23 | - Fixed bugs in ``GaussianCluster``.
24 | - ``symmeterize`` function now uses ``avg`` as default method.
25 | - Fixed ``dataset`` module loading errors.
26 | - Improve underlying `ER` sampling code.
27 | 
28 | API Changes
29 | -----------
30 | - Added ``sort_nodes`` argument for ``heatmap`` and ``gridplot`` functions.
31 | 
32 | Deprecations
33 | ------------
34 | None
35 | 
36 | Contributors to this release
37 | ----------------------------
38 | - `Jaewon Chung <https://github.com/j1c>`_
39 | - `Benjamin Pedigo <https://github.com/bdpedigo>`_
40 | - `Kiki Zhang <https://github.com/Kikiwink>`_ (new contributor!)


--------------------------------------------------------------------------------
/tests/layouts/nooverlap/test_grid_cell_creation.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | from graspologic.layouts.nooverlap._node import _Node
 7 | from graspologic.layouts.nooverlap._quad_node import _QuadNode
 8 | 
 9 | 
10 | class TestGridCellCreation(unittest.TestCase):
11 |     def setUp(self):
12 |         self.qn = _QuadNode(
13 |             [_Node(99, 3, 7, 2, 0, "red"), _Node(100, 2, 9, 3, 0, "blue")], 5, 50
14 |         )
15 | 
16 |     def test_grid_cell_center(self):
17 |         cell_x, cell_y, center_x, center_y = self.qn.find_grid_cell_and_center(
18 |             0, 0, 10, 50, 50
19 |         )
20 |         self.assertEqual(cell_x, 2)
21 |         self.assertEqual(cell_y, 2)
22 |         self.assertEqual(center_x, 40)
23 |         self.assertEqual(center_y, 40)
24 | 
25 |     def test_grid_cell_center2(self):
26 |         cell_x, cell_y, center_x, center_y = self.qn.find_grid_cell_and_center(
27 |             0, 0, 10, 50, 40
28 |         )
29 |         self.assertEqual(cell_x, 2)
30 |         self.assertEqual(cell_y, 2)
31 |         self.assertEqual(center_x, 40)
32 |         self.assertEqual(center_y, 40)
33 | 
34 |     def test_grid_cell_center3(self):
35 |         cell_x, cell_y, center_x, center_y = self.qn.find_grid_cell_and_center(
36 |             3, 4, 10, 53, 44
37 |         )
38 |         self.assertEqual(cell_x, 2)
39 |         self.assertEqual(cell_y, 2)
40 |         self.assertEqual(center_x, 43)
41 |         self.assertEqual(center_y, 44)
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     unittest.main()
46 | 


--------------------------------------------------------------------------------
/graspologic/datasets/drosophila/left_cell_labels.csv:
--------------------------------------------------------------------------------
  1 | K
  2 | K
  3 | K
  4 | K
  5 | K
  6 | K
  7 | K
  8 | K
  9 | K
 10 | K
 11 | K
 12 | K
 13 | K
 14 | K
 15 | K
 16 | K
 17 | K
 18 | K
 19 | K
 20 | K
 21 | K
 22 | K
 23 | K
 24 | K
 25 | K
 26 | K
 27 | K
 28 | K
 29 | K
 30 | K
 31 | K
 32 | K
 33 | K
 34 | K
 35 | K
 36 | K
 37 | K
 38 | K
 39 | K
 40 | K
 41 | K
 42 | K
 43 | K
 44 | K
 45 | K
 46 | K
 47 | K
 48 | K
 49 | K
 50 | K
 51 | K
 52 | K
 53 | K
 54 | K
 55 | K
 56 | K
 57 | K
 58 | K
 59 | K
 60 | K
 61 | K
 62 | K
 63 | K
 64 | K
 65 | K
 66 | K
 67 | K
 68 | K
 69 | K
 70 | K
 71 | K
 72 | K
 73 | K
 74 | K
 75 | K
 76 | K
 77 | K
 78 | K
 79 | K
 80 | K
 81 | K
 82 | K
 83 | K
 84 | K
 85 | K
 86 | K
 87 | K
 88 | K
 89 | K
 90 | K
 91 | K
 92 | K
 93 | K
 94 | K
 95 | K
 96 | K
 97 | K
 98 | K
 99 | K
100 | K
101 | K
102 | I
103 | I
104 | I
105 | I
106 | I
107 | I
108 | I
109 | I
110 | I
111 | I
112 | I
113 | I
114 | I
115 | I
116 | I
117 | I
118 | I
119 | I
120 | I
121 | I
122 | I
123 | O
124 | O
125 | O
126 | O
127 | O
128 | O
129 | O
130 | O
131 | O
132 | O
133 | O
134 | O
135 | O
136 | O
137 | O
138 | O
139 | O
140 | O
141 | O
142 | O
143 | O
144 | O
145 | O
146 | O
147 | O
148 | O
149 | O
150 | O
151 | O
152 | P
153 | P
154 | P
155 | P
156 | P
157 | P
158 | P
159 | P
160 | P
161 | P
162 | P
163 | P
164 | P
165 | P
166 | P
167 | P
168 | P
169 | P
170 | P
171 | P
172 | P
173 | P
174 | P
175 | P
176 | P
177 | P
178 | P
179 | P
180 | P
181 | P
182 | P
183 | P
184 | P
185 | P
186 | P
187 | P
188 | P
189 | P
190 | P
191 | P
192 | P
193 | P
194 | P
195 | P
196 | P
197 | P
198 | P
199 | P
200 | P
201 | P
202 | P
203 | P
204 | P
205 | P
206 | P
207 | P
208 | P
209 | P
210 | 


--------------------------------------------------------------------------------
/tests/test_mug2vec.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | import numpy as np
 7 | 
 8 | from graspologic.cluster import GaussianCluster
 9 | from graspologic.embed import mug2vec
10 | from graspologic.simulations import sbm
11 | 
12 | 
13 | def generate_data():
14 |     np.random.seed(2)
15 | 
16 |     p1 = [[0.3, 0.1], [0.1, 0.3]]
17 |     p2 = [[0.1, 0.3], [0.3, 0.1]]
18 |     n = [50, 50]
19 | 
20 |     g1 = [sbm(n, p1) for _ in range(20)]
21 |     g2 = [sbm(n, p2) for _ in range(20)]
22 |     g = g1 + g2
23 | 
24 |     y = ["0"] * 20 + ["1"] * 20
25 | 
26 |     return g, y
27 | 
28 | 
29 | class TestMug2Vec(unittest.TestCase):
30 |     def test_mug2vec(self):
31 |         graphs, labels = generate_data()
32 | 
33 |         mugs = mug2vec(pass_to_ranks=None, svd_seed=1)
34 |         xhat = mugs.fit_transform(graphs)
35 | 
36 |         gmm = GaussianCluster(5)
37 |         gmm.fit(xhat, labels)
38 | 
39 |         self.assertEqual(gmm.n_components_, 2)
40 | 
41 |     def test_inputs(self):
42 |         graphs, labels = generate_data()
43 | 
44 |         mugs = mug2vec(omnibus_components=-1, svd_seed=1)
45 |         with self.assertRaises(ValueError):
46 |             mugs.fit(graphs)
47 | 
48 |         mugs = mug2vec(cmds_components=-1, svd_seed=1)
49 |         with self.assertRaises(ValueError):
50 |             mugs.fit(graphs)
51 | 
52 |         mugs = mug2vec(omnibus_n_elbows=-1, svd_seed=1)
53 |         with self.assertRaises(ValueError):
54 |             mugs.fit(graphs)
55 | 
56 |         mugs = mug2vec(cmds_n_elbows=-1, svd_seed=1)
57 |         with self.assertRaises(ValueError):
58 |             mugs.fit(graphs)
59 | 


--------------------------------------------------------------------------------
/graspologic/datasets/drosophila/right_cell_labels.csv:
--------------------------------------------------------------------------------
  1 | K
  2 | K
  3 | K
  4 | K
  5 | K
  6 | K
  7 | K
  8 | K
  9 | K
 10 | K
 11 | K
 12 | K
 13 | K
 14 | K
 15 | K
 16 | K
 17 | K
 18 | K
 19 | K
 20 | K
 21 | K
 22 | K
 23 | K
 24 | K
 25 | K
 26 | K
 27 | K
 28 | K
 29 | K
 30 | K
 31 | K
 32 | K
 33 | K
 34 | K
 35 | K
 36 | K
 37 | K
 38 | K
 39 | K
 40 | K
 41 | K
 42 | K
 43 | K
 44 | K
 45 | K
 46 | K
 47 | K
 48 | K
 49 | K
 50 | K
 51 | K
 52 | K
 53 | K
 54 | K
 55 | K
 56 | K
 57 | K
 58 | K
 59 | K
 60 | K
 61 | K
 62 | K
 63 | K
 64 | K
 65 | K
 66 | K
 67 | K
 68 | K
 69 | K
 70 | K
 71 | K
 72 | K
 73 | K
 74 | K
 75 | K
 76 | K
 77 | K
 78 | K
 79 | K
 80 | K
 81 | K
 82 | K
 83 | K
 84 | K
 85 | K
 86 | K
 87 | K
 88 | K
 89 | K
 90 | K
 91 | K
 92 | K
 93 | K
 94 | K
 95 | K
 96 | K
 97 | K
 98 | K
 99 | K
100 | K
101 | I
102 | I
103 | I
104 | I
105 | I
106 | I
107 | I
108 | I
109 | I
110 | I
111 | I
112 | I
113 | I
114 | I
115 | I
116 | I
117 | I
118 | I
119 | I
120 | I
121 | I
122 | O
123 | O
124 | O
125 | O
126 | O
127 | O
128 | O
129 | O
130 | O
131 | O
132 | O
133 | O
134 | O
135 | O
136 | O
137 | O
138 | O
139 | O
140 | O
141 | O
142 | O
143 | O
144 | O
145 | O
146 | O
147 | O
148 | O
149 | O
150 | O
151 | P
152 | P
153 | P
154 | P
155 | P
156 | P
157 | P
158 | P
159 | P
160 | P
161 | P
162 | P
163 | P
164 | P
165 | P
166 | P
167 | P
168 | P
169 | P
170 | P
171 | P
172 | P
173 | P
174 | P
175 | P
176 | P
177 | P
178 | P
179 | P
180 | P
181 | P
182 | P
183 | P
184 | P
185 | P
186 | P
187 | P
188 | P
189 | P
190 | P
191 | P
192 | P
193 | P
194 | P
195 | P
196 | P
197 | P
198 | P
199 | P
200 | P
201 | P
202 | P
203 | P
204 | P
205 | P
206 | P
207 | P
208 | P
209 | P
210 | P
211 | P
212 | P
213 | P
214 | 


--------------------------------------------------------------------------------
/graspologic/types.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | """
 5 | This module includes common graspologic type hint declarations.
 6 | """
 7 | 
 8 | import sys
 9 | from typing import Optional, Union
10 | 
11 | import networkx as nx
12 | import numpy as np
13 | import scipy.sparse as sp
14 | 
15 | # an explanation, for those who come in the later times:
16 | # the following is because when type hinting first came out, Python 3.6 up to 3.8
17 | # (inclusive) specifically couldn't really handle having generics in the
18 | # list/dict/tuple/set whatever primitives that are in builtins
19 | # so we all used the typing module to provide their type signature
20 | # and then 3.9 said 'oh, we can fix that now, and also if you use typing we'll cut you',
21 | # so our choice was either to never support 3.9 onward, never support pre 3.9, or
22 | # do this... jankery
23 | # some things of note: mypy specifically looks for `sys.version_info` - so
24 | # using a `from sys import version_info` gets ignored, and you will get mypy errors
25 | # on top of that, doing `List = list` in the 3.9+ block doesn't work at all, so we
26 | # have to use this VERY specific syntax.  if you want to test it, try it out, but as of
27 | # today, `from builtins import foo as Foo` is the right way to do it.
28 | # PEP 484 & PEP 585 Fun
29 | if sys.version_info >= (3, 9):
30 |     from builtins import dict as Dict
31 |     from builtins import list as List
32 |     from builtins import set as Set
33 |     from builtins import tuple as Tuple
34 | else:
35 |     from typing import Dict, List, Set, Tuple
36 | 
37 | AdjacencyMatrix = Union[np.ndarray, sp.csr_array]
38 | 
39 | GraphRepresentation = Union[np.ndarray, sp.csr_array, nx.Graph]
40 | 
41 | RngType = Optional[Union[int, np.integer, np.random.Generator]]
42 | 
43 | __all__ = [
44 |     "AdjacencyMatrix",
45 |     "Dict",
46 |     "List",
47 |     "GraphRepresentation",
48 |     "RngType",
49 |     "Set",
50 |     "Tuple",
51 | ]
52 | 


--------------------------------------------------------------------------------
/docs/sphinx-ext/toctree_filter.py:
--------------------------------------------------------------------------------
 1 | # Copied and modified from https://stackoverflow.com/questions/15001888/conditional-toctree-in-sphinx
 2 | 
 3 | import re
 4 | 
 5 | from sphinx.directives.other import TocTree
 6 | 
 7 | 
 8 | def setup(app):
 9 |     app.add_config_value("toc_filter_exclude", [], "html")
10 |     app.add_directive("toctree-filt", TocTreeFilt)
11 |     return {"version": "1.0.0"}
12 | 
13 | 
14 | class TocTreeFilt(TocTree):
15 |     """
16 |     Directive to notify Sphinx about the hierarchical structure of the docs,
17 |     and to include a table-of-contents like tree in the current document. This
18 |     version filters the entries based on a list of prefixes. We simply filter
19 |     the content of the directive and call the super's version of run. The
20 |     list of exclusions is stored in the **toc_filter_exclusion** list. Any
21 |     table of content entry prefixed by one of these strings will be excluded.
22 |     If `toc_filter_exclusion=['secret','draft']` then all toc entries of the
23 |     form `:secret:ultra-api` or `:draft:new-features` will be excuded from
24 |     the final table of contents. Entries without a prefix are always included.
25 |     """
26 | 
27 |     hasPat = re.compile("\s*(.*)$")
28 | 
29 |     # Remove any entries in the content that we dont want and strip
30 |     # out any filter prefixes that we want but obviously don't want the
31 |     # prefix to mess up the file name.
32 |     def filter_entries(self, entries):
33 |         excl = self.state.document.settings.env.config.toc_filter_exclude
34 |         filtered = []
35 |         for e in entries:
36 |             m = self.hasPat.match(e)
37 |             if m != None:
38 |                 if not m.groups()[0] in excl:
39 |                     filtered.append(m.groups()[0])
40 |             else:
41 |                 filtered.append(e)
42 |         return filtered
43 | 
44 |     def run(self):
45 |         # Remove all TOC entries that should not be on display
46 |         self.content = self.filter_entries(self.content)
47 |         return super().run()
48 | 


--------------------------------------------------------------------------------
/docs/reference/release/release_0.3.rst:
--------------------------------------------------------------------------------
 1 | .. _last-graspy-label:
 2 | 
 3 | Release Notes: GraSPy 0.3
 4 | =========================
 5 | 
 6 | We're happy to announce the release of GraSPy 0.3! GraSPy is a Python package for 
 7 | understanding the properties of random graphs that arise from modern datasets, such as
 8 | social networks and brain networks.
 9 | 
10 | For more information, please visit our `website <http://graspy.neurodata.io/>`_
11 | and our `tutorials <https://graspy.neurodata.io/tutorial.html>`_.
12 | 
13 | 
14 | Highlights
15 | ----------
16 | This release is the result of over 5 months of work with over 11 pull requests by 
17 | 7 contributors. Highlights include:
18 | 
19 | - Added seeded graph matching as a capability for graph matching, renamed graph matching class to ``GraphMatch`` 
20 | - Added functions for simulating a pair of correlated RDPG graphs.
21 | - Deprecated Python 3.5
22 | - Added different backend hypothesis tests for the ``LatentDistributionTest`` from Hyppo
23 | - Added a correction to make ``LatentDistributionTest`` valid for differently sized graphs
24 | 
25 | Improvements
26 | ------------
27 | - Updated default value of ``rescale`` in RDPG simulation 
28 | - Updated default value of ``scaled`` in MASE estimation 
29 | - Improved error throwing in ``AutoGMM``
30 | - Clarified the API for ``inference`` submodule
31 | 
32 | API Changes
33 | -----------
34 | - ``FastApproximateQAP`` was renamed to ``GraphMatch``
35 | - ``fit`` method of ``LatentDistributionTest`` and ``LatentPositionTest`` now returns self instead of a p-value
36 | 
37 | Deprecations
38 | ------------
39 | - Python 3.5
40 | 
41 | Contributors to this release
42 | ----------------------------
43 | - `Jaewon Chung <https://github.com/j1c>`_
44 | - `Benjamin Pedigo <https://github.com/bdpedigo>`_
45 | - `Ali Saad-Eldin <https://github.com/asaadeldin11>`_
46 | - `Shan Qiu <https://github.com/SHAAAAN>`_
47 | - `Bijan Varjavand <https://github.com/bvarjavand>`_
48 | - `Anton Alyakin <https://github.com/alyakin314>`_ (new contributor!)
49 | - `Casey Weiner <https://github.com/caseypw>`_ (new contributor!)


--------------------------------------------------------------------------------
/docs/reference/release/release_0.0.3.rst:
--------------------------------------------------------------------------------
 1 | Release Notes: GraSPy 0.0.3
 2 | ===========================
 3 | 
 4 | We're happy to announce the release of GraSPy 0.0.3! GraSPy is a Python package for 
 5 | understanding the properties of random graphs that arise from modern datasets, such as
 6 | social networks and brain networks.
 7 | 
 8 | For more information, please visit our `website <http://graspy.neurodata.io/>`_
 9 | and our `tutorials <https://graspy.neurodata.io/tutorial.html>`_.
10 | 
11 | 
12 | Highlights
13 | ----------
14 | This release is the result of over 2 months of work with over 16 pull requests by 
15 | 4 contributors. Highlights include:
16 | 
17 | - Optimization over covariance structures when using ``GaussianCluster``
18 | - Standardized sorting for visualizing graphs when using ``heatmap`` or ``gridplot``
19 | - Graph model classes for fitting several random graph models to input datasets
20 | - Improved customization for ``heatmaps`` and ``gridplots``
21 | 
22 | 
23 | Improvements
24 | ------------
25 | - Added badges to Github for arxiv paper and number of downloads
26 | - Remove author headers for individual source files 
27 | - Fix bugs in documentation
28 | - Bug fix for calculating intersection of largest connected components between graphs
29 | - Pre-defined axes can be passed to ``heatmap`` for making subplot figures
30 | - Colormap objects and color bounds can be passed to ``heatmap`` directly
31 | 
32 | API Changes
33 | -----------
34 | - ``SemiparametricTest`` was renamed to ``LatentPositionTest``
35 | - ``NonparametricTest`` was renamed to ``LatentDistributionTest``
36 | - ``heatmap`` and ``gridplot`` accept ``hier_label_fontsize`` and ``title_pad`` kwargs
37 | 
38 | Deprecations
39 | ------------
40 | - The notebooks folder was removed from ``GraSPy``
41 | - ``SemiparametricTest`` and ``NonparametricTest`` renamed (see above)
42 | 
43 | Contributors to this release
44 | ----------------------------
45 | - `Benjamin Pedigo <https://github.com/bdpedigo>`_
46 | - `Jaewon Chung <https://github.com/j1c>`_
47 | - `Hayden Helm <https://github.com/hhelm10>`_ (new contributor!)
48 | - `Alex Loftus <https://github.com/loftusa>`_ (new contributor!)


--------------------------------------------------------------------------------
/docs/reference/release/release_0.2.rst:
--------------------------------------------------------------------------------
 1 | Release Notes: GraSPy 0.2
 2 | =========================
 3 | 
 4 | We're happy to announce the release of GraSPy 0.2! GraSPy is a Python package for 
 5 | understanding the properties of random graphs that arise from modern datasets, such as
 6 | social networks and brain networks.
 7 | 
 8 | For more information, please visit our `website <http://graspy.neurodata.io/>`_
 9 | and our `tutorials <https://graspy.neurodata.io/tutorial.html>`_.
10 | 
11 | 
12 | Highlights
13 | ----------
14 | This release is the result of over 8 months of work with over 25 pull requests by 
15 | 10 contributors. Highlights include:
16 | 
17 | - Added ``AutoGMMCluster`` in ``cluster`` submodule. ``AutoGMMCluster`` is Python equivalent to ``mclust`` in R.
18 | - Added ``subgraph`` submodule, which detects vertices that maximally correlates to given features.
19 | - Added ``match`` submodule. Used for matching vertices from a pair of graphs with unknown vertex correspondence.
20 | - Added functions for simulating a pair of correlated ER and SBM graphs.
21 | 
22 | Improvements
23 | ------------
24 | - Diagonal augmentation is default behavior in AdjacencySpectralEmbed.
25 | - Added functionality in ``to_laplacian`` to allow for directed graphs.
26 | - Updated docstrings.
27 | - Updated documentation website.
28 | - Various bug fixes.
29 | 
30 | API Changes
31 | -----------
32 | - Added ``**kwargs`` argument for ``heatmap``.
33 | 
34 | Deprecations
35 | ------------
36 | None
37 | 
38 | Contributors to this release
39 | ----------------------------
40 | - `Jaewon Chung <https://github.com/j1c>`_
41 | - `Benjamin Pedigo <https://github.com/bdpedigo>`_
42 | - `Tommy Athey <https://github.com/tathey1>`_ (new contributor!)
43 | - `Jayanta Dey <https://github.com/jdey4>`_ (new contributor!)
44 | - `Iain Carmichael <https://github.com/idc9>`_ (new contributor!)
45 | - `Shiyu Sun <https://github.com/shiyussy>`_ (new contributor!)
46 | - `Ali Saad-Eldin <https://github.com/asaadeldin11>`_ (new contributor!)
47 | - `Gun Kang <https://github.com/gkang7>`_ (new contributor!)
48 | - `Shan Qiu <https://github.com/SHAAAAN>`_ (new contributor!)
49 | - `Ben Falk <https://github.com/falkben>`_ (new contributor!)
50 | - `Jennifer Heiko <https://github.com/jheiko1>`_ (new contributor!)


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # test files
  2 | _.py
  3 | _.ipynb
  4 | 
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | # *.csv
 10 | 
 11 | # C extensions
 12 | *.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build
 73 | docs/reference/_build/
 74 | docs/tutorials/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # celery beat schedule file
 90 | celerybeat-schedule
 91 | 
 92 | # SageMath parsed files
 93 | *.sage.py
 94 | 
 95 | # Environments
 96 | .env
 97 | .venv
 98 | env/
 99 | venv/
100 | ENV/
101 | env.bak/
102 | venv.bak/
103 | 
104 | # Spyder project settings
105 | .spyderproject
106 | .spyproject
107 | 
108 | # Rope project settings
109 | .ropeproject
110 | 
111 | # mkdocs documentation
112 | /site
113 | 
114 | # mypy
115 | .mypy_cache/
116 | .dmypy.json
117 | dmypy.json
118 | 
119 | # misc
120 | notebooks
121 | .vscode/
122 | docs/bpedigo/ase_clustering.py
123 | *.pickle
124 | /graspy-env
125 | *.code-workspace
126 | *.png
127 | notebooks/bpedigo
128 | 
129 | *.DS_Store
130 | .idea
131 | *.iml
132 | pip-wheel-metadata
133 | 
134 | 


--------------------------------------------------------------------------------
/ROLES.md:
--------------------------------------------------------------------------------
 1 | # Roles and Areas of Responsibility
 2 | 
 3 | The following is a non-exhaustive list of the primary contributors to `graspologic` and 
 4 | their roles and areas of responsibilities. Please feel free to use this list to `@` 
 5 | specific contributors in your issues or pull requests that seem to line up best with 
 6 | your issue! 
 7 | 
 8 | ## Core Contributors
 9 | 
10 | ### Ali Saad-Eldin ([@asaadeldin11](https://github.com/asaadeldin11)) (he/him)
11 | Ali is a Masters Student at Johns Hopkins University. He contributes and reviews code 
12 | mostly for the `match` and `embed` modules. 
13 | 
14 | ### Anton Alyakin ([@alyakin314](https://github.com/alyakin314))
15 | Anton is an Assistant Research Engineer at Johns Hopkins University. His primary 
16 | contributions to `graspologic` are within `align` and `inference` modules. Ask him 
17 | anything about those.
18 | 
19 | ### Benjamin Pedigo ([@bdpedigo](https://github.com/bdpedigo)) (he/him)
20 | Ben is a PhD student at Johns Hopkins University in the NeuroData lab. Ask Ben about 
21 | network model fitting and sampling, clustering, and spectral embedding (`models`, 
22 | `simulations`, `cluster`, and `embed`, respectfully). Ben is also happy to hear how we 
23 | can improve our tutorials.
24 | 
25 | ### Carolyn Buractaon ([@carolyncb](https://github.com/carolyncb)) (she/her)
26 | Carolyn is a Technical Program Manager at Microsoft. Ask Carolyn about where the project
27 | is going and how it’s organized.
28 | 
29 | ### Dwayne Pryce ([@dwaynepryce](https://github.com/dwaynepryce)) (he/him)
30 | Dwayne Pryce is a Software Engineer at Microsoft Research. His primary contributions to
31 | `graspologic` are on the steering committee, quality of life utility functions, and
32 | build and release processes.
33 | 
34 | ### Jaewon Chung ([@j1c](https://github.com/j1c)) (he/him)
35 | Jaewon is a PhD student at Johns Hopkins University. He is a maintainer and developer 
36 | for `graspologic`, and is responsible for reviewing code contributions, merging pull 
37 | requests, and making decisions on the `graspologic` API.
38 | 
39 | ### Nick Caurvina ([@nyecarr](https://github.com/nyecarr)) (he/him)
40 | Nick is a Software Engineer at Microsoft Research. Ask Nick about the network 
41 | embeddings and their application to business problems.
42 | 


--------------------------------------------------------------------------------
/docs/reference/in-the-wild.rst:
--------------------------------------------------------------------------------
 1 | graspologic in the wild
 2 | =======================
 3 | 
 4 | Below we include some examples of graspologic being used in projects. Feel free to make
 5 | a pull request if you'd like to add to this list!
 6 | 
 7 | Papers
 8 | """"""
 9 | * `Statistical connectomics <https://www.annualreviews.org/doi/abs/10.1146/annurev-statistics-042720-023234>`_: 
10 |   Authors in the NeuroData group at Johns Hopkins University "provide an overview from the perspective of statistical network science of
11 |   the kinds of models, assumptions, problems, and applications that are theoretically and 
12 |   empirically justified for analysis of connectome data," using graspologic for most 
13 |   analysis and examples.
14 | 
15 | * `Graph matching via optimal transport <https://arxiv.org/abs/2111.05366>`_:
16 |   Authors in the NeuroData group improved on a state-of-the-art technique for graph matching, making the algorithm faster and more accurate on
17 |   larger and less-correlated networks.
18 |   
19 | Educational materials
20 | """""""""""""""""""""
21 | * `Network data science <https://bdpedigo.github.io/networks-course>`_: 
22 |   A short course on network data science (using many tools in `graspologic`) taught at Johns Hopkins University.
23 |   
24 | * `Hands-on Network Machine Learning with Scikit-Learn and Graspologic <http://docs.neurodata.io/graph-stats-book>`:
25 |   A JupyterBook (in progress) on network machine learning based on the tools in `graspologic` and `scikit-learn`. Written
26 |   by authors in the NeuroData group.
27 | 
28 | Blog posts
29 | """"""""""
30 | * `Advancing organizational science using network machine learning to measure innovation in the workplace <https://www.microsoft.com/en-us/research/blog/advancing-organizational-science-using-network-machine-learning-to-measure-innovation-in-the-workplace>`_: 
31 |   Researchers at Microsoft Research and collaborators used tools from graspologic (network
32 |   layout, partitioning and modularity) to understand how workplace collaboration networks
33 |   changed during the COVID-19 pandemic.
34 | 
35 | * `NeuroData notebooks <https://docs.neurodata.io/notebooks/categories/#graspologic>`_ : 
36 |   Check out this page to see what some of the members of the NeuroData lab are working on
37 |   using graspologic.
38 | 


--------------------------------------------------------------------------------
/docs/reference/install.rst:
--------------------------------------------------------------------------------
 1 | Install
 2 | =======
 3 | 
 4 | 
 5 | Below we assume you have the default Python environment already configured on
 6 | your computer and you intend to install ``graspologic`` inside of it.  If you want
 7 | to create and work with Python virtual environments, please follow instructions
 8 | on `venv <https://docs.python.org/3/library/venv.html>`_ and `virtual
 9 | environments <http://docs.python-guide.org/en/latest/dev/virtualenvs/>`_.
10 | 
11 | First, make sure you have the latest version of ``pip`` (the Python package manager)
12 | installed. If you do not, refer to the `Pip documentation
13 | <https://pip.pypa.io/en/stable/installing/>`_ and install ``pip`` first.
14 | 
15 | Install the released version
16 | ----------------------------
17 | 
18 | Install the current release of ``graspologic`` with ``pip``::
19 | 
20 |     $ pip install graspologic
21 | 
22 | To upgrade to a newer release use the ``--upgrade`` flag::
23 | 
24 |     $ pip install --upgrade graspologic
25 | 
26 | If you do not have permission to install software systemwide, you can
27 | install into your user directory using the ``--user`` flag::
28 | 
29 |     $ pip install --user graspologic
30 | 
31 | Alternatively, you can manually download ``graspologic`` from
32 | `PyPI <https://pypi.python.org/pypi/graspologic>`_.
33 | To install in this way, navigate to the download folder in your terminal and run
34 | `pip install graspologic-VERSION.tar.gz`
35 | 
36 | Python package dependencies
37 | ---------------------------
38 | graspologic requires the following packages:
39 | 
40 | - networkx
41 | - numpy
42 | - pandas
43 | - scikit-learn
44 | - scipy
45 | - seaborn
46 | 
47 | 
48 | Hardware requirements
49 | ---------------------
50 | `graspologic` package requires only a standard computer with enough RAM to support the in-memory operations. 
51 | 
52 | OS Requirements
53 | ---------------
54 | This package is supported for *Linux* and *macOS*. However, the package has been tested on the following systems:
55 | 
56 | - Linux: N/A
57 | - macOS: N/A
58 | - Windows: N/A
59 | 
60 | 
61 | Testing
62 | -------
63 | graspologic uses the Python ``pytest`` testing package.  If you don't already have
64 | that package installed, follow the directions on the `pytest homepage
65 | <https://docs.pytest.org/en/latest/>`_.
66 | 


--------------------------------------------------------------------------------
/tests/pipeline/embed/test_embeddings.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT license.
 3 | 
 4 | import unittest
 5 | 
 6 | import numpy as np
 7 | from beartype.roar import BeartypeCallHintParamViolation
 8 | 
 9 | from graspologic.pipeline.embed import Embeddings
10 | 
11 | 
12 | class TestEmbeddings(unittest.TestCase):
13 |     def setUp(self) -> None:
14 |         self.fake_embeddings = np.array([[0, 1, 2, 3], [5, 4, 3, 2], [3, 5, 1, 2]])
15 |         self.labels = np.array(["dax", "nick", "ben"])
16 |         self.embeddings = Embeddings(self.labels, self.fake_embeddings)
17 | 
18 |     def test_embeddings_index(self):
19 |         for i in range(0, 3):
20 |             entry = self.embeddings[i]
21 |             self.assertEqual(self.labels[i], entry[0])
22 |             np.testing.assert_array_equal(self.fake_embeddings[i], entry[1])
23 | 
24 |     def test_embeddings_iterable(self):
25 |         labels = []
26 |         embeddings = []
27 |         for label, embedding in self.embeddings:
28 |             labels.append(label)
29 |             embeddings.append(embedding)
30 | 
31 |         np.testing.assert_array_equal(self.labels, labels)
32 |         np.testing.assert_array_equal(self.fake_embeddings, embeddings)
33 | 
34 |     def test_embeddings_size(self):
35 |         self.assertEqual(3, len(self.embeddings))
36 | 
37 |     def test_view(self):
38 |         expected = {
39 |             "ben": np.array([3, 5, 1, 2]),
40 |             "dax": np.array([0, 1, 2, 3]),
41 |             "nick": np.array([5, 4, 3, 2]),
42 |         }
43 |         view = self.embeddings.as_dict()
44 |         self.assertSetEqual(set(view.keys()), set(expected.keys()))
45 |         for key in expected:
46 |             np.testing.assert_array_equal(expected[key], view[key])
47 | 
48 |     def test_argument_types(self):
49 |         with self.assertRaises(BeartypeCallHintParamViolation):
50 |             Embeddings(None, None)
51 |         with self.assertRaises(BeartypeCallHintParamViolation):
52 |             Embeddings(np.array(["hello"]), None)
53 |         with self.assertRaises(BeartypeCallHintParamViolation):
54 |             Embeddings(["hello"], [1.0])
55 |         with self.assertRaises(ValueError):
56 |             Embeddings(np.array(["hello"]), np.array([[1.1, 1.2], [2.1, 2.2]]))
57 | 


--------------------------------------------------------------------------------
/tests/cluster/test_kclust.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | import numpy as np
 7 | from numpy.testing import assert_equal
 8 | from sklearn.exceptions import NotFittedError
 9 | 
10 | from graspologic.cluster.kclust import KMeansCluster
11 | 
12 | 
13 | class TestKMeansCluster(unittest.TestCase):
14 |     def test_inputs(self):
15 |         # Generate random data
16 |         X = np.random.normal(0, 1, size=(100, 3))
17 | 
18 |         with self.assertRaises(TypeError):
19 |             max_clusters = "1"
20 |             kclust = KMeansCluster(max_clusters=max_clusters)
21 | 
22 |         # max_cluster < 0
23 |         with self.assertRaises(ValueError):
24 |             kclust = KMeansCluster(max_clusters=-1)
25 | 
26 |         # max_cluster more than n_samples
27 |         with self.assertRaises(ValueError):
28 |             kclust = KMeansCluster(max_clusters=1000)
29 |             kclust.fit_predict(X)
30 | 
31 |     def test_predict_without_fit(self):
32 |         # Generate random data
33 |         X = np.random.normal(0, 1, size=(100, 3))
34 | 
35 |         with self.assertRaises(NotFittedError):
36 |             kclust = KMeansCluster(max_clusters=2)
37 |             kclust.predict(X)
38 | 
39 |     def test_outputs_gaussians(self):
40 |         np.random.seed(2)
41 | 
42 |         n = 100
43 |         d = 3
44 |         num_sims = 10
45 |         for _ in range(num_sims):
46 |             X1 = np.random.normal(2, 0.5, size=(n, d))
47 |             X2 = np.random.normal(-2, 0.5, size=(n, d))
48 |             X = np.vstack((X1, X2))
49 |             y = np.repeat([0, 1], n)
50 | 
51 |             kclust = KMeansCluster(max_clusters=5)
52 |             kclust.fit(X, y)
53 |             aris = kclust.ari_
54 | 
55 |             # Assert that the two cluster model is the best
56 |             assert_equal(np.max(aris), 1)
57 | 
58 |     def test_no_y(self):
59 |         np.random.seed(2)
60 |         n = 100
61 |         d = 3
62 |         X1 = np.random.normal(2, 0.5, size=(n, d))
63 |         X2 = np.random.normal(-2, 0.5, size=(n, d))
64 |         X = np.vstack((X1, X2))
65 | 
66 |         kclust = KMeansCluster(max_clusters=5)
67 |         kclust.fit(X)
68 | 
69 |         assert_equal(np.argmax(kclust.silhouette_), 0)
70 | 


--------------------------------------------------------------------------------
/docs/reference/cli.rst:
--------------------------------------------------------------------------------
 1 | CLI
 2 | ===
 3 | 
 4 | In addition to the main library, there is also a CLI runnable module for automatically
 5 | generating layouts for graphs in an edge list.
 6 | 
 7 | You can run this from the command line like so:
 8 | 
 9 | .. code-block:: bash
10 | 
11 |     python -m graspologic.layouts --help
12 | 
13 | Which should return something like:
14 | 
15 | .. code-block:: none
16 | 
17 |     usage: python -m graspologic.layouts [-h] [--verbose VERBOSE] {n2vumap,n2vtsne,render} ...
18 | 
19 |     Runnable module that automatically generates a layout of a graph by a provided edge list
20 | 
21 |     positional arguments:
22 |       {n2vumap,n2vtsne,render}
23 |         n2vumap             Auto layout using UMAP for dimensionality reduction
24 |         n2vtsne             Auto layout using tSNE for dimensionality reduction
25 |         render              Renders a graph via an input file
26 | 
27 | Of those commands, you can then do:
28 | 
29 | .. code-block:: bash
30 | 
31 |     python -m graspologic.layouts n2vumap --help
32 | 
33 | Which will return something like:
34 | 
35 | .. code-block:: none
36 | 
37 |     usage: python -m graspologic.layouts n2vumap [-h] --edge_list EDGE_LIST [--skip_header] [--image_file IMAGE_FILE] [--location_file LOCATION_FILE] [--max_edges MAX_EDGES] [--dpi DPI]
38 |                                              [--allow_overlaps]
39 | 
40 |     optional arguments:
41 |       -h, --help            show this help message and exit
42 |       --edge_list EDGE_LIST
43 |                             edge list in csv file. must be source,target,weight.
44 |       --skip_header         skip first line in csv file, corresponding to header.
45 |       --image_file IMAGE_FILE
46 |                             output path and filename for generated image file. required if --location_file is omitted.
47 |       --location_file LOCATION_FILE
48 |                             output path and filename for location file. required if --image_file is omitted.
49 |       --max_edges MAX_EDGES
50 |                             maximum edges to keep during embedding. edges with low weights will be pruned to keep at most this many edges
51 |       --dpi DPI             used with --image_file to render an image at this dpi
52 |       --allow_overlaps      skip the no overlap algorithm and let nodes stack as per the results of the down projection algorithm
53 | 


--------------------------------------------------------------------------------
/graspologic/layouts/nooverlap/_quad_tree.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT license.
 3 | from typing import Optional
 4 | 
 5 | from graspologic.types import List, Tuple
 6 | 
 7 | from ._node import _Node
 8 | from ._quad_node import _QuadNode
 9 | 
10 | 
11 | class _QuadTree:
12 |     # used to hold objects that have x, y, and mass property
13 |     # nodes = []
14 | 
15 |     def __init__(self, nodes: List[_Node], max_nodes_per_quad: int):
16 |         self.nodes = nodes
17 |         self.root = _QuadNode(nodes, 0, max_nodes_per_quad, None)
18 | 
19 |     def get_quad_density_list(self) -> List[Tuple[float, float, int, _QuadNode]]:
20 |         density_list = self.root.get_density_list()
21 |         return sorted(density_list, reverse=True)
22 | 
23 |     def layout_graph(self) -> List[_Node]:
24 |         return self.layout_dense_first()
25 | 
26 |     def tree_stats(self) -> List[float]:
27 |         results = self.root.quad_stats()
28 |         return list(results) + [
29 |             results[3] / len(self.nodes),
30 |             results[4] / len(self.nodes),
31 |             self.root.sq_ratio,
32 |         ]
33 | 
34 |     def collect_nodes(self) -> List[_Node]:
35 |         ret_val: List[_Node] = []
36 |         self.root.collect_nodes(ret_val)
37 |         return ret_val
38 | 
39 |     def get_tree_node_bounds(self) -> List[Tuple[int, float, float, float, float]]:
40 |         ret_val: List[Tuple[int, float, float, float, float]] = []
41 |         self.root.boxes_by_level(ret_val)
42 |         return ret_val
43 | 
44 |     def count_overlaps(self) -> int:
45 |         return self.root.num_overlapping()
46 | 
47 |     def count_overlaps_across_quads(self) -> int:
48 |         return self.root.num_overlapping_across_quads(self.root.nodes)
49 | 
50 |     def layout_dense_first(self, first_color: Optional[str] = None) -> List[_Node]:
51 |         den_list = list(self.get_quad_density_list())
52 |         first = True
53 |         # count = 0
54 |         for cell_density, density_ratio, cell_count, qn in den_list:
55 |             # print ('cell density', cell_density, 'sq_density', density_ratio, 'cell_count', cell_count)
56 |             qn.layout_quad()
57 |             if first:
58 |                 if first_color is not None and qn.parent is not None:
59 |                     for n in qn.parent.nodes:
60 |                         n.color = first_color  #'#FF0004'
61 |             first = False
62 |         return self.nodes
63 | 


--------------------------------------------------------------------------------
/docs/reference/release/release_0.0.2.rst:
--------------------------------------------------------------------------------
 1 | Release Notes: GraSPy 0.0.2
 2 | ===========================
 3 | 
 4 | We're happy to announce the release of GraSPy 0.0.2! GraSPy is a Python package for 
 5 | understanding the properties of random graphs that arise from modern datasets, such as social networks 
 6 | and brain networks.
 7 | 
 8 | For more information, please visit our `website <http://graspy.neurodata.io/>`_
 9 | and our `tutorials <https://graspy.neurodata.io/tutorial.html>`_.
10 | 
11 | 
12 | Highlights
13 | ----------
14 | This release is the result of 3 months of work with over 16 pull requests by 5 contributors. Highlights include:
15 | 
16 | - Nonparametric hypothesis testing method for testing two non-vertex matched graphs.
17 | - Plotting updates to ``pairplot``, ``gridplot`` and ``heatmaps``.
18 | - Sampling degree-correlcted stochatic block models (DC-SBM).
19 | - ``import_edgelist`` function for importing single or multiple edgelists.
20 | - Enforcing ``Black`` formatting for the package.
21 | 
22 | Improvements
23 | ------------
24 | - Embedding methods are now fully sklearn-compliant. This is tested via ``check_estimator`` function in sklearn.
25 | - ``gridplot`` and ``heatmap`` can now plot hierchical labels.
26 | - New Laplacian computing method ('R-DAD') by adding a constant to the diagonal degree matrix.
27 | - Semiparametric testing only checks for largest connected component (LCC) in the intial embeddings. 
28 | - Various bug fixes.
29 | - Various tutorial latex fixes.
30 | - Various documentation clarifications.
31 | - More consistent documentation.
32 | 
33 | API Changes
34 | -----------
35 | - ``check_lcc`` argument in ``AdjacencySpectralEmbed``, ``LaplacianSpectralEmbed``, and ``OmnibusEmbed`` classes, which checks if input graph(s) are fully connected when ``check_lcc`` is True.
36 | - ``gridplot`` and ``heatmap`` now have a ``inner_hier_labels`` and ``outer_hier_labels``, which are used for hierarchical labeling of nodes.
37 | - ``to_laplacian`` function now has ``regularizer`` arg for when ``form`` is 'R-DAD'.
38 | - ``sbm`` function now has ``dc`` and ``dc_kws`` arguments for sampling SBM with degree-correction.
39 | 
40 | Deprecations
41 | ------------
42 | None.
43 | 
44 | Contributors to this release
45 | ----------------------------
46 | - `Benjamin Pedigo <https://github.com/bdpedigo>`_
47 | - `Jaewon Chung <https://github.com/j1c>`_
48 | - `Bijan Varjavand <https://github.com/bvarjavand>`_
49 | - `Vikram Chandrashekhar <https://github.com/vikramc1>`_
50 | - `Ronan Perry <https://github.com/rflperry>`_
51 | 


--------------------------------------------------------------------------------
/graspologic/inference/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ..types import AdjacencyMatrix
 4 | 
 5 | 
 6 | def compute_density(adjacency: AdjacencyMatrix, loops: bool = False) -> float:
 7 |     """
 8 |     For a given graph, this function computes the graph density, defined as the actual number of edges divided by the total possible number
 9 |     of edges in the graph.
10 | 
11 |     Parameters
12 |     ----------
13 |     adjacency: int, array shape (n_nodes,n_nodes)
14 |         The adjancy matrix for the graph. Edges are denoted by 1s while non-edges are denoted by 0s.
15 | 
16 |     loops: boolean
17 |         Optional variable to select whether to include self-loops (i.e. connections between a node and itself). Default is "false," meaning
18 |         such connections are ignored.
19 | 
20 |     Returns
21 |     -------
22 |     n_edges/n_possible: float
23 |         The computed density, calculated as the total number of edges divided by the total number of possible edges.
24 | 
25 |     """
26 |     n_edges = np.count_nonzero(adjacency)
27 |     n_nodes = adjacency.shape[0]
28 |     n_possible = n_nodes**2
29 |     if not loops:
30 |         n_possible -= n_nodes
31 |     return n_edges / n_possible
32 | 
33 | 
34 | def compute_density_adjustment(
35 |     adjacency1: AdjacencyMatrix, adjacency2: AdjacencyMatrix
36 | ) -> float:
37 |     """
38 |     Computes the density adjustment to be used when testing the hypothesis that the density of one network is equal to a fixed parameter
39 |     times the density of a second network. This function first calls the compute_density function above to compute the densities of both
40 |     networks, then computes an odds ratio by calculating the odds of an edge in each network and taking the ratio of the results.
41 | 
42 |     Parameters
43 |     ----------
44 |     adjacency1: int, array of size (n_nodes1,n_nodes1)
45 |         Adjacency matrix for the first graph. 1s represent edges while 0s represent the absence of an edge. The array is a square of side length
46 |         n_nodes1, where this corresponds to the number of nodes in graph 1.
47 | 
48 |     adjacency2: int, array of size (n_nodes2,n_nodes2)
49 |         Same as above, but for the second graph.
50 | 
51 |     Returns
52 |     ---------
53 |     odds_ratio: float
54 |         Computed as the ratio of the odds of an edge in graph 1 to the odds of an edge in graph 2.
55 | 
56 |     """
57 |     density1 = compute_density(adjacency1)
58 |     density2 = compute_density(adjacency2)
59 |     # return density1 / density2
60 |     odds1 = density1 / (1 - density1)
61 |     odds2 = density2 / (1 - density2)
62 |     odds_ratio = odds1 / odds2
63 |     return odds_ratio
64 | 


--------------------------------------------------------------------------------
/graspologic/pipeline/embed/_elbow.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT license.
 3 | 
 4 | 
 5 | from typing import Union
 6 | 
 7 | import numpy as np
 8 | from scipy.stats import norm
 9 | 
10 | from graspologic.types import List, Tuple
11 | 
12 | 
13 | def _compute_likelihood(arr: np.ndarray) -> np.ndarray:
14 |     """
15 |     Computes the log likelihoods based on normal distribution given
16 |     a 1d-array of sorted values. If the input has no variance,
17 |     the likelihood will be nan.
18 |     """
19 |     n_elements = len(arr)
20 |     likelihoods = np.zeros(n_elements)
21 | 
22 |     for idx in range(1, n_elements + 1):
23 |         # split into two samples
24 |         s1 = arr[:idx]
25 |         s2 = arr[idx:]
26 | 
27 |         # deal with when input only has 2 elements
28 |         if (s1.size == 1) & (s2.size == 1):
29 |             likelihoods[idx - 1] = -np.inf
30 |             continue
31 | 
32 |         # compute means
33 |         mu1 = np.mean(s1)
34 |         if s2.size != 0:
35 |             mu2 = np.mean(s2)
36 |         else:
37 |             # Prevent numpy warning for taking mean of empty array
38 |             mu2 = -np.inf
39 | 
40 |         # compute pooled variance
41 |         variance = (np.sum((s1 - mu1) ** 2) + np.sum((s2 - mu2) ** 2)) / (
42 |             n_elements - 1 - (idx < n_elements)
43 |         )
44 |         std = np.sqrt(variance)
45 | 
46 |         # compute log likelihoods
47 |         likelihoods[idx - 1] = np.sum(norm.logpdf(s1, loc=mu1, scale=std)) + np.sum(
48 |             norm.logpdf(s2, loc=mu2, scale=std)
49 |         )
50 | 
51 |     return likelihoods
52 | 
53 | 
54 | def _find_elbows(priority_ordered_matrix: np.ndarray, n_elbows: int) -> List[int]:
55 |     # use Ghodsi & Zhu method for finding elbow
56 |     idx = 0
57 |     elbows = []
58 |     for _ in range(n_elbows):
59 |         arr = priority_ordered_matrix[idx:]
60 |         if arr.size <= 1:  # Cant compute likelihoods with 1 numbers
61 |             break
62 |         lq = _compute_likelihood(arr)
63 |         idx += int(np.argmax(lq)) + 1
64 |         elbows.append(idx)
65 | 
66 |     return elbows
67 | 
68 | 
69 | def _index_of_elbow(
70 |     priority_ordered_matrix: Union[np.ndarray, Tuple[np.ndarray, np.ndarray]],
71 |     n_elbows: int,
72 | ) -> int:
73 |     if isinstance(priority_ordered_matrix, tuple):
74 |         left_elbows = _find_elbows(priority_ordered_matrix[0], n_elbows)
75 |         right_elbows = _find_elbows(priority_ordered_matrix[1], n_elbows)
76 |         return max(left_elbows[-1], right_elbows[-1])
77 |     else:
78 |         elbows = _find_elbows(priority_ordered_matrix, n_elbows)
79 |         return elbows[-1]
80 | 


--------------------------------------------------------------------------------
/tests/layouts/test_auto.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import random
 5 | import unittest
 6 | 
 7 | import networkx as nx
 8 | import numpy as np
 9 | 
10 | from graspologic.layouts.auto import _get_bounds, layout_umap
11 | 
12 | 
13 | class TestAuto(unittest.TestCase):
14 |     def test_get_bounds(self):
15 |         y = np.array([(1, 2), (4, 5), (-1, -2), (10, -20)])
16 |         minx, miny, maxx, maxy = _get_bounds(y)
17 |         self.assertEqual(-1, minx)
18 |         self.assertEqual(-20, miny)
19 |         self.assertEqual(10, maxx)
20 |         self.assertEqual(5, maxy)
21 | 
22 |     def test_layout_umap_string_node_ids(self):
23 |         graph = nx.florentine_families_graph()
24 | 
25 |         for s, t in graph.edges():
26 |             graph.add_edge(s, t, weight=1)
27 | 
28 |         _, node_positions = layout_umap(graph=graph)
29 | 
30 |         self.assertEqual(len(node_positions), len(graph.nodes()))
31 | 
32 |     def test_layout_umap_int_node_ids(self):
33 |         graph = nx.florentine_families_graph()
34 |         graph_int_node_ids = nx.Graph()
35 |         ids_as_ints = dict()
36 | 
37 |         for s, t in graph.edges():
38 |             if s not in ids_as_ints:
39 |                 ids_as_ints[s] = int(len(ids_as_ints.keys()))
40 | 
41 |             if t not in ids_as_ints:
42 |                 ids_as_ints[t] = int(len(ids_as_ints.keys()))
43 | 
44 |             graph_int_node_ids.add_edge(ids_as_ints[s], ids_as_ints[t], weight=1)
45 | 
46 |         _, node_positions = layout_umap(graph=graph_int_node_ids)
47 | 
48 |         self.assertEqual(len(node_positions), len(graph.nodes()))
49 | 
50 |     def test_layout_umap_directed_weighted(self):
51 |         graph = nx.erdos_renyi_graph(10, 0.7, directed=True)
52 | 
53 |         for s, t in graph.edges():
54 |             graph.edges[s, t]["weight"] = np.random.randint(1, 10)
55 | 
56 |         _, node_positions = layout_umap(graph=graph)
57 | 
58 |         self.assertEqual(len(node_positions), len(graph.nodes()))
59 | 
60 |     def test_layout_umap_directed_unweighted(self):
61 |         graph = nx.erdos_renyi_graph(10, 0.7, directed=True)
62 | 
63 |         _, node_positions = layout_umap(graph=graph)
64 | 
65 |         self.assertEqual(len(node_positions), len(graph.nodes()))
66 | 
67 |     def test_exercise_approximate_prune(self):
68 |         form = nx.erdos_renyi_graph(100, 0.7, directed=False)
69 |         graph = nx.Graph()
70 |         rng = random.Random(12345)
71 |         for source, target in form.edges():
72 |             graph.add_edge(str(source), str(target), weight=rng.uniform(0.0, 10.0))
73 | 
74 |         result_graph, positions = layout_umap(graph, max_edges=100)
75 |         self.assertTrue(result_graph.number_of_edges() <= 100)
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     unittest.main()
80 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.7 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->
42 | 


--------------------------------------------------------------------------------
/tests/test_base_embed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | import numpy as np
 7 | 
 8 | import graspologic as gs
 9 | from graspologic.embed.base import BaseSpectralEmbed
10 | from graspologic.simulations.simulations import er_nm, er_np
11 | 
12 | 
13 | class TestBaseEmbed(unittest.TestCase):
14 |     @classmethod
15 |     def setup_class(cls):
16 |         # simple ERxN graph
17 |         cls.n = 20
18 |         cls.p = 0.5
19 |         cls.A = er_np(cls.n, cls.p, directed=True, loops=False)
20 | 
21 |     def test_baseembed_er(self):
22 |         n_components = 4
23 |         embed = BaseSpectralEmbed(n_components=n_components)
24 |         n = 10
25 |         M = 20
26 |         A = er_nm(n, M) + 5
27 |         embed._reduce_dim(A)
28 |         self.assertEqual(embed.latent_left_.shape, (n, n_components))
29 |         self.assertTrue(embed.latent_right_ is None)
30 | 
31 |     def test_baseembed_er_directed(self):
32 |         n_components = 4
33 |         embed = BaseSpectralEmbed(n_components=n_components)
34 |         n = 10
35 |         M = 20
36 |         A = er_nm(n, M, directed=True)
37 |         embed._reduce_dim(A)
38 |         self.assertEqual(embed.latent_left_.shape, (n, n_components))
39 |         self.assertEqual(embed.latent_right_.shape, (n, n_components))
40 |         self.assertTrue(embed.latent_right_ is not None)
41 | 
42 |     def test_baseembed_er_directed_concat(self):
43 |         n_components = 4
44 |         embed = BaseSpectralEmbed(n_components=n_components, concat=True)
45 |         n = 10
46 |         M = 20
47 |         A = er_nm(n, M, directed=True)
48 |         embed._reduce_dim(A)
49 |         out = embed.fit_transform(A)
50 |         self.assertEqual(out.shape, (n, 2 * n_components))
51 |         self.assertTrue(embed.latent_right_ is not None)
52 | 
53 |     def test_baseembed(self):
54 |         embed = BaseSpectralEmbed(n_components=None)
55 |         n = 10
56 |         M = 20
57 |         A = er_nm(n, M) + 5
58 |         embed._reduce_dim(A)
59 | 
60 |     def test_algorithms(self):
61 |         embed = BaseSpectralEmbed(n_components=self.n, algorithm="full")
62 |         embed._reduce_dim(self.A)
63 |         self.assertEqual(embed.latent_left_.shape, (self.n, self.n))
64 |         self.assertEqual(embed.latent_right_.shape, (self.n, self.n))
65 | 
66 |         # When algoritm != 'full', cannot decompose to all dimensions
67 |         embed = BaseSpectralEmbed(n_components=self.n, algorithm="truncated")
68 |         with self.assertRaises(ValueError):
69 |             embed._reduce_dim(self.A)
70 | 
71 |         embed = BaseSpectralEmbed(n_components=self.n, algorithm="randomized")
72 |         with self.assertRaises(ValueError):
73 |             embed._reduce_dim(self.A)
74 | 
75 |     def test_input_checks(self):
76 |         with self.assertRaises(TypeError):
77 |             BaseSpectralEmbed(n_components=self.n, concat=42)
78 | 


--------------------------------------------------------------------------------
/graspologic/cluster/base.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | from abc import ABC, abstractmethod
 5 | from typing import Any, Optional
 6 | 
 7 | import numpy as np
 8 | from sklearn.base import BaseEstimator, ClusterMixin
 9 | from sklearn.metrics import adjusted_rand_score
10 | from sklearn.utils.validation import check_is_fitted
11 | 
12 | 
13 | class BaseCluster(ABC, BaseEstimator, ClusterMixin):
14 |     """
15 |     Base clustering class.
16 |     """
17 | 
18 |     @abstractmethod
19 |     def fit(self, X: np.ndarray, y: Optional[Any] = None) -> "BaseCluster":
20 |         """
21 |         Compute clusters based on given method.
22 | 
23 |         Parameters
24 |         ----------
25 |         X : array-like, shape (n_samples, n_features)
26 |             List of n_features-dimensional data points. Each row
27 |             corresponds to a single data point.
28 | 
29 |         y : array-like, shape (n_samples,), optional (default=None)
30 |             List of labels for X if available. Used to compute
31 |             ARI scores.
32 | 
33 |         Returns
34 |         -------
35 |         self
36 |         """
37 | 
38 |     def predict(
39 |         self, X: np.ndarray, y: Optional[Any] = None
40 |     ) -> np.ndarray:  # pragma: no cover
41 |         """
42 |         Predict clusters based on best model.
43 | 
44 |         Parameters
45 |         ----------
46 |         X : array-like, shape (n_samples, n_features)
47 |             List of n_features-dimensional data points. Each row
48 |             corresponds to a single data point.
49 |         y : array-like, shape (n_samples, ), optional (default=None)
50 |             List of labels for X if available. Used to compute
51 |             ARI scores.
52 | 
53 |         Returns
54 |         -------
55 |         labels : array, shape (n_samples,)
56 |             Component labels.
57 |         """
58 |         # Check if fit is already called
59 |         check_is_fitted(self, ["model_"], all_or_any=all)
60 |         labels = self.model_.predict(X)
61 | 
62 |         return labels
63 | 
64 |     def fit_predict(
65 |         self, X: np.ndarray, y: Optional[Any] = None
66 |     ) -> np.ndarray:  # pragma: no cover
67 |         """
68 |         Fit the models and predict clusters based on best model.
69 | 
70 |         Parameters
71 |         ----------
72 |         X : array-like, shape (n_samples, n_features)
73 |             List of n_features-dimensional data points. Each row
74 |             corresponds to a single data point.
75 | 
76 |         y : array-like, shape (n_samples,), optional (default=None)
77 |             List of labels for X if available. Used to compute
78 |             ARI scores.
79 | 
80 |         Returns
81 |         -------
82 |         labels : array, shape (n_samples,)
83 |             Component labels.
84 |         """
85 |         self.fit(X, y)
86 | 
87 |         labels = self.predict(X, y)
88 |         return labels
89 | 


--------------------------------------------------------------------------------
/tests/partition/test_modularity.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation.
 2 | # Licensed under the MIT license.
 3 | 
 4 | import unittest
 5 | from typing import Dict
 6 | 
 7 | import networkx as nx
 8 | import numpy as np
 9 | 
10 | from graspologic.partition import modularity, modularity_components
11 | from tests.utils import data_file
12 | 
13 | 
14 | def _modularity_graph() -> nx.Graph:
15 |     graph = nx.Graph()
16 |     graph.add_edge("a", "b", weight=4.0)
17 |     graph.add_edge("b", "c", weight=3.0)
18 |     graph.add_edge("e", "f", weight=5.0)
19 | 
20 |     return graph
21 | 
22 | 
23 | _PARTITIONS: Dict[str, int] = {"a": 0, "b": 0, "c": 0, "e": 1, "f": 1}
24 | 
25 | 
26 | class TestModularity(unittest.TestCase):
27 |     def test_modularity(self):
28 |         graph = _modularity_graph()  # links = 12.0
29 |         partition = _PARTITIONS  # in community degree for -> 0: 14, 1: 10, community degree -> 0:14, 1:10
30 |         # modularity component for partition 0: (14.0 / (2.0 * 12.0)) - (1.0 * ((14.0 / (2 * 12.0)) ** 2.0))
31 |         # (cont): 0.5833333333333334 - 0.34027777777777785 = 0.24305555555555552
32 |         # modularity component for partition 1: (10.0 / (2.0 * 12.0)) - (1.0 * ((10.0 / (2 * 12.0)) ** 2.0))
33 |         # (cont): 0.4166666666666667 - 0.17361111111111113 = 0.24305555555555555
34 |         modularity_value = modularity(graph, partition)
35 | 
36 |         np.testing.assert_almost_equal(0.48611111111111105, modularity_value)
37 | 
38 |     def test_modularity_components(self):
39 |         graph = nx.Graph()
40 |         with open(data_file("large-graph.csv"), "r") as edge_list_io:
41 |             for line in edge_list_io:
42 |                 source, target, weight = line.strip().split(",")
43 |                 previous_weight = graph.get_edge_data(source, target, {"weight": 0})[
44 |                     "weight"
45 |                 ]
46 |                 weight = float(weight) + previous_weight
47 |                 graph.add_edge(source, target, weight=weight)
48 | 
49 |         partitions = {}
50 |         with open(data_file("large-graph-partitions.csv"), "r") as communities_io:
51 |             for line in communities_io:
52 |                 vertex, comm = line.strip().split(",")
53 |                 partitions[vertex] = int(comm)
54 | 
55 |         partition_count = max(partitions.values())
56 | 
57 |         graph.add_node("disconnected_node")
58 |         partitions["disconnected_node"] = partition_count + 1
59 | 
60 |         components = modularity_components(graph, partitions)
61 | 
62 |         # from python-louvain modularity function
63 |         community_modularity = 0.8008595783563607
64 |         total_modularity = sum(components.values())
65 | 
66 |         self.assertSetEqual(set(components.keys()), set(partitions.values()))
67 |         self.assertEqual(0, components[partition_count + 1])
68 | 
69 |         np.testing.assert_almost_equal(
70 |             community_modularity, total_modularity, decimal=8
71 |         )
72 | 


--------------------------------------------------------------------------------
/docs/tutorials/plotting/gridplot.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Gridplot: Visualize Multiple Graphs\n",
  8 |     "\n",
  9 |     "This example provides how to visualize graphs using the gridplot."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import graspologic\n",
 19 |     "\n",
 20 |     "import numpy as np\n",
 21 |     "%matplotlib inline"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "## Overlaying two sparse graphs using gridplot\n",
 29 |     "\n",
 30 |     "### Simulate more graphs using weighted stochastic block models\n",
 31 |     "The 2-block model is defined as below:\n",
 32 |     "\n",
 33 |     "\\begin{align*}\n",
 34 |     "P = \n",
 35 |     "\\begin{bmatrix}0.25 & 0.05 \\\\\n",
 36 |     "0.05 & 0.25\n",
 37 |     "\\end{bmatrix}\n",
 38 |     "\\end{align*}\n",
 39 |     "\n",
 40 |     "We generate two weighted SBMs where the weights are distributed from a discrete uniform(1, 10) and discrete uniform(2, 5)."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "from graspologic.simulations import sbm\n",
 50 |     "\n",
 51 |     "n_communities = [50, 50]\n",
 52 |     "p = np.array([[0.25, 0.05], [0.05, 0.25]])\n",
 53 |     "wt = np.random.randint\n",
 54 |     "wtargs = dict(low=1, high=10)\n",
 55 |     "\n",
 56 |     "np.random.seed(1)\n",
 57 |     "A_unif1= sbm(n_communities, p, wt=wt, wtargs=wtargs)\n",
 58 |     "\n",
 59 |     "wtargs = dict(low=2, high=5)\n",
 60 |     "A_unif2= sbm(n_communities, p, wt=wt, wtargs=wtargs)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "## Visualizing both graphs"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "from graspologic.plot import gridplot\n",
 77 |     "\n",
 78 |     "X = [A_unif1, A_unif2]\n",
 79 |     "labels = [\"Uniform(1, 10)\", \"Uniform(2, 5)\"]\n",
 80 |     "\n",
 81 |     "f = gridplot(X=X, \n",
 82 |     "             labels=labels, \n",
 83 |     "             title='Two Weighted Stochastic Block Models', \n",
 84 |     "             height=12, \n",
 85 |     "             font_scale=1.5)"
 86 |    ]
 87 |   }
 88 |  ],
 89 |  "metadata": {
 90 |   "kernelspec": {
 91 |    "display_name": "Python 3",
 92 |    "language": "python",
 93 |    "name": "python3"
 94 |   },
 95 |   "language_info": {
 96 |    "codemirror_mode": {
 97 |     "name": "ipython",
 98 |     "version": 3
 99 |    },
100 |    "file_extension": ".py",
101 |    "mimetype": "text/x-python",
102 |    "name": "python",
103 |    "nbconvert_exporter": "python",
104 |    "pygments_lexer": "ipython3",
105 |    "version": "3.7.0"
106 |   }
107 |  },
108 |  "nbformat": 4,
109 |  "nbformat_minor": 4
110 | }
111 | 


--------------------------------------------------------------------------------
/graspologic/inference/binomial.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | from typing import Literal
 3 | 
 4 | import numpy as np
 5 | from scipy.stats import chi2_contingency, fisher_exact
 6 | from statsmodels.stats.proportion import test_proportions_2indep
 7 | 
 8 | BinomialResult = namedtuple("BinomialResult", ["stat", "pvalue"])
 9 | BinomialTestMethod = Literal["score", "fisher", "chi2"]
10 | 
11 | 
12 | def binom_2samp(
13 |     x1: int,
14 |     n1: int,
15 |     x2: int,
16 |     n2: int,
17 |     null_ratio: float = 1.0,
18 |     method: BinomialTestMethod = "score",
19 | ) -> BinomialResult:
20 |     """
21 |     This function computes the likelihood that two binomial samples are drown from
22 |     identical underlying distributions. Null hypothesis is that the success probability
23 |     for each sample is identical (i.e. p1 = p2), and this function returns the
24 |     probability that the null hypothesis is accurate, under a variety of potential
25 |     statistical tests (default is score test).
26 | 
27 |     Parameters
28 |     ----------
29 |     x1 : int
30 |         Success count for group 1
31 |     n1 : int
32 |         The number of possible successes for group 1
33 |     x2 : int
34 |         Success count for group 2
35 |     n2 : int
36 |         The number of possible successes for group 2
37 |     null_ratio : float, optional
38 |         Optional parameter for testing whether p1 is a fixed ratio larger or smaller
39 |         than p2, i.e. p1 = cp2, where c is the null_ratio. Default is 1.0. This
40 |         parameter can only be !=1 if the chosen statistical test is the score test.
41 |     method : str, optional
42 |         Defines the statistical test to be run in order to reject or fail to reject the
43 |         null hypothesis. By default, this is the score test (i.e. "score").
44 | 
45 |     Returns
46 |     -------
47 |     BinomialResult: namedtuple
48 |     This namedtuple contains the following data:
49 |         stat: float
50 |             Test statistic for the requested test.
51 |         pvalue: float
52 |             The p-value for the requested test.
53 | 
54 |     References
55 |     ------
56 |     [1] Alan Agresti. Categorical data analysis. John Wiley & Sons, 3 edition, 2013.
57 | 
58 |     """
59 |     if x1 == 0 or x2 == 0:
60 |         # logging.warn("One or more counts were 0, not running test and returning nan")
61 |         return BinomialResult(np.nan, np.nan)
62 |     if null_ratio != 1 and method != "score":
63 |         raise ValueError("Non-unity null odds only works with ``method=='score'``")
64 | 
65 |     cont_table = np.array([[x1, n1 - x1], [x2, n2 - x2]])
66 |     if method == "fisher" and null_ratio == 1.0:
67 |         stat, pvalue = fisher_exact(cont_table, alternative="two-sided")
68 |     elif method == "chi2":
69 |         stat, pvalue, _, _ = chi2_contingency(cont_table)
70 |     elif method == "score":
71 |         stat, pvalue = test_proportions_2indep(
72 |             x1,
73 |             n1,
74 |             x2,
75 |             n2,
76 |             method="score",
77 |             compare="ratio",
78 |             alternative="two-sided",
79 |             value=null_ratio,
80 |         )
81 |     else:
82 |         raise ValueError
83 | 
84 |     return BinomialResult(stat, pvalue)
85 | 


--------------------------------------------------------------------------------
/tests/layouts/nooverlap/test_grid.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | from graspologic.layouts.nooverlap._grid import _GridBuckets
 7 | from graspologic.layouts.nooverlap._node import _Node
 8 | 
 9 | 
10 | class TestGrid(unittest.TestCase):
11 |     # def setUp(self):
12 |     # 	self.g = _GridBuckets(10)
13 | 
14 |     def test_get_cell(self):
15 |         g = _GridBuckets(10)
16 |         cell = g.get_cell(0, 0)
17 |         self.assertTupleEqual((0, 0), cell)
18 | 
19 |         cell = g.get_cell(-1, -1)
20 |         self.assertTupleEqual((-10, -10), cell)
21 | 
22 |         cell = g.get_cell(11, 11)
23 |         self.assertTupleEqual((10, 10), cell)
24 | 
25 |         cell = g.get_cell(105, 87)
26 |         self.assertTupleEqual((100, 80), cell)
27 | 
28 |         cell = g.get_cell(-105, -87)
29 |         self.assertTupleEqual((-110, -90), cell)
30 | 
31 |         cell = g.get_cell(-105, 87)
32 |         self.assertTupleEqual((-110, 80), cell)
33 | 
34 |         cell = g.get_cell(105, -57)
35 |         self.assertTupleEqual((100, -60), cell)
36 | 
37 |     def test_get_grid_cells(self):
38 |         g = _GridBuckets(10)
39 |         cells = g._get_grid_cells(5, 12, 1)
40 |         self.assertSetEqual({(0, 10)}, cells)
41 | 
42 |         g2 = _GridBuckets(20)
43 |         cells = g2._get_grid_cells(5, 12, 10)
44 |         self.assertSetEqual({(-20, 20), (0, 20), (0, 0), (-20, 0)}, cells)
45 | 
46 |         g3 = _GridBuckets(20)
47 |         cells = g3._get_grid_cells(-5, -12, 10)
48 |         self.assertSetEqual({(-20, -20), (0, -20), (0, -40), (-20, -40)}, cells)
49 | 
50 |     def test_add_node(self):
51 |         g = _GridBuckets(10)
52 |         n0 = _Node(0, 1, 1, 10, 1, "blue")
53 |         n1 = _Node(1, 2, 1, 10, 1, "blue")
54 |         n2 = _Node(2, 40, -20, 10, 1, "blue")
55 | 
56 |         g.add_node(n0)
57 |         nodes = g.get_potential_overlapping_nodes_by_node(n0)
58 |         self.assertSetEqual(nodes, {n0})
59 | 
60 |         g.add_node(n1)
61 |         nodes = g.get_potential_overlapping_nodes_by_node(n1)
62 |         self.assertSetEqual(nodes, {n0, n1})
63 | 
64 |         g.add_node(n2)
65 |         nodes = g.get_potential_overlapping_nodes_by_node(n1)
66 |         self.assertSetEqual(nodes, {n0, n1})
67 | 
68 |     def test_get_cell_stats(self):
69 |         g = _GridBuckets(10)
70 |         n0 = _Node(0, 1, 1, 10, 1, "blue")
71 |         n1 = _Node(1, 2, 1, 10, 1, "blue")
72 |         n2 = _Node(2, 40, -20, 10, 1, "blue")
73 |         n3 = _Node(3, -33, -33, 1, 1, "blue")
74 |         n4 = _Node(4, -193, 78, 1, 1, "blue")
75 |         g.add_node(n0)
76 |         g.add_node(n1)
77 |         g.add_node(n2)
78 |         g.add_node(n3)
79 |         g.add_node(n4)
80 |         stats = g.get_grid_cell_stats()
81 |         self.assertEqual(3, len(stats), "Correct size list")
82 |         self.assertEqual(254, stats[0][1], "empty cells")
83 |         self.assertEqual(6, stats[1][1], "one item in cell")
84 |         self.assertEqual(4, stats[2][1], "two items in cell")
85 |         self.assertEqual(
86 |             [(0, 254), (1, 6), (2, 4)], stats, "grid cell stats are in expected format"
87 |         )
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     unittest.main()
92 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | ..  -*- coding: utf-8 -*-
 2 | 
 3 | .. _contents:
 4 | 
 5 | Overview of graspologic_
 6 | ========================
 7 | 
 8 | .. _graspologic: https://graspologic.readthedocs.org/en/latest
 9 | 
10 | graspologic is a Python package for analysis of graphs, or networks.
11 | 
12 | Motivation
13 | ----------
14 | 
15 | A graph, or network, provides a mathematically intuitive representation of data with
16 | some sort of relationship between items. For example, a social network can be
17 | represented as a graph by considering all participants in the social network as nodes,
18 | with connections representing whether each pair of individuals in the network are friends
19 | with one another. Naively, one might apply traditional statistical techniques to a graph,
20 | which neglects the spatial arrangement of nodes within the network and is not utilizing
21 | all of the information present in the graph. In this package, we provide utilities and
22 | algorithms designed for the processing and analysis of graphs with specialized graph
23 | statistical algorithms.
24 | 
25 | Python
26 | ------
27 | 
28 | Python is a powerful programming language that allows concise expressions of network
29 | algorithms.  Python has a vibrant and growing ecosystem of packages that
30 | graspologic uses to provide more features such as numerical linear algebra and
31 | plotting.  In order to make the most out of graspologic you will want to know how
32 | to write basic programs in Python.  Among the many guides to Python, we
33 | recommend the `Python documentation <https://docs.python.org/3/>`_.
34 | 
35 | Free software
36 | -------------
37 | 
38 | graspologic is free software; you can redistribute it and/or modify it under the
39 | terms of the :doc:`MIT </license>` license.  We welcome contributions.
40 | Join us on `GitHub <https://github.com/graspologic-org/graspologic>`_.
41 | 
42 | History
43 | -------
44 | 
45 | ``graspologic`` first released in September 2020, but it got its start as a pair of Python libraries
46 | written by Johns Hopkins University's NeuroData lab and Microsoft Research's Project Essex.
47 | Both teams worked on many of the same algorithms, shared research, findings, and generally duplicated a lot of effort.
48 | 
49 | ``GraSPy`` - the NeuroData library - and ``topologic`` - the Microsoft Research library began merging in September of 2020, but both got their starts far earlier, with GraSPy starting in September 2018 and topologic starting just a short time later, on October 2nd, 2018.
50 | 
51 | GraSPy was originally designed and written by Jaewon Chung, Benjamin Pedigo, and Eric Bridgeford.
52 | 
53 | Topologic was originally designed and written by Patrick Bourke, Jonathan McLean, Nick Caurvina, and Dwayne Pryce.
54 | 
55 | .. toctree-filt::
56 |    :maxdepth: 1
57 |    :caption: Documentation
58 | 
59 |    license
60 |    reference/index
61 |    tutorials/index
62 | 
63 | .. toctree::
64 |    :maxdepth: 1
65 |    :caption: Useful Links
66 | 
67 |    graspologic @ GitHub <http://www.github.com/graspologic-org/graspologic/>
68 |    graspologic @ PyPI <https://pypi.org/project/graspologic/>
69 |    Issue Tracker <https://github.com/graspologic-org/graspologic/issues>
70 | 
71 | Indices and tables
72 | ==================
73 | 
74 | * :ref:`genindex`
75 | * :ref:`search`
76 | 


--------------------------------------------------------------------------------
/tests/test_rdpg_corr.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | import numpy as np
 7 | 
 8 | from graspologic.simulations.rdpg_corr import rdpg_corr
 9 | from graspologic.simulations.simulations import p_from_latent, sample_edges
10 | 
11 | 
12 | class TestRDPGCorr(unittest.TestCase):
13 |     @classmethod
14 |     def setUpClass(cls):
15 |         cls.r = 0.3
16 |         cls.Y = None
17 |         cls.X = np.random.dirichlet([20, 20], size=300)
18 | 
19 |     def test_dimensions(self):
20 |         np.random.seed(1234)
21 |         A, B = rdpg_corr(
22 |             self.X, self.Y, self.r, rescale=False, directed=False, loops=False
23 |         )
24 |         self.assertTrue(A.shape, (300, 300))
25 |         self.assertTrue(B.shape, (300, 300))
26 | 
27 |     def test_inputs(self):
28 |         x1 = np.array([[1, 1], [1, 1]])
29 |         x2 = np.array([[1, 1]])
30 |         x3 = np.zeros((2, 2, 2))
31 |         with self.assertRaises(TypeError):
32 |             p_from_latent("hi")  # wrong type
33 |         with self.assertRaises(ValueError):
34 |             p_from_latent(x1, x2)  # dimension mismatch
35 |         with self.assertRaises(ValueError):
36 |             p_from_latent(x3)  # wrong num dimensions
37 |         with self.assertRaises(TypeError):
38 |             sample_edges("XD")  # wrong type
39 |         with self.assertRaises(ValueError):
40 |             sample_edges(x3)  # wrong num dimensions
41 |         with self.assertRaises(ValueError):
42 |             sample_edges(x2)  # wrong shape for P
43 | 
44 |         if any(self.X[self.X > 1]) or any(self.X[self.X < -1]):  # wrong values for P
45 |             raise ValueError("P values should be less than 1 and bigger than -1")
46 | 
47 |     def test_rdpg_corr(self):
48 |         np.random.seed(123)
49 |         g1, g2 = rdpg_corr(
50 |             self.X, self.Y, self.r, rescale=False, directed=False, loops=False
51 |         )
52 | 
53 |         # check the dimention of g1, g2
54 |         self.assertTrue(g1.shape == (self.X.shape[0], self.X.shape[0]))
55 |         self.assertTrue(g1.shape == (self.X.shape[0], self.X.shape[0]))
56 | 
57 |         # check rho
58 |         g1 = g1[np.where(~np.eye(g1.shape[0], dtype=bool))]
59 |         g2 = g2[np.where(~np.eye(g2.shape[0], dtype=bool))]
60 |         correlation = np.corrcoef(g1, g2)[0, 1]
61 |         self.assertTrue(np.isclose(correlation, self.r, atol=0.01))
62 | 
63 |     # check P
64 |     def test_p_is_close(self):
65 |         P = p_from_latent(self.X, self.Y, rescale=False, loops=True)
66 |         if any(P[P > 1]) or any(P[P < -1]):  # wrong values for P
67 |             raise ValueError("P values should be less than 1 and bigger than -1")
68 | 
69 |         np.random.seed(8888)
70 |         graphs1 = []
71 |         graphs2 = []
72 |         for i in range(100):
73 |             g1, g2 = rdpg_corr(
74 |                 self.X, self.Y, self.r, rescale=False, directed=True, loops=True
75 |             )
76 |             graphs1.append(g1)
77 |             graphs2.append(g2)
78 |         graphs1 = np.stack(graphs1)
79 |         graphs2 = np.stack(graphs2)
80 |         np.testing.assert_allclose(np.mean(graphs1, axis=0), P, atol=0.3)
81 |         np.testing.assert_allclose(np.mean(graphs2, axis=0), P, atol=0.3)
82 | 


--------------------------------------------------------------------------------
/tests/test_sg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | import numpy as np
 7 | from numpy.testing import assert_equal
 8 | 
 9 | import graspologic.subgraph as sg
10 | 
11 | 
12 | class TestEstimateSubgraph(unittest.TestCase):
13 |     def test_estimate_subgraph_coh(self):
14 |         ys = np.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
15 |         blank = np.ones((10, 10))
16 |         blank[1:6, 0] = 0
17 |         A = np.ones((10, 10, 10))
18 | 
19 |         for ind in range(10):
20 |             if ys[ind] == 1:
21 |                 A[:, :, ind] = blank
22 |         test_model = sg.SignalSubgraph()
23 |         estsub = test_model.fit_transform(A, ys, [5, 1])
24 |         ver = np.ones((10, 10))
25 |         ver[estsub] = 0
26 |         np.testing.assert_array_equal(blank, ver)
27 | 
28 |     def test_estimate_subgraph_inc(self):
29 |         ys = np.array([0, 1, 0, 1, 0, 1, 0, 1, 0, 1])
30 |         blank = np.ones((10, 10))
31 |         blank[1:6, 0] = 0
32 |         A = np.ones((10, 10, 10))
33 | 
34 |         for ind in range(10):
35 |             if ys[ind] == 1:
36 |                 A[:, :, ind] = blank
37 |         test_model = sg.SignalSubgraph()
38 |         estsub = test_model.fit_transform(A, ys, 5)
39 |         ver = np.ones((10, 10))
40 |         ver[estsub] = 0
41 |         np.testing.assert_array_equal(blank, ver)
42 | 
43 |     def test_fit_bad_constraints(self):
44 |         A = np.ones((5, 5, 5))
45 |         ys = np.ones(5)
46 |         test_model = sg.SignalSubgraph()
47 |         with self.assertRaises(TypeError):
48 |             test_model.fit(A, ys, [1])
49 |         with self.assertRaises(TypeError):
50 |             test_model.fit(A, ys, [1, 1, 1])
51 | 
52 |     def test_construct_contingency(self):
53 |         A = np.ones((1, 1, 5))
54 |         A[:, :, 1::2] = 0
55 |         ys = np.array([1, 0, 1, 0, 0])
56 |         test_model = sg.SignalSubgraph()
57 |         test_model.fit(A, ys, 1)
58 |         test_model._SignalSubgraph__construct_contingency()
59 |         cmat = test_model.contmat_
60 |         ver = np.array([[[[1, 2], [2, 0]]]], dtype=float)
61 |         np.testing.assert_array_equal(cmat, ver)
62 | 
63 |     def test_fit_bad_type(self):
64 |         A = [[[1 for i in range(5)] for j in range(5)] for k in range(5)]
65 |         ys = [1, 1, 1, 1, 1]
66 |         test_model = sg.SignalSubgraph()
67 |         with self.assertRaises(TypeError):
68 |             test_model.fit(A, np.ones(5), 1)
69 |         with self.assertRaises(TypeError):
70 |             test_model.fit(A, set(ys), 1)
71 | 
72 |     def test_fit_bad_size(self):
73 |         test_model = sg.SignalSubgraph()
74 |         with self.assertRaises(ValueError):
75 |             test_model.fit(np.ones((5, 5)), np.ones(5), 1)
76 |         with self.assertRaises(ValueError):
77 |             test_model.fit(np.ones((3, 4, 2)), np.ones(2), 1)
78 | 
79 |     def test_fit_bad_len(self):
80 |         A = np.ones((3, 3, 3))
81 |         test_model = sg.SignalSubgraph()
82 |         with self.assertRaises(ValueError):
83 |             test_model.fit(A, np.ones((3, 3)), 1)
84 |         with self.assertRaises(ValueError):
85 |             test_model.fit(A, np.array([0, 1, 2]), 1)
86 |         with self.assertRaises(ValueError):
87 |             test_model.fit(A, np.ones(2), 1)
88 | 


--------------------------------------------------------------------------------
/tests/test_svd.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft Corporation and contributors.
 2 | # Licensed under the MIT License.
 3 | 
 4 | import unittest
 5 | 
 6 | import numpy as np
 7 | from scipy.spatial import procrustes
 8 | 
 9 | from graspologic.embed.svd import select_svd
10 | from graspologic.utils import symmetrize
11 | 
12 | 
13 | class TestSVD(unittest.TestCase):
14 |     def test_bad_inputs(self):
15 |         X = np.random.normal(size=(100, 5))
16 |         with self.assertRaises(ValueError):
17 |             bad_algo = "ROFLMAO"
18 |             select_svd(X, algorithm=bad_algo)
19 | 
20 |         with self.assertRaises(ValueError):
21 |             algorithm = "full"
22 |             bad_components = 1000
23 |             select_svd(X, n_components=bad_components, algorithm=algorithm)
24 | 
25 |         with self.assertRaises(ValueError):
26 |             algorithm = "truncated"
27 |             bad_components = 1000
28 |             select_svd(X, n_components=bad_components, algorithm=algorithm)
29 | 
30 |     def test_outputs(self):
31 |         np.random.seed(123)
32 |         X = np.vstack([
33 |             np.repeat([[0.2, 0.2, 0.2]], 50, axis=0),
34 |             np.repeat([[0.5, 0.5, 0.5]], 50, axis=0),
35 |         ])
36 |         P = X @ X.T
37 |         A = np.random.binomial(1, P).astype(float)
38 | 
39 |         n_components = 3
40 | 
41 |         # Full SVD
42 |         U_full, D_full, V_full = select_svd(
43 |             A, n_components=n_components, algorithm="full"
44 |         )
45 |         X_full = U_full @ np.diag(np.sqrt(D_full))
46 |         _, _, norm_full = procrustes(X, X_full)
47 | 
48 |         # Truncated SVD
49 |         U_trunc, D_trunc, V_trunc = select_svd(
50 |             A, n_components=n_components, algorithm="truncated"
51 |         )
52 |         X_trunc = U_trunc @ np.diag(np.sqrt(D_trunc))
53 |         _, _, norm_trunc = procrustes(X, X_trunc)
54 | 
55 |         # Randomized SVD
56 |         U_rand, D_rand, V_rand = select_svd(
57 |             A, n_components=n_components, algorithm="randomized", n_iter=10
58 |         )
59 |         X_rand = U_rand @ np.diag(np.sqrt(D_rand))
60 |         _, _, norm_rand = procrustes(X, X_rand)
61 | 
62 |         rtol = 1e-4
63 |         atol = 1e-4
64 |         np.testing.assert_allclose(norm_full, norm_trunc, rtol, atol)
65 |         np.testing.assert_allclose(norm_full, norm_rand, rtol, atol)
66 | 
67 |     def test_eigsh(self):
68 |         np.random.seed(123)
69 |         X = np.vstack([
70 |             np.repeat([[0.2, 0.2, 0.2]], 50, axis=0),
71 |             np.repeat([[0.5, 0.5, 0.5]], 50, axis=0),
72 |         ])
73 |         P = X @ X.T
74 |         A = np.random.binomial(1, P).astype(float)
75 |         A = symmetrize(A, method="triu")
76 |         n_components = 3
77 | 
78 |         # Full SVD
79 |         U_full, D_full, V_full = select_svd(
80 |             A, n_components=n_components, algorithm="full"
81 |         )
82 |         X_full = U_full @ np.diag(np.sqrt(D_full))
83 |         _, _, norm_full = procrustes(X, X_full)
84 | 
85 |         # eigsh SVD
86 |         U_square, D_square, V_square = select_svd(
87 |             A, n_components=n_components, algorithm="eigsh", n_iter=10
88 |         )
89 |         X_square = U_square @ np.diag(np.sqrt(D_square))
90 |         _, _, norm_square = procrustes(X, X_square)
91 | 
92 |         rtol = 1e-4
93 |         atol = 1e-4
94 |         np.testing.assert_allclose(norm_full, norm_square, rtol, atol)
95 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: graspologic Publish
 2 | on:
 3 |   #
 4 |   # When a release tag is created (e.g. v1.0.0), this workflow will be triggered. Hatch's VCS versioning will use the correct version tag.
 5 |   #
 6 |   release:
 7 |     types: [created]
 8 |   #
 9 |   #  On pushes to main and dev, a prerelease version will be cut for the branch. e.g. v1.0.0-pre.10+<hash>
10 |   #
11 |   push:
12 |     paths-ignore:
13 |       - '.all-contributorsrc'
14 |       - 'CONTRIBUTORS.md'
15 |     branches:
16 |       - 'main'
17 |       - 'dev'
18 | env:
19 |   PYTHON_VERSION: '3.10'
20 | jobs:
21 |   build:
22 |     uses: ./.github/workflows/build.yml
23 |   publish:
24 |     runs-on: ubuntu-latest
25 |     needs: build
26 |     permissions:
27 |       id-token: write
28 |     outputs:
29 |       version: ${{ steps.export-version.outputs.version }}
30 |     steps:
31 |       - uses: actions/checkout@v4
32 |         with:
33 |           fetch-depth: 0
34 |           fetch-tags: true
35 |       - name: Set up Python
36 |         uses: actions/setup-python@v2
37 |         with:
38 |           python-version: ${{ env.PYTHON_VERSION }}
39 |       - name: Install uv
40 |         uses: astral-sh/setup-uv@v2
41 |       - name: Install dependencies
42 |         run: uv sync --python ${{ env.PYTHON_VERSION }}
43 |       - name: Build Artifacts
44 |         run: |
45 |           RAW_VERSION=$(uvx hatch version)
46 |           # Strip any local version metadata (everything after '+') to satisfy PyPI rules
47 |           CLEAN_VERSION=${RAW_VERSION%%+*}
48 |           echo "Raw version: $RAW_VERSION"
49 |           echo "Clean version (for PyPI): $CLEAN_VERSION"
50 |           # Force hatch to use the sanitized version for the build
51 |           SETUPTOOLS_SCM_PRETEND_VERSION=$CLEAN_VERSION uv build
52 |       - name: Publish package distributions to PyPI
53 |         uses: pypa/gh-action-pypi-publish@release/v1
54 |         with:
55 |           packages-dir: dist
56 |           skip-existing: true
57 |           verbose: true
58 |   docsite:
59 |     runs-on: ubuntu-latest
60 |     needs: [publish, build]
61 |     if: github.ref=='refs/heads/main' || github.ref=='refs/heads/dev'
62 |     permissions:
63 |       id-token: write
64 |       contents: write
65 |     steps:
66 |       - name: Download documentation artifact
67 |         uses: actions/download-artifact@v4
68 |         with:
69 |           name: documentation-site
70 |           path: docs/documentation-site
71 |       - name: Publish reference docs (dev branch)
72 |         uses: peaceiris/actions-gh-pages@v3
73 |         if: github.ref=='refs/heads/dev'
74 |         with:
75 |           github_token: ${{ secrets.GITHUB_TOKEN }}
76 |           publish_dir: docs/documentation-site
77 |           destination_dir: pre-release
78 |       - name: Publish reference docs (main branch)
79 |         uses: peaceiris/actions-gh-pages@v3
80 |         if: github.ref=='refs/heads/main'
81 |         with:
82 |           github_token: ${{ secrets.GITHUB_TOKEN }}
83 |           publish_dir: docs/documentation-site
84 |           destination_dir: ${{ needs.publish.outputs.version }}
85 |       - name: Publish latest reference docs (main branch)
86 |         uses: peaceiris/actions-gh-pages@v3
87 |         if: github.ref=='refs/heads/main'
88 |         with:
89 |           github_token: ${{ secrets.GITHUB_TOKEN }}
90 |           publish_dir: docs/documentation-site
91 |           destination_dir: latest
92 | 
93 |         
94 | 


--------------------------------------------------------------------------------
/tests/test_er_and_group_connection_tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import numpy as np
 4 | from scipy.sparse import csr_array
 5 | 
 6 | from graspologic.inference import density_test, group_connection_test
 7 | from graspologic.simulations import er_np, sbm
 8 | 
 9 | 
10 | class TestGroupConnection(unittest.TestCase):
11 |     def test_gctest_works(self):
12 |         np.random.seed(8888)
13 |         B1 = np.array([[0.8, 0.6], [0.6, 0.8]])
14 |         B2 = 0.8 * B1
15 |         A1, labels1 = sbm([50, 50], B1, return_labels=True)
16 |         A2, labels2 = sbm([60, 60], B2, return_labels=True)
17 |         stat, pvalue, misc = group_connection_test(
18 |             A1, A2, labels1, labels2, density_adjustment=True
19 |         )
20 |         self.assertTrue(pvalue > 0.05)
21 | 
22 |     def test_all_kwargs(self):
23 |         B1 = np.array([[0.4, 0.6], [0.6, 0.8]])
24 |         B2 = np.array([[0.9, 0.4], [0.2, 0.865]])
25 |         A1, labels1 = sbm([60, 60], B1, return_labels=True, directed=True)
26 |         A2, labels2 = sbm([50, 50], B2, return_labels=True, directed=True)
27 |         stat, pvalue, misc = group_connection_test(
28 |             A1,
29 |             A2,
30 |             labels1,
31 |             labels2,
32 |             combine_method="tippett",
33 |             method="score",
34 |             correct_method="Bonferroni",
35 |             density_adjustment=True,
36 |         )
37 |         self.assertTrue(pvalue < 0.05)
38 |         self.assertTrue(misc["uncorrected_pvalues"].size == 4)
39 |         self.assertTrue(misc["probabilities1"].size == 4)
40 |         self.assertTrue(misc["probabilities2"].size == 4)
41 |         self.assertTrue(np.sum(misc["observed1"].to_numpy()) == np.count_nonzero(A1))
42 |         self.assertTrue(np.sum(misc["observed2"].to_numpy()) == np.count_nonzero(A2))
43 |         self.assertTrue(misc["null_ratio"] != 1.0)
44 |         self.assertTrue(misc["n_tests"] == 4)
45 |         self.assertTrue(misc["rejections"].to_numpy().size == 4)
46 |         self.assertTrue(misc["corrected_pvalues"].size == 4)
47 | 
48 |     def test_sparse(self):
49 |         B1 = np.array([[0.8, 0.6], [0.6, 0.8]])
50 |         B2 = np.array([[0.87, 0.66], [0.66, 0.87]])
51 |         A1, labels1 = sbm([50, 50], B1, return_labels=True)
52 |         A2, labels2 = sbm([60, 60], B2, return_labels=True)
53 |         sA1 = csr_array(A1)
54 |         sA2 = csr_array(A2)
55 | 
56 |         stat, pvalue, misc = group_connection_test(sA1, sA2, labels1, labels2)
57 |         self.assertTrue(pvalue <= 0.05)
58 | 
59 | 
60 | class TestER(unittest.TestCase):
61 |     def test_er(self):
62 |         np.random.seed(234)
63 |         A1 = er_np(500, 0.6)
64 |         A2 = er_np(400, 0.8)
65 |         stat, pvalue, er_misc = density_test(A1, A2)
66 |         self.assertTrue(pvalue <= 0.05)
67 |         A3 = er_np(500, 0.8)
68 |         A4 = er_np(400, 0.8)
69 |         stat, pvalue, er_misc = density_test(A3, A4)
70 |         self.assertTrue(pvalue > 0.05)
71 | 
72 |     def test_all(self):
73 |         np.random.seed(234)
74 |         A1 = er_np(500, 0.6)
75 |         A2 = er_np(400, 0.8)
76 |         stat, pvalue, er_misc = density_test(A1, A2, method="chi2")
77 |         self.assertTrue(pvalue <= 0.05)
78 |         self.assertTrue(er_misc["probability1"].to_numpy() < 1.0)
79 |         self.assertTrue(er_misc["probability2"].to_numpy() < 1.0)
80 |         self.assertTrue(er_misc["observed1"].to_numpy() == np.count_nonzero(A1))
81 |         self.assertTrue(er_misc["observed2"].to_numpy() == np.count_nonzero(A2))
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     unittest.main()
86 | 


--------------------------------------------------------------------------------
/docs/tutorials/plotting/pairplot.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Pairplot: Visualizing High Dimensional Data\n",
  8 |     "\n",
  9 |     "This example provides how to visualize high dimensional data using the pairplot."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import graspologic\n",
 19 |     "\n",
 20 |     "import numpy as np\n",
 21 |     "%matplotlib inline"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "## Simulate a binary graph using stochastic block model\n",
 29 |     "The 3-block model is defined as below:\n",
 30 |     "\n",
 31 |     "\\begin{align*}\n",
 32 |     "n &= [50, 50, 50]\\\\\n",
 33 |     "P &= \n",
 34 |     "\\begin{bmatrix}0.5 & 0.1 & 0.05 \\\\\n",
 35 |     "0.1 & 0.4 & 0.15 \\\\\n",
 36 |     "0.05 & 0.15 & 0.3\n",
 37 |     "\\end{bmatrix}\n",
 38 |     "\\end{align*}\n",
 39 |     "\n",
 40 |     "Thus, the first 50 vertices belong to block 1, the second 50 vertices belong to block 2, and the last 50 vertices belong to block 3."
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "from graspologic.simulations import sbm\n",
 50 |     "\n",
 51 |     "n_communities = [50, 50, 50]\n",
 52 |     "p = [[0.5, 0.1, 0.05], \n",
 53 |     "     [0.1, 0.4, 0.15], \n",
 54 |     "     [0.05, 0.15, 0.3],]\n",
 55 |     "\n",
 56 |     "np.random.seed(2)\n",
 57 |     "A = sbm(n_communities, p)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "## Embed using adjacency spectral embedding to obtain lower dimensional representation of the graph\n",
 65 |     "\n",
 66 |     "The embedding dimension is automatically chosen. It should embed to 3 dimensions."
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "from graspologic.embed import AdjacencySpectralEmbed\n",
 76 |     "\n",
 77 |     "ase = AdjacencySpectralEmbed()\n",
 78 |     "X = ase.fit_transform(A)\n",
 79 |     "\n",
 80 |     "print(X.shape)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "markdown",
 85 |    "metadata": {},
 86 |    "source": [
 87 |     "## Use pairplot to plot the embedded data\n",
 88 |     "\n",
 89 |     "First we generate labels that correspond to blocks. We pass the labels along with the data for pair plot."
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "from graspologic.plot import pairplot\n",
 99 |     "\n",
100 |     "labels = ['Block 1'] * 50 + ['Block 2'] * 50 + ['Block 3'] * 50\n",
101 |     "\n",
102 |     "plot = pairplot(X, labels)"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": []
111 |   }
112 |  ],
113 |  "metadata": {
114 |   "kernelspec": {
115 |    "display_name": "Python 3",
116 |    "language": "python",
117 |    "name": "python3"
118 |   },
119 |   "language_info": {
120 |    "codemirror_mode": {
121 |     "name": "ipython",
122 |     "version": 3
123 |    },
124 |    "file_extension": ".py",
125 |    "mimetype": "text/x-python",
126 |    "name": "python",
127 |    "nbconvert_exporter": "python",
128 |    "pygments_lexer": "ipython3",
129 |    "version": "3.7.3"
130 |   }
131 |  },
132 |  "nbformat": 4,
133 |  "nbformat_minor": 4
134 | }
135 | 


--------------------------------------------------------------------------------
/tests/test_select_dimension.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation and contributors.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import unittest
  5 | 
  6 | import numpy as np
  7 | from numpy.testing import assert_equal
  8 | from scipy.linalg import orth
  9 | 
 10 | from graspologic.embed.svd import select_dimension
 11 | from graspologic.simulations.simulations import sbm
 12 | 
 13 | 
 14 | def generate_data(n=10, elbows=3, seed=1):
 15 |     """
 16 |     Generate data matrix with a specific number of elbows on scree plot
 17 |     """
 18 |     np.random.seed(seed)
 19 |     x = np.random.binomial(1, 0.6, (n**2)).reshape(n, n)
 20 |     xorth = orth(x)
 21 |     d = np.zeros(xorth.shape[0])
 22 |     for i in range(0, len(d), int(len(d) / (elbows + 1))):
 23 |         d[:i] += 10
 24 |     A = xorth.T.dot(np.diag(d)).dot(xorth)
 25 |     return A, d
 26 | 
 27 | 
 28 | class TestSelectDimension(unittest.TestCase):
 29 |     def test_invalid_inputes(self):
 30 |         X, D = generate_data()
 31 | 
 32 |         # invalid n_elbows
 33 |         with self.assertRaises(ValueError):
 34 |             bad_n_elbows = -2
 35 |             select_dimension(X, n_elbows=bad_n_elbows)
 36 | 
 37 |         with self.assertRaises(ValueError):
 38 |             bad_n_elbows = "string"
 39 |             select_dimension(X, n_elbows=bad_n_elbows)
 40 | 
 41 |         # invalid n_components
 42 |         with self.assertRaises(ValueError):
 43 |             bad_n_components = -1
 44 |             select_dimension(X, n_components=bad_n_components)
 45 | 
 46 |         with self.assertRaises(ValueError):
 47 |             bad_n_components = "string"
 48 |             select_dimension(X, n_components=bad_n_components)
 49 | 
 50 |         # invalid threshold
 51 |         with self.assertRaises(ValueError):
 52 |             bad_threshold = -2
 53 |             select_dimension(X, threshold=bad_threshold)
 54 | 
 55 |         with self.assertRaises(ValueError):
 56 |             bad_threshold = "string"
 57 |             select_dimension(X, threshold=bad_threshold)
 58 | 
 59 |         with self.assertRaises(IndexError):
 60 |             bad_threshold = 1000000
 61 |             select_dimension(X, threshold=bad_threshold)
 62 | 
 63 |         # invalid X
 64 |         with self.assertRaises(ValueError):
 65 |             bad_X = -2
 66 |             select_dimension(X=bad_X)
 67 | 
 68 |         with self.assertRaises(ValueError):
 69 |             # input is tensor
 70 |             bad_X = np.random.normal(size=(100, 10, 10))
 71 |             select_dimension(X=bad_X)
 72 | 
 73 |         with self.assertRaises(ValueError):
 74 |             bad_X = np.random.normal(size=100).reshape(100, -1)
 75 |             select_dimension(X=bad_X)
 76 | 
 77 |     def test_output_synthetic(self):
 78 |         data, l = generate_data(10, 3)
 79 |         elbows, _, _ = select_dimension(X=data, n_elbows=2, return_likelihoods=True)
 80 |         assert_equal(elbows, [2, 4])
 81 | 
 82 |     def test_output_simple(self):
 83 |         """
 84 |         Elbow should be at 2.
 85 |         """
 86 |         X = np.array([10, 9, 3, 2, 1])
 87 |         elbows, _ = select_dimension(X, n_elbows=1)
 88 |         assert_equal(elbows[0], 2)
 89 | 
 90 |     def test_output_uniform(self):
 91 |         """
 92 |         Generate two sets of synthetic eigenvalues based on two uniform distributions.
 93 |         The elbow must be at 50.
 94 |         """
 95 |         np.random.seed(9)
 96 |         x1 = np.random.uniform(0, 45, 50)
 97 |         x2 = np.random.uniform(55, 100, 50)
 98 |         X = np.sort(np.hstack([x1, x2]))[::-1]
 99 |         elbows, _ = select_dimension(X, n_elbows=1)
100 |         assert_equal(elbows[0], 50)
101 | 
102 |     def test_output_two_block_sbm(self):
103 |         np.random.seed(10)
104 |         n_communities = [100, 100]
105 |         P = np.array([[0.5, 0.1], [0.1, 0.5]])
106 |         A = sbm(n_communities, P)
107 | 
108 |         elbows, _ = select_dimension(A, n_elbows=2)
109 |         assert_equal(elbows[0], 2)
110 | 


--------------------------------------------------------------------------------
/docs/tutorials/simulations/erdos_renyi.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Erdos-Renyi (ER) Model"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import graspologic\n",
 17 |     "\n",
 18 |     "import matplotlib.pyplot as plt\n",
 19 |     "import numpy as np\n",
 20 |     "%matplotlib inline"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "Erdos-Renyi (ER) graphs are the simplest generative model. We assume that each edge for all pairs of vertices is sampled independently from all the edges. There are two ways to parameterize the model:\n",
 28 |     "\n",
 29 |     "1. ER(n, p) - this model specifies the number of vertices, $n$, and each pair of vertices has $p$ probability of an edge existing between the two.\n",
 30 |     "2. ER(n, m) - this model specifies the number of vertices, $n$, and the total number of edges $m$.\n",
 31 |     "\n",
 32 |     "Below, we sample two binary graphs (undirected and no self-loops) $G_1 \\sim ER_{NP}(50, 0.3)$ and $G_2 \\sim ER_{NM}(50, 250)$."
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "from graspologic.simulations import er_nm, er_np\n",
 42 |     "\n",
 43 |     "n = 50\n",
 44 |     "m = 250\n",
 45 |     "p = 0.3\n",
 46 |     "\n",
 47 |     "np.random.seed(1)\n",
 48 |     "G1 = er_np(n=n, p=p)\n",
 49 |     "G2 = er_nm(n=n, m=m)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "## Visualize the graphs using heatmap"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "from graspologic.plot import heatmap\n",
 66 |     "\n",
 67 |     "heatmap(G1, title = 'ER-NP(50, 0.3) Simulation')\n",
 68 |     "_ = heatmap(G2, title = 'ER-NM(50, 250) Simulation')"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "markdown",
 73 |    "metadata": {},
 74 |    "source": [
 75 |     "## Weighted ER Graphs\n",
 76 |     "\n",
 77 |     "Both ``er_np()`` and ``er_nm()`` functions provide ways to sample weights for all edges that were sampled via a probability distribution function. In order to sample with weights, we provide a probability distribution function with corresponding keyword arguments for the distribution function.\n",
 78 |     "\n",
 79 |     "Below we sample $G_1 \\sim ER_{NP}(50, 0.2)$ where the weights are distributed normally with $\\mu = 0,~\\sigma^2 = 1$."
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "wt = np.random.normal\n",
 89 |     "wtargs = dict(loc=0, scale=1)\n",
 90 |     "\n",
 91 |     "G1 = er_np(n=50, p=0.2, wt=wt, wtargs=wtargs)"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "markdown",
 96 |    "metadata": {},
 97 |    "source": [
 98 |     "## Visualize the graph using heatmap"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": null,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "_ = heatmap(G1, title = 'ER-NP(50, 0.2) with N(0,1) Weights Simulation')"
108 |    ]
109 |   }
110 |  ],
111 |  "metadata": {
112 |   "kernelspec": {
113 |    "display_name": "Python 3",
114 |    "language": "python",
115 |    "name": "python3"
116 |   },
117 |   "language_info": {
118 |    "codemirror_mode": {
119 |     "name": "ipython",
120 |     "version": 3
121 |    },
122 |    "file_extension": ".py",
123 |    "mimetype": "text/x-python",
124 |    "name": "python",
125 |    "nbconvert_exporter": "python",
126 |    "pygments_lexer": "ipython3",
127 |    "version": "3.7.0"
128 |   }
129 |  },
130 |  "nbformat": 4,
131 |  "nbformat_minor": 4
132 | }
133 | 


--------------------------------------------------------------------------------
/graspologic/preconditions.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT license.
  3 | 
  4 | import numbers
  5 | from typing import Any, Union
  6 | 
  7 | import networkx as nx
  8 | 
  9 | from graspologic.types import Tuple
 10 | 
 11 | 
 12 | def check_argument_types(
 13 |     value: Any, required_types: Union[type, Tuple[type, ...]], message: str
 14 | ) -> None:
 15 |     """
 16 |     Raises a TypeError if the provided ``value`` is not one of the ``required_types``
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     value : Any
 21 |         The argument to test for valid type
 22 |     required_types : Union[type, Tuple[type, ...]]
 23 |         A type or a n-ary tuple of types to test for validity
 24 |     message : str
 25 |         The message to use as the body of the TypeError
 26 | 
 27 |     Raises
 28 |     ------
 29 |     TypeError if the type is not one of the ``required_types``
 30 |     """
 31 |     if not isinstance(value, required_types):
 32 |         raise TypeError(message)
 33 | 
 34 | 
 35 | def check_optional_argument_types(
 36 |     value: Any, required_types: Union[type, Tuple[type, ...]], message: str
 37 | ) -> None:
 38 |     """
 39 |     Raises a TypeError if the provided ``value`` is not one of the ``required_types``,
 40 |     unless it is None.  A None value is treated as a valid type.
 41 | 
 42 |     Parameters
 43 |     ----------
 44 |     value : Any
 45 |         The argument to test for valid type
 46 |     required_types : Union[type, Tuple[type, ...]]
 47 |         A type or a n-ary tuple of types to test for validity
 48 |     message : str
 49 |         The message to use as the body of the TypeError
 50 | 
 51 |     Raises
 52 |     ------
 53 |     TypeError if the type is not one of the ``required_types``, unless it is None
 54 |     """
 55 |     if value is None:
 56 |         return
 57 |     check_argument_types(value, required_types, message)
 58 | 
 59 | 
 60 | def check_argument(check: bool, message: str) -> None:
 61 |     """
 62 |     Raises a ValueError if the provided check is false
 63 | 
 64 |     >>> from graspologic import preconditions
 65 |     >>> x = 5
 66 |     >>> preconditions.check_argument(x < 5, "x must be less than 5")
 67 |     Traceback (most recent call last):
 68 |         ...
 69 |     ValueError: x must be less than 5
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     value : Any
 74 |         The argument to test for valid type
 75 |     required_types : Union[type, Tuple[type, ...]]
 76 |         A type or a n-ary tuple of types to test for validity
 77 |     message : str
 78 |         The message to use as the body of the TypeError
 79 | 
 80 |     Raises
 81 |     ------
 82 |     TypeError if the type is not one of the ``required_types``
 83 |     """
 84 |     if not check:
 85 |         raise ValueError(message)
 86 | 
 87 | 
 88 | def is_real_weighted(
 89 |     graph: Union[nx.Graph, nx.DiGraph], weight_attribute: str = "weight"
 90 | ) -> bool:
 91 |     """
 92 |     Checks every edge in ``graph`` to ascertain whether it has:
 93 | 
 94 |         - a ``weight_attribute`` key in the data dictionary for the edge
 95 |         - if that ``weight_attribute`` value is a subclass of numbers.Real
 96 | 
 97 |     If any edge fails this test, it returns ``False``, else ``True``
 98 | 
 99 |     Parameters
100 |     ----------
101 |     graph : Union[nx.Graph, nx.DiGraph]
102 |         The networkx graph to test
103 |     weight_attribute : str (default="weight")
104 |         The edge dictionary data attribute that holds the weight. Default is ``weight``.
105 | 
106 |     Returns
107 |     -------
108 |     bool
109 |         ``True`` if every edge has a numeric ``weight_attribute`` weight, ``False`` if
110 |         any edge fails this test
111 | 
112 |     """
113 |     # not only must every edge have a weight attribute but the value must be numeric
114 |     return all(
115 |         (
116 |             weight_attribute in data
117 |             and isinstance(data[weight_attribute], numbers.Real)
118 |             for _, _, data in graph.edges(data=True)
119 |         )
120 |     )
121 | 


--------------------------------------------------------------------------------
/tests/test_vertex_nomination_via_SGM.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation and contributors.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import unittest
  5 | 
  6 | import numpy as np
  7 | 
  8 | from graspologic.nominate import VNviaSGM
  9 | from graspologic.simulations import er_np
 10 | 
 11 | np.random.seed(1)
 12 | 
 13 | 
 14 | class TestVNviaSGM(unittest.TestCase):
 15 |     def test_VNviaSGM_inputs(self):
 16 |         with self.assertRaises(ValueError):
 17 |             VNviaSGM(order_voi_subgraph=-1)
 18 |         with self.assertRaises(ValueError):
 19 |             VNviaSGM(order_voi_subgraph=1.5)
 20 |         with self.assertRaises(ValueError):
 21 |             VNviaSGM(order_seeds_subgraph=-1)
 22 |         with self.assertRaises(ValueError):
 23 |             VNviaSGM(order_seeds_subgraph=1.5)
 24 |         with self.assertRaises(ValueError):
 25 |             VNviaSGM(n_init=-1)
 26 |         with self.assertRaises(ValueError):
 27 |             VNviaSGM(n_init=1.5)
 28 |         with self.assertRaises(ValueError):
 29 |             VNviaSGM(max_nominations=0)
 30 | 
 31 |         with self.assertRaises(ValueError):
 32 |             VNviaSGM().fit(
 33 |                 np.random.randn(3, 4),
 34 |                 np.random.randn(4, 4),
 35 |                 0,
 36 |                 [np.arange(2), np.arange(2)],
 37 |             )
 38 |         with self.assertRaises(ValueError):
 39 |             VNviaSGM().fit(
 40 |                 np.random.randn(4, 4),
 41 |                 np.random.randn(3, 4),
 42 |                 0,
 43 |                 [np.arange(2), np.arange(2)],
 44 |             )
 45 |         with self.assertRaises(ValueError):
 46 |             VNviaSGM().fit(
 47 |                 np.random.randn(4, 4),
 48 |                 np.random.randn(4, 4),
 49 |                 0,
 50 |                 [np.arange(2), 1],
 51 |             )
 52 |         with self.assertRaises(ValueError):
 53 |             VNviaSGM().fit(
 54 |                 np.random.randn(4, 4),
 55 |                 np.random.randn(4, 4),
 56 |                 0,
 57 |                 np.random.randn(3, 3),
 58 |             )
 59 |         with self.assertRaises(ValueError):
 60 |             VNviaSGM().fit(
 61 |                 np.random.randn(4, 4),
 62 |                 np.random.randn(4, 4),
 63 |                 0,
 64 |                 [np.arange(2), np.arange(3)],
 65 |             )
 66 |         with self.assertRaises(ValueError):
 67 |             VNviaSGM().fit(
 68 |                 np.random.randn(4, 4),
 69 |                 np.random.randn(4, 4),
 70 |                 0,
 71 |                 [np.arange(5), np.arange(5)],
 72 |             )
 73 |         with self.assertRaises(ValueError):
 74 |             VNviaSGM().fit(
 75 |                 np.random.randn(4, 4),
 76 |                 np.random.randn(4, 4),
 77 |                 0,
 78 |                 [[], []],
 79 |             )
 80 |         with self.assertRaises(ValueError):
 81 |             VNviaSGM().fit(
 82 |                 np.random.randn(4, 4),
 83 |                 np.random.randn(4, 4),
 84 |                 0,
 85 |                 [[1, 1], [1, 2]],
 86 |             )
 87 |         with self.assertRaises(ValueError):
 88 |             VNviaSGM().fit(
 89 |                 np.random.randn(4, 4),
 90 |                 np.random.randn(4, 4),
 91 |                 0,
 92 |                 [[1, 5], [1, 2]],
 93 |             )
 94 | 
 95 |     def test_vn_algorithm(self):
 96 |         g1 = er_np(n=50, p=0.6)
 97 |         node_shuffle = np.random.permutation(50)
 98 | 
 99 |         g2 = g1[np.ix_(node_shuffle, node_shuffle)]
100 | 
101 |         kklst = [(xx, yy) for xx, yy in zip(node_shuffle, np.arange(len(node_shuffle)))]
102 |         kklst.sort(key=lambda x: x[0])
103 |         kklst = np.array(kklst)
104 | 
105 |         voi = 7
106 |         nseeds = 6
107 | 
108 |         vnsgm = VNviaSGM()
109 |         nomlst = vnsgm.fit_predict(
110 |             g1, g2, voi, [kklst[0:nseeds, 0], kklst[0:nseeds, 1]]
111 |         )
112 | 
113 |         self.assertEqual(nomlst[0][0], kklst[np.where(kklst[:, 0] == voi)[0][0], 1])
114 | 


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
  1 | name: graspologic Build
  2 | on:
  3 |   push:
  4 |     paths-ignore:
  5 |       - '.all-contributorsrc'
  6 |       - 'CONTRIBUTORS.md'
  7 |     branches-ignore:
  8 |       - 'dev'
  9 |       - 'main'
 10 |   pull_request:
 11 |     paths-ignore:
 12 |       - '.all-contributorsrc'
 13 |       - 'CONTRIBUTORS.md'
 14 |   workflow_call:
 15 | 
 16 | env:
 17 |   PYTHON_VERSION: '3.10'
 18 | 
 19 | jobs:
 20 |   build-reference-documentation:
 21 |     runs-on: ubuntu-latest
 22 |     steps:
 23 |       - run: sudo apt-get install -y pandoc
 24 |       - uses: actions/checkout@v2
 25 |       - name: Set up Python
 26 |         uses: actions/setup-python@v2
 27 |         with:
 28 |           python-version: ${{ env.PYTHON_VERSION }}
 29 |       - name: Install uv
 30 |         uses: astral-sh/setup-uv@v2
 31 |       - name: Install dependencies
 32 |         run: uv sync --python ${{ env.PYTHON_VERSION }}
 33 |       - name: "Run Reference Documentation Generation"
 34 |         run: |
 35 |           echo "documentation" > dependencies_documentation.txt
 36 |           uv run pipdeptree >> dependencies_documentation.txt
 37 |           uv run poe docsWithTutorials
 38 |       - name: Archive documentation version artifact
 39 |         uses: actions/upload-artifact@v4
 40 |         with:
 41 |           name: dependencies
 42 |           path: |
 43 |             dependencies_documentation.txt
 44 |       - name: Archive documentation artifacts
 45 |         uses: actions/upload-artifact@v4
 46 |         with:
 47 |           name: documentation-site
 48 |           path: |
 49 |             docs/_build/html
 50 |   code-format-check:
 51 |     runs-on: ubuntu-latest
 52 |     steps:
 53 |       - uses: actions/checkout@v2
 54 |       - name: Set up Python
 55 |         uses: actions/setup-python@v2
 56 |         with:
 57 |           python-version: ${{ env.PYTHON_VERSION }}
 58 |       - name: Install uv
 59 |         uses: astral-sh/setup-uv@v2
 60 |       - name: Install dependencies
 61 |         run: uv sync --python ${{ env.PYTHON_VERSION }}
 62 |       - name: Run Format Check
 63 |         run: uv run poe static_checks
 64 |   test-coverage:
 65 |     runs-on: ubuntu-latest
 66 |     steps:
 67 |       - uses: actions/checkout@v2
 68 |       - name: Set up Python
 69 |         uses: actions/setup-python@v2
 70 |         with:
 71 |           python-version: ${{ env.PYTHON_VERSION }}
 72 |       - name: Install uv
 73 |         uses: astral-sh/setup-uv@v2
 74 |       - name: Install dependencies
 75 |         run: uv sync --python ${{ env.PYTHON_VERSION }}
 76 |       - name: Run Test Coverage
 77 |         env:
 78 |           MPLBACKEND: Agg
 79 |         run: uv run poe coverage
 80 |   unit-and-doc-test:
 81 |     runs-on: ${{matrix.os}}
 82 |     strategy:
 83 |       matrix:
 84 |         os: [ubuntu-latest, windows-latest, macos-latest]
 85 |         python_version: ["3.9", "3.10", "3.11", "3.12"]
 86 |       fail-fast: false
 87 |     steps:
 88 |       - uses: actions/checkout@v2
 89 |       - name: Set up Python ${{matrix.python_version}} ${{matrix.os}}
 90 |         uses: actions/setup-python@v2
 91 |         with:
 92 |           python-version: ${{matrix.python_version}}
 93 |       - name: Install uv
 94 |         uses: astral-sh/setup-uv@v2
 95 |       - name: Install dependencies
 96 |         run: uv sync --python ${{ matrix.python_version }}
 97 |       - name: Run Unit Tests and Doctests Python ${{matrix.python_version}} ${{matrix.os}}
 98 |         env:
 99 |           MPLBACKEND: Agg
100 |         run: uv run poe tests
101 |       - name: Run mypy type check Python ${{matrix.python_version}} ${{matrix.os}}
102 |         run: uv run poe type_check
103 |       - name: Generate dependency tree
104 |         run: |
105 |           export DEPS='dependencies_${{matrix.python_version}}_${{matrix.os}}.txt'
106 |           echo "${{matrix.python_version}} ${{matrix.os}}" > $DEPS
107 |           uv run pipdeptree >> $DEPS
108 |         shell: bash
109 |       - name: Archive dependency tree
110 |         uses: actions/upload-artifact@v4
111 |         with:
112 |           name: dependencies-${{matrix.python_version}}-${{matrix.os}}
113 |           path: |
114 |             dependencies_${{matrix.python_version}}_${{matrix.os}}.txt
115 | 


--------------------------------------------------------------------------------
/graspologic/align/sign_flips.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation and contributors.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import numpy as np
  5 | 
  6 | from .base import BaseAlign
  7 | 
  8 | 
  9 | class SignFlips(BaseAlign):
 10 |     """
 11 |     Flips the signs of all entries in one dataset, ``X`` along some of the
 12 |     dimensions. In particular, it does so in a way that brings this dataset to
 13 |     the same orthant as the second dataset, ``Y``, according to some criterion,
 14 |     computed along each dimension. The two critera currently available are the
 15 |     median and the maximum (in magnitude) value along each dimension.
 16 | 
 17 |     This module can also be used to bring the dataset to the first orthant
 18 |     (i.e. with all criteras being positive) by providing the identity matrix as
 19 |     the second dataset.
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     criterion : string, {'median' (default), 'max'}, optional
 24 |         String describing the criterion used to choose whether to flip signs.
 25 |         Two options are currently supported:
 26 | 
 27 |         - 'median'
 28 |             Uses the median along each dimension
 29 |         - 'max'
 30 |             Uses the maximum (in magintude) along each dimension
 31 | 
 32 |     Attributes
 33 |     ----------
 34 |     Q_ : array, size (d, d)
 35 |         Final orthogonal matrix, used to modify ``X``.
 36 | 
 37 |     """
 38 | 
 39 |     def __init__(
 40 |         self,
 41 |         criterion: str = "median",
 42 |     ):
 43 |         # checking criterion argument
 44 |         if type(criterion) is not str:
 45 |             raise TypeError("Criterion must be str")
 46 |         if criterion not in ["median", "max"]:
 47 |             raise ValueError(f"{criterion} is not a valid criterion.")
 48 | 
 49 |         super().__init__()
 50 | 
 51 |         self.criterion = criterion
 52 | 
 53 |     def set_criterion_function(self) -> None:
 54 |         # perform a check, in case it was modified directly
 55 |         if self.criterion not in ["median", "max"]:
 56 |             raise ValueError(f"{self.criterion} is not a valid criterion")
 57 | 
 58 |         if self.criterion == "median":
 59 | 
 60 |             def median_criterion(X: np.ndarray) -> np.ndarray:
 61 |                 result: np.ndarray = np.median(X, axis=0)
 62 |                 return result
 63 | 
 64 |             self.criterion_function_ = median_criterion
 65 |         if self.criterion == "max":
 66 | 
 67 |             def max_criterion(X: np.ndarray) -> np.ndarray:
 68 |                 result: np.ndarray = X[
 69 |                     np.argmax(np.abs(X), axis=0), np.arange(X.shape[1])
 70 |                 ]
 71 |                 return result
 72 | 
 73 |             self.criterion_function_ = max_criterion
 74 | 
 75 |     def fit(self, X: np.ndarray, Y: np.ndarray) -> "SignFlips":
 76 |         """
 77 |         Uses the two datasets to learn the matrix :attr:`~graspologic.align.SignFlips.Q_` that aligns the
 78 |         first dataset with the second.
 79 | 
 80 |         In sign flips, :attr:`~graspologic.align.SignFlips.Q_` is an diagonal orthogonal matrices (i.e. a
 81 |         matrix with 1 or -1 in each entry on diagonal and 0 everywhere else)
 82 |         picked such that all dimensions of ``X`` @ :attr:`~graspologic.align.SignFlips.Q_`
 83 |         and ``Y`` are in the same orthant using some critera (median or max magnitude).
 84 | 
 85 |         Parameters
 86 |         ----------
 87 |         X : np.ndarray, shape (n, d)
 88 |             Dataset to be mapped to ``Y``, must have same number of dimensions
 89 |             (axis 1) as ``Y``.
 90 | 
 91 |         Y : np.ndarray, shape (m, d)
 92 |             Target dataset, must have same number of dimensions (axis 1) as ``X``.
 93 | 
 94 |         Returns
 95 |         -------
 96 |         self : returns an instance of self
 97 | 
 98 |         """
 99 |         X, Y = self._check_datasets(X, Y)
100 |         _, d = X.shape
101 | 
102 |         self.set_criterion_function()
103 |         X_criterias = self.criterion_function_(X)
104 |         Y_criterias = self.criterion_function_(Y)
105 | 
106 |         val = np.multiply(X_criterias, Y_criterias)
107 |         t_X = (val >= 0) * 2 - 1
108 | 
109 |         self.Q_ = np.diag(t_X)
110 |         return self
111 | 


--------------------------------------------------------------------------------
/tests/test_sign_flips.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation and contributors.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import unittest
  5 | 
  6 | import numpy as np
  7 | 
  8 | from graspologic.align import SignFlips
  9 | 
 10 | 
 11 | class TestSignFlips(unittest.TestCase):
 12 |     def test_bad_kwargs(self):
 13 |         with self.assertRaises(TypeError):
 14 |             SignFlips(criterion={"this is a": "dict"})
 15 |         with self.assertRaises(ValueError):
 16 |             SignFlips(criterion="cep")
 17 |         # check delayed ValueError
 18 |         with self.assertRaises(ValueError):
 19 |             aligner = SignFlips(criterion="median")
 20 |             X = np.arange(6).reshape(6, 1)
 21 |             Y = np.arange(6).reshape(6, 1)
 22 |             aligner.criterion = "something"
 23 |             aligner.fit(X, Y)
 24 | 
 25 |     def test_bad_datasets(self):
 26 |         X = np.arange(6).reshape(6, 1)
 27 |         Y = np.arange(6).reshape(6, 1)
 28 |         Y_wrong_d = np.arange(12).reshape(6, 2)
 29 |         # check passing weird stuff as input (caught by us)
 30 |         with self.assertRaises(TypeError):
 31 |             aligner = SignFlips()
 32 |             aligner.fit("hello there", Y)
 33 |         with self.assertRaises(TypeError):
 34 |             aligner = SignFlips()
 35 |             aligner.fit(X, "hello there")
 36 |         with self.assertRaises(TypeError):
 37 |             aligner = SignFlips()
 38 |             aligner.fit({"hello": "there"}, Y)
 39 |         with self.assertRaises(TypeError):
 40 |             aligner = SignFlips()
 41 |             aligner.fit(X, {"hello": "there"})
 42 |         # check passing arrays of weird ndims (caught by check_array)
 43 |         with self.assertRaises(ValueError):
 44 |             aligner = SignFlips()
 45 |             aligner.fit(X, Y.reshape(3, 2, 1))
 46 |         with self.assertRaises(ValueError):
 47 |             aligner = SignFlips()
 48 |             aligner.fit(X.reshape(3, 2, 1), Y)
 49 |         # check passing arrays with different dimensions (caught by us)
 50 |         with self.assertRaises(ValueError):
 51 |             aligner = SignFlips()
 52 |             aligner.fit(X, Y_wrong_d)
 53 |         # check passing array with wrong dimensions to transform (caught by us)
 54 |         with self.assertRaises(ValueError):
 55 |             aligner = SignFlips()
 56 |             aligner.fit(X, Y)
 57 |             aligner.transform(Y_wrong_d)
 58 | 
 59 |     def test_two_datasets(self):
 60 |         X = np.arange(6).reshape(3, 2) * (-1)
 61 |         Y = np.arange(6).reshape(3, 2) @ np.diag([1, -1]) + 0.5
 62 |         # X flips sign in the first dimension
 63 |         Q_answer = np.array([[-1, 0], [0, 1]])
 64 |         X_answer = X.copy() @ Q_answer
 65 |         # first, do fit and transform separately
 66 |         aligner_1 = SignFlips()
 67 |         aligner_1.fit(X, Y)
 68 |         Q_test = aligner_1.Q_
 69 |         X_test = aligner_1.transform(X)
 70 |         self.assertTrue(np.all(Q_test == Q_answer))
 71 |         self.assertTrue(np.all(X_test == X_answer))
 72 |         # now, do fit_transform
 73 |         aligner_2 = SignFlips()
 74 |         X_test = aligner_2.fit_transform(X, Y)
 75 |         Q_test = aligner_2.Q_
 76 |         self.assertTrue(np.all(Q_test == Q_answer))
 77 |         self.assertTrue(np.all(X_test == X_answer))
 78 |         # try giving a different matrix as the sole input (I)
 79 |         I_test = aligner_2.transform(np.eye(2))
 80 |         I_answer = np.diag([-1, 1])
 81 |         self.assertTrue(np.all(I_test == I_answer))
 82 | 
 83 |     def test_max_criterion(self):
 84 |         X = np.arange(6).reshape(3, 2) * (-1)
 85 |         Y = np.arange(6).reshape(3, 2) @ np.diag([1, -1]) + 0.5
 86 |         # in this case, Y should be unchanged, and X matched to Y
 87 |         # so X flips sign in the first dimension
 88 |         Q_answer = np.array([[-1, 0], [0, 1]])
 89 |         X_answer = X.copy() @ Q_answer
 90 |         # set criterion to "max", see if that works
 91 |         aligner = SignFlips(criterion="max")
 92 |         aligner.fit(X, Y)
 93 |         Q_test = aligner.Q_
 94 |         X_test = aligner.transform(X)
 95 |         self.assertTrue(np.all(Q_test == Q_answer))
 96 |         self.assertTrue(np.all(X_test == X_answer))
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     unittest.main()
101 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!-- omit in toc -->
 2 | # graspologic
 3 | [![Paper shield](https://img.shields.io/badge/JMLR-Paper-red)](http://www.jmlr.org/papers/volume20/19-490/19-490.pdf)
 4 | [![PyPI version](https://img.shields.io/pypi/v/graspologic.svg)](https://pypi.org/project/graspologic/)
 5 | [![Downloads shield](https://pepy.tech/badge/graspologic)](https://pepy.tech/project/graspologic)
 6 | [![graspologic Build](https://github.com/graspologic-org/graspologic/actions/workflows/build.yml/badge.svg)](https://github.com/graspologic-org/graspologic/actions/workflows/build.yml)
 7 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 8 | 
 9 | ## `graspologic` is a package for graph statistical algorithms.
10 | <!-- no toc -->
11 | - [Overview](#overview)
12 | - [Documentation](#documentation)
13 | - [System Requirements](#system-requirements)
14 | - [Installation Guide](#installation-guide)
15 | - [Contributing](#contributing)
16 | - [License](#license)
17 | - [Issues](#issues)
18 | - [Citing `graspologic`](#citing-graspologic)
19 | 
20 | # Overview
21 | A graph, or network, provides a mathematically intuitive representation of data with some sort of relationship between items. For example, a social network can be represented as a graph by considering all participants in the social network as nodes, with connections representing whether each pair of individuals in the network are friends with one another. Naively, one might apply traditional statistical techniques to a graph, which neglects the spatial arrangement of nodes within the network and is not utilizing all of the information present in the graph. In this package, we provide utilities and algorithms designed for the processing and analysis of graphs with specialized graph statistical algorithms.
22 | 
23 | # Documentation
24 | The official documentation with usage is at [https://graspologic-org.github.io/graspologic/latest](https://graspologic-org.github.io/graspologic/latest)
25 | 
26 | Please visit the [tutorial section](https://graspologic-org.github.io/graspologic/latest/tutorials/index.html) in the official website for more in depth usage.
27 | 
28 | # System Requirements
29 | <!-- omit in toc -->
30 | ## Hardware requirements
31 | `graspologic` package requires only a standard computer with enough RAM to support the in-memory operations.
32 | 
33 | <!-- omit in toc -->
34 | ## Software requirements
35 | <!-- omit in toc -->
36 | ### OS Requirements
37 | `graspologic` is tested on the following OSes:
38 | - Linux x64
39 | - macOS x64
40 | - Windows 10 x64
41 | 
42 | And across the following **x86_64** versions of Python:
43 | - 3.9
44 | - 3.10
45 | - 3.11
46 | - 3.12
47 | 
48 | If you try to use `graspologic` for a different platform than the ones listed and notice any unexpected behavior,
49 | please feel free to [raise an issue](https://github.com/graspologic-org/graspologic/issues/new).  It's better for ourselves and our users
50 | if we have concrete examples of things not working!
51 | 
52 | # Installation Guide
53 | <!-- omit in toc -->
54 | ## Install from pip
55 | ```
56 | pip install graspologic
57 | ```
58 | 
59 | <!-- omit in toc -->
60 | ## Install from Github
61 | ```
62 | git clone https://github.com/graspologic-org/graspologic
63 | cd graspologic
64 | python3 -m venv venv
65 | source venv/bin/activate
66 | pip install .
67 | ```
68 | 
69 | # Contributing
70 | We welcome contributions from anyone. Please see our [contribution guidelines](https://github.com/graspologic-org/graspologic/blob/dev/CONTRIBUTING.md) before making a pull request. Our
71 | [issues](https://github.com/graspologic-org/graspologic/issues) page is full of places we could use help!
72 | If you have an idea for an improvement not listed there, please
73 | [make an issue](https://github.com/graspologic-org/graspologic/issues/new) first so you can discuss with the developers.
74 | 
75 | # License
76 | This project is covered under the MIT License.
77 | 
78 | # Issues
79 | We appreciate detailed bug reports and feature requests (though we appreciate pull requests even more!). Please visit our [issues](https://github.com/graspologic-org/graspologic/issues) page if you have questions or ideas.
80 | 
81 | # Citing `graspologic`
82 | If you find `graspologic` useful in your work, please cite the package via the [GraSPy paper](http://www.jmlr.org/papers/volume20/19-490/19-490.pdf)
83 | 
84 | > Chung, J., Pedigo, B. D., Bridgeford, E. W., Varjavand, B. K., Helm, H. S., & Vogelstein, J. T. (2019). GraSPy: Graph Statistics in Python. Journal of Machine Learning Research, 20(158), 1-7.
85 | 


--------------------------------------------------------------------------------
/tests/test_latentpositiontest.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation and contributors.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import unittest
  5 | 
  6 | import numpy as np
  7 | 
  8 | from graspologic.inference import latent_position_test
  9 | from graspologic.inference.latent_position_test import _difference_norm
 10 | from graspologic.simulations import er_np, sbm
 11 | 
 12 | 
 13 | class TestLatentPositionTest(unittest.TestCase):
 14 |     @classmethod
 15 |     def test_ase_works(self):
 16 |         np.random.seed(1234556)
 17 |         A1 = er_np(5, 0.8)
 18 |         A2 = er_np(5, 0.8)
 19 |         lpt = latent_position_test(A1, A2)
 20 | 
 21 |     def test_omni_works(self):
 22 |         np.random.seed(1234556)
 23 |         A1 = er_np(5, 0.8)
 24 |         A2 = er_np(5, 0.8)
 25 |         lpt = latent_position_test(A1, A2, embedding="omnibus")
 26 | 
 27 |     def test_bad_kwargs(self):
 28 |         np.random.seed(1234556)
 29 |         A1 = er_np(5, 0.8)
 30 |         A2 = er_np(5, 0.8)
 31 | 
 32 |         with self.assertRaises(ValueError):
 33 |             latent_position_test(A1, A2, n_components=-100)
 34 |         with self.assertRaises(ValueError):
 35 |             latent_position_test(A1, A2, test_case="oops")
 36 |         with self.assertRaises(ValueError):
 37 |             latent_position_test(A1, A2, n_bootstraps=-100)
 38 |         with self.assertRaises(ValueError):
 39 |             latent_position_test(A1, A2, embedding="oops")
 40 |         with self.assertRaises(TypeError):
 41 |             latent_position_test(A1, A2, n_bootstraps=0.5)
 42 |         with self.assertRaises(TypeError):
 43 |             latent_position_test(A1, A2, n_components=0.5)
 44 |         with self.assertRaises(TypeError):
 45 |             latent_position_test(A1, A2, embedding=6)
 46 |         with self.assertRaises(TypeError):
 47 |             latent_position_test(A1, A2, test_case=6)
 48 |         with self.assertRaises(TypeError):
 49 |             latent_position_test(A1, A2, workers="oops")
 50 | 
 51 |     def test_n_bootstraps(self):
 52 |         np.random.seed(1234556)
 53 |         A1 = er_np(5, 0.8)
 54 |         A2 = er_np(5, 0.8)
 55 | 
 56 |         lpt = latent_position_test(A1, A2, n_bootstraps=234, n_components=None)
 57 |         assert lpt[2]["null_distribution_1"].shape[0] == 234
 58 | 
 59 |     def test_bad_matrix_inputs(self):
 60 |         np.random.seed(1234556)
 61 |         A1 = er_np(5, 0.8)
 62 |         A2 = er_np(5, 0.8)
 63 |         A1[2, 0] = 1  # make asymmetric
 64 |         A1[0, 2] = 0
 65 |         with self.assertRaises(NotImplementedError):  # TODO : remove when we implement
 66 |             latent_position_test(A1, A2)
 67 | 
 68 |         bad_matrix = [[1, 2]]
 69 |         with self.assertRaises(TypeError):
 70 |             latent_position_test(bad_matrix, A2)
 71 | 
 72 |         with self.assertRaises(ValueError):
 73 |             latent_position_test(A1[:2, :2], A2)
 74 | 
 75 |     def test_rotation_norm(self):
 76 |         # two triangles rotated by 90 degrees
 77 |         points1 = np.array([[0, 0], [3, 0], [3, -2]])
 78 |         rotation = np.array([[0, 1], [-1, 0]])
 79 |         points2 = np.dot(points1, rotation)
 80 | 
 81 |         n = _difference_norm(points1, points2, embedding="ase", test_case="rotation")
 82 |         self.assertAlmostEqual(n, 0)
 83 | 
 84 |     def test_diagonal_rotation_norm(self):
 85 |         # triangle in 2d
 86 |         points1 = np.array([[0, 0], [3, 0], [3, -2]], dtype=np.float64)
 87 |         rotation = np.array([[0, 1], [-1, 0]])
 88 |         # rotated 90 degrees
 89 |         points2 = np.dot(points1, rotation)
 90 |         # diagonally scaled
 91 |         diagonal = np.array([[2, 0, 0], [0, 3, 0], [0, 0, 2]])
 92 |         points2 = np.dot(diagonal, points2)
 93 | 
 94 |         n = _difference_norm(
 95 |             points1, points2, embedding="ase", test_case="diagonal-rotation"
 96 |         )
 97 |         self.assertAlmostEqual(n, 0)
 98 | 
 99 |     def test_scalar_rotation_norm(self):
100 |         # triangle in 2d
101 |         points1 = np.array([[0, 0], [3, 0], [3, -2]], dtype=np.float64)
102 |         rotation = np.array([[0, 1], [-1, 0]])
103 |         # rotated 90 degrees
104 |         points2 = np.dot(points1, rotation)
105 |         # scaled
106 |         points2 = 2 * points2
107 | 
108 |         n = _difference_norm(
109 |             points1, points2, embedding="ase", test_case="scalar-rotation"
110 |         )
111 |         self.assertAlmostEqual(n, 0)
112 | 
113 | 
114 | if __name__ == "__main__":
115 |     unittest.main()
116 | 


--------------------------------------------------------------------------------
/tests/test_spectral_nomination.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation and contributors.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import itertools
  5 | import unittest
  6 | 
  7 | import numpy as np
  8 | 
  9 | from graspologic.embed.ase import AdjacencySpectralEmbed
 10 | from graspologic.nominate import SpectralVertexNomination
 11 | from graspologic.simulations.simulations import sbm
 12 | 
 13 | # global constants for tests
 14 | n_verts = 50
 15 | p = np.array([[0.7, 0.25, 0.2], [0.25, 0.8, 0.3], [0.2, 0.3, 0.85]])
 16 | labels = np.array([0] * n_verts + [1] * n_verts + [2] * n_verts)
 17 | adj = np.array(sbm(3 * [n_verts], p), dtype=int)
 18 | embeder = AdjacencySpectralEmbed()
 19 | pre_embeded = embeder.fit_transform(adj)
 20 | 
 21 | 
 22 | class TestSpectralVertexNominatorOutputs(unittest.TestCase):
 23 |     def _nominate(self, X, seed, nominator=None, k=None):
 24 |         if nominator is None:
 25 |             nominator = SpectralVertexNomination(n_neighbors=k)
 26 |         nominator.fit(X)
 27 |         n_verts = X.shape[0]
 28 |         nom_list, dists = nominator.predict(seed)
 29 |         self.assertEqual(nom_list.shape, (n_verts, seed.shape[0]))
 30 |         self.assertEqual(dists.shape, (n_verts, seed.shape[0]))
 31 |         return nom_list
 32 | 
 33 |     def test_seed_inputs(self):
 34 |         with self.assertRaises(IndexError):
 35 |             self._nominate(adj, np.zeros((1, 50), dtype=int))
 36 |         with self.assertRaises(TypeError):
 37 |             self._nominate(adj, np.random.random((10, 2)))
 38 | 
 39 |     def test_X_inputs(self):
 40 |         with self.assertRaises(IndexError):
 41 |             self._nominate(np.zeros((5, 5, 5), dtype=int), np.zeros(3, dtype=int))
 42 |         with self.assertRaises(TypeError):
 43 |             self._nominate([[0] * 10] * 10, np.zeros(3, dtype=int))
 44 |         # embedding should have fewer cols than rows.
 45 |         svn = SpectralVertexNomination(input_graph=False)
 46 |         with self.assertRaises(IndexError):
 47 |             self._nominate(
 48 |                 np.zeros((10, 20), dtype=int),
 49 |                 np.zeros(3, dtype=int),
 50 |                 nominator=svn,
 51 |             )
 52 |         # adj matrix should be square
 53 |         with self.assertRaises(IndexError):
 54 |             self._nominate(np.zeros((3, 4), dtype=int), np.zeros(3, dtype=int))
 55 | 
 56 |     def _test_k(self):
 57 |         # k should be > 0
 58 |         with self.assertRaises(ValueError):
 59 |             self._nominate(adj, np.zeros(3, dtype=int), k=0)
 60 |         # k of wrong type
 61 |         with self.assertRaises(TypeError):
 62 |             self._nominate(adj, np.zeros(3, dtype=int), k="hello world")
 63 | 
 64 |     def test_constructor_inputs(self):
 65 |         with self.assertRaises(ValueError):
 66 |             svn = SpectralVertexNomination(embedder="hi")
 67 |             self._nominate(adj, np.zeros(3, dtype=int), nominator=svn)
 68 | 
 69 |     def test_constructor_inputs1(self):
 70 |         # embedder must be BaseSpectralEmbed or str
 71 |         with self.assertRaises(TypeError):
 72 |             svn = SpectralVertexNomination(embedder=45)
 73 | 
 74 |     def test_constructor_inputs2(self):
 75 |         # input graph param has wrong type
 76 |         with self.assertRaises(TypeError):
 77 |             svn = SpectralVertexNomination(input_graph=4)
 78 | 
 79 |     def test_basic_unattributed(self):
 80 |         """
 81 |         Runs two attributed seeds and two unattributed seeds with each nominator.
 82 |         Ensures all options work. Should be fast. Nested parametrization tests all
 83 |         combinations of listed parameters.
 84 |         """
 85 |         nominators = [
 86 |             SpectralVertexNomination(embedder="ASE"),
 87 |             SpectralVertexNomination(embedder="LSE"),
 88 |             SpectralVertexNomination(embedder=embeder),
 89 |         ]
 90 |         seeds = [
 91 |             np.array([8]),
 92 |             np.array([2, 6, 9, 15, 25]),
 93 |             np.arange(n_verts - 1, dtype=int),
 94 |         ]
 95 |         for nominator, seed in itertools.product(nominators, seeds):
 96 |             self._nominate(adj, seed, nominator)
 97 | 
 98 |     def test_pre_embedded(self):
 99 |         seeds = [
100 |             np.array([8]),
101 |             np.array([2, 6, 9, 15, 25]),
102 |             np.arange(n_verts - 1, dtype=int),
103 |         ]
104 |         for seed in seeds:
105 |             svn = SpectralVertexNomination(input_graph=False)
106 |             self._nominate(pre_embeded, seed, nominator=svn)
107 | 


--------------------------------------------------------------------------------
/graspologic/pipeline/graph_builder.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT license.
  3 | 
  4 | from collections import OrderedDict
  5 | from typing import Any, Union
  6 | 
  7 | import networkx as nx
  8 | from beartype import beartype
  9 | 
 10 | from graspologic.types import Dict, List, Tuple
 11 | 
 12 | __all__ = ["GraphBuilder"]
 13 | 
 14 | 
 15 | class GraphBuilder:
 16 |     """
 17 |     GraphBuilder is a simple builder for networkx Graphs. To use less memory,
 18 |     it automatically maps all node ids of any hashable type to ``int``.
 19 | 
 20 |     In other words, if you can use it as a key in a dictionary, it will work.
 21 | 
 22 |     By default, the main method it provides, ``add_edge``, will sum edge weights
 23 |     if the edge already exists.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     directed : bool (default=False)
 28 |         Used to create either a :class:`networkx.Graph` or
 29 |         :class:`networkx.DiGraph` object.
 30 |     """
 31 | 
 32 |     @beartype
 33 |     def __init__(self, directed: bool = False):
 34 |         # OrderedDict is the default for {} anyway, but I wanted to be very explicit,
 35 |         # since we absolutely rely on the ordering
 36 |         self._id_map: Dict[Any, int] = OrderedDict()
 37 |         self._graph = nx.DiGraph() if directed else nx.Graph()
 38 | 
 39 |     @beartype
 40 |     def add_edge(
 41 |         self,
 42 |         source: Any,
 43 |         target: Any,
 44 |         weight: Union[int, float] = 1.0,
 45 |         sum_weight: bool = True,
 46 |         **attributes: Any,
 47 |     ) -> None:
 48 |         """
 49 |         Adds a weighted edge between the provided source and target. The source
 50 |         and target id are converted to a unique ``int``.
 51 | 
 52 |         If no weight is provided, a default weight of ``1.0`` is used.
 53 | 
 54 |         If an edge between the source and target already exists, and if the
 55 |         ``sum_weight`` argument is ``True``, then the weights are summed.
 56 | 
 57 |         Otherwise, the last weight provided will be used as the edge's weight.
 58 | 
 59 |         Any other attributes specified will be added to the edge's data dictionary.
 60 | 
 61 |         Parameters
 62 |         ----------
 63 |         source : Any
 64 |             source node id
 65 |         target : Any
 66 |             target node id
 67 |         weight : Union[int, float] (default=1.0)
 68 |             The weight for the edge. If none is provided, the weight is defaulted to 1.
 69 |         sum_weight : bool (default=True)
 70 |             If an edge between the ``source`` and ``target`` already exist, should we
 71 |             sum the edge weights or overwrite the edge weight with the provided
 72 |             ``weight`` value.
 73 |         attributes : kwargs
 74 |             The attributes kwargs are presumed to be attributes that should be added
 75 |             to the edge dictionary for ``source`` and ``target``.
 76 |         """
 77 |         source_id = self._map_node_id(source)
 78 |         target_id = self._map_node_id(target)
 79 |         if sum_weight:
 80 |             old = self._graph.get_edge_data(source_id, target_id, default={}).get(
 81 |                 "weight", 0
 82 |             )
 83 |             self._graph.add_edge(
 84 |                 source_id, target_id, weight=old + weight, **attributes
 85 |             )
 86 |         else:
 87 |             self._graph.add_edge(source_id, target_id, weight=weight, **attributes)
 88 | 
 89 |     def build(self) -> Tuple[Union[nx.Graph, nx.DiGraph], Dict[Any, int], List[Any]]:
 90 |         """
 91 |         Returns
 92 |         -------
 93 |         Tuple[Union[nx.Graph, nx.DiGraph], Dict[Any, int], List[Any]]
 94 |             The returned tuple is either an undirected or directed graph, depending on
 95 |             the constructor argument ``directed``. The second value in the tuple is a
 96 |             dictionary of original node ids to their assigned integer ids. The third
 97 |             and final value in the tuple is a List of original node ids, where the
 98 |             index corresponds to the assigned integer and the value is the corresponding
 99 |             original ID.
100 |         """
101 |         old_to_new = self._id_map
102 |         new_to_old = [key for key, _ in old_to_new.items()]
103 |         return self._graph, old_to_new, new_to_old
104 | 
105 |     def _map_node_id(self, node_id: Any) -> int:
106 |         mapped_node_id = self._id_map.get(node_id, len(self._id_map))
107 |         self._id_map[node_id] = mapped_node_id
108 |         return mapped_node_id
109 | 


--------------------------------------------------------------------------------
/tests/test_mds.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation and contributors.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import unittest
  5 | 
  6 | import numpy as np
  7 | from numpy.testing import assert_almost_equal
  8 | from sklearn.utils.estimator_checks import check_estimator
  9 | 
 10 | from graspologic.embed.mds import ClassicalMDS
 11 | 
 12 | 
 13 | class TestMDS(unittest.TestCase):
 14 |     def test_sklearn_conventions(self):
 15 |         check_estimator(ClassicalMDS())
 16 | 
 17 |     def test_input(self):
 18 |         X = np.random.normal(0, 1, size=(10, 3))
 19 | 
 20 |         # X cannot be tensor when precomputed dissimilarity
 21 |         with self.assertRaises(ValueError):
 22 |             tensor = np.random.normal(0, 1, size=(10, 3, 3))
 23 |             mds = ClassicalMDS(n_components=3, dissimilarity="precomputed")
 24 |             mds.fit(tensor)
 25 | 
 26 |         with self.assertRaises(ValueError):
 27 |             one_dimensional = np.random.normal(size=10)
 28 |             mds = ClassicalMDS(n_components=2, dissimilarity="euclidean")
 29 |             mds.fit(one_dimensional)
 30 | 
 31 |         # n_components > n_samples
 32 |         with self.assertRaises(ValueError):
 33 |             mds = ClassicalMDS(n_components=100)
 34 |             mds.fit(X)
 35 | 
 36 |         # Invalid n_components
 37 |         with self.assertRaises(ValueError):
 38 |             mds = ClassicalMDS(n_components=-2)
 39 |             mds.fit(X)
 40 | 
 41 |         with self.assertRaises(TypeError):
 42 |             mds = ClassicalMDS(n_components="1")
 43 |             mds.fit(X)
 44 | 
 45 |         # Invalid dissimilarity
 46 |         with self.assertRaises(ValueError):
 47 |             mds = ClassicalMDS(dissimilarity="abc")
 48 |             mds.fit(X)
 49 | 
 50 |         # Invalid input for fit function
 51 |         with self.assertRaises(ValueError):
 52 |             mds = ClassicalMDS(n_components=3, dissimilarity="precomputed")
 53 |             mds.fit(X="bad_input")
 54 | 
 55 |         # Must be square and symmetric matrix if precomputed dissimilarity
 56 |         with self.assertRaises(ValueError):
 57 |             mds = ClassicalMDS(n_components=3, dissimilarity="precomputed")
 58 |             mds.fit(X)
 59 | 
 60 |     def test_tensor_input(self):
 61 |         X = np.random.normal(size=(100, 5, 5))
 62 |         mds = ClassicalMDS(n_components=3, dissimilarity="euclidean")
 63 |         mds.fit(X)
 64 | 
 65 |         self.assertEqual(mds.dissimilarity_matrix_.shape, (100, 100))
 66 | 
 67 |         X_transformed = mds.fit_transform(X)
 68 |         self.assertEqual(X_transformed.shape, (100, 3))
 69 | 
 70 |     def test_output(self):
 71 |         """
 72 |         Recover a 3D tetrahedron with distance 1 between all points
 73 | 
 74 |         Use both fit and fit_transform functions
 75 |         """
 76 | 
 77 |         def _compute_dissimilarity(arr):
 78 |             out = np.zeros((4, 4))
 79 |             for i in range(4):
 80 |                 out[i] = np.linalg.norm(arr - arr[i], axis=1)
 81 | 
 82 |             return out
 83 | 
 84 |         def use_fit_transform():
 85 |             A = np.ones((4, 4)) - np.identity(4)
 86 | 
 87 |             mds = ClassicalMDS(n_components=3, dissimilarity="precomputed")
 88 |             B = mds.fit_transform(A)
 89 | 
 90 |             Ahat = _compute_dissimilarity(B)
 91 | 
 92 |             # Checks up to 7 decimal points
 93 |             assert_almost_equal(A, Ahat)
 94 | 
 95 |         def use_fit():
 96 |             A = np.ones((4, 4)) - np.identity(4)
 97 | 
 98 |             mds = ClassicalMDS(n_components=3, dissimilarity="precomputed")
 99 |             mds.fit(A)
100 |             B = np.dot(mds.components_, np.diag(mds.singular_values_))
101 | 
102 |             Ahat = _compute_dissimilarity(B)
103 | 
104 |             # Checks up to 7 decimal points
105 |             assert_almost_equal(A, Ahat)
106 | 
107 |         def use_euclidean():
108 |             A = np.array([
109 |                 [-7.62291243e-17, 6.12372436e-01, 4.95031815e-16],
110 |                 [-4.97243701e-01, -2.04124145e-01, -2.93397401e-01],
111 |                 [5.02711453e-01, -2.04124145e-01, -2.83926977e-01],
112 |                 [-5.46775198e-03, -2.04124145e-01, 5.77324378e-01],
113 |             ])
114 | 
115 |             mds = ClassicalMDS(dissimilarity="euclidean")
116 |             B = mds.fit_transform(A)
117 | 
118 |             target = np.ones((4, 4)) - np.identity(4)
119 |             assert_almost_equal(mds.dissimilarity_matrix_, target)
120 | 
121 |         use_fit_transform()
122 |         use_fit()
123 |         use_euclidean()
124 | 


--------------------------------------------------------------------------------
/docs/tutorials/simulations/rdpg.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Random Dot Product Graph (RDPG) Model"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import graspologic\n",
 17 |     "\n",
 18 |     "import numpy as np\n",
 19 |     "%matplotlib inline"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "markdown",
 24 |    "metadata": {},
 25 |    "source": [
 26 |     "RDPG is a latent position generative model, in which the probability of an edge existing between pairs of vertices is determined by the dot product of the associated latent position vectors. In other words, given $X \\in \\mathbb{R}^{n\\times d}$, where $n$ is the number of vertices and $d$ is the dimensionality of each vector, the probability matrix $P$ is given by:\n",
 27 |     "\n",
 28 |     "$$ P = X X^T $$\n",
 29 |     "\n",
 30 |     "Both ER and SBM models can be formulated as a RDPG. Below, we sample $ER_{NP}(100, 0.5)$ using RDPG formulation. In this case, we set $X \\in \\mathbb{R}^{100\\times 2}$ where all the values in $X$ is 0.5. This results in $P$ matrix where all the probabilities are also 0.5."
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {
 37 |     "tags": []
 38 |    },
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "from graspologic.simulations import rdpg\n",
 42 |     "\n",
 43 |     "# Create a latent position matrix\n",
 44 |     "X = np.full((100, 2), 0.5)\n",
 45 |     "print(X @ X.T)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "A = rdpg(X)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## Visualize the adjacency matrix"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "from graspologic.plot import heatmap\n",
 71 |     "\n",
 72 |     "_ = heatmap(A, title='ER_NP(100, 0.5) Using RDPG')"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "markdown",
 77 |    "metadata": {},
 78 |    "source": [
 79 |     "## Stochastic block model as RDPG\n",
 80 |     "\n",
 81 |     "We can formulate the following 2-block SBM parameters as RDPG, where the latent positions live in $\\mathbb{R}^3$.\n",
 82 |     "\n",
 83 |     "\\begin{align*}\n",
 84 |     "n &= [50, 50]\\\\\n",
 85 |     "p &= \\begin{bmatrix}0.33 & 0.09\\\\\n",
 86 |     "0.09 & 0.03\n",
 87 |     "\\end{bmatrix}\n",
 88 |     "\\end{align*}\n",
 89 |     "\n",
 90 |     "as\n",
 91 |     "\n",
 92 |     "\\begin{align*}\n",
 93 |     "X &= \\begin{bmatrix}0.5 & 0.2 & 0.2\\\\\n",
 94 |     "& \\vdots & \\\\\n",
 95 |     "0.1 & 0.1 & 0.1\\\\\n",
 96 |     "& \\vdots & \n",
 97 |     "\\end{bmatrix}\\\\\n",
 98 |     "P &= XX^T\n",
 99 |     "\\end{align*}"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {},
106 |    "outputs": [],
107 |    "source": [
108 |     "X = np.array([[0.5, 0.2, 0.2]] * 50 + [[0.1, 0.1, 0.1]] * 50)\n",
109 |     "A_rdpg = rdpg(X, loops=False)\n",
110 |     "_ = heatmap(A_rdpg, title='2-block SBM as RDPG')"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "### Results from SBM simulation using same formulation shows similar structure"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "from graspologic.simulations import sbm\n",
127 |     "\n",
128 |     "n = [50, 50]\n",
129 |     "p = [[0.33, 0.09], [0.09, 0.03]]\n",
130 |     "\n",
131 |     "A_sbm = sbm(n, p)\n",
132 |     "_ = heatmap(A_sbm, title = 'SBM Simulation')"
133 |    ]
134 |   }
135 |  ],
136 |  "metadata": {
137 |   "kernelspec": {
138 |    "display_name": "Python 3",
139 |    "language": "python",
140 |    "name": "python3"
141 |   },
142 |   "language_info": {
143 |    "codemirror_mode": {
144 |     "name": "ipython",
145 |     "version": 3
146 |    },
147 |    "file_extension": ".py",
148 |    "mimetype": "text/x-python",
149 |    "name": "python",
150 |    "nbconvert_exporter": "python",
151 |    "pygments_lexer": "ipython3",
152 |    "version": "3.7.0"
153 |   }
154 |  },
155 |  "nbformat": 4,
156 |  "nbformat_minor": 4
157 | }
158 | 


--------------------------------------------------------------------------------
/graspologic/utils/ptr.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation and contributors.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import numpy as np
  5 | from scipy.stats import rankdata
  6 | 
  7 | from ..types import GraphRepresentation
  8 | from .utils import import_graph, is_loopless, is_symmetric, is_unweighted, symmetrize
  9 | 
 10 | 
 11 | def pass_to_ranks(
 12 |     graph: GraphRepresentation, method: str = "simple-nonzero"
 13 | ) -> GraphRepresentation:
 14 |     """
 15 |     Rescales edge weights of an adjacency matrix based on their relative rank in
 16 |     the graph.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     graph: array_like or networkx.Graph
 21 |         Adjacency matrix
 22 | 
 23 |     method: {'simple-nonzero' (default), 'simple-all', 'zero-boost'} string, optional
 24 | 
 25 |         - 'simple-nonzero'
 26 |             assigns ranks to all non-zero edges, settling ties using
 27 |             the average. Ranks are then scaled by
 28 |             :math:`\\frac{rank(\\text{non-zero edges})}{\\text{total non-zero edges} + 1}`
 29 |         - 'simple-all'
 30 |             assigns ranks to all non-zero edges, settling ties using
 31 |             the average. Ranks are then scaled by
 32 |             :math:`\\frac{rank(\\text{non-zero edges})}{n^2 + 1}`
 33 |             where n is the number of nodes
 34 |         - 'zero-boost'
 35 |             preserves the edge weight for all 0s, but ranks the other
 36 |             edges as if the ranks of all 0 edges has been assigned. If there are
 37 |             10 0-valued edges, the lowest non-zero edge gets weight 11 / (number
 38 |             of possible edges). Ties settled by the average of the weight that those
 39 |             edges would have received. Number of possible edges is determined
 40 |             by the type of graph (loopless or looped, directed or undirected).
 41 | 
 42 |     See also
 43 |     --------
 44 |     scipy.stats.rankdata
 45 | 
 46 |     Returns
 47 |     -------
 48 |     graph: numpy.ndarray, shape(n_vertices, n_vertices)
 49 |         Adjacency matrix of graph after being passed to ranks
 50 |     """
 51 | 
 52 |     graph = import_graph(graph)  # just for typechecking
 53 | 
 54 |     if is_unweighted(graph):
 55 |         return graph
 56 | 
 57 |     if graph.min() < 0:
 58 |         raise UserWarning(
 59 |             "Current pass-to-ranks on graphs with negative"
 60 |             + " weights will yield nonsensical results, especially for zero-boost"
 61 |         )
 62 | 
 63 |     if method == "zero-boost":
 64 |         if is_symmetric(graph):
 65 |             # start by working with half of the graph, since symmetric
 66 |             triu = np.triu(graph)
 67 |             non_zeros = triu[triu != 0]
 68 |         else:
 69 |             non_zeros = graph[graph != 0]
 70 |         rank = rankdata(non_zeros)
 71 | 
 72 |         if is_symmetric(graph):
 73 |             if is_loopless(graph):
 74 |                 num_zeros = (len(graph[graph == 0]) - graph.shape[0]) / 2
 75 |                 possible_edges = graph.shape[0] * (graph.shape[0] - 1) / 2
 76 |             else:
 77 |                 num_zeros = (
 78 |                     len(triu[triu == 0]) - graph.shape[0] * (graph.shape[0] - 1) / 2
 79 |                 )
 80 |                 possible_edges = graph.shape[0] * (graph.shape[0] + 1) / 2
 81 |         else:
 82 |             if is_loopless(graph):
 83 |                 # n^2 - num_nonzero - num_diagonal
 84 |                 num_zeros = graph.size - len(non_zeros) - graph.shape[0]
 85 |                 # n^2 - num_diagonal
 86 |                 possible_edges = graph.size - graph.shape[0]
 87 |             else:
 88 |                 num_zeros = graph.size - len(non_zeros)
 89 |                 possible_edges = graph.size
 90 | 
 91 |         # shift up by the number of zeros
 92 |         rank = rank + num_zeros
 93 |         # normalize by the number of possible edges for this kind of graph
 94 |         rank = rank / possible_edges
 95 |         # put back into matrix form (and reflect over the diagonal if necessary)
 96 |         if is_symmetric(graph):
 97 |             triu[triu != 0] = rank
 98 |             graph = symmetrize(triu, method="triu")
 99 |         else:
100 |             graph[graph != 0] = rank
101 |         return graph
102 |     elif method in ["simple-all", "simple-nonzero"]:
103 |         non_zeros = graph[graph != 0]
104 |         rank = rankdata(non_zeros)
105 |         if method == "simple-all":
106 |             normalizer = graph.size
107 |         elif method == "simple-nonzero":
108 |             normalizer = rank.shape[0]
109 |         rank = rank / (normalizer + 1)
110 |         graph[graph != 0] = rank
111 |         return graph
112 |     else:
113 |         raise ValueError("Unsuported pass-to-ranks method")
114 | 


--------------------------------------------------------------------------------
/graspologic/align/orthogonal_procrustes.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation and contributors.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import numpy as np
  5 | from scipy.linalg import orthogonal_procrustes
  6 | 
  7 | from .base import BaseAlign
  8 | 
  9 | 
 10 | class OrthogonalProcrustes(BaseAlign):
 11 |     """
 12 |     Computes the matrix solution of the classical orthogonal Procrustes [1]_
 13 |     problem, which is that given two matrices ``X`` and ``Y`` of equal shape
 14 |     (n, d), find an orthogonal matrix that most closely maps ``X`` to
 15 |     ``Y``. Subsequently, uses that matrix to transform either the original ``X``,
 16 |     or a different dataset in the same space.
 17 | 
 18 |     Note that when used to match two datasets, this method unlike
 19 |     :class:`~graspologic.align.SeedlessProcrustes`, not only requires that the
 20 |     datasets have the same number of entries, but also that there is some
 21 |     correspondence between the entries. In graph embeddings, this usually
 22 |     corresponds to the assumption that the vertex :math:`i` in graph ``X`` has the same
 23 |     latent position as the vertex :math:`i` in graph ``Y``.
 24 | 
 25 |     Attributes
 26 |     ----------
 27 |     Q_ : array, size (d, d)
 28 |             Final orthogonal matrix, used to modify ``X``.
 29 | 
 30 |     score_ : float
 31 |         Final value of the objective function: :math:`|| X Q - Y ||_F`
 32 |         Lower means the datasets have been matched together better.
 33 | 
 34 |     References
 35 |     ----------
 36 | 
 37 |     .. [1] https://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem
 38 | 
 39 |     .. [2] Peter H. Schonemann, "A generalized solution of the orthogonal
 40 |            Procrustes problem", Psychometrica -- Vol. 31, No. 1, March, 1996.
 41 | 
 42 |     Notes
 43 |     -----
 44 |     Formally, minimizes :math:`|| X Q - Y ||_F`, which has a closed form
 45 |     solution, whenever :math:`Q` is constrained to be an orthogonal matrix,
 46 |     that is a matrix that satisfies :math:`Q^T Q = Q Q^T = I`. For the more
 47 |     details, including the proof of the closed-form solution see [1]_.
 48 | 
 49 |     Implementation-wise, this class is a wrapper of the
 50 |     :func:`scipy.linalg.orthogonal_procrustes`, which itself uses an algorithm
 51 |     described in find the optimal solution algorithm [2]_.
 52 | 
 53 |     """
 54 | 
 55 |     def __init__(
 56 |         self,
 57 |     ) -> None:
 58 |         super().__init__()
 59 | 
 60 |     def fit(self, X: np.ndarray, Y: np.ndarray) -> "OrthogonalProcrustes":
 61 |         """
 62 |         Uses the two datasets to learn the matrix :attr:`~graspologic.align.OrthogonalProcrustes.Q_` that aligns the
 63 |         first dataset with the second.
 64 | 
 65 |         Parameters
 66 |         ----------
 67 |         X : np.ndarray, shape (n, d)
 68 |             Dataset to be mapped to ``Y``, must have the same shape as ``Y``.
 69 | 
 70 |         Y : np.ndarray, shape (m, d)
 71 |             Target dataset, must have the same shape as ``X``.
 72 | 
 73 | 
 74 |         Returns
 75 |         -------
 76 |         self : returns an instance of self
 77 | 
 78 |         """
 79 |         X, Y = self._check_datasets(X, Y)
 80 | 
 81 |         _, d = X.shape
 82 |         if X.shape[0] != Y.shape[0]:
 83 |             msg = (
 84 |                 "Two datasets have different number of entries! "
 85 |                 "OrthogonalProcrustes assumes that entries of the two "
 86 |                 "datasets are matched. consider using SeedlessProcrustes "
 87 |                 "instead."
 88 |             )
 89 |             raise ValueError(msg)
 90 | 
 91 |         _, d = X.shape
 92 |         self.Q_, _ = orthogonal_procrustes(X, Y)
 93 |         self.score_ = np.linalg.norm(X @ self.Q_ - Y, ord="fro")
 94 |         return self
 95 | 
 96 |     def fit_transform(self, X: np.ndarray, Y: np.ndarray) -> np.ndarray:
 97 |         """
 98 |         Uses the two datasets to learn the matrix :attr:`~graspologic.align.OrthogonalProcrustes.Q_` that aligns the
 99 |         first dataset with the second. Then, transforms the first dataset ``X``
100 |         using the learned matrix :attr:`~graspologic.align.OrthogonalProcrustes.Q_`.
101 | 
102 |         Parameters
103 |         ----------
104 |         X : np.ndarray, shape (n, d)
105 |             Dataset to be mapped to ``Y``, must have the same shape as ``Y``.
106 | 
107 |         Y : np.ndarray, shape (m, d)
108 |             Target dataset, must have the same shape as ``X``.
109 | 
110 |         Returns
111 |         -------
112 |         X_prime : np.ndarray, shape (n, d)
113 |             First dataset of vectors, aligned to second. Equal to
114 |             ``X`` @ :attr:`~graspologic.align.BaseAlign.Q_`.
115 |         """
116 |         return super().fit_transform(X, Y)
117 | 


--------------------------------------------------------------------------------
/graspologic/cluster/kclust.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation and contributors.
  2 | # Licensed under the MIT License.
  3 | 
  4 | from typing import Optional, Union
  5 | 
  6 | import numpy as np
  7 | from sklearn.cluster import KMeans
  8 | from sklearn.metrics import adjusted_rand_score, silhouette_score
  9 | 
 10 | from graspologic.types import List
 11 | 
 12 | from .base import BaseCluster
 13 | 
 14 | 
 15 | class KMeansCluster(BaseCluster):
 16 |     ari_: Optional[List[float]]
 17 | 
 18 |     """
 19 |     KMeans Cluster.
 20 | 
 21 |     It computes all possible models from one component to ``max_clusters``.
 22 |     When the true labels are known, the best model is given by the model with highest
 23 |     adjusted Rand index (ARI).
 24 |     Otherwise, the best model is given by the model with highest silhouette score.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     max_clusters : int, default=2.
 29 |         The maximum number of clusters to consider. Must be ``>=2``.
 30 | 
 31 |     random_state : int, RandomState instance or None, optional (default=None)
 32 |         If int, ``random_state`` is the seed used by the random number generator;
 33 |         If RandomState instance, ``random_state`` is the random number generator;
 34 |         If None, the random number generator is the RandomState instance used
 35 |         by ``np.random``.
 36 | 
 37 |     Attributes
 38 |     ----------
 39 |     n_clusters_ : int
 40 |         Optimal number of clusters. If y is given, it is based on largest
 41 |         ARI. Otherwise, it is based on highest silhouette score.
 42 | 
 43 |     model_ : KMeans object
 44 |         Fitted KMeans object fitted with ``n_clusters_``.
 45 | 
 46 |     silhouette_ : list
 47 |         List of silhouette scores computed for all possible number
 48 |         of clusters given by ``range(2, max_clusters)``.
 49 | 
 50 |     ari_ : list
 51 |         Only computed when y is given. List of ARI values computed for
 52 |         all possible number of clusters given by ``range(2, max_clusters)``.
 53 |     """
 54 | 
 55 |     def __init__(
 56 |         self,
 57 |         max_clusters: int = 2,
 58 |         random_state: Optional[Union[int, np.random.RandomState]] = None,
 59 |     ):
 60 |         if isinstance(max_clusters, int):
 61 |             if max_clusters <= 1:
 62 |                 msg = "n_components must be >= 2 or None."
 63 |                 raise ValueError(msg)
 64 |             else:
 65 |                 self.max_clusters = max_clusters
 66 |         else:
 67 |             msg = "max_clusters must be an integer, not {}.".format(type(max_clusters))
 68 |             raise TypeError(msg)
 69 |         self.random_state = random_state
 70 | 
 71 |     def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None) -> "KMeansCluster":
 72 |         """
 73 |         Fits kmeans model to the data.
 74 | 
 75 |         Parameters
 76 |         ----------
 77 |         X : array-like, shape (n_samples, n_features)
 78 |             List of n_features-dimensional data points. Each row
 79 |             corresponds to a single data point.
 80 | 
 81 |         y : array-like, shape (n_samples,), optional (default=None)
 82 |             List of labels for `X` if available. Used to compute ARI scores.
 83 | 
 84 |         Returns
 85 |         -------
 86 |         self
 87 |         """
 88 |         # Deal with number of clusters
 89 |         if self.max_clusters > X.shape[0]:
 90 |             msg = "n_components must be >= n_samples, but got \
 91 |                 n_components = {}, n_samples = {}".format(self.max_clusters, X.shape[0])
 92 |             raise ValueError(msg)
 93 |         else:
 94 |             max_clusters = self.max_clusters
 95 | 
 96 |         # Get parameters
 97 |         random_state = self.random_state
 98 | 
 99 |         # Compute all models
100 |         models = []
101 |         silhouettes = []
102 |         aris = []
103 |         for n in range(2, max_clusters + 1):
104 |             model = KMeans(n_clusters=n, random_state=random_state)
105 | 
106 |             # Fit and compute values
107 |             predictions = model.fit_predict(X)
108 |             models.append(model)
109 |             silhouettes.append(silhouette_score(X, predictions))
110 |             if y is not None:
111 |                 aris.append(adjusted_rand_score(y, predictions))
112 | 
113 |         if y is not None:
114 |             self.ari_ = aris
115 |             self.silhouette_ = silhouettes
116 |             self.n_clusters_ = np.argmax(aris) + 1
117 |             self.model_ = models[np.argmax(aris)]
118 |         else:
119 |             self.ari_ = None
120 |             self.silhouette_ = silhouettes
121 |             self.n_clusters_ = np.argmax(silhouettes) + 1
122 |             self.model_ = models[np.argmax(silhouettes)]
123 | 
124 |         return self
125 | 


--------------------------------------------------------------------------------
/tests/test_n2v.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation and contributors.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import unittest
  5 | from typing import TYPE_CHECKING
  6 | 
  7 | import networkx as nx
  8 | 
  9 | import graspologic.embed.n2v as n2v
 10 | from graspologic.embed.n2v import _Node2VecGraph
 11 | 
 12 | if TYPE_CHECKING:
 13 |     import numpy as np
 14 | 
 15 | 
 16 | class Node2VecEmbedTest(unittest.TestCase):
 17 |     def test_node2vec_embedding_correct_shape_is_returned(self):
 18 |         import io
 19 | 
 20 |         graph = nx.read_edgelist(
 21 |             io.StringIO(_edge_list), nodetype=int, create_using=nx.DiGraph()
 22 |         )
 23 | 
 24 |         model = n2v.node2vec_embed(graph)
 25 |         model_matrix: np.ndarray = model[0]
 26 |         vocab_list = model[1]
 27 |         self.assertIsNotNone(model)
 28 |         self.assertIsNotNone(model[0])
 29 |         self.assertIsNotNone(model[1])
 30 | 
 31 |         # model matrix should be 34 x 128
 32 |         self.assertEqual(model_matrix.shape[0], 34)
 33 |         self.assertEqual(model_matrix.shape[1], 128)
 34 | 
 35 |         # vocab list should have exactly 34 elements
 36 |         self.assertEqual(len(vocab_list), 34)
 37 | 
 38 |     def test_node2vec_embedding_florentine_graph_correct_shape_is_returned(self):
 39 |         graph = nx.florentine_families_graph()
 40 |         for s, t in graph.edges():
 41 |             graph.add_edge(s, t, weight=1)
 42 | 
 43 |         model = n2v.node2vec_embed(graph)
 44 |         model_matrix: np.ndarray = model[0]
 45 |         vocab_list = model[1]
 46 |         self.assertIsNotNone(model)
 47 |         self.assertIsNotNone(model[0])
 48 |         self.assertIsNotNone(model[1])
 49 | 
 50 |         # model matrix should be 34 x 128
 51 |         self.assertEqual(model_matrix.shape[0], 15)
 52 |         self.assertEqual(model_matrix.shape[1], 128)
 53 | 
 54 |         # vocab list should have exactly 34 elements
 55 |         self.assertEqual(len(vocab_list), 15)
 56 | 
 57 |     def test_node2vec_embedding_barbell_graph_correct_shape_is_returned(self):
 58 |         graph = nx.barbell_graph(25, 2)
 59 |         for s, t in graph.edges():
 60 |             graph.add_edge(s, t, weight=1)
 61 | 
 62 |         model = n2v.node2vec_embed(graph)
 63 |         model_matrix: np.ndarray = model[0]
 64 |         vocab_list = model[1]
 65 |         self.assertIsNotNone(model)
 66 |         self.assertIsNotNone(model[0])
 67 |         self.assertIsNotNone(model[1])
 68 | 
 69 |         # model matrix should be 34 x 128
 70 |         self.assertEqual(model_matrix.shape[0], 52)
 71 |         self.assertEqual(model_matrix.shape[1], 128)
 72 | 
 73 |         # vocab list should have exactly 34 elements
 74 |         self.assertEqual(len(vocab_list), 52)
 75 | 
 76 |     def test_get_walk_length_lower_defaults_to_1(self):
 77 |         expected_walk_length = 1
 78 | 
 79 |         g = _Node2VecGraph(nx.Graph(), 1, 1)
 80 |         w = g._get_walk_length_interpolated(
 81 |             degree=0, percentiles=[1, 2, 3, 4, 10, 100], max_walk_length=10
 82 |         )
 83 | 
 84 |         self.assertEqual(w, expected_walk_length)
 85 | 
 86 |     def test_get_walk_length_higher_default_to_walk_length(self):
 87 |         expected_walk_length = 100
 88 | 
 89 |         g = _Node2VecGraph(nx.Graph(), 1, 1)
 90 |         w = g._get_walk_length_interpolated(
 91 |             degree=10,
 92 |             percentiles=[2, 3, 4, 5, 6, 7, 8, 9],
 93 |             max_walk_length=expected_walk_length,
 94 |         )
 95 | 
 96 |         self.assertEqual(w, expected_walk_length)
 97 | 
 98 |     def test_get_walk_length_in_middle_selects_interpolated_bucket(self):
 99 |         expected_walk_length = 5
100 | 
101 |         g = _Node2VecGraph(nx.Graph(), 1, 1)
102 |         w = g._get_walk_length_interpolated(
103 |             degree=5, percentiles=[2, 3, 4, 5, 6, 7, 8, 9], max_walk_length=10
104 |         )
105 | 
106 |         self.assertEqual(w, expected_walk_length)
107 | 
108 | 
109 | _edge_list = """
110 | 1 32
111 | 1 22
112 | 1 20
113 | 1 18
114 | 1 14
115 | 1 13
116 | 1 12
117 | 1 11
118 | 1 9
119 | 1 8
120 | 1 7
121 | 1 6
122 | 1 5
123 | 1 4
124 | 1 3
125 | 1 2
126 | 2 31
127 | 2 22
128 | 2 20
129 | 2 18
130 | 2 14
131 | 2 8
132 | 2 4
133 | 2 3
134 | 3 14
135 | 3 9
136 | 3 10
137 | 3 33
138 | 3 29
139 | 3 28
140 | 3 8
141 | 3 4
142 | 4 14
143 | 4 13
144 | 4 8
145 | 5 11
146 | 5 7
147 | 6 17
148 | 6 11
149 | 6 7
150 | 7 17
151 | 9 34
152 | 9 33
153 | 9 33
154 | 10 34
155 | 14 34
156 | 15 34
157 | 15 33
158 | 16 34
159 | 16 33
160 | 19 34
161 | 19 33
162 | 20 34
163 | 21 34
164 | 21 33
165 | 23 34
166 | 23 33
167 | 24 30
168 | 24 34
169 | 24 33
170 | 24 28
171 | 24 26
172 | 25 32
173 | 25 28
174 | 25 26
175 | 26 32
176 | 27 34
177 | 27 30
178 | 28 34
179 | 29 34
180 | 29 32
181 | 30 34
182 | 30 33
183 | 31 34
184 | 31 33
185 | 32 34
186 | 32 33
187 | 33 34
188 | """
189 | 


--------------------------------------------------------------------------------
/tests/test_io.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation and contributors.
  2 | # Licensed under the MIT License.
  3 | 
  4 | import os
  5 | import sys
  6 | import tempfile
  7 | import unittest
  8 | from pathlib import Path
  9 | 
 10 | import networkx as nx
 11 | import numpy as np
 12 | import pytest
 13 | 
 14 | import graspologic as gs
 15 | 
 16 | 
 17 | class TestImportGraph(unittest.TestCase):
 18 |     @classmethod
 19 |     def setUpClass(cls) -> None:
 20 |         # simple ERxN graph
 21 |         n = 15
 22 |         p = 0.5
 23 |         cls.A = np.zeros((n, n))
 24 |         nedge = int(round(n * n * p))
 25 |         np.put(
 26 |             cls.A,
 27 |             np.random.choice(np.arange(0, n * n), size=nedge, replace=False),
 28 |             np.random.normal(size=nedge),
 29 |         )
 30 | 
 31 |     def test_graphin(self):
 32 |         G = nx.from_numpy_array(self.A)
 33 |         np.testing.assert_array_equal(nx.to_numpy_array(G), gs.utils.import_graph(G))
 34 | 
 35 |     def test_npin(self):
 36 |         np.testing.assert_array_equal(self.A, gs.utils.import_graph(self.A))
 37 | 
 38 |     def test_wrongtypein(self):
 39 |         a = 5
 40 |         with self.assertRaises(TypeError):
 41 |             gs.utils.import_graph(a)
 42 |         with self.assertRaises(TypeError):
 43 |             gs.utils.import_graph(None)
 44 | 
 45 |     def test_nonsquare(self):
 46 |         non_square = np.hstack((self.A, self.A))
 47 |         with self.assertRaises(ValueError):
 48 |             gs.utils.import_graph(non_square)
 49 | 
 50 | 
 51 | class TestImportEdgelist(unittest.TestCase):
 52 |     @classmethod
 53 |     def tearDownClass(cls) -> None:
 54 |         cls.tmpdir.cleanup()
 55 | 
 56 |     @classmethod
 57 |     def setUpClass(cls) -> None:
 58 |         cls.tmpdir = tempfile.TemporaryDirectory()
 59 |         n = 10
 60 |         p = 0.5
 61 |         wt = np.random.exponential
 62 |         wtargs = dict(scale=4)
 63 | 
 64 |         np.random.seed(1)
 65 | 
 66 |         cls.A = gs.simulations.er_np(n, p)
 67 |         cls.B = gs.simulations.er_np(n, p, wt=wt, wtargs=wtargs)
 68 | 
 69 |         G_A = nx.from_numpy_array(cls.A)
 70 |         G_B = nx.from_numpy_array(cls.B)
 71 |         G_B = nx.relabel_nodes(G_B, lambda x: x + 10)  # relabel nodes to go from 10-19.
 72 | 
 73 |         cls.root = str(cls.tmpdir.name)
 74 |         cls.A_path = os.path.join(cls.root, "A_unweighted.edgelist")
 75 |         cls.B_path = os.path.join(cls.root, "B.edgelist")
 76 | 
 77 |         nx.write_edgelist(G_A, cls.A_path, data=False)
 78 |         nx.write_weighted_edgelist(G_B, cls.B_path)
 79 | 
 80 |     def test_in(self):
 81 |         A_from_edgelist = gs.utils.import_edgelist(self.A_path)
 82 |         B_from_edgelist = gs.utils.import_edgelist(self.B_path)
 83 | 
 84 |         np.testing.assert_allclose(A_from_edgelist, self.A)
 85 |         np.testing.assert_allclose(B_from_edgelist, self.B)
 86 | 
 87 |     def test_in_Path_obj(self):
 88 |         A_from_edgelist = gs.utils.import_edgelist(Path(self.A_path))
 89 |         B_from_edgelist = gs.utils.import_edgelist(Path(self.B_path))
 90 | 
 91 |         np.testing.assert_allclose(A_from_edgelist, self.A)
 92 |         np.testing.assert_allclose(B_from_edgelist, self.B)
 93 | 
 94 |     def test_multiple_in(self):
 95 |         graphs = gs.utils.import_edgelist(self.root)
 96 |         A = np.zeros((20, 20))
 97 |         A[:10, :10] = self.A
 98 | 
 99 |         B = np.zeros((20, 20))
100 |         B[10:, 10:] = self.B
101 | 
102 |         self.assertEqual(len(graphs), 2)
103 |         self.assertTrue(all(graph.shape == (20, 20) for graph in graphs))
104 |         np.testing.assert_allclose(graphs[0], A)
105 |         np.testing.assert_allclose(graphs[1], B)
106 | 
107 |     def test_wrongtypein(self):
108 |         path = 5
109 |         with self.assertRaises(TypeError):
110 |             gs.utils.import_edgelist(path)
111 |         with self.assertRaises(TypeError):
112 |             gs.utils.import_edgelist(None)
113 | 
114 |     def test_vertices(self):
115 |         expected_vertices_A = np.arange(0, 10)
116 |         expected_vertices_B = np.arange(10, 20)
117 | 
118 |         _, A_vertices = gs.utils.import_edgelist(self.A_path, return_vertices=True)
119 |         _, B_vertices = gs.utils.import_edgelist(self.B_path, return_vertices=True)
120 | 
121 |         np.testing.assert_allclose(expected_vertices_A, A_vertices)
122 |         np.testing.assert_allclose(expected_vertices_B, B_vertices)
123 | 
124 |     def test_no_graphs_found(self):
125 |         path = str(self.root + "invalid_edgelist.edgelist")
126 |         with self.assertRaises(ValueError):
127 |             gs.utils.import_edgelist(path)
128 | 
129 |     def test_bad_delimiter(self):
130 |         delimiter = ","
131 |         with pytest.warns(UserWarning):
132 |             graphs = gs.utils.import_edgelist(self.root, delimiter=delimiter)
133 | 


--------------------------------------------------------------------------------