├── notebooks
    └── .gitkeep
├── .gitattributes
├── doc
    ├── source
    │   ├── read.rst
    │   ├── cluster.rst
    │   ├── entropy.rst
    │   ├── graph.rst
    │   ├── threshold.rst
    │   ├── standardize.rst
    │   ├── utilities.rst
    │   ├── dynamics.rst
    │   ├── index.rst
    │   ├── distance.rst
    │   ├── reconstruction.rst
    │   ├── conf.py
    │   └── tutorial.rst
    ├── Makefile
    └── make.bat
├── netrd_distance_example.png
├── netrd_dynamics_example.png
├── netrd_reconstruction_example.png
├── paper
    ├── netrd_distance_example.pdf
    └── allRecons_withGroundtruth_SherringtonKirkpatrick.pdf
├── requirements.txt
├── netrd
    ├── __init__.py
    ├── dynamics
    │   ├── __init__.py
    │   ├── base.py
    │   ├── single_unbiased_random_walker.py
    │   ├── voter.py
    │   ├── sherrington_kirkpatrick.py
    │   ├── ising_glauber.py
    │   ├── SIS.py
    │   ├── kuramoto.py
    │   └── lotka_volterra.py
    ├── utilities
    │   ├── read.py
    │   ├── __init__.py
    │   ├── cluster.py
    │   ├── standardize.py
    │   ├── graph.py
    │   ├── entropy.py
    │   └── threshold.py
    ├── distance
    │   ├── base.py
    │   ├── frobenius.py
    │   ├── jaccard_distance.py
    │   ├── __init__.py
    │   ├── degree_divergence.py
    │   ├── polynomial_dissimilarity.py
    │   ├── hamming.py
    │   ├── ipsen_mikhailov.py
    │   ├── netlsd.py
    │   ├── deltacon.py
    │   ├── communicability_jsd.py
    │   ├── resistance_perturbation.py
    │   ├── dk_series.py
    │   ├── netsimile.py
    │   ├── graph_diffusion.py
    │   ├── quantum_jsd.py
    │   └── distributional_nbd.py
    └── reconstruction
    │   ├── base.py
    │   ├── __init__.py
    │   ├── random.py
    │   ├── maximum_likelihood_estimation.py
    │   ├── graphical_lasso.py
    │   ├── correlation_matrix.py
    │   ├── free_energy_minimization.py
    │   ├── ou_inference.py
    │   ├── thouless_anderson_palmer.py
    │   ├── granger_causality.py
    │   ├── correlation_spanning_tree.py
    │   ├── mean_field.py
    │   ├── naive_transfer_entropy.py
    │   ├── partial_correlation_matrix.py
    │   ├── partial_correlation_influence.py
    │   └── marchenko_pastur.py
├── .readthedocs.yml
├── .github
    └── workflows
    │   ├── draft-pdf.yml
    │   └── python-ci.yml
├── LICENSE
├── tests
    ├── test_dynamics.py
    ├── test_utilities.py
    ├── test_reconstruction.py
    └── test_distance.py
├── setup.py
├── .gitignore
└── README.md


/notebooks/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | notebooks/* linguist-vendored
2 | 


--------------------------------------------------------------------------------
/doc/source/read.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: netrd.utilities.read
2 |     :members:
3 |     :undoc-members:
4 | 


--------------------------------------------------------------------------------
/netrd_distance_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netsiphd/netrd/HEAD/netrd_distance_example.png


--------------------------------------------------------------------------------
/netrd_dynamics_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netsiphd/netrd/HEAD/netrd_dynamics_example.png


--------------------------------------------------------------------------------
/doc/source/cluster.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: netrd.utilities.cluster
2 |     :members:
3 |     :undoc-members:
4 | 


--------------------------------------------------------------------------------
/doc/source/entropy.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: netrd.utilities.entropy
2 |     :members:
3 |     :undoc-members:
4 | 


--------------------------------------------------------------------------------
/doc/source/graph.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: netrd.utilities.graph
2 |     :members:
3 |     :undoc-members:
4 | 


--------------------------------------------------------------------------------
/doc/source/threshold.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: netrd.utilities.threshold
2 |     :members:
3 |     :undoc-members:
4 | 


--------------------------------------------------------------------------------
/doc/source/standardize.rst:
--------------------------------------------------------------------------------
1 | .. automodule:: netrd.utilities.standardize
2 |     :members:
3 |     :undoc-members:
4 | 


--------------------------------------------------------------------------------
/netrd_reconstruction_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netsiphd/netrd/HEAD/netrd_reconstruction_example.png


--------------------------------------------------------------------------------
/paper/netrd_distance_example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netsiphd/netrd/HEAD/paper/netrd_distance_example.pdf


--------------------------------------------------------------------------------
/paper/allRecons_withGroundtruth_SherringtonKirkpatrick.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/netsiphd/netrd/HEAD/paper/allRecons_withGroundtruth_SherringtonKirkpatrick.pdf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | networkx>=2.2.0
2 | numpy>=1.16.0
3 | scipy>=1.0.0
4 | scikit-learn>=0.18.2
5 | numpydoc>=0.9
6 | ortools>=6.7
7 | sphinx-rtd-theme>=0.4
8 | Sphinx==2.0.1
9 | 


--------------------------------------------------------------------------------
/doc/source/utilities.rst:
--------------------------------------------------------------------------------
 1 | Utilities
 2 | =========
 3 | 
 4 | Common utilities for use within ``netrd``.
 5 | 
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 |    :caption: Submodules
10 | 
11 |    cluster
12 |    entropy
13 |    graph
14 |    read
15 |    standardize
16 |    threshold
17 | 


--------------------------------------------------------------------------------
/netrd/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | netrd
 3 | -----
 4 | 
 5 | netrd stands for Network Reconstruction and Distances. It is a repository
 6 | of different algorithms for constructing a network from time series data,
 7 | as well as for comparing two networks. It is the product of the Network
 8 | Science Insitute 2019 Collabathon.
 9 | 
10 | """
11 | 
12 | from . import distance  # noqa
13 | from . import reconstruction  # noqa
14 | from . import dynamics  # noqa
15 | from . import utilities  # noqa
16 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | # .readthedocs.yml
 2 | # Read the Docs configuration file
 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 4 | 
 5 | # Required
 6 | version: 2
 7 | 
 8 | # Build documentation in the docs/ directory with Sphinx
 9 | sphinx:
10 |   configuration: doc/source/conf.py
11 | 
12 | # Optionally set the version of Python and requirements required
13 | python:
14 |   version: 3.6
15 |   install:
16 |     - requirements: requirements.txt
17 |     - method: pip
18 |       path: .
19 | 


--------------------------------------------------------------------------------
/doc/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = source
 8 | BUILDDIR      = build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/netrd/dynamics/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseDynamics
 2 | from .sherrington_kirkpatrick import SherringtonKirkpatrickIsing
 3 | from .single_unbiased_random_walker import SingleUnbiasedRandomWalker
 4 | from .kuramoto import Kuramoto
 5 | from .lotka_volterra import LotkaVolterra
 6 | from .ising_glauber import IsingGlauber
 7 | from .branching_process import BranchingModel
 8 | from .voter import VoterModel
 9 | from .SIS import SISModel
10 | 
11 | __all__ = [
12 |     'BaseDynamics',
13 |     'SherringtonKirkpatrickIsing',
14 |     'SingleUnbiasedRandomWalker',
15 |     'Kuramoto',
16 |     'LotkaVolterra',
17 |     'IsingGlauber',
18 |     'BranchingModel',
19 |     'VoterModel',
20 |     'SISModel',
21 | ]
22 | 


--------------------------------------------------------------------------------
/.github/workflows/draft-pdf.yml:
--------------------------------------------------------------------------------
 1 | on: [push]
 2 | 
 3 | jobs:
 4 |   paper:
 5 |     runs-on: ubuntu-latest
 6 |     name: Paper Draft
 7 |     steps:
 8 |       - name: Checkout
 9 |         uses: actions/checkout@v2
10 |       - name: Build draft PDF
11 |         uses: openjournals/openjournals-draft-action@master
12 |         with:
13 |           journal: joss
14 |           # This should be the path to the paper within your repo.
15 |           paper-path: paper/paper.md
16 |       - name: Upload
17 |         uses: actions/upload-artifact@v1
18 |         with:
19 |           name: paper
20 |           # This is the output path where Pandoc will write the compiled
21 |           # PDF. Note, this should be the same directory as the input
22 |           # paper.md
23 |           path: paper/paper.pdf
24 | 


--------------------------------------------------------------------------------
/netrd/utilities/read.py:
--------------------------------------------------------------------------------
 1 | """
 2 | read.py
 3 | -------
 4 | 
 5 | Utilities for reading data.
 6 | 
 7 | author: Tim LaRock (timothylarock at gmail dot com)
 8 | 
 9 | Submitted as part of the 2019 NetSI Collabathon.
10 | 
11 | """
12 | import numpy as np
13 | 
14 | 
15 | def read_time_series(filename, delimiter=','):
16 |     r"""Read a time series from a file into an array.
17 | 
18 |     This function expects `filename` to be a comma separated text file with
19 |     only data (no headers).
20 | 
21 |     Parameters
22 |     ----------
23 |     filename (str)
24 |         path to a file that will be read
25 | 
26 |     delimiter (str)
27 |         delimiter in the file
28 | 
29 |     Returns
30 |     -------
31 | 
32 |     arr
33 |         the array read from filename
34 | 
35 |     """
36 |     return np.loadtxt(filename, delimiter=delimiter)
37 | 


--------------------------------------------------------------------------------
/doc/source/dynamics.rst:
--------------------------------------------------------------------------------
 1 | Dynamics
 2 | ========
 3 | 
 4 | Dynamics classes allow the user to run simulations over a network.
 5 | 
 6 | 
 7 | Base class
 8 | ----------
 9 | .. autoclass:: netrd.dynamics.BaseDynamics
10 | 
11 | 
12 | Available dynamics
13 | ------------------
14 | 
15 | All of the following dynamics inherit from ``BaseDynamics`` and have the
16 | same general usage as above.
17 | 
18 | .. autosummary::
19 |    :nosignatures:
20 | 
21 |     netrd.dynamics.BranchingModel
22 |     netrd.dynamics.IsingGlauber
23 |     netrd.dynamics.Kuramoto
24 |     netrd.dynamics.LotkaVolterra
25 |     netrd.dynamics.SISModel
26 |     netrd.dynamics.SherringtonKirkpatrickIsing
27 |     netrd.dynamics.SingleUnbiasedRandomWalker
28 |     netrd.dynamics.VoterModel
29 | 
30 | 
31 | Reference
32 | ---------
33 | 
34 | .. automodule:: netrd.dynamics
35 |     :members:
36 |     :undoc-members:
37 | 


--------------------------------------------------------------------------------
/doc/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/netrd/utilities/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | utilities
 3 | ----------
 4 | 
 5 | Common utilities for use within ``netrd``.
 6 | 
 7 | """
 8 | from .threshold import threshold
 9 | from .graph import (
10 |     create_graph,
11 |     ensure_undirected,
12 |     undirected,
13 |     ensure_unweighted,
14 |     unweighted,
15 | )
16 | from .read import read_time_series
17 | from .cluster import clusterGraph
18 | from .standardize import mean_GNP_distance
19 | from .entropy import (
20 |     js_divergence,
21 |     entropy_from_seq,
22 |     joint_entropy,
23 |     conditional_entropy,
24 |     categorized_data,
25 |     linear_bins,
26 | )
27 | 
28 | __all__ = [
29 |     'threshold',
30 |     'clusterGraph',
31 |     'js_divergence',
32 |     'entropy_from_seq',
33 |     'joint_entropy',
34 |     'conditional_entropy',
35 |     'categorized_data',
36 |     'linear_bins',
37 |     'create_graph',
38 |     'undirected',
39 |     'ensure_undirected',
40 |     'unweighted',
41 |     'ensure_unweighted',
42 |     'read_time_series',
43 |     'mean_GNP_distance',
44 | ]
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 NetSI 2019 Collabathon Team
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a
 6 | copy of this software and associated documentation files (the "Software"),
 7 | to deal in the Software without restriction, including without limitation
 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 | and/or sell copies of the Software, and to permit persons to whom the
10 | Software is furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 | DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/tests/test_dynamics.py:
--------------------------------------------------------------------------------
 1 | """
 2 | test_dynamics.py
 3 | ----------------
 4 | 
 5 | Test dynamics algorithms.
 6 | 
 7 | """
 8 | 
 9 | import networkx as nx
10 | from netrd import dynamics
11 | from netrd.dynamics import BaseDynamics
12 | from netrd.dynamics import LotkaVolterra
13 | 
14 | 
15 | def test_dynamics_valid_dimensions():
16 |     """Dynamics models should return N x L arrays."""
17 | 
18 |     G = nx.barbell_graph(10, 5)
19 |     N = G.number_of_nodes()
20 | 
21 |     for L in [25, 100]:
22 |         for obj in dynamics.__dict__.values():
23 |             if isinstance(obj, type) and BaseDynamics in obj.__bases__:
24 |                 TS = obj().simulate(G, L)
25 |                 assert TS.shape == (N, L), "f{label} has wrong dimensions"
26 | 
27 |     assert BaseDynamics().simulate(G, 25).shape == (N, 25)
28 |     assert BaseDynamics().simulate(G, 100).shape == (N, 100)
29 | 
30 | 
31 | def test_lotka_volterra():
32 |     """Test Lotka Volterra simulation"""
33 |     g = nx.fast_gnp_random_graph(10, 0.001)
34 |     lv_model = LotkaVolterra()
35 |     assert lv_model.simulate(g, 100, stochastic=False).shape == (10, 100)
36 |     assert lv_model.simulate(g, 100, stochastic=False).shape == (10, 100)
37 | 


--------------------------------------------------------------------------------
/.github/workflows/python-ci.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 | 
12 |     runs-on: ${{ matrix.os }}
13 |     strategy:
14 |       matrix:
15 |         os: [macos-latest, ubuntu-latest]
16 |         python-version: ['3.7', '3.8', '3.9', '3.10', '3.11']
17 | 
18 |     steps:
19 |     - uses: actions/checkout@v2
20 |     - name: Set up Python ${{ matrix.python-version }}
21 |       uses: actions/setup-python@v2
22 |       with:
23 |         python-version: ${{ matrix.python-version }}
24 |     - name: Install dependencies
25 |       run: |
26 |         python -m pip install --upgrade pip
27 |         pip install black pytest flake8
28 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
29 |         pip install -e .
30 |     - name: Lint with black
31 |       run: |
32 |         black --skip-string-normalization --check netrd
33 |         black --skip-string-normalization --check tests
34 |     - name: Check for unused imports with flake8
35 |       run: |
36 |         flake8 --select=F401,F403 netrd
37 |         flake8 --select=F401,F403 tests
38 |     - name: Test with pytest
39 |       run: |
40 |         cd tests/
41 |         pytest
42 | 


--------------------------------------------------------------------------------
/netrd/dynamics/base.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class BaseDynamics:
 5 |     """Base class for all dynamics processes.
 6 | 
 7 |     The basic usage is as follows:
 8 | 
 9 |     >>> ground_truth = nx.read_edgelist("ground_truth.txt")
10 |     >>> dynamics_model = Dynamics()
11 |     >>> synthetic_TS = dynamics_model.simulate(ground_truth, <some_params>)
12 |     >>> # G = Reconstructor().fit(synthetic_TS)
13 | 
14 |     This produces a numpy array of time series data.
15 | 
16 |     """
17 | 
18 |     def __init__(self):
19 |         self.results = {}
20 | 
21 |     def simulate(self, G, L):
22 |         r"""Simulate dynamics on a ground truth network.
23 | 
24 |         The results dictionary stores the ground truth network as
25 |         `'ground_truth'`.
26 | 
27 |         Parameters
28 |         ----------
29 | 
30 |         G (nx.Graph)
31 |             the input (ground-truth) graph with :math:`N` nodes.
32 | 
33 |         L (int)
34 |             the length of the desired time series.
35 | 
36 |         Returns
37 |         -------
38 | 
39 |         TS (np.ndarray)
40 |             an :math`N \times L` array of synthetic time series data.
41 | 
42 |         """
43 |         N = G.number_of_nodes()
44 |         self.results['ground_truth'] = G
45 |         self.results['TS'] = np.ones((N, L))
46 |         return self.results['TS']
47 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | netrd: A library for network {reconstruction, distances, dynamics}
 2 | ======================================================================
 3 | 
 4 | This library provides a consistent, NetworkX-based interface to various
 5 | utilities for graph distances, graph reconstruction from time series
 6 | data, and simulated dynamics on networks.
 7 | 
 8 | To see the library in action, visit the `netrd
 9 | explorer <https://netrdexplorer.herokuapp.com/>`__.
10 | 
11 | Installation
12 | ============
13 | 
14 | ::
15 | 
16 |    git clone https://github.com/netsiphd/netrd
17 |    cd netrd
18 |    pip install .
19 | 
20 | Tutorial
21 | ========
22 | 
23 | A tutorial on using the library can be found `here <tutorial.html>`__. To see
24 | more advanced usage of the library, refer to `this
25 | notebook <https://nbviewer.jupyter.org/github/netsiphd/netrd/blob/master/notebooks/00%20-%20netrd_introduction.ipynb>`__.
26 | 
27 | Contributing
28 | ============
29 | 
30 | Contributing guidelines can be found in
31 | `CONTRIBUTING.md <CONTRIBUTING.md>`__.
32 | 
33 | .. toctree::
34 |    :maxdepth: 1
35 |    :caption: Contents
36 | 
37 |    tutorial
38 |    dynamics
39 |    distance
40 |    reconstruction
41 |    utilities
42 | 
43 | 
44 | Indices and tables
45 | ==================
46 | 
47 | * :ref:`genindex`
48 | * :ref:`modindex`
49 | * :ref:`search`
50 | 


--------------------------------------------------------------------------------
/doc/source/distance.rst:
--------------------------------------------------------------------------------
 1 | Distance
 2 | ========
 3 | 
 4 | Graph distance methods to compare two networks.
 5 | 
 6 | 
 7 | Base class
 8 | ----------
 9 | .. autoclass:: netrd.distance.BaseDistance
10 | 
11 | 
12 | Available distances
13 | -------------------
14 | 
15 | All of the following algorithms inherit from ``BaseDistance`` and have the
16 | same general usage as above.
17 | 
18 | .. autosummary::
19 |    :nosignatures:
20 | 
21 |     netrd.distance.CommunicabilityJSD
22 |     netrd.distance.DegreeDivergence
23 |     netrd.distance.DeltaCon
24 |     netrd.distance.DistributionalNBD
25 |     netrd.distance.dkSeries
26 |     netrd.distance.DMeasure
27 |     netrd.distance.Frobenius
28 |     netrd.distance.GraphDiffusion
29 |     netrd.distance.Hamming
30 |     netrd.distance.HammingIpsenMikhailov
31 |     netrd.distance.IpsenMikhailov
32 |     netrd.distance.JaccardDistance
33 |     netrd.distance.LaplacianSpectral
34 |     netrd.distance.NonBacktrackingSpectral
35 |     netrd.distance.NetLSD
36 |     netrd.distance.NetSimile
37 |     netrd.distance.OnionDivergence
38 |     netrd.distance.PolynomialDissimilarity
39 |     netrd.distance.PortraitDivergence
40 |     netrd.distance.QuantumJSD
41 |     netrd.distance.ResistancePerturbation
42 | 
43 | 
44 | Reference
45 | ---------
46 | 
47 | .. automodule:: netrd.distance
48 |     :members:
49 |     :undoc-members:
50 | 


--------------------------------------------------------------------------------
/netrd/distance/base.py:
--------------------------------------------------------------------------------
 1 | class BaseDistance:
 2 |     """Base class for all distance algorithms.
 3 | 
 4 |     The basic usage of a distance algorithm is as follows:
 5 | 
 6 |     >>> dist_obj = DistanceAlgorithm()
 7 |     >>> distance = dist_obj.dist(G1, G2, <some_params>)
 8 |     >>> # or alternatively: distance = dist_obj.results['dist']
 9 | 
10 |     Here, `G1` and `G2` are ``nx.Graph`` objects (or subclasses such as
11 |     ``nx.DiGraph``). The results dictionary holds the distance value, as
12 |     well as any other values that were computed as a side effect.
13 | 
14 |     """
15 | 
16 |     def __init__(self):
17 |         self.results = {}
18 | 
19 |     def __call__(self, *args, **kwargs):
20 |         return self.dist(*args, **kwargs)
21 | 
22 |     def dist(self, G1, G2):
23 |         """Compute distance between two graphs.
24 | 
25 |         Values computed as side effects of the distance method can be foun
26 |         in self.results.
27 | 
28 |         Parameters
29 |         ----------
30 | 
31 |         G1, G2 (nx.Graph): two graphs.
32 | 
33 |         Returns
34 |         -----------
35 | 
36 |         distance (float).
37 | 
38 |         """
39 |         dist = -1  # compute the distance
40 |         self.results['dist'] = dist  # store dist in self.results
41 |         # self.results[..] = ..     # also store other values if needed
42 |         return dist  # return only one value!
43 | 


--------------------------------------------------------------------------------
/doc/source/reconstruction.rst:
--------------------------------------------------------------------------------
 1 | Reconstruction
 2 | ==============
 3 | 
 4 | Algorithms to recosntruct a graph from time series data.
 5 | 
 6 | 
 7 | Base class
 8 | ----------
 9 | .. autoclass:: netrd.reconstruction.BaseReconstructor
10 | 
11 | 
12 | Available algorithms
13 | --------------------
14 | 
15 | All of the following algorithms inherit from ``BaseReconstructor`` and have
16 | the same general usage as above.
17 | 
18 | .. autosummary::
19 |    :nosignatures:
20 | 
21 |     netrd.reconstruction.ConvergentCrossMapping
22 |     netrd.reconstruction.CorrelationMatrix
23 |     netrd.reconstruction.CorrelationSpanningTree
24 |     netrd.reconstruction.FreeEnergyMinimization
25 |     netrd.reconstruction.GrangerCausality
26 |     netrd.reconstruction.GraphicalLasso
27 |     netrd.reconstruction.MarchenkoPastur
28 |     netrd.reconstruction.MaximumLikelihoodEstimation
29 |     netrd.reconstruction.MeanField
30 |     netrd.reconstruction.MutualInformationMatrix
31 |     netrd.reconstruction.NaiveTransferEntropy
32 |     netrd.reconstruction.OUInference
33 |     netrd.reconstruction.OptimalCausationEntropy
34 |     netrd.reconstruction.PartialCorrelationInfluence
35 |     netrd.reconstruction.PartialCorrelationMatrix
36 |     netrd.reconstruction.RandomReconstructor
37 |     netrd.reconstruction.ThoulessAndersonPalmer
38 | 
39 | 
40 | Reference
41 | ---------
42 | 
43 | .. automodule:: netrd.reconstruction
44 |     :members:
45 |     :undoc-members:
46 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | 
 4 | with open('requirements.txt') as file:
 5 |     requires = [line.strip() for line in file if not line.startswith('#')]
 6 | 
 7 | with open('README.md') as fin:
 8 |     # read the first section of README - set between the first two '#' lines -
 9 |     # as long_description, and use the first section header as description.
10 |     long_description = ""
11 |     at_first_section = False
12 |     read = iter(fin.readlines())
13 |     for line in read:
14 |         if at_first_section:
15 |             break
16 |         at_first_section = line.startswith('#')
17 |         description = line[1:].strip()
18 |     long_description += line
19 |     for line in read:
20 |         if line.startswith('#'):
21 |             break
22 |         long_description += line
23 |     long_description = long_description.strip()
24 | 
25 | 
26 | setuptools.setup(
27 |     name='netrd',
28 |     version='0.3.0',
29 |     author='NetSI 2019 Collabathon Team',
30 |     author_email='stefanmccabe@gmail.com',
31 |     description=description,
32 |     long_description=long_description,
33 |     long_description_content_type='text/markdown',
34 |     url='https://github.com/netsiphd/netrd',
35 |     packages=setuptools.find_packages(),
36 |     install_requires=requires,
37 |     classifiers=[
38 |         'Programming Language :: Python :: 3',
39 |         'License :: OSI Approved :: MIT License',
40 |         'Operating System :: OS Independent',
41 |     ],
42 | )
43 | 


--------------------------------------------------------------------------------
/netrd/distance/frobenius.py:
--------------------------------------------------------------------------------
 1 | """
 2 | frobenius.py
 3 | ------------
 4 | 
 5 | Frobenius norm between two adjacency matrices.
 6 | 
 7 | """
 8 | 
 9 | import numpy as np
10 | import networkx as nx
11 | from .base import BaseDistance
12 | from ..utilities.graph import unweighted
13 | 
14 | 
15 | class Frobenius(BaseDistance):
16 |     """The Frobenius distance between their adjacency matrices."""
17 | 
18 |     @unweighted
19 |     def dist(self, G1, G2):
20 |         r"""Frobenius distance between two graphs.
21 | 
22 |         If :math:`a_{ij}` and :math:`b_{ij}` are the two adjacency matrices
23 |         we define
24 | 
25 |         .. math::
26 |             d(G1, G2) = \sqrt{\sum_{i,j} |a_{ij} - b_{ij}|^2}
27 | 
28 | 
29 |         The results dictionary also stores a 2-tuple of the underlying
30 |         adjacency matrices in the key `'adjacency_matrices'`.
31 | 
32 |         Parameters
33 |         ----------
34 |         G1, G2 (nx.Graph)
35 |             two graphs to compare
36 | 
37 |         Returns
38 |         -------
39 |         float
40 |             the distance between `G1` and `G2`
41 | 
42 |         Notes
43 |         -----
44 | 
45 |         The graphs must have the same number of nodes.
46 | 
47 |         """
48 | 
49 |         adj1 = nx.to_numpy_array(G1)
50 |         adj2 = nx.to_numpy_array(G2)
51 |         dist = np.linalg.norm((adj1 - adj2))
52 |         self.results['dist'] = dist
53 |         self.results['adjacency_matrices'] = adj1, adj2
54 |         return dist
55 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/base.py:
--------------------------------------------------------------------------------
 1 | class BaseReconstructor:
 2 |     r"""Base class for graph reconstruction algorithms.
 3 | 
 4 |     The basic usage of a graph reconstruction algorithm is as follows:
 5 | 
 6 |     >>> reconstructor = ReconstructionAlgorithm()
 7 |     >>> G = reconstructor.fit(TS, <some_params>)
 8 |     >>> # or alternately, G = reconstructor.results['graph']
 9 | 
10 |     Here, `TS` is an :math:`N \times L` numpy array consisting of :math:`L`
11 |     observations for each of :math:`N` sensors. This constrains the graphs
12 |     to have integer-valued nodes.
13 | 
14 |     The ``results`` dict object, in addition to containing the graph
15 |     object, may also contain objects created as a side effect of
16 |     reconstructing the network, which may be useful for debugging or
17 |     considering goodness of fit. What is returned will vary between
18 |     reconstruction algorithms.
19 | 
20 |     """
21 | 
22 |     def __init__(self):
23 |         self.results = {}
24 | 
25 |     def fit(self, TS, **kwargs):
26 |         """Reconstruct a graph from time series TS.
27 | 
28 |         Parameters
29 |         ----------
30 |         TS (np.ndarray): Array consisting of $L$ observations from $N$ sensors.
31 | 
32 |         Returns
33 |         -------
34 |         G (nx.Graph): A reconstructed graph with $N$ nodes.
35 | 
36 |         """
37 |         G = nx.Graph()  # reconstruct the graph
38 |         self.results['graph'] = G  # and store it in self.results
39 |         # self.results[..] = ..   # also store other values if needed
40 |         return G
41 | 


--------------------------------------------------------------------------------
/netrd/distance/jaccard_distance.py:
--------------------------------------------------------------------------------
 1 | """
 2 | jaccard_distance.py
 3 | --------------
 4 | 
 5 | Graph distance based on the Jaccard index between edge sets.
 6 | 
 7 | author: David Saffo
 8 | email: saffo.d@husky.neu.edu
 9 | Submitted as part of the 2019 NetSI Collabathon.
10 | 
11 | """
12 | 
13 | from .base import BaseDistance
14 | from ..utilities import unweighted
15 | 
16 | 
17 | class JaccardDistance(BaseDistance):
18 |     """Jaccard distance between edge sets."""
19 | 
20 |     @unweighted
21 |     def dist(self, G1, G2):
22 |         r"""Compute the Jaccard index between two graphs.
23 | 
24 |         The Jaccard index between two sets
25 | 
26 |         .. math::
27 |             J(A, B) = \frac{|A \cap B|}{|A \cup B|}
28 | 
29 |         provides a measure of similarity between sets. Here, we use the edge
30 |         sets of two graphs. The index, a measure of similarity, is converted to
31 |         a distance
32 | 
33 |         .. math::
34 |             d_J(A, B) = 1 - J(A, B)
35 | 
36 |         for consistency with other graph distances.
37 | 
38 |         Parameters
39 |         ----------
40 | 
41 |         G1, G2 (nx.Graph)
42 |             two graphs to be compared.
43 | 
44 |         Returns
45 |         -------
46 | 
47 |         dist (float)
48 |             the distance between G1 and G2.
49 | 
50 |         """
51 | 
52 |         e1 = set(G1.edges)
53 |         e2 = set(G2.edges)
54 |         cup = set.union(e1, e2)
55 |         cap = set.intersection(e1, e2)
56 | 
57 |         dist = 1 - len(cap) / len(cup)
58 | 
59 |         self.results["dist"] = dist
60 |         return dist
61 | 


--------------------------------------------------------------------------------
/netrd/distance/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseDistance
 2 | from .hamming import Hamming
 3 | from .frobenius import Frobenius
 4 | from .portrait_divergence import PortraitDivergence
 5 | from .jaccard_distance import JaccardDistance
 6 | from .ipsen_mikhailov import IpsenMikhailov
 7 | from .hamming_ipsen_mikhailov import HammingIpsenMikhailov
 8 | from .resistance_perturbation import ResistancePerturbation
 9 | from .netsimile import NetSimile
10 | from .netlsd import NetLSD
11 | from .laplacian_spectral_method import LaplacianSpectral
12 | from .polynomial_dissimilarity import PolynomialDissimilarity
13 | from .degree_divergence import DegreeDivergence
14 | from .onion_divergence import OnionDivergence
15 | from .deltacon import DeltaCon
16 | from .quantum_jsd import QuantumJSD
17 | from .communicability_jsd import CommunicabilityJSD
18 | from .distributional_nbd import DistributionalNBD
19 | from .dk_series import dkSeries
20 | from .dmeasure import DMeasure
21 | from .nbd import NonBacktrackingSpectral
22 | from .graph_diffusion import GraphDiffusion
23 | 
24 | __all__ = [
25 |     'BaseDistance',
26 |     'Hamming',
27 |     'Frobenius',
28 |     'PortraitDivergence',
29 |     'JaccardDistance',
30 |     'IpsenMikhailov',
31 |     'HammingIpsenMikhailov',
32 |     'ResistancePerturbation',
33 |     'NetSimile',
34 |     'NetLSD',
35 |     'LaplacianSpectral',
36 |     'PolynomialDissimilarity',
37 |     'DegreeDivergence',
38 |     'OnionDivergence',
39 |     'DeltaCon',
40 |     'QuantumJSD',
41 |     'CommunicabilityJSD',
42 |     'DistributionalNBD',
43 |     'dkSeries',
44 |     'DMeasure',
45 |     'NonBacktrackingSpectral',
46 |     'GraphDiffusion',
47 | ]
48 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseReconstructor
 2 | from .random import RandomReconstructor
 3 | from .correlation_matrix import CorrelationMatrix
 4 | from .partial_correlation_matrix import PartialCorrelationMatrix
 5 | from .partial_correlation_influence import PartialCorrelationInfluence
 6 | from .free_energy_minimization import FreeEnergyMinimization
 7 | from .mean_field import MeanField
 8 | from .thouless_anderson_palmer import ThoulessAndersonPalmer
 9 | from .maximum_likelihood_estimation import MaximumLikelihoodEstimation
10 | from .convergent_cross_mapping import ConvergentCrossMapping
11 | from .mutual_information_matrix import MutualInformationMatrix
12 | from .ou_inference import OUInference
13 | from .graphical_lasso import GraphicalLasso
14 | from .marchenko_pastur import MarchenkoPastur
15 | from .naive_transfer_entropy import NaiveTransferEntropy
16 | from .granger_causality import GrangerCausality
17 | from .optimal_causation_entropy import OptimalCausationEntropy
18 | from .correlation_spanning_tree import CorrelationSpanningTree
19 | 
20 | __all__ = [
21 |     'BaseReconstructor',
22 |     'RandomReconstructor',
23 |     'CorrelationMatrix',
24 |     'PartialCorrelationMatrix',
25 |     'PartialCorrelationInfluence',
26 |     'FreeEnergyMinimization',
27 |     'ThoulessAndersonPalmer',
28 |     'MeanField',
29 |     'MaximumLikelihoodEstimation',
30 |     'ConvergentCrossMapping',
31 |     'MutualInformationMatrix',
32 |     'OUInference',
33 |     'GraphicalLasso',
34 |     'MarchenkoPastur',
35 |     'NaiveTransferEntropy',
36 |     'GrangerCausality',
37 |     'OptimalCausationEntropy',
38 |     'CorrelationSpanningTree',
39 | ]
40 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/random.py:
--------------------------------------------------------------------------------
 1 | """
 2 | random.py
 3 | ---------
 4 | 
 5 | Reconstruct a network from a random matrix
 6 | not taking the time series into account.
 7 | 
 8 | author: Brennan Klein
 9 | email: klein.br@husky.neu.edu
10 | Submitted as part of the 2019 NetSI Collabathon.
11 | 
12 | """
13 | 
14 | from .base import BaseReconstructor
15 | import numpy as np
16 | from ..utilities import create_graph, threshold
17 | 
18 | 
19 | class RandomReconstructor(BaseReconstructor):
20 |     """Returns a random graph (dummy class)."""
21 | 
22 |     def fit(self, TS, threshold_type='range', **kwargs):
23 |         """Return a random correlation matrix with a threshold.
24 | 
25 |         The results dictionary also stores the weight matrix as
26 |         `'weights_matrix'` and the thresholded version of the weight matrix
27 |         as `'thresholded_matrix'`.
28 | 
29 |         Parameters
30 |         ----------
31 | 
32 |         TS (np.ndarray)
33 |             array consisting of :math:`L` observations from :math:`N` sensors.
34 | 
35 |         threshold_type (str)
36 |             Which thresholding function to use on the matrix of
37 |             weights. See `netrd.utilities.threshold.py` for
38 |             documentation. Pass additional arguments to the thresholder
39 |             using ``**kwargs``.
40 | 
41 |         Returns
42 |         -------
43 |         G (nx.Graph)
44 |             a reconstructed graph with :math:`N` nodes.
45 | 
46 |         """
47 |         N, L = TS.shape
48 |         W = np.random.rand(N, N)
49 |         A = threshold(W, threshold_type, **kwargs)
50 |         G = create_graph(A)
51 |         self.results['graph'] = G
52 |         self.results['weights_matrix'] = W
53 |         self.results['thresholded_matrix'] = A
54 |         return G
55 | 


--------------------------------------------------------------------------------
/netrd/utilities/cluster.py:
--------------------------------------------------------------------------------
 1 | """
 2 | cluster.py
 3 | ----------
 4 | 
 5 | Utilities for creating a seriated/ordered adjacency matrix with
 6 | hierarchical clustering.
 7 | 
 8 | author: David Saffo (saffo.d@husky.neu.edu)
 9 | 
10 | Submitted as part of the 2019 NetSI Collabathon.
11 | 
12 | """
13 | import networkx as nx
14 | from scipy.cluster.hierarchy import dendrogram, linkage
15 | 
16 | 
17 | def clusterGraph(G, method='single', metric='euclidean', optimal_ordering=False):
18 |     """Create seriated adjacency matrix.
19 | 
20 |     Parameters
21 |     ----------
22 | 
23 |     G (nx.Graph)
24 |         a networkx graph
25 | 
26 |     method
27 |         the clustering algorithm to use for options see [1].
28 | 
29 |     metric (str)
30 |         linkage method to use
31 | 
32 |     optimal_ordering (bool)
33 |         if true tries to minimize the distance of succesive indexes
34 | 
35 |     Returns
36 |     -------
37 | 
38 |     adjClustered (np.ndarray)
39 |         a numpy matrix with rows and columns reordered based on clustering
40 | 
41 |     order (list)
42 |         a list with the new index order for rows and columns
43 | 
44 |     dend (dict)
45 |         a dictionary with the hierarchy for the dendogram
46 | 
47 |     link (np.ndarray)
48 |         a linkage matrix with results from clustering
49 | 
50 |     References
51 |     ----------
52 | 
53 |     [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html
54 | 
55 |     """
56 |     adj = nx.to_numpy_matrix(G)
57 |     link = linkage(adj, method, metric, optimal_ordering)
58 |     dend = dendrogram(link, no_plot=True)
59 |     order = dend['leaves']
60 |     adjClustered = adj[order, :]
61 |     adjClustered = adjClustered[:, order]
62 |     return adjClustered, order, dend, link
63 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | docs/build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # Environments
 86 | .env
 87 | .venv
 88 | env/
 89 | venv/
 90 | ENV/
 91 | env.bak/
 92 | venv.bak/
 93 | 
 94 | # Spyder project settings
 95 | .spyderproject
 96 | .spyproject
 97 | 
 98 | # Rope project settings
 99 | .ropeproject
100 | 
101 | # mkdocs documentation
102 | /site
103 | 
104 | # mypy
105 | .mypy_cache/
106 | 
107 | # mac dstore
108 | .DS_Store
109 | 


--------------------------------------------------------------------------------
/netrd/distance/degree_divergence.py:
--------------------------------------------------------------------------------
 1 | """
 2 | degree_divergence.py
 3 | --------------------------
 4 | 
 5 | Baseline distance measure: the K-L divergence
 6 |  between the two degree distributions.
 7 | 
 8 | author: Stefan McCabe
 9 | email: stefanmccabe at gmail dot com
10 | Submitted as part of the 2019 NetSI Collabathon.
11 | 
12 | """
13 | 
14 | from collections import Counter
15 | import numpy as np
16 | import networkx as nx
17 | from .base import BaseDistance
18 | from ..utilities import entropy, unweighted
19 | 
20 | 
21 | class DegreeDivergence(BaseDistance):
22 |     """Compare two degree distributions."""
23 | 
24 |     @unweighted
25 |     def dist(self, G1, G2):
26 |         """Jenson-Shannon divergence between degree distributions.
27 | 
28 |         Assumes undirected networks.
29 | 
30 |         Parameters
31 |         ----------
32 | 
33 |         G1, G2 (nx.Graph)
34 |             two networkx graphs to be compared.
35 | 
36 |         Returns
37 |         -------
38 | 
39 |         dist (float)
40 |             the distance between `G1` and `G2`.
41 | 
42 |         """
43 | 
44 |         def degree_vector_histogram(graph):
45 |             """Return the degrees in both formats.
46 | 
47 |             max_deg is the length of the histogram, to be padded with
48 |             zeros.
49 | 
50 |             """
51 |             vec = np.array(list(dict(graph.degree()).values()))
52 |             if next(nx.selfloop_edges(graph), False):
53 |                 max_deg = len(graph)
54 |             else:
55 |                 max_deg = len(graph) - 1
56 |             counter = Counter(vec)
57 |             hist = np.array([counter[v] for v in range(max_deg)])
58 |             return vec, hist
59 | 
60 |         deg1, hist1 = degree_vector_histogram(G1)
61 |         deg2, hist2 = degree_vector_histogram(G2)
62 |         self.results['degree_vectors'] = deg1, deg2
63 |         self.results['degree_histograms'] = hist1, hist2
64 | 
65 |         max_len = max(len(hist1), len(hist2))
66 |         p1 = np.pad(hist1, (0, max_len - len(hist1)), 'constant', constant_values=0)
67 |         p2 = np.pad(hist2, (0, max_len - len(hist2)), 'constant', constant_values=0)
68 |         self.results['dist'] = entropy.js_divergence(p1, p2)
69 |         return self.results['dist']
70 | 


--------------------------------------------------------------------------------
/netrd/dynamics/single_unbiased_random_walker.py:
--------------------------------------------------------------------------------
 1 | """
 2 | single_unbiased_random_walker.py
 3 | --------------------------------
 4 | 
 5 | Simulate a lonely walker on a network.
 6 | 
 7 | """
 8 | from .base import BaseDynamics
 9 | import networkx as nx
10 | import numpy as np
11 | 
12 | 
13 | class SingleUnbiasedRandomWalker(BaseDynamics):
14 |     """Random walk dynamics."""
15 | 
16 |     def simulate(self, G, L, initial_node=None):
17 |         r"""Simulate single random-walker dynamics on a ground truth network.
18 | 
19 |         Generates an :math:`N \times L` time series `TS` with
20 |         ``TS[j,t]==1`` if the walker is at node :math:`j` at time
21 |         :math:`t`, and ``TS[j,t]==0`` otherwise.
22 | 
23 |         The results dictionary also stores the ground truth network as
24 |         `'ground_truth'`.
25 | 
26 |         Examples
27 |         --------
28 |         .. code:: python
29 | 
30 |             G = nx.ring_of_cliques(4,16)
31 |             L = 2001
32 |             dynamics = SingleUnbiasedRandomWalker()
33 |             TS = dynamics.simulate(G, L)
34 | 
35 | 
36 |         Parameters
37 |         ----------
38 |         G (nx.Graph)
39 |             The input (ground-truth) graph with :math:`N` nodes.
40 | 
41 |         L (int)
42 |             The length of the desired time series.
43 | 
44 |         Returns
45 |         -------
46 |         TS (np.ndarray)
47 |             An :math:`N \times L` array of synthetic time series data.
48 | 
49 |         """
50 |         # get adjacency matrix and set up vector of indices
51 |         A = nx.to_numpy_array(G)
52 |         N = G.number_of_nodes()
53 |         W = np.zeros(L, dtype=int)
54 |         # place walker at initial location
55 |         if initial_node:
56 |             W[0] = initial_node
57 |         else:
58 |             W[0] = np.random.randint(N)
59 | 
60 |         # run dynamical process
61 |         for t in range(L - 1):
62 |             W[t + 1] = np.random.choice(np.where(A[W[t], :])[0])
63 |         self.results['node_index_sequence'] = W
64 |         # turn into a binary-valued
65 |         TS = np.zeros((N, L))
66 |         for t, w in enumerate(W):
67 |             TS[w, t] = 1
68 |         self.results['TS'] = TS
69 |         self.results['ground_truth'] = G
70 |         return TS
71 | 


--------------------------------------------------------------------------------
/netrd/utilities/standardize.py:
--------------------------------------------------------------------------------
 1 | """
 2 | standardize.py
 3 | --------------
 4 | 
 5 | Utilities for computing standardization values for distance measures.
 6 | 
 7 | author: Harrison Hartle/Tim LaRock (timothylarock at gmail dot com)
 8 | 
 9 | Submitted as part of the 2019 NetSI Collabathon.
10 | 
11 | """
12 | 
13 | import numpy as np
14 | import networkx as nx
15 | 
16 | 
17 | def mean_GNP_distance(n, prob, distance, samples=10, **kwargs):
18 |     r"""Mean distance between :math:`G(n, p)` graphs.
19 | 
20 |     Compute the mean distance between `samples` :math:`G(n, p)` graphs with
21 |     parameters using distance function `distance`, whose parameters are
22 |     passed with ``**kwargs``.
23 | 
24 | 
25 |     Parameters
26 |     ----------
27 | 
28 |     n (int)
29 |         Number of nodes in ER graphs to be generated
30 | 
31 |     prob (float)
32 |         Probability of edge in ER graphs to be generated.
33 | 
34 |     samples (int)
35 |         Number of samples to average distance over.
36 | 
37 |     distance (function)
38 |         Function from `netrd.distances.<distance>.dist`
39 | 
40 |     **kwargs (dict)
41 |         Keyword arguments to pass to the distance function.
42 | 
43 |     Returns
44 |     -------
45 |     mean (float)
46 |         The average distance between the sampled ER networks.
47 | 
48 |     std (float)
49 |         The standard deviation of the distances.
50 | 
51 |     dist (np.ndarray)
52 |         Array storing the actual distances.
53 | 
54 |     Examples
55 |     --------
56 |     .. code:: python
57 | 
58 |         dist_obj = netrd.distance.ResistancePerturbation()
59 |         kwargs = {'p':2}
60 |         mean, std, dists = netrd.utilities.mean_GNP_distance(100, 0.1, dist_obj.dist, **kwargs)
61 | 
62 | 
63 |     Notes
64 |     -----
65 |     Ideally, each sample would involve generating two :math:`G(n, p)`
66 |     graphs, computing the distance between them, then throwing them both
67 |     away.  However, this will be computationally expensive, so for now we
68 |     are reusing samples. The diagonal of the distance matrix is excluded,
69 |     i.e., do not compute the distance between a sample graph and itself.
70 | 
71 |     """
72 |     graphs = [nx.fast_gnp_random_graph(n, prob) for _ in range(samples)]
73 |     dis_mat = np.full((samples, samples), np.nan)
74 |     for i in range(samples):
75 |         for j in range(samples):
76 |             if i == j:
77 |                 continue
78 |             dis_mat[i, j] = distance(graphs[i], graphs[j], **kwargs)
79 | 
80 |     # the nan* versions below ignore NaNs and normalize appropriately
81 |     return np.nanmean(dis_mat), np.nanstd(dis_mat), dis_mat
82 | 


--------------------------------------------------------------------------------
/netrd/dynamics/voter.py:
--------------------------------------------------------------------------------
 1 | """
 2 | voter.py
 3 | --------
 4 | 
 5 | Implementation of voter model dynamics on a network.
 6 | 
 7 | author: Stefan McCabe
 8 | 
 9 | Submitted as part of the 2019 NetSI Collabathon.
10 | 
11 | """
12 | 
13 | from netrd.dynamics import BaseDynamics
14 | import numpy as np
15 | import networkx as nx
16 | from ..utilities import unweighted
17 | 
18 | 
19 | class VoterModel(BaseDynamics):
20 |     """Voter dynamics."""
21 | 
22 |     @unweighted
23 |     def simulate(self, G, L, noise=None):
24 |         r"""Simulate voter-model-style dynamics on a network.
25 | 
26 |         Nodes are randomly assigned a state in :math:`\{-1, 1\}`; at each
27 |         time step all nodes asynchronously update by choosing their new
28 |         state uniformly from their neighbors. Generates an :math:`N \times
29 |         L` time series.
30 | 
31 |         The results dictionary also stores the ground truth network as
32 |         `'ground_truth'`.
33 | 
34 |         Parameters
35 |         ----------
36 |         G (nx.Graph)
37 |             the input (ground-truth) graph with `N` nodes.
38 | 
39 |         L (int)
40 |             the length of the desired time series.
41 | 
42 |         noise (float, str or None)
43 |             if noise is present, with this probability a node's state will
44 |             be randomly redrawn from :math:`\{-1, 1\}` independent of its
45 |             neighbors' states. If 'automatic', set noise to :math:`1/N`.
46 | 
47 |         Returns
48 |         -------
49 |         TS (np.ndarray)
50 |             an :math:`N \times L` array of synthetic time series data.
51 | 
52 |         """
53 | 
54 |         N = G.number_of_nodes()
55 | 
56 |         if noise is None:
57 |             noise = 0
58 |         elif noise == 'automatic' or noise == 'auto':
59 |             noise = 1 / N
60 |         elif not isinstance(noise, (int, float)):
61 |             raise ValueError("noise must be a number, 'automatic', or None")
62 | 
63 |         transitions = nx.to_numpy_array(G)
64 |         transitions = transitions / np.sum(transitions, axis=0)
65 | 
66 |         TS = np.zeros((N, L))
67 |         TS[:, 0] = [1 if x < 0.5 else -1 for x in np.random.rand(N)]
68 |         indices = np.arange(N)
69 | 
70 |         for t in range(1, L):
71 |             np.random.shuffle(indices)
72 |             TS[:, t] = TS[:, t - 1]
73 |             for i in indices:
74 |                 TS[i, t] = np.random.choice(TS[:, t], p=transitions[:, i])
75 |                 if np.random.rand() < noise:
76 |                     TS[i, t] = 1 if np.random.rand() < 0.5 else -1
77 | 
78 |         self.results['ground_truth'] = G
79 |         self.results['TS'] = TS
80 |         return TS
81 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # http://www.sphinx-doc.org/en/master/config
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('../../'))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = 'netrd'
21 | copyright = '2019, NetSI 2019 Collabathon team'
22 | author = 'NetSI 2019 Collabathon team'
23 | 
24 | # The full version, including alpha/beta/rc tags
25 | release = '0.1'
26 | 
27 | 
28 | # -- General configuration ---------------------------------------------------
29 | 
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = ['sphinx.ext.autodoc',
34 |               'numpydoc',
35 |               'sphinx.ext.coverage',
36 |               'sphinx.ext.mathjax',
37 |               'sphinx.ext.viewcode']
38 | numpydoc_show_class_members = False
39 | 
40 | # Add any paths that contain templates here, relative to this directory.
41 | templates_path = ['_templates']
42 | 
43 | # List of patterns, relative to source directory, that match files and
44 | # directories to ignore when looking for source files.
45 | # This pattern also affects html_static_path and html_extra_path.
46 | exclude_patterns = []
47 | 
48 | # The suffix(es) of source filenames.
49 | source_suffix = ['.rst', '.md']
50 | 
51 | # Add the markdown parser.
52 | # from recommonmark.parser import CommonMarkParser
53 | # source_parsers = {'.md': CommonMarkParser}
54 | 
55 | # from recommonmark.transform import AutoStructify
56 | # def setup(app):
57 | #     app.add_source_parser()
58 | #     app.add_config_value(
59 | #         'recommonmark_config', {'enable_eval_rst': True}, True)
60 | #     app.add_transform(AutoStructify)
61 | 
62 | # -- Options for HTML output -------------------------------------------------
63 | 
64 | # The theme to use for HTML and HTML Help pages.  See the documentation for
65 | # a list of builtin themes.
66 | #
67 | html_theme = 'sphinx_rtd_theme'
68 | 
69 | # Add any paths that contain custom static files (such as style sheets) here,
70 | # relative to this directory. They are copied after the builtin static files,
71 | # so a file named "default.css" will overwrite the builtin "default.css".
72 | html_static_path = ['_static']
73 | 


--------------------------------------------------------------------------------
/netrd/distance/polynomial_dissimilarity.py:
--------------------------------------------------------------------------------
 1 | """
 2 | polynomial_dissimilarity.py
 3 | --------------
 4 | 
 5 | From
 6 | ----
 7 | Donnat, Claire, and Susan Holmes. "Tracking
 8 | network dynamics: A survey of distances
 9 | and similarity metrics." arXiv
10 | preprint arXiv:1801.07351 (2018).
11 | 
12 | author: Jessica T. Davis
13 | email:
14 | Submitted as part of the 2019 NetSI Collabathon.
15 | 
16 | """
17 | import numpy as np
18 | import networkx as nx
19 | from .base import BaseDistance
20 | from ..utilities import unweighted
21 | 
22 | 
23 | class PolynomialDissimilarity(BaseDistance):
24 |     """Compares polynomials relating to the eigenvalues of the adjacency matrices."""
25 | 
26 |     @unweighted
27 |     def dist(self, G1, G2, k=5, alpha=1):
28 |         r"""Compares the polynomials of the eigenvalue decomposition of
29 |         two adjacency matrices.
30 | 
31 |         Note that the :math:`ij`-th element of where :math:`A^k`
32 |         corresponds to the number of paths of length :math:`k` between
33 |         nodes :math:`i` and :math:`j`.
34 | 
35 |         The results dictionary also stores a 2-tuple of the underlying
36 |         adjacency matrices in the key `'adjacency_matrices'`.
37 | 
38 |         Parameters
39 |         ----------
40 | 
41 |         G1, G2 (nx.Graph)
42 |             two networkx graphs to be compared.
43 | 
44 |         k (float)
45 |             maximum degree of the polynomial
46 | 
47 |         alpha (float)
48 |             weighting factor
49 | 
50 |         Returns
51 |         -------
52 |         dist (float)
53 |             Polynomial Dissimilarity between `G1`, `G2`
54 | 
55 |         References
56 |         ----------
57 |         .. [1] Donnat, Claire, and Susan Holmes. "Tracking network
58 |                dynamics: A survey of distances and similarity metrics."
59 |                arXiv preprint arXiv:1801.07351 (2018).
60 | 
61 |         """
62 | 
63 |         A1 = nx.to_numpy_array(G1)
64 |         A2 = nx.to_numpy_array(G2)
65 | 
66 |         P_A1 = similarity_score(A1, k, alpha)
67 |         P_A2 = similarity_score(A2, k, alpha)
68 | 
69 |         dist = np.linalg.norm(P_A1 - P_A2, ord="fro") / A1.shape[0] ** 2
70 | 
71 |         self.results["adjacency_matrices"] = A1, A2
72 |         self.results["dist"] = dist
73 |         return dist
74 | 
75 | 
76 | def similarity_score(A, k, alpha):
77 |     """
78 |     Calculate the similarity score used in the polynomial dissimilarity
79 |     distance. This uses a polynomial transformation of the eigenvalues of the
80 |     of the adjacency matrix in combination with the eigenvectors of the
81 |     adjacency matrix. See p. 27 of Donnat and Holmes (2018).
82 |     """
83 | 
84 |     eig_vals, Q = np.linalg.eig(A)
85 | 
86 |     n = A.shape[0]
87 | 
88 |     def polynomial(kp):
89 |         return eig_vals**kp / (n - 1) ** (alpha * (kp - 1))
90 | 
91 |     W = np.diag(sum([polynomial(k) for k in range(1, k + 1)]))
92 |     P_A = np.dot(np.dot(Q, W), Q.T)
93 | 
94 |     return P_A
95 | 


--------------------------------------------------------------------------------
/netrd/dynamics/sherrington_kirkpatrick.py:
--------------------------------------------------------------------------------
 1 | """
 2 | sherrington_kirkpatrick.py
 3 | ---------------------
 4 | Generate an ising model-like time series on a graph
 5 | 
 6 | author: Brennan Klein
 7 | email: brennanjamesklein at gmail dot com
 8 | submitted as part of the 2019 NetSI Collabathon
 9 | """
10 | from .base import BaseDynamics
11 | import networkx as nx
12 | import numpy as np
13 | from ..utilities import unweighted
14 | 
15 | 
16 | class SherringtonKirkpatrickIsing(BaseDynamics):
17 |     """Ising model-like dynamics."""
18 | 
19 |     @unweighted
20 |     def simulate(self, G, L, noisy=False):
21 |         r"""Simulate Kinetic Ising model dynamics on a ground truth network.
22 | 
23 |         The results dictionary also stores the ground truth network as
24 |         `'ground_truth'`.
25 | 
26 |         Parameters
27 |         ----------
28 |         G (nx.Graph)
29 |             The input (ground-truth) graph with :math:`N` nodes.
30 | 
31 |         L (int)
32 |             The length of the desired time series.
33 | 
34 |         Returns
35 |         -------
36 |         TS (np.ndarray)
37 |             An :math:`N \times L` array of synthetic time series data.
38 | 
39 |         Examples
40 |         --------
41 |         .. code:: python
42 | 
43 |             G = nx.ring_of_cliques(4,16)
44 |             L = 2001
45 |             dynamics = SherringtonKirkpatrickIsing()
46 |             TS = dynamics.simulate(G, L)
47 | 
48 | 
49 |         References
50 |         ----------
51 |         .. [1] D. Sherrington and S. Kirkpatrick, Phys. Rev. Lett. 35, 1792
52 |                (1975).
53 | 
54 |         .. [2] Hoang, D.T., Song, J., Periwal, V. and Jo, J., Network
55 |                inference in stochastic systems from neurons to currencies:
56 |                Improved performance at small sample size. (2019)
57 | 
58 |         """
59 | 
60 |         N = G.number_of_nodes()
61 | 
62 |         # get transition probability matrix of G
63 |         A = nx.to_numpy_array(G)
64 |         W = np.zeros(A.shape)
65 |         for i in range(A.shape[0]):
66 |             if A[i].sum() > 0:
67 |                 W[i] = A[i] / A[i].sum()
68 | 
69 |         # initialize a time series of ones
70 |         ts = np.ones((L, N))
71 |         for t in range(1, L - 1):
72 |             h = np.sum(W[:, :] * ts[t, :], axis=1)  # Wij from j to i
73 |             p = 1 / (1 + np.exp(-2 * h))
74 |             if noisy:
75 |                 ts[t + 1, :] = p - np.random.rand(N)
76 |             else:
77 |                 ts[t + 1, :] = sign_vec(p - np.random.rand(N))
78 | 
79 |         self.results['ground_truth'] = G
80 |         self.results['TS'] = ts.T
81 | 
82 |         return self.results['TS']
83 | 
84 | 
85 | def sign(x):
86 |     """
87 |     np.sign(0) = 0 but here to avoid value 0,
88 |     we redefine it as def sign(0) = 1
89 |     """
90 |     return 1.0 if x >= 0 else -1.0
91 | 
92 | 
93 | def sign_vec(x):
94 |     """
95 |     Binarize an array
96 |     """
97 |     x_vec = np.vectorize(sign)
98 |     return x_vec(x)
99 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/maximum_likelihood_estimation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | maximum_likelihood_estimation.py
 3 | ---------------------
 4 | Reconstruction of graphs using maximum likelihood estimation
 5 | author: Brennan Klein
 6 | email: brennanjamesklein at gmail dot com
 7 | submitted as part of the 2019 NeTSI Collabathon
 8 | """
 9 | from .base import BaseReconstructor
10 | import numpy as np
11 | from ..utilities import create_graph, threshold
12 | 
13 | 
14 | class MaximumLikelihoodEstimation(BaseReconstructor):
15 |     """Uses maximum likelihood estimation."""
16 | 
17 |     def fit(self, TS, rate=1.0, stop_criterion=True, threshold_type='degree', **kwargs):
18 |         """Infer inter-node coupling weights using maximum likelihood estimation
19 |         methods.
20 | 
21 |         The results dictionary also stores the weight matrix as
22 |         `'weights_matrix'` and the thresholded version of the weight matrix
23 |         as `'thresholded_matrix'`.
24 | 
25 |         Parameters
26 |         ----------
27 | 
28 |         TS (np.ndarray)
29 |             Array consisting of :math:`L` observations from :math:`N` sensors.
30 | 
31 |         rate (float)
32 |             rate term in maximum likelihood
33 | 
34 |         stop_criterion (bool)
35 |             if True, prevent overly-long runtimes
36 | 
37 |         threshold_type (str)
38 |             Which thresholding function to use on the matrix of
39 |             weights. See `netrd.utilities.threshold.py` for
40 |             documentation. Pass additional arguments to the thresholder
41 |             using '`**kwargs`'.
42 | 
43 |         Returns
44 |         -------
45 |         G (nx.Graph or nx.DiGraph)
46 |             a reconstructed graph.
47 | 
48 |         References
49 |         ----------
50 | 
51 |         .. [1] https://github.com/nihcompmed/network-inference/blob/master/sphinx/codesource/inference.py
52 | 
53 |         """
54 | 
55 |         N, L = np.shape(TS)  # N nodes, length L
56 |         rate = rate / L
57 | 
58 |         s1 = TS[:, :-1]
59 |         W = np.zeros((N, N))
60 | 
61 |         nloop = 10000
62 |         for i0 in range(N):
63 |             st1 = TS[i0, 1:]  # time series activity of single node
64 | 
65 |             w = np.zeros(N)
66 |             h = np.zeros(L - 1)
67 |             cost = np.full(nloop, 100.0)
68 | 
69 |             for iloop in range(nloop):
70 |                 dw = np.dot(s1, (st1 - np.tanh(h)))
71 | 
72 |                 w += rate * dw
73 |                 h = np.dot(s1.T, w)
74 | 
75 |                 cost[iloop] = ((st1 - np.tanh(h)) ** 2).mean()
76 | 
77 |                 if stop_criterion and cost[iloop] >= cost[iloop - 1]:
78 |                     break
79 | 
80 |             W[i0, :] = w
81 | 
82 |         # threshold the network
83 |         W_thresh = threshold(W, threshold_type, **kwargs)
84 | 
85 |         # construct the network
86 | 
87 |         self.results['graph'] = create_graph(W_thresh)
88 |         self.results['weights_matrix'] = W
89 |         self.results['thresholded_matrix'] = W_thresh
90 |         G = self.results['graph']
91 | 
92 |         return G
93 | 


--------------------------------------------------------------------------------
/netrd/dynamics/ising_glauber.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ising_glauber.py
 3 | ----------------
 4 | 
 5 | Implementation to simulate the Ising-Glauber model on a network.
 6 | 
 7 | author: Chia-Hung Yang
 8 | Submitted as part of the 2019 NetSI Collabathon.
 9 | """
10 | 
11 | from netrd.dynamics import BaseDynamics
12 | import numpy as np
13 | import networkx as nx
14 | from numpy.random import rand
15 | from ..utilities import unweighted
16 | 
17 | 
18 | class IsingGlauber(BaseDynamics):
19 |     """Ising-Glauber model."""
20 | 
21 |     @unweighted
22 |     def simulate(self, G, L, init=None, beta=2):
23 |         r"""Simulate time series on a network from the Ising-Glauber model.
24 | 
25 |         In the Ising-Glauber model, each node has a binary state. At every
26 |         time step, nodes switch their state with certain probability. For
27 |         inactive nodes, this probability is :math:`1 / (1 + e^{\beta (k -
28 |         2m) / k})` where :math:`\beta` is a parameter tuning the likelihood
29 |         of switching state, :math:`k` is degree of the node and :math:`m`
30 |         is the number of its active neighbors; for active nodes the
31 |         switch-state probability is :math:`1 - 1 / (1 + e^{\beta (k - 2m) /
32 |         k})` instead.
33 | 
34 |         The results dictionary also stores the ground truth network as
35 |         `'ground_truth'`.
36 | 
37 |         Parameters
38 |         ----------
39 |         G (nx.Graph)
40 |             Underlying ground-truth network of simulated time series which
41 |             has :math:`N` nodes.
42 | 
43 |         L (int)
44 |             Length of time series.
45 | 
46 |         init (np.ndarray)
47 |             Length-:math:`N` 1D array of nodes' initial condition, which
48 |             must have binary value (0 or 1).
49 | 
50 |         beta (float)
51 |             Inverse temperature tuning the likelihood that a node switches
52 |             its state. Default to :math:`2`.
53 | 
54 |         Returns
55 |         -------
56 |         TS (np.ndarray)
57 |             :math:`N \times L` array of :math:`L` observations on :math:`N`
58 |             nodes.
59 | 
60 |         """
61 | 
62 |         N = G.number_of_nodes()
63 |         adjmat = nx.to_numpy_array(G, dtype=float)
64 |         degs = adjmat.sum(axis=0)
65 | 
66 |         # Randomly initialize an initial condition if not specified
67 |         TS = np.zeros((N, L), dtype=int)
68 |         if init is None:
69 |             init = rand(N)
70 |         TS[:, 0] = np.round(init).astype(int)
71 | 
72 |         # Simulate the time series
73 |         for t in range(L - 1):
74 |             state = TS[:, t].copy()  # State for each node
75 |             num_act_nei = np.dot(state, adjmat)  # Number of active neighbors
76 | 
77 |             hamltn = (degs - 2 * num_act_nei) / degs
78 |             thrds = 1 / (1 + np.exp(beta * hamltn))
79 |             # Probability of switching state
80 |             probs = np.where(state == 0, thrds, 1 - thrds)
81 | 
82 |             _next = np.where(rand(N) < probs, 1 - state, state)
83 |             TS[:, t + 1] = _next
84 | 
85 |         self.results['ground_truth'] = G
86 |         self.results['TS'] = TS
87 |         return TS
88 | 


--------------------------------------------------------------------------------
/netrd/distance/hamming.py:
--------------------------------------------------------------------------------
 1 | """
 2 | hamming.py
 3 | --------------
 4 | 
 5 | Hamming distance, wrapper for scipy function:
 6 | https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.hamming.html#scipy.spatial.distance.hamming
 7 | 
 8 | """
 9 | 
10 | import scipy
11 | import numpy as np
12 | import networkx as nx
13 | from .base import BaseDistance
14 | from ..utilities import unweighted
15 | 
16 | 
17 | class Hamming(BaseDistance):
18 |     """Entry-wise disagreement between adjacency matrices."""
19 | 
20 |     @unweighted
21 |     def dist(self, G1, G2):
22 |         r"""The proportion of disagreeing nodes between the flattened adjacency
23 |         matrices.
24 | 
25 |         If :math:`u` and :math:`v` are boolean vectors, then Hamming
26 |         distance is:
27 | 
28 |         .. math::
29 | 
30 |             \frac{c_{01} + c_{10}}{n}
31 | 
32 |         where :math:`c_{ij}` is the number of occurrences of where
33 |         :math:`u[k] = i` and :math:`v[k] = j` for :math:`k < n`.
34 | 
35 |         The graphs must have the same number of nodes. A small modification
36 |         to this code could allow weights can be applied, but only one set
37 |         of weights that apply to both graphs.
38 | 
39 |         The results dictionary also stores a 2-tuple of the underlying
40 |         adjacency matrices in the key `'adjacency_matrices'`.
41 | 
42 |         Parameters
43 |         ----------
44 | 
45 |         G1, G2 (nx.Graph)
46 |             two networkx graphs to be compared.
47 | 
48 |         Returns
49 |         -------
50 | 
51 |         dist (float)
52 |             the distance between `G1` and `G2`.
53 | 
54 |         References
55 |         ----------
56 | 
57 |         .. [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.hamming.html#scipy.spatial.distance.hamming
58 | 
59 |         """
60 | 
61 |         if G1.number_of_nodes() == G2.number_of_nodes():
62 |             N = G1.number_of_nodes()
63 |         else:
64 |             raise ValueError("Graphs have the same number of nodes")
65 | 
66 |         adj1 = nx.to_numpy_array(G1)
67 |         adj2 = nx.to_numpy_array(G2)
68 | 
69 |         # undirected case: consider only upper triangular
70 |         mask = np.triu_indices(N, k=1)
71 | 
72 |         # directed case: consider all but the diagonal
73 |         if nx.is_directed(G1) or nx.is_directed(G2):
74 |             new_mask = np.tril_indices(N, k=-1)
75 |             mask = (np.append(mask[0], new_mask[0]), np.append(mask[1], new_mask[1]))
76 | 
77 |         # only if there are self-loops include the diagonal
78 |         # this corrects the implicit denominator of Hamming, which
79 |         # should be N^2 for networks with self-loops and N(N-1) for
80 |         # those without
81 |         if next(nx.selfloop_edges(G1), False) or next(nx.selfloop_edges(G2), False):
82 |             new_mask = np.diag_indices(N)
83 |             mask = (np.append(mask[0], new_mask[0]), np.append(mask[1], new_mask[1]))
84 | 
85 |         dist = scipy.spatial.distance.hamming(
86 |             adj1[mask].flatten(), adj2[mask].flatten()
87 |         )
88 |         self.results["dist"] = dist
89 |         self.results["adjacency_matrices"] = adj1, adj2
90 |         return dist
91 | 


--------------------------------------------------------------------------------
/tests/test_utilities.py:
--------------------------------------------------------------------------------
  1 | """
  2 | test_utilities.py
  3 | -----------------
  4 | 
  5 | Test utility functions.
  6 | 
  7 | """
  8 | 
  9 | import numpy as np
 10 | from netrd.utilities.entropy import categorized_data
 11 | from netrd.utilities.entropy import entropy_from_seq, joint_entropy, conditional_entropy
 12 | from netrd.utilities import threshold
 13 | 
 14 | 
 15 | def test_thresholds():
 16 |     """
 17 |     Test the threshold function by testing three underlying thresholding
 18 |     methods: range, quantile, and degree.
 19 |     """
 20 | 
 21 |     mat = np.arange(1, 17, 1).reshape((4, 4))
 22 | 
 23 |     for k in range(5):
 24 |         thresholded_mat = threshold(mat, 'degree', avg_k=k, remove_self_loops=False)
 25 |         assert (thresholded_mat != 0).sum() == 4 * k
 26 | 
 27 |     for n in range(17):
 28 |         thresholded_mat = threshold(
 29 |             mat, 'quantile', quantile=n / 16, remove_self_loops=False
 30 |         )
 31 |         print(n)
 32 |         assert (thresholded_mat != 0).sum() == 16 - n
 33 | 
 34 |     thresholded_mat = threshold(
 35 |         mat, 'range', cutoffs=[(0, np.inf)], remove_self_loops=False
 36 |     )
 37 |     assert (thresholded_mat >= 0).all()
 38 | 
 39 |     thresholded_mat = threshold(
 40 |         mat, 'range', cutoffs=[(-np.inf, 0)], remove_self_loops=False
 41 |     )
 42 |     assert (thresholded_mat <= 0).all()
 43 | 
 44 |     target_mat = np.array(
 45 |         [[0, 0, 0, 0], [0, 0, 0, 0], [9, 10, 11, 12], [13, 14, 15, 16]]
 46 |     )
 47 | 
 48 |     assert np.array_equal(
 49 |         threshold(mat, 'range', cutoffs=[(9, 16)], remove_self_loops=False), target_mat
 50 |     )
 51 |     assert np.array_equal(
 52 |         threshold(mat, 'degree', avg_k=2, remove_self_loops=False), target_mat
 53 |     )
 54 |     assert np.array_equal(
 55 |         threshold(mat, 'quantile', quantile=0.5, remove_self_loops=False), target_mat
 56 |     )
 57 | 
 58 |     target_mat = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]])
 59 | 
 60 |     assert np.array_equal(
 61 |         threshold(
 62 |             mat, 'range', cutoffs=[(9, 16)], binary=True, remove_self_loops=False
 63 |         ),
 64 |         target_mat,
 65 |     )
 66 |     assert np.array_equal(
 67 |         threshold(mat, 'degree', avg_k=2, binary=True, remove_self_loops=False),
 68 |         target_mat,
 69 |     )
 70 |     assert np.array_equal(
 71 |         threshold(mat, 'quantile', quantile=0.5, binary=True, remove_self_loops=False),
 72 |         target_mat,
 73 |     )
 74 | 
 75 | 
 76 | def test_categorized_data():
 77 |     """Test the function that turn continuous data into categorical."""
 78 |     raw = np.array([[1.0, 1.4, 3.0], [2.0, 2.2, 5.0]]).T
 79 |     n_bins = 2
 80 |     data = categorized_data(raw, n_bins)
 81 | 
 82 |     data_true = np.array([[0, 0, 1], [0, 0, 1]]).T
 83 |     assert np.array_equal(data, data_true)
 84 | 
 85 | 
 86 | def test_entropies():
 87 |     """
 88 |     Test functions computing entropy, joint entropy and conditional entropy.
 89 | 
 90 |     """
 91 |     data = np.array([[1, 0, 0, 1, 1, 0, 1, 0], [0, 1, 0, 1, 1, 0, 1, 0]]).T
 92 |     H = entropy_from_seq(data[:, 0])
 93 |     H_joint = joint_entropy(data)
 94 |     H_cond = conditional_entropy(data[:, 1, np.newaxis], data[:, 0, np.newaxis])
 95 | 
 96 |     H_true = 1.0
 97 |     H_joint_true = 3 / 4 + 3 / 4 * np.log2(8 / 3)
 98 |     H_cond_true = H_joint - H
 99 | 
100 |     assert np.isclose(H, H_true)
101 |     assert np.isclose(H_joint, H_joint_true)
102 |     assert np.isclose(H_cond, H_cond_true)
103 | 


--------------------------------------------------------------------------------
/netrd/distance/ipsen_mikhailov.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ipsen_mikhailov.py
  3 | --------------------------
  4 | 
  5 | Graph distance based on paper:
  6 | Evolutionary reconstruction of network
  7 | Available here:
  8 | https://journals.aps.org/pre/abstract/10.1103/PhysRevE.66.046109
  9 | 
 10 | author: Guillaume St-Onge
 11 | email: guillaume.st-onge.4@ulaval.ca
 12 | Submitted as part of the 2019 NetSI Collabathon.
 13 | 
 14 | """
 15 | 
 16 | import numpy as np
 17 | import networkx as nx
 18 | from .base import BaseDistance
 19 | from scipy.sparse.csgraph import laplacian
 20 | from scipy.linalg import eigh
 21 | from scipy.integrate import quad
 22 | from ..utilities.graph import unweighted
 23 | 
 24 | 
 25 | class IpsenMikhailov(BaseDistance):
 26 |     """Compares the spectrum of the Laplacian matrices."""
 27 | 
 28 |     @unweighted
 29 |     def dist(self, G1, G2, hwhm=0.08):
 30 |         """Compare the spectrum ot the associated Laplacian matrices.
 31 | 
 32 |         The results dictionary also stores a 2-tuple of the underlying
 33 |         adjacency matrices in the key `'adjacency_matrices'`.
 34 | 
 35 |         Parameters
 36 |         ----------
 37 | 
 38 |         G1, G2 (nx.Graph)
 39 |             two networkx graphs to be compared.
 40 | 
 41 |         hwhm (float)
 42 |             half with at half maximum of the lorentzian kernel.
 43 | 
 44 |         Returns
 45 |         -------
 46 | 
 47 |         dist (float)
 48 |             the distance between G1 and G2.
 49 | 
 50 |         Notes
 51 |         -----
 52 | 
 53 |         Requires undirected networks.
 54 | 
 55 |         References
 56 |         ----------
 57 | 
 58 |         .. [1] https://journals.aps.org/pre/abstract/10.1103/PhysRevE.66.046109
 59 | 
 60 |         """
 61 |         # get the adjacency matrices
 62 |         adj1 = nx.to_numpy_array(G1)
 63 |         adj2 = nx.to_numpy_array(G2)
 64 |         self.results['adjacency_matrices'] = adj1, adj2
 65 | 
 66 |         # get the IM distance
 67 |         dist = _im_distance(adj1, adj2, hwhm)
 68 | 
 69 |         self.results['dist'] = dist
 70 | 
 71 |         return dist
 72 | 
 73 | 
 74 | def _im_distance(adj1, adj2, hwhm):
 75 |     """Computes the Ipsen-Mikhailov distance for two symmetric adjacency
 76 |     matrices
 77 | 
 78 |     Base on this paper :
 79 |     https://journals.aps.org/pre/abstract/10.1103/PhysRevE.66.046109
 80 | 
 81 |     Note : this is also used by the file hamming_ipsen_mikhailov.py
 82 | 
 83 |     Parameters
 84 |     ----------
 85 | 
 86 |     adj1, adj2 (array): adjacency matrices.
 87 | 
 88 |     hwhm (float) : hwhm of the lorentzian distribution.
 89 | 
 90 |     Returns
 91 |     -------
 92 | 
 93 |     dist (float) : Ipsen-Mikhailov distance.
 94 | 
 95 |     """
 96 |     N = len(adj1)
 97 |     # get laplacian matrix
 98 |     L1 = laplacian(adj1, normed=False)
 99 |     L2 = laplacian(adj2, normed=False)
100 | 
101 |     # get the modes for the positive-semidefinite laplacian
102 |     w1 = np.sqrt(np.abs(eigh(L1)[0][1:]))
103 |     w2 = np.sqrt(np.abs(eigh(L2)[0][1:]))
104 | 
105 |     # we calculate the norm for both spectrum
106 |     norm1 = (N - 1) * np.pi / 2 - np.sum(np.arctan(-w1 / hwhm))
107 |     norm2 = (N - 1) * np.pi / 2 - np.sum(np.arctan(-w2 / hwhm))
108 | 
109 |     # define both spectral densities
110 |     density1 = lambda w: np.sum(hwhm / ((w - w1) ** 2 + hwhm**2)) / norm1
111 |     density2 = lambda w: np.sum(hwhm / ((w - w2) ** 2 + hwhm**2)) / norm2
112 | 
113 |     func = lambda w: (density1(w) - density2(w)) ** 2
114 | 
115 |     return np.sqrt(quad(func, 0, np.inf, limit=100)[0])
116 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/graphical_lasso.py:
--------------------------------------------------------------------------------
  1 | """
  2 | graphical_lasso.py
  3 | --------------
  4 | 
  5 | Graph reconstruction algorithm based on [1, 2].
  6 | 
  7 | [1] J. Friedman, T. Hastie, R. Tibshirani, "Sparse inverse covariance estimation with
  8 | the graphical lasso", Biostatistics 9, pp. 432–441 (2008).
  9 | [2] https://github.com/CamDavidsonPilon/Graphical-Lasso-in-Finance
 10 | 
 11 | author: Charles Murphy
 12 | email: charles.murphy.1@ulaval.ca
 13 | Submitted as part of the 2019 NetSI Collabathon.
 14 | """
 15 | 
 16 | import numpy as np
 17 | from sklearn.covariance import graphical_lasso
 18 | from .base import BaseReconstructor
 19 | from ..utilities import create_graph, threshold
 20 | 
 21 | 
 22 | class GraphicalLasso(BaseReconstructor):
 23 |     """Performs graphical lasso."""
 24 | 
 25 |     def fit(
 26 |         self,
 27 |         TS,
 28 |         alpha=0.01,
 29 |         max_iter=100,
 30 |         tol=0.0001,
 31 |         threshold_type='degree',
 32 |         **kwargs
 33 |     ):
 34 |         """Performs a graphical lasso.
 35 | 
 36 |         For details see [1, 2].
 37 | 
 38 |         The results dictionary also stores the covariance matrix as
 39 |         `'weights_matrix'`, the precision matrix as `'precision_matrix'`,
 40 |         and the thresholded version of the covariance matrix as
 41 |         `'thresholded_matrix'`.
 42 | 
 43 |         This implementation uses `scikit-learn`'s implementation of the
 44 |         graphical lasso; for convenience two control parameters `tol` and
 45 |         `max_iter` are available to interface with their method.
 46 | 
 47 |         Parameters
 48 |         ----------
 49 | 
 50 |         TS (np.ndarray)
 51 |             Array consisting of :math:`L` observations from :math:`N`
 52 |             sensors.
 53 | 
 54 |         alpha (float, default=0.01)
 55 |             Coefficient of penalization, higher values means more
 56 |             sparseness
 57 | 
 58 |         max_iter (int, default=100)
 59 |             Maximum number of iterations.
 60 | 
 61 |         tol (float, default=0.0001)
 62 |             Stop the algorithm when the duality gap is below a certain
 63 |             threshold.
 64 | 
 65 |         threshold_type (str)
 66 |             Which thresholding function to use on the matrix of
 67 |             weights. See `netrd.utilities.threshold.py` for
 68 |             documentation. Pass additional arguments to the thresholder
 69 |             using ``**kwargs``.
 70 | 
 71 |         Returns
 72 |         -------
 73 | 
 74 |         G (nx.Graph)
 75 |             A reconstructed graph with :math:`N` nodes.
 76 | 
 77 |         References
 78 |         ----------
 79 | 
 80 |         .. [1] J. Friedman, T. Hastie, R. Tibshirani, "Sparse inverse
 81 |                covariance estimation with the graphical lasso",
 82 |                Biostatistics 9, pp. 432–441 (2008).
 83 | 
 84 |         .. [2] https://github.com/CamDavidsonPilon/Graphical-Lasso-in-Finance
 85 | 
 86 |         """
 87 |         emp_cov = np.cov(TS)
 88 | 
 89 |         cov, prec = graphical_lasso(emp_cov, alpha, max_iter=max_iter, tol=tol)
 90 |         self.results['weights_matrix'] = cov
 91 |         self.results['precision_matrix'] = prec
 92 | 
 93 |         # threshold the network
 94 |         self.results['thresholded_matrix'] = threshold(
 95 |             self.results['weights_matrix'], threshold_type, **kwargs
 96 |         )
 97 | 
 98 |         # construct the network
 99 |         G = create_graph(self.results['thresholded_matrix'])
100 |         self.results['graph'] = G
101 | 
102 |         return G
103 | 


--------------------------------------------------------------------------------
/tests/test_reconstruction.py:
--------------------------------------------------------------------------------
  1 | """
  2 | test_reconstruction.py
  3 | ----------------------
  4 | 
  5 | Test reconstruction algorithms.
  6 | 
  7 | """
  8 | 
  9 | import numpy as np
 10 | from netrd import reconstruction
 11 | from netrd.reconstruction import ConvergentCrossMapping
 12 | from netrd.reconstruction import BaseReconstructor
 13 | 
 14 | 
 15 | def test_graph_size():
 16 |     """
 17 |     The number of nodes in a reconstructed graph should be
 18 |     equal to the number of sensors in the time series data
 19 |     used to reconstruct the graph.
 20 |     """
 21 |     size = 50
 22 |     for label, obj in reconstruction.__dict__.items():
 23 |         if label in [
 24 |             'PartialCorrelationMatrix',
 25 |             'NaiveTransferEntropy' 'OptimalCausationEntropy',
 26 |         ]:
 27 |             continue
 28 |         if isinstance(obj, type) and BaseReconstructor in obj.__bases__:
 29 |             TS = np.random.random((size, 125))
 30 |             G = obj().fit(TS, threshold_type='range', cutoffs=[(-np.inf, np.inf)])
 31 |             assert G.order() == size, f"{label} has wrong size"
 32 | 
 33 | 
 34 | def test_naive_transfer_entropy():
 35 |     """
 36 |     Use a smaller data set to test the NaiveTransferEntropy,
 37 |     because it is very slow.
 38 | 
 39 |     """
 40 |     size = 25
 41 |     TS = np.random.random((size, 100))
 42 |     G = reconstruction.NaiveTransferEntropy().fit(
 43 |         TS, delay_max=2, threshold_type='range', cutoffs=[(-np.inf, np.inf)]
 44 |     )
 45 |     assert G.order() == size
 46 | 
 47 | 
 48 | def test_oce():
 49 |     """
 50 |     Test optimal causation entropy using a smaller dataset.
 51 |     """
 52 | 
 53 |     size = 25
 54 |     TS = np.random.random((size, 50))
 55 |     G = reconstruction.OptimalCausationEntropy().fit(
 56 |         TS, threshold_type='range', cutoffs=[(-np.inf, np.inf)]
 57 |     )
 58 |     assert G.order() == size
 59 | 
 60 | 
 61 | def test_convergent_cross_mapping():
 62 |     """
 63 |     Examine the outcome of ConvergentCrossMapping with synthetic
 64 |     time series data generated from a two-species Lotka-Vottera model.
 65 | 
 66 |     """
 67 |     filepath = '../data/two_species_coupled_time_series.dat'
 68 |     edgelist = {(1, 0), (0, 1)}
 69 |     keys = ['graph', 'weights_matrix', 'pvalues_matrix']
 70 | 
 71 |     TS = np.loadtxt(filepath, delimiter=',')
 72 |     recon = ConvergentCrossMapping()
 73 |     G = recon.fit(TS, threshold_type='range', cutoffs=[(-np.inf, np.inf)])
 74 |     el = set(G.edges())
 75 |     res = recon.results.keys()
 76 | 
 77 |     assert el == edgelist
 78 |     assert all(k in res for k in keys)
 79 | 
 80 | 
 81 | def test_partial_correlation():
 82 |     """
 83 |     The PartialCorrelationMatrix has many parameterizations
 84 |     that ought to be tested differently. Otherwise, this should be
 85 |     equivalent to `test_graph_size`.
 86 |     """
 87 |     for resid in [True, False]:
 88 |         for index in [0, None]:
 89 |             for size in [10, 100]:
 90 |                 if index is None and resid is True:
 91 |                     pass  # this shouldn't be a valid parameterization
 92 |                 else:
 93 |                     TS = np.random.random((size, 50))
 94 |                     G = reconstruction.PartialCorrelationMatrix().fit(
 95 |                         TS, index=index, cutoffs=[(-np.inf, np.inf)]
 96 |                     )
 97 |                     if index is None:
 98 |                         assert G.order() == size
 99 |                     else:
100 |                         assert G.order() == (size - 1)
101 | 


--------------------------------------------------------------------------------
/netrd/dynamics/SIS.py:
--------------------------------------------------------------------------------
  1 | """
  2 | SIS.py
  3 | ------
  4 | 
  5 | Implementation of Susceptible-Infected-Susceptible models dynamics on a
  6 | network.
  7 | 
  8 | author: Stefan McCabe
  9 | 
 10 | Submitted as part of the 2019 NetSI Collabathon.
 11 | 
 12 | """
 13 | 
 14 | from netrd.dynamics import BaseDynamics
 15 | import numpy as np
 16 | import networkx as nx
 17 | 
 18 | 
 19 | class SISModel(BaseDynamics):
 20 |     """Susceptible-Infected-Susceptible dynamical process."""
 21 | 
 22 |     def simulate(self, G, L, num_seeds=1, beta=None, mu=None):
 23 |         r"""Simulate SIS model dynamics on a network.
 24 | 
 25 |         The results dictionary also stores the ground truth network as
 26 |         `'ground_truth'`.
 27 | 
 28 |         Parameters
 29 |         ----------
 30 |         G (nx.Graph)
 31 |             the input (ground-truth) graph with :math:`N` nodes.
 32 | 
 33 |         L (int)
 34 |             the length of the desired time series.
 35 | 
 36 |         num_seeds (int)
 37 |             the number of initially infected nodes.
 38 | 
 39 |         beta (float)
 40 |             the infection rate for the SIS process.
 41 | 
 42 |         mu (float)
 43 |             the recovery rate for the SIS process.
 44 | 
 45 |         Returns
 46 |         -------
 47 |         TS (np.ndarray)
 48 |             an :math:`N \times L` array of synthetic time series data.
 49 | 
 50 |         """
 51 |         H = G.copy()
 52 |         N = H.number_of_nodes()
 53 |         TS = np.zeros((N, L))
 54 |         index_to_node = dict(zip(range(G.order()), list(G.nodes())))
 55 | 
 56 |         # sensible defaults for beta and mu
 57 |         if not beta:
 58 |             avg_k = np.mean(list(dict(H.degree()).values()))
 59 |             beta = 1 / avg_k
 60 |         if not mu:
 61 |             mu = 1 / H.number_of_nodes()
 62 | 
 63 |         seeds = np.random.permutation(
 64 |             np.concatenate([np.repeat(1, num_seeds), np.repeat(0, N - num_seeds)])
 65 |         )
 66 |         TS[:, 0] = seeds
 67 |         infected_attr = {index_to_node[i]: s for i, s in enumerate(seeds)}
 68 |         nx.set_node_attributes(H, infected_attr, 'infected')
 69 |         nx.set_node_attributes(H, 0, 'next_infected')
 70 | 
 71 |         # SIS dynamics
 72 |         for t in range(1, L):
 73 |             nodes = np.random.permutation(H.nodes)
 74 |             for i in nodes:
 75 |                 if H.nodes[i]['infected']:
 76 |                     neigh = H.neighbors(i)
 77 |                     for j in neigh:
 78 |                         if np.random.random() < beta:
 79 |                             H.nodes[j]['next_infected'] = 1
 80 |                     if np.random.random() < mu:
 81 |                         H.nodes[i]['infected'] = 0
 82 |             infections = nx.get_node_attributes(H, 'infected')
 83 |             next_infections = nx.get_node_attributes(H, 'next_infected')
 84 | 
 85 |             # store SIS dynamics for time t
 86 |             TS[:, t] = np.array(list(infections.values()))
 87 |             nx.set_node_attributes(H, next_infections, 'infected')
 88 |             nx.set_node_attributes(H, 0, 'next_infected')
 89 | 
 90 |             # if the epidemic dies off, stop
 91 |             if TS[:, t].sum() < 1:
 92 |                 break
 93 | 
 94 |         # if the epidemic died off, pad the time series to the right shape
 95 |         if TS.shape[1] < L:
 96 |             TS = np.hstack([TS, np.zeros((N, L - TS.shape[1]))])
 97 | 
 98 |         self.results['ground_truth'] = H
 99 |         self.results['TS'] = TS
100 |         self.results['index_to_node'] = index_to_node
101 | 
102 |         return TS
103 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/correlation_matrix.py:
--------------------------------------------------------------------------------
 1 | """
 2 | correlation_matrix.py
 3 | ---------------------
 4 | Reconstruction of graphs using the correlation matrix.
 5 | author: Stefan McCabe
 6 | email: stefanmccabe at gmail dot com
 7 | Submitted as part of the 2019 NetSI Collabathon
 8 | """
 9 | from .base import BaseReconstructor
10 | import numpy as np
11 | from ..utilities import create_graph, threshold
12 | 
13 | 
14 | class CorrelationMatrix(BaseReconstructor):
15 |     """Uses the correlation matrix."""
16 | 
17 |     def fit(self, TS, num_eigs=None, threshold_type='range', **kwargs):
18 |         """Uses the correlation matrix.
19 | 
20 |         If ``num_eigs`` is `None`, perform the reconstruction using the
21 |         unregularized correlation matrix. Otherwise, construct a regularized
22 |         precision matrix using ``num_eigs`` eigenvectors and eigenvalues of the
23 |         correlation matrix. For details on the regularization method, see [1].
24 |         The results dictionary also stores the raw correlation matrix
25 |         (potentially regularized) as `'weights_matrix'` and the thresholded
26 |         version of the correlation matrix as `'thresholded_matrix'`. For
27 |         details see [2]_.
28 | 
29 |         Parameters
30 |         ----------
31 |         TS (np.ndarray)
32 |             Array consisting of :math:`L` observations from :math:`N` sensors
33 | 
34 |         num_eigs (int)
35 |             The number of eigenvalues to use. (This corresponds to the
36 |             amount of regularization.) The number of eigenvalues used must
37 |             be less than :math:`N`.
38 | 
39 |         threshold_type (str)
40 |             Which thresholding function to use on the matrix of
41 |             weights. See `netrd.utilities.threshold.py` for
42 |             documentation. Pass additional arguments to the thresholder
43 |             using `**kwargs`.
44 | 
45 |         Returns
46 |         -------
47 |         G (nx.Graph)
48 |             a reconstructed graph.
49 | 
50 |         References
51 |         ----------
52 |         .. [1] https://bwlewis.github.io/correlation-regularization/
53 | 
54 |         .. [2] https://github.com/valeria-io/visualising_stocks_correlations/blob/master/corr_matrix_viz.ipynb
55 | 
56 |         """
57 |         # get the correlation matrix
58 |         cor = np.corrcoef(TS)
59 | 
60 |         if num_eigs:
61 |             N = TS.shape[0]
62 |             if num_eigs > N:
63 |                 raise ValueError(
64 |                     "The number of eigenvalues used must be less "
65 |                     "than the number of sensors."
66 |                 )
67 | 
68 |             # get eigenvalues and eigenvectors of the correlation matrix
69 |             vals, vecs = np.linalg.eigh(cor)
70 |             idx = vals.argsort()[::-1]
71 |             vals = vals[idx]
72 |             vecs = vecs[:, idx]
73 | 
74 |             # construct the precision matrix and store it
75 |             P = (vecs[:, :num_eigs]) @ (
76 |                 1 / (vals[:num_eigs]).reshape(num_eigs, 1) * (vecs[:, :num_eigs]).T
77 |             )
78 |             P = P / (
79 |                 np.sqrt(np.diag(P)).reshape(N, 1) @ np.sqrt(np.diag(P)).reshape(1, N)
80 |             )
81 |             mat = P
82 |         else:
83 |             mat = cor
84 | 
85 |         # store the appropriate source matrix
86 |         self.results['weights_matrix'] = mat
87 | 
88 |         # threshold the correlation matrix
89 |         A = threshold(mat, threshold_type, **kwargs)
90 |         self.results['thresholded_matrix'] = A
91 | 
92 |         # construct the network
93 |         self.results['graph'] = create_graph(A)
94 |         G = self.results['graph']
95 | 
96 |         return G
97 | 


--------------------------------------------------------------------------------
/netrd/distance/netlsd.py:
--------------------------------------------------------------------------------
  1 | """
  2 | netlsd.py
  3 | --------------
  4 | 
  5 | Graph distance based on:
  6 | A. Tsitsulin, D. Mottin, P. Karras, A. Bronstein & E. Müller. NetLSD: Hearing the Shape of a Graph. KDD 2018
  7 | 
  8 | author: Anton Tsitsulin
  9 | 
 10 | """
 11 | import numpy as np
 12 | import networkx as nx
 13 | import scipy.linalg as spl
 14 | 
 15 | from .base import BaseDistance
 16 | from ..utilities import undirected, unweighted
 17 | 
 18 | 
 19 | class NetLSD(BaseDistance):
 20 |     """Compares spectral node signature distributions."""
 21 | 
 22 |     @undirected
 23 |     @unweighted
 24 |     def dist(self, G1, G2, normalization=None, timescales=None):
 25 |         """NetLSD: Hearing the Shape of a Graph.
 26 | 
 27 |         A network similarity measure based on spectral node signature
 28 |         distributions.
 29 | 
 30 |         The results dictionary includes the underlying signature vectors in
 31 |         `'signatures'`.
 32 | 
 33 |         Parameters
 34 |         ----------
 35 | 
 36 |         G1, G2 (nx.Graph)
 37 |             two undirected networkx graphs to be compared.
 38 | 
 39 |         normalization (str)
 40 |             type of normalization of the heat kernel vectors. either
 41 |             `'complete'`, `'empty'` or `'none'`
 42 | 
 43 |         timescales (np.ndarray)
 44 |             timescales for the comparison. None yields default.
 45 | 
 46 |         Returns
 47 |         -------
 48 | 
 49 |         dist (float)
 50 |             the distance between `G1` and `G2`.
 51 | 
 52 |         References
 53 |         ----------
 54 | 
 55 |         .. [1] A. Tsitsulin, D. Mottin, P. Karras, A. Bronstein &
 56 |                E. Müller. NetLSD: Hearing the Shape of a Graph. KDD 2018
 57 | 
 58 |         """
 59 |         if normalization is None:
 60 |             normalization = 'none'
 61 |         if timescales is None:
 62 |             timescales = np.logspace(-2, 2, 256)
 63 |         assert isinstance(
 64 |             normalization, str
 65 |         ), 'Normalization parameter must be of string type'
 66 | 
 67 |         lap1 = nx.normalized_laplacian_matrix(G1)
 68 |         lap2 = nx.normalized_laplacian_matrix(G2)
 69 | 
 70 |         # Note: this is O(n^3) worst-case.
 71 |         eigs1 = spl.eigvalsh(lap1.todense())
 72 |         eigs2 = spl.eigvalsh(lap2.todense())
 73 | 
 74 |         hkt1 = _lsd_signature(eigs1, timescales, normalization)
 75 |         hkt2 = _lsd_signature(eigs2, timescales, normalization)
 76 | 
 77 |         self.results['signatures'] = (hkt1, hkt2)
 78 |         self.results['dist'] = np.linalg.norm(hkt1 - hkt2)
 79 | 
 80 |         return self.results['dist']
 81 | 
 82 | 
 83 | def _lsd_signature(eigenvalues, timescales, normalization):
 84 |     """
 85 |     Computes heat kernel trace from given eigenvalues, timescales, and normalization.
 86 | 
 87 |     Parameters
 88 |     --------------
 89 |     eigenvalues (numpy.ndarray): Eigenvalue vector
 90 |     timescales (numpy.ndarray): Vector of discrete timesteps for the kernel computation
 91 |     normalization (str):
 92 |         Either 'empty', 'complete' or 'none'.
 93 |         If 'none' or any other value, return unnormalized heat kernel trace.
 94 |         For the details how 'empty' and 'complete' are computed, please refer to the paper.
 95 |     Returns
 96 |     -------
 97 |     numpy.ndarray
 98 |         Heat kernel trace signature
 99 |     """
100 |     nv = eigenvalues.shape[0]
101 |     hkt = np.zeros(timescales.shape)
102 |     for idx, t in enumerate(timescales):
103 |         hkt[idx] = np.sum(np.exp(-t * eigenvalues))
104 |     if normalization == 'empty':
105 |         return hkt / nv
106 |     if normalization == 'complete':
107 |         return hkt / (1 + (nv - 1) * np.exp(-(1 + 1 / (nv - 1)) * timescales))
108 |     return hkt
109 | 


--------------------------------------------------------------------------------
/netrd/utilities/graph.py:
--------------------------------------------------------------------------------
  1 | """
  2 | graph.py
  3 | --------
  4 | 
  5 | Utilities for creating and interacting with graph objects.
  6 | 
  7 | author: Stefan McCabe (stefanmccabe at gmail dot com)
  8 | 
  9 | Submitted as part of the 2019 NetSI Collabathon.
 10 | 
 11 | """
 12 | from functools import wraps
 13 | import warnings
 14 | import numpy as np
 15 | import networkx as nx
 16 | 
 17 | 
 18 | def create_graph(A, create_using=None, remove_self_loops=True):
 19 |     """Flexibly creating a networkx graph from a numpy array.
 20 | 
 21 |     Parameters
 22 |     ----------
 23 |     A (np.ndarray)
 24 |         A numpy array.
 25 | 
 26 |     create_using (nx.Graph or None)
 27 |         Create the graph using a specific networkx graph. Can be used for
 28 |         forcing an asymmetric matrix to create an undirected graph, for
 29 |         example.
 30 | 
 31 |     remove_self_loops (bool)
 32 |         If True, remove the diagonal of the matrix before creating the
 33 |         graph object.
 34 | 
 35 |     Returns
 36 |     -------
 37 |     G
 38 |         A graph, typically a nx.Graph or nx.DiGraph.
 39 | 
 40 |     """
 41 |     if remove_self_loops:
 42 |         np.fill_diagonal(A, 0)
 43 | 
 44 |     if create_using is None:
 45 |         if np.allclose(A, A.T):
 46 |             G = nx.from_numpy_array(A, create_using=nx.Graph())
 47 |         else:
 48 |             G = nx.from_numpy_array(A, create_using=nx.DiGraph())
 49 |     else:
 50 |         G = nx.from_numpy_array(A, create_using=create_using)
 51 | 
 52 |     return G
 53 | 
 54 | 
 55 | def ensure_undirected(G):
 56 |     """Ensure the graph G is undirected.
 57 | 
 58 |     If it is not, coerce it to undirected and warn the user.
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     G (networkx graph)
 63 |         The graph to be checked
 64 | 
 65 |     Returns
 66 |     -------
 67 | 
 68 |     G (nx.Graph)
 69 |         Undirected version of the input graph
 70 | 
 71 |     """
 72 |     if nx.is_directed(G):
 73 |         G = G.to_undirected(as_view=False)
 74 |         warnings.warn("Coercing directed graph to undirected.", RuntimeWarning)
 75 |     return G
 76 | 
 77 | 
 78 | def undirected(func):
 79 |     """
 80 |     Decorator applying ``ensure_undirected()`` to all ``nx.Graph``-subclassed
 81 |     arguments of ``func``.
 82 |     """
 83 | 
 84 |     @wraps(func)
 85 |     def wrapper(*args, **kwargs):
 86 |         args = [
 87 |             ensure_undirected(arg) if issubclass(arg.__class__, nx.Graph) else arg
 88 |             for arg in args
 89 |         ]
 90 |         return func(*args, **kwargs)
 91 | 
 92 |     return wrapper
 93 | 
 94 | 
 95 | def ensure_unweighted(G):
 96 |     """Ensure the graph G is unweighted.
 97 | 
 98 |     If it is not, coerce it to unweighted and warn the user.
 99 | 
100 |     Parameters
101 |     ----------
102 |     G (networkx graph)
103 |         The graph to be checked
104 | 
105 |     Returns
106 |     -------
107 | 
108 |     G (nx.Graph)
109 |         Unweighted version of the input graph
110 | 
111 |     """
112 | 
113 |     for _, _, attr in G.edges(data=True):
114 |         if not np.isclose(attr.get("weight", 1.0), 1.0):
115 |             H = G.__class__()
116 |             H.add_nodes_from(G)
117 |             H.add_edges_from(G.edges)
118 |             warnings.warn("Coercing weighted graph to unweighted.", RuntimeWarning)
119 |             return H
120 | 
121 |     return G
122 | 
123 | 
124 | def unweighted(func):
125 |     """
126 |     Decorator applying ``ensure_unweighted()`` to all ``nx.Graph``-subclassed
127 |     arguments of ``func``.
128 |     """
129 | 
130 |     @wraps(func)
131 |     def wrapper(*args, **kwargs):
132 |         args = [
133 |             ensure_unweighted(arg) if issubclass(arg.__class__, nx.Graph) else arg
134 |             for arg in args
135 |         ]
136 |         return func(*args, **kwargs)
137 | 
138 |     return wrapper
139 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/free_energy_minimization.py:
--------------------------------------------------------------------------------
  1 | """
  2 | free_energy_minimization.py
  3 | ---------------------------
  4 | Reconstruction of graphs by minimizing a free energy of your data
  5 | author: Brennan Klein
  6 | email: brennanjamesklein at gmail dot com
  7 | submitted as part of the 2019 NetSI Collabathon
  8 | """
  9 | from .base import BaseReconstructor
 10 | import numpy as np
 11 | from scipy import linalg
 12 | from ..utilities import create_graph, threshold
 13 | 
 14 | 
 15 | class FreeEnergyMinimization(BaseReconstructor):
 16 |     """Applies free energy principle."""
 17 | 
 18 |     def fit(self, TS, threshold_type='degree', **kwargs):
 19 |         """Infer inter-node coupling weights by minimizing a free energy over the
 20 |         data structure.
 21 | 
 22 |         The results dictionary also stores the weight matrix as
 23 |         `'weights_matrix'` and the thresholded version of the weight matrix
 24 |         as `'thresholded_matrix'`. For details see [1]_.
 25 | 
 26 |         Parameters
 27 |         ----------
 28 | 
 29 |         TS (np.ndarray)
 30 |             Array consisting of :math:`L` observations from :math.`N`
 31 |             sensors.
 32 | 
 33 |         threshold_type (str)
 34 |             Which thresholding function to use on the matrix of
 35 |             weights. See `netrd.utilities.threshold.py` for
 36 |             documentation. Pass additional arguments to the thresholder
 37 |             using ``**kwargs``.
 38 | 
 39 |         Returns
 40 |         -------
 41 | 
 42 |         G (nx.Graph or nx.DiGraph)
 43 |             a reconstructed graph.
 44 | 
 45 |         References
 46 |         ----------
 47 | 
 48 |         .. [1] https://github.com/nihcompmed/network-inference/blob/master/sphinx/codesource/inference.py
 49 | 
 50 |         """
 51 | 
 52 |         N, L = np.shape(TS)  # N nodes, length L
 53 |         m = np.mean(TS[:, :-1], axis=1)  # model average
 54 |         ds = TS[:, :-1].T - m  # discrepancy
 55 |         t1 = L - 1  # time limit
 56 | 
 57 |         # covariance of the discrepeancy
 58 |         c = np.cov(ds, rowvar=False, bias=True)
 59 | 
 60 |         c_inv = linalg.inv(c)  # inverse
 61 |         dst = ds.T  # discrepancy at time t
 62 | 
 63 |         # empty matrix to populate w/ inferred couplings
 64 |         W = np.empty((N, N))
 65 | 
 66 |         nloop = 10000  # failsafe
 67 | 
 68 |         for i0 in range(N):  # for each node
 69 |             TS1 = TS[i0, 1:]  # take its entire time series
 70 |             h = TS1  # calculate the the local field
 71 | 
 72 |             cost = np.full(nloop, 100.0)
 73 | 
 74 |             for iloop in range(nloop):
 75 |                 h_av = np.mean(h)  # average local field
 76 |                 hs_av = np.dot(dst, h - h_av) / t1  # deltaE_i delta\sigma_k
 77 |                 w = np.dot(hs_av, c_inv)  # expectation under model
 78 | 
 79 |                 h = np.dot(TS[:, :-1].T, w[:])  # estimate of local field
 80 |                 TS_model = np.tanh(h)  # under kinetic Ising model
 81 | 
 82 |                 # discrepancy cost
 83 |                 cost[iloop] = np.mean((TS1[:] - TS_model[:]) ** 2)
 84 | 
 85 |                 if cost[iloop] >= cost[iloop - 1]:
 86 |                     break  # if it increases, break
 87 | 
 88 |                 # complicated, but this seems to be the estimate of W_i
 89 |                 h *= np.divide(
 90 |                     TS1, TS_model, out=np.ones_like(TS1), where=TS_model != 0
 91 |                 )
 92 | 
 93 |             W[i0, :] = w[:]
 94 | 
 95 |         # threshold the network
 96 |         W_thresh = threshold(W, threshold_type, **kwargs)
 97 | 
 98 |         # construct the network
 99 | 
100 |         self.results['graph'] = create_graph(W_thresh)
101 |         self.results['weights_matrix'] = W
102 |         self.results['thresholded_matrix'] = W_thresh
103 |         G = self.results['graph']
104 | 
105 |         return G
106 | 


--------------------------------------------------------------------------------
/doc/source/tutorial.rst:
--------------------------------------------------------------------------------
  1 | Tutorial
  2 | ========
  3 | 
  4 | Reconstructing a graph
  5 | ----------------------
  6 | 
  7 | All reconstruction algorithms provide a simple interface. First,
  8 | initialize the reconstructor object by calling its constructor with no
  9 | arguments. Then, use the ``fit()`` method to obtain the reconstructed
 10 | network.
 11 | 
 12 | .. code:: python
 13 | 
 14 |    TS = np.loadtxt('data/synth_4clique_N64_simple.csv',
 15 |                    delimiter=',',
 16 |                    encoding='utf8')
 17 |    # TS is a NumPy array of shape N (number of nodes) x L (observations).
 18 | 
 19 |    recon = netrd.reconstruction.RandomReconstructor()
 20 |    G = recon.fit(TS)
 21 | 
 22 | Many reconstruction algorithms store additional metadata in a
 23 | ``results`` dictionary.
 24 | 
 25 | .. code:: python
 26 | 
 27 |    # Another way to obtain the reconstructed graph
 28 |    G = recon.results['graph']
 29 | 
 30 |    # A dense matrix of weights
 31 |    W = recon.results['weights_matrix']
 32 | 
 33 |    # The binarized matrix from which the graph is created
 34 |    A = recon.results['thresholded_matrix']
 35 | 
 36 | Many, though not all, reconstruction algorithms work by assigning each
 37 | potential edge a weight and then thresholding the matrix to obtain a
 38 | sparse representation. This thresholding can be controlled by setting
 39 | the ``threshold_type`` argument to one of four values:
 40 | 
 41 | -  ``range``: Consider only weights whose values fall within a range.
 42 | -  ``degree``: Consider only the largest weights, targeting a specific
 43 |    average degree.
 44 | -  ``quantile``: Consider only weights in, e.g., the 0.90 quantile and
 45 |    above.
 46 | -  ``custom``: Pass a custom function for thresholding the matrix
 47 |    yourself.
 48 | 
 49 | Each of these has a specific argument to pass to tune the thresholding:
 50 | 
 51 | -  ``cutoffs``: A list of 2-tuples specifying the values to keep. For
 52 |    example, to keep only values whose absolute values are above 0.5, use
 53 |    ``cutoffs=[(-np.inf, -0.5), (0.5, np.inf)]``
 54 | -  ``avg_k``: The desired average degree of the network.
 55 | -  ``quantile``: The appropriate quantile (not percentile).
 56 | -  ``custom_thresholder``: A user-defined function that returns an N x N
 57 |    NumPy array.
 58 | 
 59 | .. code:: python
 60 | 
 61 |    H = recon.fit(TS, threshold_type='degree', avg_k = 15.125)
 62 | 
 63 |    print(nx.info(G))
 64 |    # This network is a complete graph.
 65 | 
 66 |    print(nx.info(H))
 67 |    # This network is not.
 68 | 
 69 | Distances between graphs
 70 | ------------------------
 71 | 
 72 | Distances behave similarly to reconstructors. All distance objects have
 73 | a ``dist()`` method that takes two NetworkX graphs.
 74 | 
 75 | .. code:: python
 76 | 
 77 |    G1 = nx.fast_gnp_random_graph(1000, 0.1)
 78 |    G2 = nx.fast_gnp_random_graph(1000, 0.1)
 79 | 
 80 |    dist = netrd.distance.NetSimile()
 81 |    D = dist.dist(G1, G2)
 82 | 
 83 | Some distances also store metadata in ``results`` dictionaries.
 84 | 
 85 | .. code:: python
 86 | 
 87 |    # Another way to get the distance
 88 |    D = dist.results['dist']
 89 | 
 90 |    # The underlying features used in NetSimile
 91 |    vecs = dist.results['signature_vectors']
 92 | 
 93 | Dynamics on graphs
 94 | ------------------
 95 | 
 96 | As a utility, we also implement various ways to simulate dynamics on a
 97 | network. These have a similar interface to reconstructors and distances.
 98 | Their ``simulate()`` method takes an input graph and the desired length
 99 | of the dynamics, returning the same N x L array used in the graph
100 | reconstruction methods.
101 | 
102 | .. code:: python
103 | 
104 |    model = netrd.dynamics.VoterModel()
105 |    TS = model.simulate(G, 1000, noise=.001)
106 | 
107 |    # Another way to get the dynamics
108 |    TS = model.results['TS']
109 | 
110 |    # The original graph is stored in results
111 |    H = model.results['ground_truth']
112 | 
113 | 


--------------------------------------------------------------------------------
/netrd/distance/deltacon.py:
--------------------------------------------------------------------------------
  1 | """
  2 | deltacon.py
  3 | --------------------------
  4 | 
  5 | Deltacon measure for graph distance, after:
  6 | 
  7 | Koutra, Danai, Joshua T. Vogelstein, and Christos Faloutsos. 2013. “Deltacon: A
  8 | Principled Massive-Graph Similarity Function.” In Proceedings of the 2013 SIAM
  9 | International Conference on Data Mining, 162–70. Society for Industrial and
 10 | Applied Mathematics. https://doi.org/10.1137/1.9781611972832.18.
 11 | 
 12 | author: Stefan McCabe
 13 | email: stefanmccabe at gmail dot com
 14 | Submitted as part of the 2019 NetSI Collabathon.
 15 | 
 16 | """
 17 | 
 18 | import numpy as np
 19 | import networkx as nx
 20 | from .base import BaseDistance
 21 | from ..utilities import undirected
 22 | 
 23 | 
 24 | class DeltaCon(BaseDistance):
 25 |     """Compare matrices related to Fast Belief Propagation."""
 26 | 
 27 |     @undirected
 28 |     def dist(self, G1, G2, exact=True, g=None):
 29 |         """DeltaCon is based on the Matsusita between matrices created from fast
 30 |         belief propagation (FBP) on graphs G1 and G2.
 31 | 
 32 |         Because the FBP algorithm requires a costly matrix inversion, there
 33 |         is a faster, roughly linear, algorithm that gives approximate
 34 |         results.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 | 
 39 |         G1, G2 (nx.Graph)
 40 |             two networkx graphs to be compared.
 41 | 
 42 |         exact (bool)
 43 |             if True, use the slower but exact algorithm (DeltaCon_0)
 44 | 
 45 |         g (int)
 46 |             the number of groups to use in the efficient algorithm. If
 47 |             exact is set to False but g is not set, the efficient algorithm
 48 |             will still behave like the exact algorithm, since each node is
 49 |             put in its own group.
 50 | 
 51 |         Returns
 52 |         -------
 53 | 
 54 |         dist (float)
 55 |             the distance between G1 and G2.
 56 | 
 57 |         References
 58 |         ----------
 59 | 
 60 |         .. [1] Koutra, Danai, Joshua T. Vogelstein, and Christos
 61 |                Faloutsos. 2013. "Deltacon: A Principled Massive-Graph
 62 |                Similarity Function." In Proceedings of the 2013 SIAM
 63 |                International Conference on Data Mining, 162–70. Society for
 64 |                Industrial and Applied
 65 |                Mathematics. https://doi.org/10.1137/1.9781611972832.18.
 66 | 
 67 |         """
 68 |         assert G1.number_of_nodes() == G2.number_of_nodes()
 69 |         N = G1.number_of_nodes()
 70 | 
 71 |         if not exact and g is None:
 72 |             g = N
 73 | 
 74 |         A1 = nx.to_numpy_array(G1)
 75 |         L1 = nx.laplacian_matrix(G1).toarray()
 76 |         D1 = L1 + A1
 77 | 
 78 |         A2 = nx.to_numpy_array(G2)
 79 |         L2 = nx.laplacian_matrix(G2).toarray()
 80 |         D2 = L2 + A2
 81 | 
 82 |         eps_1 = 1 / (1 + np.max(D1))
 83 |         eps_2 = 1 / (1 + np.max(D2))
 84 | 
 85 |         if exact:
 86 |             S1 = np.linalg.inv(np.eye(N) + (eps_1**2) * D1 - eps_1 * A1)
 87 |             S2 = np.linalg.inv(np.eye(N) + (eps_2**2) * D2 - eps_2 * A2)
 88 |         else:
 89 |             raise NotImplementedError(
 90 |                 "The efficient algorithm is not "
 91 |                 "implemented. Please use the exact "
 92 |                 "algorithm."
 93 |             )
 94 | 
 95 |         def matusita_dist(X, Y):
 96 |             r"""Return the Matusita distance
 97 | 
 98 |             .. math::
 99 | 
100 |                 \sqrt{\sum_i \sum_j \left( \sqrt{X_{ij}} - \sqrt{Y_{ij}} \right)^{2}}
101 | 
102 | 
103 |             between X and Y.
104 |             """
105 |             return np.sqrt(np.sum(np.square(np.sqrt(X) - np.sqrt(Y))))
106 | 
107 |         dist = matusita_dist(S1, S2)
108 | 
109 |         self.results['belief_matrix_1'] = S1
110 |         self.results['belief_matrix_2'] = S2
111 | 
112 |         self.results['dist'] = dist
113 |         return dist
114 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/ou_inference.py:
--------------------------------------------------------------------------------
  1 | """
  2 | <u_inference.py
  3 | --------------
  4 | 
  5 | Graph reconstruction algorithm based on [1, 2].
  6 | 
  7 | [1] P. Barucca, "Localization in covariance matrices of coupled heterogeneous
  8 | Ornstein-Uhlenbeck processes", Phys. Rev. E 90, 062129 (2014).
  9 | [2] https://github.com/paolobarucca/OUinference.
 10 | 
 11 | author: Charles Murphy
 12 | email: charles.murphy.1@ulaval.ca
 13 | Submitted as part of the 2019 NetSI Collabathon.
 14 | """
 15 | 
 16 | from .base import BaseReconstructor
 17 | import numpy as np
 18 | from scipy.linalg import eig
 19 | from ..utilities import create_graph, threshold
 20 | 
 21 | 
 22 | class OUInference(BaseReconstructor):
 23 |     """Assumes a Orstein-Uhlenbeck generative model."""
 24 | 
 25 |     def fit(self, TS, threshold_type='range', **kwargs):
 26 |         """Infers the coupling coefficients assuming a Orstein-Uhlenbeck process
 27 |         generative model.
 28 | 
 29 |         The results dictionary also stores the weight matrix as
 30 |         `'weights_matrix'`, the covariance matrix in `covariance_matrix`
 31 |         and the thresholded version of the weight matrix as
 32 |         `'thresholded_matrix'`.
 33 | 
 34 |         Parameters
 35 |         ----------
 36 | 
 37 |         TS (np.ndarray)
 38 |             Array consisting of :math:`L` observations from :math:`N`
 39 |             sensors.
 40 | 
 41 |         threshold_type (str)
 42 |             Which thresholding function to use on the matrix of
 43 |             weights. See `netrd.utilities.threshold.py` for
 44 |             documentation. Pass additional arguments to the thresholder
 45 |             using ``**kwargs``.
 46 | 
 47 |         Returns
 48 |         -------
 49 | 
 50 |         G (nx.Graph)
 51 |             A reconstructed graph with :math:`N` nodes.
 52 | 
 53 |         """
 54 |         N, T = np.shape(TS)
 55 | 
 56 |         temperatures = np.mean((TS[:, 1:] - TS[:, :-1]) ** 2, 1) / 2
 57 |         index = np.where(temperatures > 0)
 58 |         Y = TS[index, :][0]
 59 | 
 60 |         yCovariance = np.cov(Y)
 61 |         index_pair = np.array([(i, j) for i in index for j in index])
 62 |         weights = inverse_method(-yCovariance, temperatures)
 63 |         self.results['covariance_matrix'] = np.zeros([N, N])
 64 |         self.results['covariance_matrix'][index_pair] = yCovariance
 65 | 
 66 |         self.results['weights_matrix'] = np.zeros([N, N])
 67 |         self.results['weights_matrix'][index_pair] = weights
 68 | 
 69 |         # threshold the network
 70 |         W_thresh = threshold(self.results['weights_matrix'], threshold_type, **kwargs)
 71 |         self.results['thresholded_matrix'] = W_thresh
 72 | 
 73 |         # construct the network
 74 |         self.results['graph'] = create_graph(W_thresh)
 75 |         G = self.results['graph']
 76 | 
 77 |         return G
 78 | 
 79 | 
 80 | def inverse_method(covariance, temperatures):
 81 |     """This function finds the weights of an heterogenous Ornstein-Uhlenbeck
 82 |     process
 83 |     covariance  = covariance matrix of the zero-mean signal
 84 | 
 85 |     Parameters
 86 |     ----------
 87 | 
 88 |     covariance (np.ndarray): Covariance matrix of the zero-mean signal.
 89 | 
 90 |     temperatures (np.ndarray): Diffusion coefficient of each of the signals.
 91 | 
 92 |     Returns
 93 |     -------
 94 | 
 95 |     weights (np.ndarray): Coupling between nodes under the OU process asumption.
 96 | 
 97 |     """
 98 | 
 99 |     if len(np.shape(temperatures)) == 1:
100 |         T = np.diag(temperatures)
101 |     elif len(np.shape(temperatures)) == 2:
102 |         T = temperatures
103 |     else:
104 |         raise ValueError("temperature must either be a vector or a matrix.")
105 | 
106 |     n, m = np.shape(covariance)
107 | 
108 |     eig_val, eig_vec = eig(-covariance)
109 |     eig_val = np.diag(eig_val)
110 | 
111 |     e_mat = np.matmul(eig_vec.T, np.matmul(T, eig_vec))
112 | 
113 |     eig_val = np.matmul(np.ones([n, n]), eig_val)
114 |     eig_val = (eig_val + eig_val.T) ** (-1)
115 |     eig_val = eig_val.real
116 |     weights = -np.matmul(eig_vec, np.matmul(2 * eig_val * e_mat, eig_vec.T))
117 | 
118 |     return weights
119 | 


--------------------------------------------------------------------------------
/netrd/distance/communicability_jsd.py:
--------------------------------------------------------------------------------
  1 | """
  2 | communicability_jsd.py
  3 | --------------------------
  4 | 
  5 | Distance measure based on the Jensen-Shannon Divergence
  6 | between the communicability sequence of two graphs as
  7 | defined in:
  8 | 
  9 | Chen, D., Shi, D. D., Qin, M., Xu, S. M., & Pan, G. J. (2018).
 10 | Complex network comparison based on communicability
 11 | sequence entropy. Physical Review E, 98(1), 012319.
 12 | 
 13 | https://journals.aps.org/pre/abstract/10.1103/PhysRevE.98.012319
 14 | 
 15 | author: Brennan Klein
 16 | email: brennanjamesklein@gmail.com
 17 | Submitted as part of the 2019 NetSI Collabathon.
 18 | 
 19 | """
 20 | 
 21 | import networkx as nx
 22 | import numpy as np
 23 | from .base import BaseDistance
 24 | from ..utilities import entropy, undirected, unweighted
 25 | 
 26 | 
 27 | class CommunicabilityJSD(BaseDistance):
 28 |     """Jensen-Shannon divergence between communicability sequences."""
 29 | 
 30 |     @undirected
 31 |     @unweighted
 32 |     def dist(self, G1, G2):
 33 |         r"""Compares the communicability matrix of two graphs.
 34 | 
 35 |         This distance is based on the communicability matrix, :math:`C`, of
 36 |         a graph consisting of elements :math:`c_{ij}` which are values
 37 |         corresponding to the numbers of shortest paths of length :math:`k`
 38 |         between nodes :math:`i` and :math:`j`.
 39 | 
 40 |         The commmunicability matrix is symmetric, which means the
 41 |         communicability sequence is formed by flattening the upper
 42 |         triangular of :math:`C`, which is then normalized to create the
 43 |         communicability sequence, :math:`P`.
 44 | 
 45 |         The communicability sequence entropy distance between two graphs,
 46 |         `G1` and `G2`, is the Jensen-Shannon divergence between these
 47 |         communicability sequence distributions, :math:`P1` and :math:`P2`
 48 |         of the two graphs.
 49 | 
 50 |         Parameters
 51 |         ----------
 52 | 
 53 |         G1, G2 (nx.Graph)
 54 |             two graphs
 55 | 
 56 |         Returns
 57 |         -------
 58 | 
 59 |         dist (float)
 60 |             between zero and one, this is the communicability sequence
 61 |             distance bewtween `G1` and `G2`.
 62 | 
 63 |         Notes
 64 |         -----
 65 | 
 66 |         This function uses the networkx approximation of the
 67 |         communicability of a graph, `nx.communicability_exp`, which
 68 |         requires `G1` and `G2` to be simple undirected networks. In
 69 |         addition to the final distance scalar, `self.results` stores the
 70 |         two vectors :math:`P1` and :math:`P2`, their mixed vector,
 71 |         :math:`P0`, and their associated entropies.
 72 | 
 73 | 
 74 |         References
 75 |         ----------
 76 | 
 77 |         .. [1] Estrada, E., & Hatano, N. (2008). Communicability in complex
 78 |                networks. Physical Review E, 77(3), 036111.
 79 |                https://journals.aps.org/pre/abstract/10.1103/PhysRevE.77.036111
 80 | 
 81 |         .. [2] Chen, D., Shi, D. D., Qin, M., Xu, S. M., & Pan,
 82 |                G. J. (2018).  Complex network comparison based on
 83 |                communicability sequence entropy. Physical Review E, 98(1),
 84 |                012319.
 85 | 
 86 |         """
 87 | 
 88 |         N1 = G1.number_of_nodes()
 89 |         N2 = G2.number_of_nodes()
 90 | 
 91 |         C1 = nx.communicability_exp(G1)
 92 |         C2 = nx.communicability_exp(G2)
 93 | 
 94 |         Ca1 = np.zeros((N1, N1))
 95 |         Ca2 = np.zeros((N2, N2))
 96 | 
 97 |         for i in range(Ca1.shape[0]):
 98 |             Ca1[i] = np.array(list(C1[i].values()))
 99 |         for i in range(Ca2.shape[0]):
100 |             Ca2[i] = np.array(list(C2[i].values()))
101 | 
102 |         lil_sigma1 = np.triu(Ca1).flatten()
103 |         lil_sigma2 = np.triu(Ca2).flatten()
104 | 
105 |         big_sigma1 = sum(lil_sigma1[np.nonzero(lil_sigma1)[0]])
106 |         big_sigma2 = sum(lil_sigma2[np.nonzero(lil_sigma2)[0]])
107 | 
108 |         P1 = lil_sigma1 / big_sigma1
109 |         P2 = lil_sigma2 / big_sigma2
110 |         P1 = np.array(sorted(P1))
111 |         P2 = np.array(sorted(P2))
112 | 
113 |         dist = entropy.js_divergence(P1, P2)
114 | 
115 |         self.results['P1'] = P1
116 |         self.results['P2'] = P2
117 |         self.results['dist'] = dist
118 | 
119 |         return dist
120 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/thouless_anderson_palmer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | thouless_anderson_palmer.py
  3 | ---------------------
  4 | Reconstruction of graphs using a Thouless-Anderson-Palmer
  5 | mean field approximation
  6 | author: Brennan Klein
  7 | email: brennanjamesklein at gmail dot com
  8 | submitted as part of the 2019 NetSI Collabathon
  9 | """
 10 | from .base import BaseReconstructor
 11 | import numpy as np
 12 | from scipy import linalg
 13 | from ..utilities import create_graph, threshold
 14 | 
 15 | 
 16 | class ThoulessAndersonPalmer(BaseReconstructor):
 17 |     """Uses Thouless-Anderson-Palmer mean field approximation."""
 18 | 
 19 |     def fit(self, TS, threshold_type='range', **kwargs):
 20 |         """Infer inter-node coupling weights using a Thouless-Anderson-Palmer mean
 21 |         field approximation.
 22 | 
 23 |         From the paper: "Similar to naive mean field, TAP works well only
 24 |         in the regime of large sample sizes and small coupling variability.
 25 |         However, this method leads to poor inference results in the regime
 26 |         of small sample sizes and/or large coupling variability." For
 27 |         details see [1]_.
 28 | 
 29 |         The results dictionary also stores the weight matrix as
 30 |         `'weights_matrix'` and the thresholded version of the weight matrix
 31 |         as `'thresholded_matrix'`.
 32 | 
 33 |         Parameters
 34 |         ----------
 35 | 
 36 |         TS (np.ndarray)
 37 |             Array consisting of :math:`L` observations from :math:`N`
 38 |             sensors.
 39 | 
 40 |         threshold_type (str)
 41 |             Which thresholding function to use on the matrix of
 42 |             weights. See `netrd.utilities.threshold.py` for
 43 |             documentation. Pass additional arguments to the thresholder
 44 |             using ``**kwargs``.
 45 | 
 46 |         Returns
 47 |         -------
 48 |         G (nx.Graph or nx.DiGraph)
 49 |             a reconstructed graph.
 50 | 
 51 |         References
 52 |         -----------
 53 | 
 54 |         .. [1] https://github.com/nihcompmed/network-inference/blob/master/sphinx/codesource/inference.py
 55 | 
 56 |         """
 57 | 
 58 |         N, L = np.shape(TS)  # N nodes, length L
 59 |         m = np.mean(TS, axis=1)  # empirical value
 60 | 
 61 |         # A matrix
 62 |         A = 1 - m**2
 63 |         A_inv = np.diag(1 / A)
 64 |         A = np.diag(A)
 65 |         ds = TS.T - m  # equal time correlation
 66 |         C = np.cov(ds, rowvar=False, bias=True)
 67 |         C_inv = linalg.inv(C)
 68 | 
 69 |         s1 = TS[:, 1:]  # one-step-delayed correlation
 70 | 
 71 |         ds1 = s1.T - np.mean(s1, axis=1)
 72 |         D = cross_cov(ds1, ds[:-1])
 73 | 
 74 |         # predict naive mean field W:
 75 |         B = np.dot(D, C_inv)
 76 |         W_NMF = np.dot(A_inv, B)
 77 | 
 78 |         # TAP part: solving for Fi in the following equation
 79 |         # F(1-F)**2) = (1-m**2)sum_j W_NMF**2(1-m**2) ==> 0<F<1
 80 | 
 81 |         step = 0.001
 82 |         nloop = int(0.33 / step) + 2
 83 | 
 84 |         W2_NMF = W_NMF**2
 85 | 
 86 |         temp = np.empty(N)
 87 |         F = np.empty(N)
 88 | 
 89 |         for i in range(N):
 90 |             temp[i] = (1 - m[i] ** 2) * np.sum(W2_NMF[i, :] * (1 - m[:] ** 2))
 91 | 
 92 |             y = -1.0
 93 |             iloop = 0
 94 | 
 95 |             while y < 0 and iloop < nloop:
 96 |                 x = iloop * step
 97 |                 y = x * (1 - x) ** 2 - temp[i]
 98 |                 iloop += 1
 99 | 
100 |             F[i] = x
101 | 
102 |         # A_TAP matrix
103 |         A_TAP = np.empty(N)
104 |         for i in range(N):
105 |             A_TAP[i] = A[i, i] * (1 - F[i])
106 | 
107 |         A_TAP_inv = np.diag(1 / A_TAP)
108 | 
109 |         # predict W:
110 |         W = np.dot(A_TAP_inv, B)
111 |         self.results['weights_matrix'] = W
112 | 
113 |         # threshold the network
114 |         W_thresh = threshold(W, threshold_type, **kwargs)
115 |         self.results['thresholded_matrix'] = W_thresh
116 | 
117 |         # construct the network
118 |         self.results['graph'] = create_graph(W_thresh)
119 |         G = self.results['graph']
120 | 
121 |         return G
122 | 
123 | 
124 | def cross_cov(a, b):
125 |     """
126 |     cross_covariance
127 |     a,b -->  <(a - <a>)(b - <b>)>  (axis=0)
128 |     """
129 |     da = a - np.mean(a, axis=0)
130 |     db = b - np.mean(b, axis=0)
131 | 
132 |     return np.matmul(da.T, db) / a.shape[0]
133 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/granger_causality.py:
--------------------------------------------------------------------------------
  1 | """
  2 | granger_causality.py
  3 | --------------
  4 | 
  5 | Graph reconstruction algorithm based on [1].
  6 | 
  7 | [1] P. Desrosiers, S. Labrecque, M. Tremblay, M. Bélanger, B. De Dorlodot,
  8 | D. C. Côté, "Network inference from functional experimental data", Proc. SPIE
  9 | 9690, Clinical and Translational Neurophotonics; Neural Imaging and Sensing;
 10 | and Optogenetics and Optical Manipulation, 969019 (2016);
 11 | 
 12 | author: Charles Murphy
 13 | email: charles.murphy.1@ulaval.ca
 14 | Submitted as part of the 2019 NetSI Collabathon.
 15 | """
 16 | 
 17 | import numpy as np
 18 | 
 19 | from .base import BaseReconstructor
 20 | from sklearn.linear_model import LinearRegression
 21 | from ..utilities import create_graph, threshold
 22 | 
 23 | 
 24 | class GrangerCausality(BaseReconstructor):
 25 |     """Uses the Granger causality between nodes."""
 26 | 
 27 |     def fit(self, TS, lag=1, threshold_type="range", **kwargs):
 28 |         r"""Reconstruct a network based on the Granger causality. To evaluate
 29 |         the effect of a time series :math:`j` over another, :math:`i`, it first
 30 |         evaluates the error :math:`e_1` given by an autoregressive model fit
 31 |         with :math:`i` alone. Then, it evaluates another error :math:`e_2`
 32 |         given by an autoregressive model trained to correlate the future of
 33 |         :math:`i` with the past of :math:`i` and :math:`j`. The Granger
 34 |         causality of node :math:`j` over :math:`i` is simply given by
 35 |         :math:`log(var(e_1) / var(e_2))``.
 36 | 
 37 |         It reconstructs the network by calculating the Granger
 38 |         causality for each pair of nodes.
 39 | 
 40 |         Parameters
 41 |         ----------
 42 | 
 43 |         TS (np.ndarray)
 44 |             Array consisting of :math:`L` observations from :math:`N`
 45 |             sensors.
 46 | 
 47 |         lag (int)
 48 |             Time lag to consider.
 49 | 
 50 |         threshold_type (str)
 51 |             Which thresholding function to use on the matrix of
 52 |             weights. See `netrd.utilities.threshold.py` for
 53 |             documentation. Pass additional arguments to the thresholder
 54 |             using ``**kwargs``.
 55 | 
 56 |         Returns
 57 |         --------
 58 | 
 59 |         G (nx.Graph)
 60 |             A reconstructed graph with :math:`N` nodes.
 61 | 
 62 |         """
 63 | 
 64 |         n = TS.shape[0]
 65 |         W = np.zeros([n, n])
 66 | 
 67 |         for i in range(n):
 68 |             xi, yi = GrangerCausality.split_data(TS[i, :], lag)
 69 | 
 70 |             for j in range(n):
 71 |                 xj, yj = GrangerCausality.split_data(TS[j, :], lag)
 72 |                 xij = np.concatenate([xi, xj], axis=-1)
 73 |                 reg1 = LinearRegression().fit(xi, yi)
 74 |                 reg2 = LinearRegression().fit(xij, yi)
 75 |                 err1 = yi - reg1.predict(xi)
 76 |                 err2 = yi - reg2.predict(xij)
 77 | 
 78 |                 std_i = np.std(err1)
 79 |                 std_ij = np.std(err2)
 80 | 
 81 |                 if std_i == 0:
 82 |                     W[j, i] = -99999999
 83 |                 elif std_ij == 0:
 84 |                     W[j, i] = 99999999
 85 |                 else:
 86 |                     W[j, i] = np.log(std_i) - np.log(std_ij)
 87 | 
 88 |         self.results["weights_matrix"] = W
 89 |         # threshold the network
 90 |         W_thresh = threshold(W, threshold_type, **kwargs)
 91 |         self.results["thresholded_matrix"] = W_thresh
 92 | 
 93 |         # construct the network
 94 |         self.results["graph"] = create_graph(W_thresh)
 95 |         G = self.results["graph"]
 96 | 
 97 |         return G
 98 | 
 99 |     @staticmethod
100 |     def split_data(TS, lag):
101 |         """From a single node time series, return a training dataset with
102 |         corresponding targets.
103 | 
104 |         Parameters
105 |         ----------
106 | 
107 |         TS (np.ndarray)
108 |             Array consisting of :math:`L` observations from :math:`N`
109 |             sensors.
110 | 
111 |         lag (int)
112 |             Time lag to consider.
113 | 
114 |         Returns
115 |         -------
116 | 
117 |         inputs (np.ndarray)
118 |             Training data for the inputs.
119 | 
120 |         targets (np.ndarray)
121 |             Training data for the targets.
122 | 
123 |         """
124 |         T = len(TS)
125 |         inputs = np.zeros([T - lag - 1, lag])
126 |         targets = np.zeros(T - lag - 1)
127 | 
128 |         for t in range(T - lag - 1):
129 |             inputs[t, :] = TS[t : lag + t]
130 |             targets[t] = TS[t + lag]
131 | 
132 |         return inputs, targets
133 | 


--------------------------------------------------------------------------------
/netrd/dynamics/kuramoto.py:
--------------------------------------------------------------------------------
  1 | """
  2 | kuramoto.py
  3 | -----------
  4 | Kuramoto model of oscillators.
  5 | 
  6 | author: Harrison Hartle
  7 | """
  8 | 
  9 | from .base import BaseDynamics
 10 | import networkx as nx
 11 | import numpy as np
 12 | import scipy.integrate as it
 13 | from ..utilities import unweighted
 14 | 
 15 | 
 16 | class Kuramoto(BaseDynamics):
 17 |     """Kuramoto model of oscillators."""
 18 | 
 19 |     @unweighted
 20 |     def simulate(self, G, L, dt=0.01, strength=1, phases=None, freqs=None):
 21 |         r"""Simulate Kuramoto model on a ground truth network.
 22 | 
 23 |         Kuramoto oscillators model synchronization processes. At each time
 24 |         step, each node adjusts its phase :math:`\theta_i` according to the
 25 |         equation
 26 | 
 27 |         .. math::
 28 |             \theta_i = \omega_i + \frac{\lambda}{N}\sum_{j=1}^{N}\sin\left(\theta_j - \theta_i\right),
 29 | 
 30 | 
 31 |         where :math:`\lambda`, is a coupling `strength` parameter and each node
 32 |         has an internal frequency :math:`\omega_i`; the `freqs` function
 33 |         parameter provides the option to initialize these frequencies with
 34 |         user-defined values (or leave as `None` to randomly initialize). Each
 35 |         node's initial phase :math:`\theta_{i0}` can be randomly initialized
 36 |         (the default behavior) or set by specifying the `phases` parameter.
 37 | 
 38 |         The results dictionary also stores the ground truth network as
 39 |         `'ground_truth'` and the internal frequencies of the process as
 40 |         `'internal_frequencies'`.
 41 | 
 42 |         For more information on the Kuramoto model, see the review essay
 43 |         included below.
 44 | 
 45 |         Parameters
 46 |         ----------
 47 | 
 48 |         G (nx.Graph)
 49 |             the input (ground-truth) graph with :math:`N` nodes.
 50 | 
 51 |         L (int)
 52 |             the length of the desired time series.
 53 | 
 54 |         dt (float)
 55 |             size of timestep for numerical integration.
 56 | 
 57 |         strength (float)
 58 |             coupling strength (prefactor for interaction terms).
 59 | 
 60 |         phases (np.ndarray)
 61 |             an :math:`N \times 1` array of initial phases.
 62 | 
 63 |         freqs (np.ndarray)
 64 |             an :math:`N \times 1` array of internal frequencies.
 65 | 
 66 |         Returns
 67 |         -------
 68 | 
 69 |         TS (np.ndarray)
 70 |             an :math:`N \times L` array of synthetic time series data.
 71 | 
 72 |         Examples
 73 |         --------
 74 | 
 75 |         .. code:: python
 76 | 
 77 |             G = nx.ring_of_cliques(4,16)
 78 |             N = G.number_of_nodes()
 79 |             L = int(1e4)
 80 |             omega = np.random.uniform(0.95, 1.05, N)
 81 |             dynamics = Kuramoto()
 82 |             TS = dynamics.simulate(G, L, dt=0.01, strength=0.3, freqs=omega)
 83 | 
 84 |         References
 85 |         ----------
 86 |         .. [1] F. Rodrigues, T. Peron, P. Ji, J. Kurths.
 87 |                The Kuramoto model in complex networks.
 88 |                https://arxiv.org/abs/1511.07139
 89 | 
 90 |         """
 91 |         A = nx.to_numpy_array(G)
 92 |         N = G.number_of_nodes()
 93 | 
 94 |         try:
 95 |             if phases is not None:
 96 |                 assert len(phases) == N
 97 |                 theta_0 = phases
 98 |             else:
 99 |                 theta_0 = 2 * np.pi * np.random.rand(N)
100 | 
101 |             if freqs is not None:
102 |                 assert len(freqs) == N
103 |                 omega = freqs
104 |             else:
105 |                 omega = np.random.uniform(0.9, 1.1, N)
106 | 
107 |         except AssertionError:
108 |             raise ValueError("Initial conditions must be None or lists of length N.")
109 | 
110 |         t = np.linspace(dt, L * dt, L)  # time-vector
111 |         one = np.ones(N)  # define a rate of change function
112 | 
113 |         def ddt_theta(theta, t, g, strength, A):
114 |             prefactor = strength / N
115 |             first = np.outer(one, theta)
116 |             second = np.outer(theta, one)
117 | 
118 |             return g + prefactor * (A * np.sin(first - second)).dot(one)
119 | 
120 |         # integrate the equations of motion numerically
121 |         args = (omega, strength, A)
122 |         TS_T = it.odeint(ddt_theta, theta_0, t, args=args)
123 | 
124 |         # odeint returns LxN result
125 |         # transposing yields reversed-order nodes => apply flipud.
126 |         TS = np.flipud(TS_T.T)
127 | 
128 |         # adjust phases
129 |         TS = TS % (2 * np.pi)
130 | 
131 |         self.results["internal_frequencies"] = omega
132 |         self.results["ground_truth"] = G
133 |         self.results["TS"] = TS
134 | 
135 |         return TS
136 | 


--------------------------------------------------------------------------------
/netrd/distance/resistance_perturbation.py:
--------------------------------------------------------------------------------
  1 | """
  2 | resistance_perturbation.py
  3 | --------------------------
  4 | 
  5 | Graph distance based on resistance perturbation (https://arxiv.org/abs/1605.01091v2)
  6 | 
  7 | author: Ryan J. Gallagher & Jessica T. Davis
  8 | 
  9 | Submitted as part of the 2019 NetSI Collabathon.
 10 | 
 11 | """
 12 | import numpy as np
 13 | import networkx as nx
 14 | from .base import BaseDistance
 15 | from ..utilities import undirected
 16 | 
 17 | 
 18 | class ResistancePerturbation(BaseDistance):
 19 |     """Compares the resistance matrices."""
 20 | 
 21 |     @undirected
 22 |     def dist(self, G1, G2, p=2):
 23 |         r"""The p-norm of the difference between two graph resistance matrices.
 24 | 
 25 |         The resistance perturbation distance changes if either graph is
 26 |         relabeled (it is not invariant under graph isomorphism), so node
 27 |         labels should be consistent between the two graphs being
 28 |         compared. The distance is not normalized.
 29 | 
 30 |         The resistance matrix of a graph :math:`G` is calculated as
 31 |         :math:`R = \text{diag}(L_i) 1^T + 1 \text{diag}(L_i)^T - 2L_i`,
 32 |         where :math:`L_i` is the Moore-Penrose pseudoinverse of the
 33 |         Laplacian of :math:`G`.
 34 | 
 35 |         The resistance perturbation distance between :math:`G_1` and
 36 |         :math:`G_2` is calculated as the :math:`p`-norm of the difference
 37 |         in their resitance matrices,
 38 | 
 39 |         .. math::
 40 |             d_{r(p)} = | R^{(1)} - R^{(2)} | = ( \sum_{i,j \in V} | R^{(1)}_{i,j} - R^{(2)}_{i,j} |^p )^{1/p},
 41 | 
 42 |         where :math:`R^{(1)}` and :math:`R^{(2)}` are the resistance
 43 |         matrices of :math:`G_1` and :math:`G_2` respectively. When :math:`p
 44 |         = \infty`, we have
 45 | 
 46 |         .. math::
 47 |             d_{r(\infty)} = \max_{i,j \in V} |R^{(1)}_{i,j} - R^{(2)}_{i,j}|.
 48 | 
 49 | 
 50 |         This method assumes that the input graphs are undirected; if
 51 |         directed graphs are used, it will coerce them to undirected graphs
 52 |         and emit a RuntimeWarning.
 53 | 
 54 |         The results dictionary also stores a 2-tuple of the underlying
 55 |         resistance matrices in the key `'resistance_matrices'`.
 56 | 
 57 |         Parameters
 58 |         ----------
 59 | 
 60 |         G1, G2 (nx.Graph)
 61 |             two networkx graphs to be compared.
 62 | 
 63 |         p (float or str, optional)
 64 |             :math:`p`-norm to take of the difference between the resistance
 65 |             matrices. Specify ``np.inf`` to take :math:`\infty`-norm.
 66 | 
 67 |         Returns
 68 |         -------
 69 |         dist (float)
 70 |             the distance between G1 and G2.
 71 | 
 72 |         References
 73 |         ----------
 74 | 
 75 |         .. [1] https://arxiv.org/abs/1605.01091v2
 76 | 
 77 |         """
 78 |         # Check for connected graphs
 79 |         if not nx.is_connected(G1) or not nx.is_connected(G2):
 80 |             raise ValueError(
 81 |                 "Resistance perturbation is undefined for disconnected graphs."
 82 |             )
 83 | 
 84 |         # Get resistance matrices
 85 |         R1 = get_resistance_matrix(G1)
 86 |         R2 = get_resistance_matrix(G2)
 87 |         self.results['resistance_matrices'] = R1, R2
 88 | 
 89 |         # Get resistance perturbation distance
 90 |         if not np.isinf(p):
 91 |             dist = np.power(np.sum(np.power(np.abs(R1 - R2), p)), 1 / p)
 92 |         else:
 93 |             dist = np.amax(np.abs(R1 - R2))
 94 |         self.results['dist'] = dist
 95 | 
 96 |         return dist
 97 | 
 98 | 
 99 | def get_resistance_matrix(G):
100 |     """Get the resistance matrix of a networkx graph.
101 | 
102 |     The resistance matrix of a graph :math:`G` is calculated as
103 |     :math:`R = \text{diag}(L_i) 1^T + 1 \text{diag}(L_i)^T - 2L_i`,
104 |     where L_i is the Moore-Penrose pseudoinverse of the Laplacian of :math:`G`.
105 | 
106 |     Parameters
107 |     ----------
108 |     G (nx.Graph): networkx graph from which to get its resistance matrix
109 | 
110 |     Returns
111 |     -------
112 |     R (np.array): resistance matrix of G
113 | 
114 |     """
115 |     # Get adjacency matrix
116 |     n = len(G.nodes())
117 |     A = nx.to_numpy_array(G)
118 |     # Get Laplacian
119 |     D = np.diag(A.sum(axis=0))
120 |     L = D - A
121 |     # Get Moore-Penrose pseudoinverses of Laplacian
122 |     # Note: converts to dense matrix and introduces n^2 operation here
123 |     I = np.eye(n)
124 |     J = (1 / n) * np.ones((n, n))
125 |     L_i = np.linalg.solve(L + J, I) - J
126 |     # Get resistance matrix
127 |     ones = np.ones(n)
128 |     ones = ones.reshape((1, n))
129 |     L_i_diag = np.diag(L_i)
130 |     L_i_diag = L_i_diag.reshape((n, 1))
131 |     R = np.dot(L_i_diag, ones) + np.dot(ones.T, L_i_diag.T) - 2 * L_i
132 |     return R
133 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/correlation_spanning_tree.py:
--------------------------------------------------------------------------------
  1 | """
  2 | correlation_spanning_tree.py
  3 | ----------------------------
  4 | 
  5 | Graph reconstruction algorithm based on Mantegna, R. N. (1999). Hierarchical structure in
  6 | financial markets. The European Physical Journal B-Condensed Matter and Complex Systems,
  7 | 11(1), 193-197. DOI https://doi.org/10.1007/s100510050929
  8 | https://link.springer.com/article/10.1007/s100510050929
  9 | 
 10 | author: Matteo Chinazzi
 11 | Submitted as part of the 2019 NetSI Collabathon.
 12 | """
 13 | 
 14 | from .base import BaseReconstructor
 15 | import numpy as np
 16 | from scipy.sparse.csgraph import minimum_spanning_tree
 17 | 
 18 | try:
 19 |     from networkx import from_scipy_sparse_array as from_sparse
 20 | except ImportError:
 21 |     from networkx import from_scipy_sparse_matrix as from_sparse
 22 | 
 23 | 
 24 | class CorrelationSpanningTree(BaseReconstructor):
 25 |     """Minimum spanning tree connecting the sensors."""
 26 | 
 27 |     def fit(self, TS, distance='root_inv', **kwargs):
 28 |         r"""Create a minimum spanning tree connecting the sensors.
 29 | 
 30 |         The empirical correlation matrix is used to first compute a
 31 |         distance matrix and then to create a minimum spanning tree
 32 |         connecting all the sensors in the data.  This method implements the
 33 |         methodology described in [1]_ and applied in the context of creating
 34 |         a graph connecting the stocks of a portfolio of generated by
 35 |         looking at the correlations between the daily time series of stock
 36 |         prices.
 37 | 
 38 |         The results dictionary also stores the distance matrix (computed
 39 |         from the correlations) as `'distance_matrix'`.
 40 | 
 41 |         Parameters
 42 |         ----------
 43 | 
 44 |         TS (np.ndarray)
 45 |             :math:`N \times L` array consisting of :math:`L` observations
 46 |             from :math:`N` sensors.
 47 | 
 48 |         distance (str)
 49 |             'inv_square' calculates distance as :math:`1-corr_{ij}^2`
 50 |             as in [1]_. 'root_inv' calculates distance as
 51 |             :math:`\sqrt{2 (1-corr_{ij})}` [2]_.
 52 | 
 53 |         Returns
 54 |         -------
 55 | 
 56 |         G (nx.Graph)
 57 |             A reconstructed graph with :math:`N` nodes.
 58 | 
 59 |         Examples
 60 |         --------
 61 |         .. code:: python
 62 | 
 63 |             import numpy as np
 64 |             import networkx as nx
 65 |             from matplotlib import pyplot as plt
 66 |             from netrd.reconstruction import CorrelationSpanningTree
 67 | 
 68 |             N = 25
 69 |             T = 300
 70 |             M = np.random.normal(size=(N,T))
 71 | 
 72 |             print('Create correlated time series')
 73 |             market_mode = 0.4*np.random.normal(size=(1,T))
 74 |             M += market_mode
 75 | 
 76 |             sector_modes = {d: 0.5*np.random.normal(size=(1,T)) for d in range(5)}
 77 |             for sector_mode, vals in sector_modes.items():
 78 |                 M[sector_mode*5:(sector_mode+1)*5,:] += vals
 79 | 
 80 |             print('Link node colors to sectors')
 81 |             colors = ['b','r','g','y','m']
 82 |             node_colors = [color for color in colors for __ in range(5)]
 83 | 
 84 |             print('Network reconstruction step')
 85 |             cst_net = CorrelationSpanningTree()
 86 |             G = cst_net.fit(M)
 87 | 
 88 |             print('Plot reconstructed spanning tree')
 89 |             fig, ax = plt.subplots()
 90 |             nx.draw(G, ax=ax, node_color=node_colors)
 91 | 
 92 | 
 93 |         References
 94 |         ----------
 95 | 
 96 |         .. [1] Mantegna, R. N. (1999). Hierarchical structure in financial
 97 |                markets.  The European Physical Journal B-Condensed Matter
 98 |                and Complex Systems, 11(1), 193-197.  DOI
 99 |                https://doi.org/10.1007/s100510050929
100 |                https://link.springer.com/article/10.1007/s100510050929
101 | 
102 |         .. [2] Bonanno, G., Caldarelli, G., Lillo, F. & Mantegna,
103 |                R. N. (2003) Topology of correlation-based minimal spanning
104 |                trees in real and model markets.  Physical Review E 68.
105 | 
106 |         .. [3] Vandewalle, N., Brisbois, F. & Tordoir, X. (2001) Non-random
107 |                topology of stock markets. Quantitative Finance 1, 372–374.
108 | 
109 |         """
110 |         C = np.corrcoef(TS)  # Empirical correlation matrix
111 | 
112 |         D = (
113 |             np.sqrt(2 * (1 - C)) if distance == 'root_inv' else 1 - np.square(C)
114 |         )  # Distance matrix
115 | 
116 |         self.results['distance_matrix'] = D
117 | 
118 |         MST = minimum_spanning_tree(D)  # Minimum Spanning Tree
119 | 
120 |         G = from_sparse(MST)
121 | 
122 |         self.results['graph'] = G
123 | 
124 |         return G
125 | 


--------------------------------------------------------------------------------
/netrd/distance/dk_series.py:
--------------------------------------------------------------------------------
  1 | """
  2 | dk_series.py
  3 | --------------------------
  4 | 
  5 | Graph distance based on the dk-series.
  6 | 
  7 | author: Brennan Klein & Stefan McCabe
  8 | email: brennanjamesklein@gmail.com
  9 | Submitted as part of the 2019 NetSI Collabathon.
 10 | 
 11 | """
 12 | 
 13 | 
 14 | import networkx as nx
 15 | import numpy as np
 16 | from scipy.sparse import coo_matrix
 17 | from collections import defaultdict
 18 | from .base import BaseDistance
 19 | from ..utilities import entropy, undirected, unweighted
 20 | 
 21 | 
 22 | class dkSeries(BaseDistance):
 23 |     """Compare graphs based on their :math:`dk`-series."""
 24 | 
 25 |     @unweighted
 26 |     @undirected
 27 |     def dist(self, G1, G2, d=2):
 28 |         r"""Compute the distance between two graphs by using the Jensen-Shannon
 29 |         divergence between the :math:`dk`-series of the graphs.
 30 | 
 31 |         The :math:`dk`-series of a graph is the collection of distributions of
 32 |         size :math:`d` subgraphs, where nodes are labelled by degrees. For
 33 |         simplicity, we currently consider only the :math:`1k`-series, i.e., the
 34 |         degree distribution, or the :math:`2k`-series, i.e., the
 35 |         distribution of edges between nodes of degree :math:`(k_i, k_j)`. The
 36 |         distance between these :math:`dk`-series is calculated using the
 37 |         Jensen-Shannon divergence.
 38 | 
 39 |         Parameters
 40 |         ----------
 41 | 
 42 |         G1, G2 (nx.Graph)
 43 |             two networkx graphs to be compared
 44 | 
 45 |         d (int)
 46 |             the size of the subgraph to consider
 47 | 
 48 |         Returns
 49 |         -------
 50 | 
 51 |         dist (float)
 52 |             the distance between `G1` and `G2`.
 53 | 
 54 |         References
 55 |         ----------
 56 | 
 57 |         .. [1] Orsini, Chiara, Marija M. Dankulov, Pol Colomer-de-Simón,
 58 |                Almerima Jamakovic, Priya Mahadevan, Amin Vahdat, Kevin E.
 59 |                Bassler, et al. 2015. “Quantifying Randomness in Real Networks.”
 60 |                Nature Communications 6 (1). https://doi.org/10.1038/ncomms9627.
 61 | 
 62 |         """
 63 | 
 64 |         N = max(len(G1), len(G2))
 65 | 
 66 |         if d == 1:
 67 |             from .degree_divergence import DegreeDivergence
 68 | 
 69 |             degdiv = DegreeDivergence()
 70 |             dist = degdiv.dist()
 71 | 
 72 |             # the 2k-distance stores the distribution in a sparse matrix,
 73 |             # so here we take the output of DegreeDivergence and
 74 |             # produce a comparable object
 75 |             hist1, hist2 = degdiv.results["degree_histograms"]
 76 |             hist1 /= len(G1)
 77 |             hist2 /= len(G2)
 78 |             hist1 = coo_matrix(hist1)
 79 |             hist2 = coo_matrix(hist2)
 80 | 
 81 |             self.results["dk_distributions"] = hist1, hist2
 82 | 
 83 |         elif d == 2:
 84 |             D1 = dk2_series(G1, N)
 85 |             D2 = dk2_series(G2, N)
 86 | 
 87 |             # store the 2K-distributions
 88 |             self.results["dk_distributions"] = D1, D2
 89 | 
 90 |             # flatten matrices. this is safe because we've padded to the same size
 91 |             G1_dk_normed = D1.toarray()[np.triu_indices(N)].flatten()
 92 |             G2_dk_normed = D2.toarray()[np.triu_indices(N)].flatten()
 93 | 
 94 |             assert np.isclose(G1_dk_normed.sum(), 1)
 95 |             assert np.isclose(G2_dk_normed.sum(), 1)
 96 | 
 97 |             dist = entropy.js_divergence(G1_dk_normed, G2_dk_normed)
 98 |         else:
 99 |             raise NotImplementedError()
100 | 
101 |         self.results["dist"] = dist
102 |         return dist
103 | 
104 | 
105 | def dk2_series(G, N=None):
106 |     """
107 |     Calculate the 2k-series (i.e. the number of edges between
108 |     degree-labelled nodes) for G.
109 |     """
110 | 
111 |     if N is None:
112 |         N = len(G)
113 | 
114 |     k_dict = dict(nx.degree(G))
115 |     dk2 = defaultdict(int)
116 | 
117 |     for i, j in G.edges:
118 |         k_i = k_dict[i]
119 |         k_j = k_dict[j]
120 | 
121 |         # We're enforcing order here because at the end we're going to
122 |         # leverage that all the information can be stored in the upper
123 |         # triangular for convenience.
124 |         if k_i <= k_j:
125 |             dk2[(k_i, k_j)] += 1
126 |         else:
127 |             dk2[(k_j, k_i)] += 1
128 | 
129 |     # every edge should be counted once
130 |     assert sum(list(dk2.values())) == G.size()
131 | 
132 |     # convert from dict to sparse matrix
133 |     row = [i for (i, j) in dk2.keys()]
134 |     col = [j for (i, j) in dk2.keys()]
135 |     data = [x for x in dk2.values()]
136 | 
137 |     D = coo_matrix((data, (row, col)), shape=(N, N))
138 | 
139 |     # this should be normalized by the number of edges
140 |     D = D / G.size()
141 | 
142 |     return D
143 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![DOI](https://joss.theoj.org/papers/10.21105/joss.02990/status.svg)](https://doi.org/10.21105/joss.02990)
  2 | [![PyPI version](https://badge.fury.io/py/netrd.svg)](https://badge.fury.io/py/netrd)
  3 | [![ReadTheDocs](https://img.shields.io/readthedocs/netrd.svg)](
  4 |     https://netrd.readthedocs.io)
  5 | ![CI](https://github.com/netsiphd/netrd/workflows/build/badge.svg)
  6 | 
  7 | # netrd: A library for network {reconstruction, distances, dynamics}
  8 | 
  9 | This library provides a consistent, NetworkX-based interface to various
 10 | utilities for graph distances, graph reconstruction from time series data, and
 11 | simulated dynamics on networks. 
 12 | 
 13 | Some resources that maybe of interest:
 14 | 
 15 | * A [tutorial](https://netrd.readthedocs.io/en/latest/tutorial.html) on how to use the library
 16 | * The API [reference](https://netrd.readthedocs.io/en/latest/) 
 17 | * A [notebook](https://nbviewer.jupyter.org/github/netsiphd/netrd/blob/master/notebooks/example.ipynb) showing advanced usage
 18 | 
 19 | # Installation
 20 | 
 21 | `netrd` is easy to install through pip:
 22 | 
 23 | ```
 24 | pip install netrd
 25 | ```
 26 | 
 27 | If you are thinking about contributing to `netrd`, you can install a
 28 | development version by executing
 29 | 
 30 | ```
 31 | git clone https://github.com/netsiphd/netrd
 32 | cd netrd
 33 | pip install .
 34 | ```
 35 | 
 36 | # Usage
 37 | 
 38 | ## Reconstructing a graph
 39 | 
 40 | <p align="center">
 41 | <img src="netrd_reconstruction_example.png" alt="example reconstruction" width="95%"/>
 42 | </p>
 43 | 
 44 | The basic usage of a graph reconstruction algorithm is as follows:
 45 | 
 46 | ```python
 47 | from netrd.reconstruction import CorrelationMatrix
 48 | import numpy as np
 49 | # 100 nodes, 1000 observations
 50 | TS = np.random.random((100, 1000))
 51 | 
 52 | reconstructor = CorrelationMatrix()
 53 | G = reconstructor.fit(TS, threshold_type='degree', avg_k=15)
 54 | # or alternately, G = reconstructor.results['graph']
 55 | ```
 56 | 
 57 | Here, `TS` is an N x L numpy array consisting of L
 58 | observations for each of N sensors. This constrains the graphs
 59 | to have integer-valued nodes.
 60 | 
 61 | The `results` dict object, in addition to containing the graph
 62 | object, may also contain objects created as a side effect of
 63 | reconstructing the network, which may be useful for debugging or
 64 | considering goodness of fit. What is returned will vary between
 65 | reconstruction algorithms.
 66 | 
 67 | Many reconstruction algorithms create a dense matrix of weights and
 68 | use additional parameters to describe how to create a sparse graph; the
 69 | [tutorial](https://netrd.readthedocs.io/en/latest/tutorial.html) has more
 70 | details on these parameters.
 71 | 
 72 | 
 73 | ## Distances between graphs
 74 | 
 75 | <p align="center">
 76 | <img src="netrd_distance_example.png" alt="example distance" width="95%"/>
 77 | </p>
 78 | 
 79 | The basic usage of a distance algorithm is as follows:
 80 | 
 81 | ```python
 82 | from netrd.distance import QuantumJSD
 83 | import networkx as nx
 84 | G1 = nx.fast_gnp_random_graph(1000, .1)
 85 | G2 = nx.fast_gnp_random_graph(1000, .1)
 86 | 
 87 | dist_obj = QuantumJSD()
 88 | distance = dist_obj.dist(G1, G2)
 89 | # or alternatively: distance = dist_obj.results['dist']
 90 | ```
 91 | 
 92 | Here, `G1` and `G2` are `nx.Graph` objects (or subclasses such as
 93 | `nx.DiGraph`). The results dictionary holds the distance value, as
 94 | well as any other values that were computed as a side effect.
 95 | 
 96 | ## Dynamics on graphs
 97 | 
 98 | <p align="center">
 99 | <img src="netrd_dynamics_example.png" alt="example distance" width="95%"/>
100 | </p>
101 | 
102 | The basic usage of a dynamics algorithm is as follows:
103 | 
104 | ```python
105 | from netrd.dynamics import VoterModel
106 | import networkx as nx
107 | ground_truth = nx.karate_club_graph()
108 | 
109 | dynamics_model = VoterModel()
110 | synthetic_TS = dynamics_model.simulate(ground_truth, 1000)
111 | # this is the same structure as the input data to a reconstructor
112 | # G = CorrelationMatrix().fit(synthetic_TS)
113 | ```
114 | 
115 | This produces a numpy array of time series data.
116 | 
117 | 
118 | # Contributing
119 | 
120 | Contributing guidelines can be found in [CONTRIBUTING.md](CONTRIBUTING.md).
121 | 
122 | 
123 | # Publications
124 | 
125 | * McCabe, S., Torres, L., LaRock, T., Haque, S. A., Yang, C.-H., Hartle, H., and
126 | Klein, B. (2021). netrd: A library for network reconstruction and graph
127 | distances. *Journal of Open Source Software* 6(62): 2990.
128 | doi:&nbsp;[10.21105/joss.02990](https://doi.org/10.21105/joss.02990).
129 | arXiv:&nbsp;[2010.16019](https://arxiv.org/abs/2010.16019).
130 |     + paper detailing the methods used in this package
131 | 
132 | * Hartle H., Klein B., McCabe S., Daniels A., St-Onge G., Murphy C., and
133 | Hébert-Dufresne L. (2020). Network comparison and the within-ensemble graph
134 | distance. *Proceedings of the Royal Society A* 476: 20190744.
135 | doi:&nbsp;[10.1098/rspa.2019.0744](http://doi.org/10.1098/rspa.2019.0744).
136 | arXiv:&nbsp;[2008.02415](https://arxiv.org/abs/2008.02415).
137 |     + recent work introducing a baseline measure for comparing graph distances
138 | 


--------------------------------------------------------------------------------
/netrd/utilities/entropy.py:
--------------------------------------------------------------------------------
  1 | """
  2 | entropy.py
  3 | ----------
  4 | 
  5 | Utility functions computing entropy of variables in time series data.
  6 | 
  7 | author: Chia-Hung Yang
  8 | 
  9 | Submitted as part of the 2019 NetSI Collabathon.
 10 | """
 11 | 
 12 | from collections import defaultdict
 13 | import numpy as np
 14 | from scipy.stats import entropy as sp_entropy
 15 | 
 16 | 
 17 | def js_divergence(P, Q):
 18 |     """Jensen-Shannon divergence between `P` and `Q`.
 19 | 
 20 |     Parameters
 21 |     ----------
 22 | 
 23 |     P, Q (np.ndarray)
 24 |         Two discrete distributions represented as 1D arrays. They are
 25 |         assumed to have the same support
 26 | 
 27 |     Returns
 28 |     -------
 29 | 
 30 |     float
 31 |         The Jensen-Shannon divergence between `P` and `Q`.
 32 | 
 33 |     """
 34 |     M = 0.5 * (P + Q)
 35 |     jsd = 0.5 * (sp_entropy(P, M, base=2) + sp_entropy(Q, M, base=2))
 36 | 
 37 |     # If the input distributions are identical, floating-point error in the
 38 |     # construction of the mixture matrix can result in negative values that are
 39 |     # very close to zero. If one wants to compute the root-JSD metric, these
 40 |     # negative values lead to undesirable nans.
 41 |     if np.isclose(jsd, 0.0):
 42 |         return 0
 43 |     else:
 44 |         return jsd
 45 | 
 46 | 
 47 | def entropy_from_seq(var):
 48 |     r"""Return the Shannon entropy of a variable. This differs from
 49 |     Scipy's entropy by taking a sequence of observations as input
 50 |     rather than a histogram or probability distribution.
 51 | 
 52 |     Parameters
 53 |     ----------
 54 | 
 55 |     var (ndarray)
 56 |         1D array of observations of the variable.
 57 | 
 58 |     Notes
 59 |     -----
 60 | 
 61 |     1. :math:`H(X) = - \sum p(X) \log_2(p(X))`
 62 |     2. Data of the variable must be categorical.
 63 | 
 64 |     """
 65 |     return joint_entropy(var[:, np.newaxis])
 66 | 
 67 | 
 68 | def joint_entropy(data):
 69 |     r"""Joint entropy of all variables in the data.
 70 | 
 71 |     Parameters
 72 |     ----------
 73 |     data (np.ndarray)
 74 |         Array of data with variables as columns and observations as rows.
 75 | 
 76 |     Returns
 77 |     -------
 78 |     float
 79 |         Joint entropy of the variables of interests.
 80 | 
 81 |     Notes
 82 |     -----
 83 |     1. :math:`H(\{X_i\}) = - \sum p(\{X_i\}) \log_2(p(\{X_i\}))`
 84 |     2. The data of variables must be categorical.
 85 | 
 86 |     """
 87 |     # Entropy is computed through summing contribution of states with
 88 |     # non-zero empirical probability in the data
 89 |     count = defaultdict(int)
 90 |     for state in data:
 91 |         key = tuple(state)
 92 |         count[key] += 1
 93 | 
 94 |     return sp_entropy(list(count.values()), base=2)
 95 | 
 96 | 
 97 | def conditional_entropy(data, given):
 98 |     r"""Conditional entropy of variables in the data conditioned on
 99 |     a given set of variables.
100 | 
101 |     Parameters
102 |     ----------
103 |     data (np.ndarray)
104 |         Array of data with variables of interests as columns and
105 |         observations as rows.
106 | 
107 |     given (np.ndarray)
108 |         Array of data with the conditioned variables as columns and
109 |         observations as rows.
110 | 
111 |     Returns
112 |     -------
113 |     float
114 |         Conditional entrpoy of the variables :math:`\{X_i\}` of interest
115 |         conditioned on variables :math:`\{Y_j\}`.
116 | 
117 |     Notes
118 |     -----
119 |     1. :math:`H(\{X_i\}|\{Y_j\}) = - \sum p(\{X_i\}\cup\{Y_j\}) \log_2(p(\{X_i\}|\{Y_j\}))`
120 |     2. The data of vairiables must be categorical.
121 | 
122 |     """
123 |     joint = np.hstack((data, given))
124 |     entrp = joint_entropy(joint) - joint_entropy(given)
125 | 
126 |     return entrp
127 | 
128 | 
129 | def categorized_data(raw, n_bins):
130 |     """Categorize data.
131 | 
132 |     An entry in the returned array is the index of the bin of the
133 |     linearly-binned raw continuous data.
134 | 
135 |     Parameters
136 |     ----------
137 |     raw (np.ndarray)
138 |         Array of raw continuous data.
139 |     n_bins (int)
140 |         A universal number of bins for all the variables.
141 | 
142 |     Returns
143 |     -------
144 |     np.ndarray
145 |         Array of bin indices after categorizing the raw data.
146 | 
147 |     """
148 |     bins = linear_bins(raw, n_bins)
149 |     data = np.ones(raw.shape, dtype=int)
150 | 
151 |     # Find the index of bins each element in the raw data array belongs to
152 |     for (i, j), val in np.ndenumerate(raw):
153 |         data[i, j] = np.argmax(bins[1:, j] >= val)
154 | 
155 |     return data
156 | 
157 | 
158 | def linear_bins(raw, n_bins):
159 |     r"""Separators of linear bins for each variable in the raw data.
160 | 
161 |     Parameters
162 |     ----------
163 |     raw (np.ndarray)
164 |         Array of raw continuous data.
165 | 
166 |     n_bins (int)
167 |         A universal number of bins for all the variables.
168 | 
169 |     Returns
170 |     -------
171 |     np.ndarray
172 |         Array where a column is the separators of bins for a variable.
173 | 
174 |     Notes
175 |     -----
176 |     The bins are :math:`B_0 = [b_0, b_1]`, :math:`B_i = (b_i, b_{i+1}]`,
177 |     where :math:`b_i` s are the separators of bins.
178 | 
179 |     """
180 |     _min = raw.min(axis=0)
181 |     _max = raw.max(axis=0)
182 |     bins = np.array(
183 |         [np.linspace(start, end, num=n_bins + 1) for start, end in zip(_min, _max)]
184 |     )
185 |     return bins.T
186 | 


--------------------------------------------------------------------------------
/netrd/utilities/threshold.py:
--------------------------------------------------------------------------------
  1 | """
  2 | threshold.py
  3 | ------------
  4 | 
  5 | Utilities for thresholding matrices based on different criteria
  6 | 
  7 | author: Stefan McCabe (stefanmccabe at gmail dot com)
  8 | 
  9 | Submitted as part of the 2019 NetSI Collabathon.
 10 | 
 11 | """
 12 | import numpy as np
 13 | import warnings
 14 | 
 15 | 
 16 | def threshold_in_range(mat, **kwargs):
 17 |     r"""Threshold by setting values not within a list of ranges to zero.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     mat (np.ndarray)
 22 |         A numpy array.
 23 | 
 24 |     cutoffs (list of tuples)
 25 |         When thresholding, include only edges whose correlations fall
 26 |         within a given range or set of ranges. The lower value must come
 27 |         first in each tuple. For example, to keep those values whose
 28 |         absolute value is between :math:`0.5` and :math:`1`, pass
 29 |         ``cutoffs=[(-1, -0.5), (0.5, 1)]``.
 30 | 
 31 |     Returns
 32 |     -------
 33 |     thresholded_mat (np.ndarray)
 34 |         the thresholded numpy array
 35 | 
 36 |     """
 37 |     if 'cutoffs' in kwargs:
 38 |         cutoffs = kwargs['cutoffs']
 39 |     else:
 40 |         warnings.warn(
 41 |             "Setting 'cutoffs' argument is strongly encouraged. Using cutoff range of (-1, 1).",
 42 |             RuntimeWarning,
 43 |         )
 44 |         cutoffs = [(-1, 1)]
 45 | 
 46 |     mask_function = np.vectorize(
 47 |         lambda x: any([x >= cutoff[0] and x <= cutoff[1] for cutoff in cutoffs])
 48 |     )
 49 |     mask = mask_function(mat)
 50 | 
 51 |     thresholded_mat = mat * mask
 52 | 
 53 |     if kwargs.get('binary', False):
 54 |         thresholded_mat = np.abs(np.sign(thresholded_mat))
 55 | 
 56 |     if kwargs.get('remove_self_loops', True):
 57 |         np.fill_diagonal(thresholded_mat, 0)
 58 | 
 59 |     return thresholded_mat
 60 | 
 61 | 
 62 | def threshold_on_quantile(mat, **kwargs):
 63 |     """Threshold by setting values below a given quantile to zero.
 64 | 
 65 |     Parameters
 66 |     ----------
 67 | 
 68 |     mat (np.ndarray)
 69 |         A numpy array.
 70 | 
 71 |     quantile (float)
 72 |         The threshold above which to keep an element of the array, e.g.,
 73 |         set to zero elements below the 90th quantile of the array.
 74 | 
 75 |     Returns
 76 |     -------
 77 |     thresholded_mat
 78 |         the thresholded numpy array
 79 | 
 80 |     """
 81 |     if 'quantile' in kwargs:
 82 |         quantile = kwargs['quantile']
 83 |     else:
 84 |         warnings.warn(
 85 |             "Setting 'quantile' argument is strongly recommended. Using target quantile of 0.9 for thresholding.",
 86 |             RuntimeWarning,
 87 |         )
 88 |         quantile = 0.9
 89 | 
 90 |     if kwargs.get('remove_self_loops', True):
 91 |         np.fill_diagonal(mat, 0)
 92 | 
 93 |     if quantile != 0:
 94 |         thresholded_mat = mat * (mat > np.percentile(mat, quantile * 100))
 95 |     else:
 96 |         thresholded_mat = mat
 97 | 
 98 |     if kwargs.get('binary', False):
 99 |         thresholded_mat = np.abs(np.sign(thresholded_mat))
100 | 
101 |     return thresholded_mat
102 | 
103 | 
104 | def threshold_on_degree(mat, **kwargs):
105 |     """Threshold by setting values below a given quantile to zero.
106 | 
107 |     Parameters
108 |     ----------
109 | 
110 |     mat (np.ndarray)
111 |         A numpy array.
112 | 
113 |     avg_k (float)
114 |         The average degree to target when thresholding the matrix.
115 | 
116 |     Returns
117 |     -------
118 |     thresholded_mat
119 |         the thresholded numpy array
120 | 
121 |     """
122 | 
123 |     if 'avg_k' in kwargs:
124 |         avg_k = kwargs['avg_k']
125 |     else:
126 |         warnings.warn(
127 |             "Setting 'avg_k' argument is strongly encouraged. Using average "
128 |             "degree of 1 for thresholding.",
129 |             RuntimeWarning,
130 |         )
131 |         avg_k = 1
132 | 
133 |     n = len(mat)
134 |     A = np.ones((n, n))
135 | 
136 |     if kwargs.get('remove_self_loops', True):
137 |         np.fill_diagonal(A, 0)
138 |         np.fill_diagonal(mat, 0)
139 | 
140 |     if np.mean(np.sum(A, 1)) <= avg_k:
141 |         # degenerate case: threshold the whole matrix
142 |         thresholded_mat = mat
143 |     else:
144 |         for m in sorted(mat.flatten()):
145 |             A[mat == m] = 0
146 |             if np.mean(np.sum(A, 1)) <= avg_k:
147 |                 break
148 |         thresholded_mat = mat * (mat > m)
149 | 
150 |     if kwargs.get('binary', False):
151 |         thresholded_mat = np.abs(np.sign(thresholded_mat))
152 | 
153 |     return thresholded_mat
154 | 
155 | 
156 | def threshold(mat, rule, **kwargs):
157 |     """A flexible interface to other thresholding functions.
158 | 
159 |     Parameters
160 |     ----------
161 | 
162 |     mat (np.ndarray)
163 |         A numpy array.
164 | 
165 |     rule (str)
166 |         A string indicating which thresholding function to invoke.
167 | 
168 |     kwargs (dict)
169 |         Named arguments to pass to the underlying threshold function.
170 | 
171 |     Returns
172 |     -------
173 |     thresholded_mat
174 |         the thresholded numpy array
175 | 
176 |     """
177 |     try:
178 |         if rule == 'degree':
179 |             return threshold_on_degree(mat, **kwargs)
180 |         elif rule == 'range':
181 |             return threshold_in_range(mat, **kwargs)
182 |         elif rule == 'quantile':
183 |             return threshold_on_quantile(mat, **kwargs)
184 |         elif rule == 'custom':
185 |             return kwargs['custom_thresholder'](mat)
186 |     except KeyError:
187 |         raise ValueError("missing threshold parameter")
188 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/mean_field.py:
--------------------------------------------------------------------------------
  1 | """
  2 | exact_mean_field.py
  3 | ---------------------
  4 | Reconstruction of graphs using the exact mean field
  5 | author: Brennan Klein
  6 | email: brennanjamesklein at gmail dot com
  7 | submitted as part of the 2019 NetSI Collabathon
  8 | """
  9 | from .base import BaseReconstructor
 10 | import numpy as np
 11 | from scipy import linalg
 12 | from scipy.integrate import quad
 13 | from scipy.optimize import fsolve
 14 | from ..utilities import create_graph, threshold
 15 | 
 16 | 
 17 | class MeanField(BaseReconstructor):
 18 |     def fit(
 19 |         self, TS, exact=True, stop_criterion=True, threshold_type='range', **kwargs
 20 |     ):
 21 |         """Infer inter-node coupling weights using a mean field approximation.
 22 | 
 23 |         From the paper: "Exact mean field (eMF) is another mean field
 24 |         approximation, similar to naive mean field and thouless anderson
 25 |         palmer. We can improve the performance of this method by adding our
 26 |         stopping criterion. In general, eMF outperforms nMF and TAP, but it
 27 |         is still worse than FEM and MLE, especially in the limit of small
 28 |         sample sizes and large coupling variability." For details see [1]_.
 29 | 
 30 |         The results dictionary also stores the weight matrix as
 31 |         `'weights_matrix'` and the thresholded version of the weight matrix
 32 |         as `'thresholded_matrix'`.
 33 | 
 34 |         Parameters
 35 |         ----------
 36 | 
 37 |         TS (np.ndarray)
 38 |             Array consisting of :math:`L` observations from :math:`N` sensors.
 39 | 
 40 |         exact (bool)
 41 |             If True, use the exact mean field approximation. If False, use the
 42 |             naive mean field approximation.
 43 | 
 44 |         stop_criterion (bool)
 45 |             If True, prevent overly-long runtimes. Only applies for exact mean
 46 |             field.
 47 | 
 48 |         threshold_type (str)
 49 |             Which thresholding function to use on the matrix of
 50 |             weights. See `netrd.utilities.threshold.py` for
 51 |             documentation. Pass additional arguments to the thresholder
 52 |             using ``**kwargs``.
 53 | 
 54 |         Returns
 55 |         -------
 56 | 
 57 |         G (nx.Graph or nx.DiGraph)
 58 |             a reconstructed graph.
 59 | 
 60 |         References
 61 |         ----------
 62 | 
 63 |         .. [1] https://github.com/nihcompmed/network-inference/blob/master/sphinx/codesource/inference.py
 64 | 
 65 |         """
 66 |         N, L = np.shape(TS)  # N nodes, length L
 67 |         m = np.mean(TS, axis=1)  # empirical value
 68 | 
 69 |         # A matrix
 70 |         A = 1 - m**2
 71 |         A_inv = np.diag(1 / A)
 72 |         A = np.diag(A)
 73 | 
 74 |         ds = TS.T - m  # equal time correlation
 75 |         C = np.cov(ds, rowvar=False, bias=True)
 76 |         C_inv = linalg.inv(C)
 77 | 
 78 |         s1 = TS[:, 1:]  # one-step-delayed correlation
 79 |         ds1 = s1.T - np.mean(s1, axis=1)
 80 |         D = cross_cov(ds1, ds[:-1])
 81 | 
 82 |         # predict naive mean field W:
 83 |         B = np.dot(D, C_inv)
 84 | 
 85 |         if exact:
 86 |             # ---------------------------------------------------------------
 87 |             fun1 = (
 88 |                 lambda x, H: (1 / np.sqrt(2 * np.pi))
 89 |                 * np.exp(-(x**2) / 2)
 90 |                 * np.tanh(H + x * np.sqrt(delta))
 91 |             )
 92 | 
 93 |             fun2 = (
 94 |                 lambda x: (1 / np.sqrt(2 * np.pi))
 95 |                 * np.exp(-(x**2) / 2)
 96 |                 * (1 - np.square(np.tanh(H + x * np.sqrt(delta))))
 97 |             )
 98 | 
 99 |             W = np.empty((N, N))
100 | 
101 |             nloop = 100
102 | 
103 |             for i0 in range(N):
104 |                 cost = np.zeros(nloop + 1)
105 |                 delta = 1.0
106 | 
107 |                 def integrand(H):
108 |                     """
109 |                     Return the integrand of this function
110 |                     """
111 |                     y, err = quad(fun1, -np.inf, np.inf, args=(H,))
112 | 
113 |                     return y - m[i0]
114 | 
115 |                 for iloop in range(1, nloop):
116 |                     H = fsolve(integrand, 0.0)
117 |                     H = float(H)
118 | 
119 |                     a, err = quad(fun2, -np.inf, np.inf)
120 |                     a = float(a)
121 | 
122 |                     if a != 0:
123 |                         delta = (1 / (a**2)) * np.sum(
124 |                             (B[i0, :] ** 2) * (1 - m[:] ** 2)
125 |                         )
126 |                         W_temp = B[i0, :] / a
127 | 
128 |                     H_temp = np.dot(TS[:, :-1].T, W_temp)
129 |                     cost[iloop] = np.mean((s1.T[:, i0] - np.tanh(H_temp)) ** 2)
130 | 
131 |                     if stop_criterion and cost[iloop] >= cost[iloop - 1]:
132 |                         break
133 | 
134 |                 W[i0, :] = W_temp[:]
135 |         else:
136 |             W = np.dot(A_inv, B)
137 | 
138 |         # threshold the network
139 |         W_thresh = threshold(W, threshold_type, **kwargs)
140 | 
141 |         # construct the network
142 | 
143 |         self.results['graph'] = create_graph(W_thresh)
144 |         self.results['weights_matrix'] = W
145 |         self.results['thresholded_matrix'] = W_thresh
146 |         G = self.results['graph']
147 | 
148 |         return G
149 | 
150 | 
151 | def cross_cov(a, b):
152 |     """
153 |     cross_covariance
154 |     a,b -->  <(a - <a>)(b - <b>)>  (axis=0)
155 |     """
156 |     da = a - np.mean(a, axis=0)
157 |     db = b - np.mean(b, axis=0)
158 | 
159 |     return np.matmul(da.T, db) / a.shape[0]
160 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/naive_transfer_entropy.py:
--------------------------------------------------------------------------------
  1 | """
  2 | naive_transfer_entropy.py
  3 | --------------
  4 | Graph reconstruction algorithm based on
  5 | Schreiber, T. (2000).  Measuring information transfer.
  6 | Physical Review Letters, 85(2):461–464
  7 | https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.85.461
  8 | 
  9 | author: Chia-Hung Yang and Brennan Klein
 10 | email: yang.chi[at]husky[dot]neu[dot]edu and klein.br@husky.neu.edu
 11 | Submitted as part of the 2019 NetSI Collabathon.
 12 | """
 13 | 
 14 | from .base import BaseReconstructor
 15 | import numpy as np
 16 | from itertools import permutations
 17 | from ..utilities import create_graph, threshold
 18 | from ..utilities.entropy import conditional_entropy, categorized_data
 19 | 
 20 | 
 21 | class NaiveTransferEntropy(BaseReconstructor):
 22 |     """Uses transfer entropy between sensors."""
 23 | 
 24 |     def fit(self, TS, delay_max=1, n_bins=2, threshold_type='range', **kwargs):
 25 |         r"""Calculates the transfer entropy from i --> j.
 26 | 
 27 |         The resulting network is asymmetric, and each element
 28 |         :math:`TE_{ij}` represents the amount of information contained
 29 |         about the future states of :math:`i` by knowing the past states of
 30 |         :math:`i` and past states of :math:`j`. Presumably, if one time
 31 |         series :math:`i` does not depend on the other :math:`j`, knowing
 32 |         all of i does not increase your certainty about the next state of
 33 |         :math:`i`.
 34 | 
 35 |         The reason that this method is referred to as "naive" transfer
 36 |         entropy is because it appears there are much more complicated
 37 |         conditional mutual informations that need to be calculated in order
 38 |         for this method to be true to the notion of information
 39 |         transfer. These are implemented in state of the art algorighms, as
 40 |         in the Java Information Dynamics Toolkit [1]_.
 41 | 
 42 |         The results dictionary also stores the weight matrix as
 43 |         `'weights_matrix'` and the thresholded version of the weight matrix
 44 |         as `'thresholded_matrix'`.
 45 | 
 46 |         Parameters
 47 |         ----------
 48 | 
 49 |         TS (np.ndarray)
 50 |             array consisting of :math:`L` observations from :math:`N`
 51 |             sensors.
 52 | 
 53 |         delay_max (int)
 54 |             the number of timesteps in the past to aggregate and average in
 55 |             order to get :math:`TE_{ij}`
 56 | 
 57 |         n_bins (int)
 58 |             the number of bins to turn values in the time series to categorical
 59 |             data, which is a pre-processing step to compute entropy.
 60 | 
 61 |         threshold_type (str)
 62 |             Which thresholding function to use on the matrix of
 63 |             weights. See `netrd.utilities.threshold.py` for
 64 |             documentation. Pass additional arguments to the thresholder
 65 |             using ``**kwargs``.
 66 | 
 67 |         Returns
 68 |         -------
 69 | 
 70 |         G (nx.Graph)
 71 |             a reconstructed graph with :math:`N` nodes.
 72 | 
 73 |         References
 74 |         ----------
 75 | 
 76 |         .. [1] https://github.com/jlizier/jidt
 77 | 
 78 |         """
 79 |         N, L = TS.shape  # Get the shape and length of the time series
 80 |         data = TS.T  # Transpose the time series to make observations the rows
 81 |         if delay_max >= L:
 82 |             raise ValueError('Max steps of delay exceeds time series length.')
 83 | 
 84 |         # Transform the data into its binned categorical version,
 85 |         # which is a pre-processing before computing entropy
 86 |         data = categorized_data(data, n_bins)
 87 | 
 88 |         # Compute the transfer entropy of every tuple of nodes
 89 |         TE = np.zeros((N, N))  # Initialize an matrix for transfer entropy
 90 |         for i, j in permutations(range(N), 2):
 91 |             # Check several delay values and average them together
 92 |             # This average is naive, but appears to be sufficient in
 93 |             # some circumstances
 94 |             te_list = [
 95 |                 transfer_entropy(data[:, i], data[:, j], delay)
 96 |                 for delay in range(1, delay_max + 1)
 97 |             ]
 98 |             TE[i, j] = np.mean(te_list)
 99 | 
100 |         self.results['weights_matrix'] = TE
101 | 
102 |         # threshold the network
103 |         TE_thresh = threshold(TE, threshold_type, **kwargs)
104 |         self.results['thresholded_matrix'] = TE_thresh
105 | 
106 |         # construct the network
107 |         self.results['graph'] = create_graph(TE_thresh)
108 |         G = self.results['graph']
109 | 
110 |         return G
111 | 
112 | 
113 | def transfer_entropy(X, Y, delay):
114 |     """
115 |     This is a TE implementation: asymmetric statistic measuring the reduction
116 |     in uncertainty for the dynamics of Y given the history of X. Or the
117 |     amount of information from X to Y. The calculation is done via conditional
118 |     mutual information.
119 | 
120 |     Parameters
121 |     ----------
122 |     X (np.ndarray): time series of categorical values from node :math:`i`
123 |     Y (np.ndarray): time series of categorical values from node :math:`j`
124 |     delay (int): steps with which node :math:`i` past state is accounted
125 | 
126 |     Returns
127 |     -------
128 |     te (float): the transfer entropy from nodes i to j
129 | 
130 |     """
131 |     X_past = X[:-delay, np.newaxis]
132 |     Y_past = Y[:-delay, np.newaxis]
133 |     joint_past = np.hstack((Y_past, X_past))
134 |     Y_future = Y[delay:, np.newaxis]
135 | 
136 |     te = conditional_entropy(Y_future, Y_past)
137 |     te -= conditional_entropy(Y_future, joint_past)
138 | 
139 |     return te
140 | 


--------------------------------------------------------------------------------
/netrd/dynamics/lotka_volterra.py:
--------------------------------------------------------------------------------
  1 | """
  2 | lotka_volterra.py
  3 | ----------------
  4 | 
  5 | Implementation to simulate a Lotka-Volterra model on a network.
  6 | 
  7 | author: Chia-Hung Yang
  8 | Submitted as part of the 2019 NetSI Collabathon.
  9 | """
 10 | 
 11 | from netrd.dynamics import BaseDynamics
 12 | import numpy as np
 13 | import networkx as nx
 14 | from numpy.random import uniform, normal
 15 | from scipy.integrate import ode
 16 | from ..utilities import unweighted
 17 | 
 18 | 
 19 | class LotkaVolterra(BaseDynamics):
 20 |     """Lotka-Volterra dynamics of species abundance."""
 21 | 
 22 |     @unweighted
 23 |     def simulate(
 24 |         self,
 25 |         G,
 26 |         L,
 27 |         init=None,
 28 |         gr=None,
 29 |         cap=None,
 30 |         inter=None,
 31 |         dt=1e-2,
 32 |         stochastic=True,
 33 |         pertb=None,
 34 |     ):
 35 |         r"""Simulate time series on a network from the Lotka-Volterra model.
 36 | 
 37 |         The Lotka-Volterra model was designed to describe dynamics of
 38 |         species abundances in an ecosystem. Species :math:`i`'s abundance
 39 |         change per time is :math:`\frac{d X_i}{d t} = r_i X_i \left(1 -
 40 |         \frac{X_i}{K_i} + \sum_{j \neq i} W_{ij} \frac{X_j}{K_i}\right)`
 41 |         where :math:`r_i` and :math:`K_i` are the growth rate and the
 42 |         carrying capacity of species :math:`i` respectively, and
 43 |         :math:`W_{ij}` are the relative interaction strength of species
 44 |         :math:`j` on :math:`i`.
 45 | 
 46 |         The results dictionary also stores the ground truth network as
 47 |         `'ground_truth'` and the intermediate time steps as `'time_steps'`.
 48 | 
 49 |         Parameters
 50 |         ----------
 51 | 
 52 |         G (nx.Graph)
 53 |             Underlying ground-truth network of simulated time series which
 54 |             has :math:`N` nodes.
 55 | 
 56 |         L (int)
 57 |             Length of time series.
 58 | 
 59 |         init (np.ndarray)
 60 |             Length-:math:`N` 1D array of nodes' initial condition. If not
 61 |             specified an initial condition is unifromly generated from 0 to
 62 |             the nodes' carrying capacity.
 63 | 
 64 |         gr (np.ndarray)
 65 |             Length-:math:`N` 1D array of nodes' growth rate. If not
 66 |             specified, default to 1 for all nodes.
 67 | 
 68 |         cap (np.ndarray)
 69 |             Length-:math:`N` 1D array of nodes' carrying capacity. If not
 70 |             specified, default to 1 for all nodes.
 71 | 
 72 |         inter (np.ndarray)
 73 |             :math:`N \times N` array of interaction weights between
 74 |             nodes. If not specified, default to a zero-diagonal matrix
 75 |             whose [i, j] entry is :math:`\frac{sign(j - i)}{N - 1}`.
 76 | 
 77 |         dt (float or np.ndarray)
 78 |             Sizes of time steps when simulating the continuous-time
 79 |             dynamics.
 80 | 
 81 |         stochastic (bool)
 82 |             Whether to simulate the stochastic or deterministic dynamics.
 83 | 
 84 |         pertb (np.ndarray)
 85 |             Length-:math:`N` 1D array of perturbation magnitude of nodes'
 86 |             growth. If not specified, default to 0.01 for all nodes.
 87 | 
 88 |         Returns
 89 |         -------
 90 | 
 91 |         TS (np.ndarray)
 92 |             :math:`N \times L` array of `L` observations on :math:`N` nodes.
 93 | 
 94 |         Notes
 95 |         -----
 96 | 
 97 |         The deterministic dynamics is simulated through the forth-order
 98 |         Runge-Kutta method, and the stochastic one is simulated through
 99 |         multiplicative noise with the Euler-Maruyama method.
100 | 
101 |         The ground-truth network, time steps and the time series can be
102 |         found in results['ground-truth'], reuslts['time_steps'] and
103 |         results['time_series'] respectively.
104 | 
105 |         """
106 | 
107 |         N = G.number_of_nodes()
108 |         adjmat = nx.to_numpy_array(G)
109 | 
110 |         # Initialize the model's parameters if not specified
111 |         if gr is None:
112 |             gr = np.ones(N, dtype=float)
113 |         if cap is None:
114 |             cap = np.ones(N, dtype=float)
115 |         if inter is None:
116 |             wei = 1 / (N - 1)
117 |             full = np.full((N, N), wei, dtype=float)
118 |             inter = np.zeros((N, N), dtype=float)
119 |             inter += np.triu(full) - np.tril(full)
120 | 
121 |         if stochastic and pertb is None:
122 |             pertb = 1e-2 * np.ones(N, dtype=float)
123 | 
124 |         # Randomly initialize an initial condition if not speciefied
125 |         TS = np.zeros((N, L), dtype=float)
126 |         if init is None:
127 |             init = uniform(low=0, high=cap)
128 |         TS[:, 0] = init
129 | 
130 |         # Define the function of dynamics
131 |         mat = np.where(adjmat == 1, inter, 0.0) + np.diag(-np.ones(N))
132 |         mat /= cap[:, np.newaxis]
133 | 
134 |         def dyn(t, state):
135 |             return state * (gr + np.dot(mat, state))
136 | 
137 |         # Simulate the time series
138 |         if isinstance(dt, float):
139 |             dt = dt * np.ones(L - 1)
140 | 
141 |         # Deterministic dynamics
142 |         if not stochastic:
143 |             integrator = ode(dyn).set_integrator('dopri5')
144 |             integrator.set_initial_value(init, 0.0)
145 |             for t in range(L - 1):
146 |                 if integrator.successful():
147 |                     TS[:, t + 1] = integrator.integrate(integrator.t + dt[t])
148 |                 else:
149 |                     message = 'Integration not succesful. '
150 |                     message += 'Change sizes of time steps or the parameters.'
151 |                     raise RuntimeError(message)
152 | 
153 |         # Stochastic dynamics
154 |         else:
155 |             for t in range(L - 1):
156 |                 state = TS[:, t].copy()
157 |                 _next = state + dyn(t, state) * dt[t]
158 |                 _next += state * normal(scale=pertb) * np.sqrt(dt[t])
159 |                 TS[:, t + 1] = _next
160 | 
161 |         # Store the results
162 |         self.results['ground_truth'] = G
163 |         self.results['time_steps'] = np.cumsum(dt)
164 |         self.results['TS'] = TS
165 | 
166 |         return TS
167 | 


--------------------------------------------------------------------------------
/netrd/distance/netsimile.py:
--------------------------------------------------------------------------------
  1 | """
  2 | netsimile.py
  3 | --------------
  4 | 
  5 | Graph distance based on:
  6 | Berlingerio, M., Koutra, D., Eliassi-Rad, T. & Faloutsos, C. NetSimile: A Scalable Approach to Size-Independent Network Similarity. arXiv (2012)
  7 | 
  8 | author: Alex Gates
  9 | email: ajgates42@gmail.com (optional)
 10 | Submitted as part of the 2019 NetSI Collabathon.
 11 | 
 12 | """
 13 | import networkx as nx
 14 | import numpy as np
 15 | from scipy.spatial.distance import canberra
 16 | from scipy.stats import skew, kurtosis
 17 | 
 18 | from .base import BaseDistance
 19 | from ..utilities import undirected, unweighted
 20 | 
 21 | 
 22 | class NetSimile(BaseDistance):
 23 |     """Compares node signature distributions."""
 24 | 
 25 |     @undirected
 26 |     @unweighted
 27 |     def dist(self, G1, G2):
 28 |         """A scalable approach to network similarity.
 29 | 
 30 |         A network similarity measure based on node signature distributions.
 31 | 
 32 |         The results dictionary includes the underlying feature matrices in
 33 |         `'feature_matrices'` and the underlying signature vectors in
 34 |         `'signature_vectors'`.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 | 
 39 |         G1, G2 (nx.Graph)
 40 |             two undirected networkx graphs to be compared.
 41 | 
 42 |         Returns
 43 |         -------
 44 | 
 45 |         dist (float)
 46 |             the distance between `G1` and `G2`.
 47 | 
 48 |         References
 49 |         ----------
 50 | 
 51 |         .. [1] Michele Berlingerio, Danai Koutra, Tina Eliassi-Rad,
 52 |                Christos Faloutsos: NetSimile: A Scalable Approach to
 53 |                Size-Independent Network Similarity. CoRR abs/1209.2684
 54 |                (2012)
 55 | 
 56 |         """
 57 | 
 58 |         # find the graph node feature matrices
 59 |         G1_node_features = feature_extraction(G1)
 60 |         G2_node_features = feature_extraction(G2)
 61 | 
 62 |         # get the graph signature vectors
 63 |         G1_signature = graph_signature(G1_node_features)
 64 |         G2_signature = graph_signature(G2_node_features)
 65 | 
 66 |         # the final distance is the absolute canberra distance
 67 |         dist = abs(canberra(G1_signature, G2_signature))
 68 | 
 69 |         self.results['feature_matrices'] = G1_node_features, G2_node_features
 70 |         self.results['signature_vectors'] = G1_signature, G2_signature
 71 |         self.results['dist'] = dist
 72 | 
 73 |         return dist
 74 | 
 75 | 
 76 | def feature_extraction(G):
 77 |     """Node feature extraction.
 78 | 
 79 |     Parameters
 80 |     ----------
 81 | 
 82 |     G (nx.Graph): a networkx graph.
 83 | 
 84 |     Returns
 85 |     -------
 86 | 
 87 |     node_features (float): the Nx7 matrix of node features."""
 88 | 
 89 |     # necessary data structures
 90 |     node_features = np.zeros(shape=(G.number_of_nodes(), 7))
 91 |     node_list = sorted(G.nodes())
 92 |     node_degree_dict = dict(G.degree())
 93 |     node_clustering_dict = dict(nx.clustering(G))
 94 |     egonets = {n: nx.ego_graph(G, n) for n in node_list}
 95 | 
 96 |     # node degrees
 97 |     degs = [node_degree_dict[n] for n in node_list]
 98 | 
 99 |     # clustering coefficient
100 |     clusts = [node_clustering_dict[n] for n in node_list]
101 | 
102 |     # average degree of neighborhood
103 |     neighbor_degs = [
104 |         np.mean([node_degree_dict[m] for m in egonets[n].nodes if m != n])
105 |         if node_degree_dict[n] > 0
106 |         else 0
107 |         for n in node_list
108 |     ]
109 | 
110 |     # average clustering coefficient of neighborhood
111 |     neighbor_clusts = [
112 |         np.mean([node_clustering_dict[m] for m in egonets[n].nodes if m != n])
113 |         if node_degree_dict[n] > 0
114 |         else 0
115 |         for n in node_list
116 |     ]
117 | 
118 |     # number of edges in the neighborhood
119 |     neighbor_edges = [
120 |         egonets[n].number_of_edges() if node_degree_dict[n] > 0 else 0
121 |         for n in node_list
122 |     ]
123 | 
124 |     # number of outgoing edges from the neighborhood
125 |     # the sum of neighborhood degrees = 2*(internal edges) + external edges
126 |     # node_features[:,5] = node_features[:,0] * node_features[:,2] - 2*node_features[:,4]
127 |     neighbor_outgoing_edges = [
128 |         len(
129 |             [
130 |                 edge
131 |                 for edge in set.union(*[set(G.edges(j)) for j in egonets[i].nodes])
132 |                 if not egonets[i].has_edge(*edge)
133 |             ]
134 |         )
135 |         for i in node_list
136 |     ]
137 | 
138 |     # number of neighbors of neighbors (not in neighborhood)
139 |     neighbors_of_neighbors = [
140 |         len(
141 |             set([p for m in G.neighbors(n) for p in G.neighbors(m)])
142 |             - set(G.neighbors(n))
143 |             - set([n])
144 |         )
145 |         if node_degree_dict[n] > 0
146 |         else 0
147 |         for n in node_list
148 |     ]
149 | 
150 |     # assembling the features
151 |     node_features[:, 0] = degs
152 |     node_features[:, 1] = clusts
153 |     node_features[:, 2] = neighbor_degs
154 |     node_features[:, 3] = neighbor_clusts
155 |     node_features[:, 4] = neighbor_edges
156 |     node_features[:, 5] = neighbor_outgoing_edges
157 |     node_features[:, 6] = neighbors_of_neighbors
158 | 
159 |     return np.nan_to_num(node_features)
160 | 
161 | 
162 | def graph_signature(node_features):
163 |     signature_vec = np.zeros(7 * 5)
164 | 
165 |     # for each of the 7 features
166 |     for k in range(7):
167 |         # find the mean
168 |         signature_vec[k * 5] = node_features[:, k].mean()
169 |         # find the median
170 |         signature_vec[k * 5 + 1] = np.median(node_features[:, k])
171 |         # find the std
172 |         signature_vec[k * 5 + 2] = node_features[:, k].std()
173 |         # find the skew
174 |         signature_vec[k * 5 + 3] = skew(node_features[:, k])
175 |         # find the kurtosis
176 |         signature_vec[k * 5 + 4] = kurtosis(node_features[:, k])
177 | 
178 |     return signature_vec
179 | 
180 | 
181 | """
182 | # sample usage
183 | >>>from netrd.distance import NetSimile
184 | >>>G1 = nx.karate_club_graph()
185 | >>>G2 = nx.krackhardt_kite_graph()
186 | 
187 | >>>test = NetSimile()
188 | >>>print(test.dist(G1, G2))
189 | 20.180783067167326
190 | """
191 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/partial_correlation_matrix.py:
--------------------------------------------------------------------------------
  1 | """
  2 | partial_correlation_matrix.py
  3 | ---------------------
  4 | 
  5 | Reconstruction of graphs using the partial correlation matrix.
  6 | 
  7 | author: Stefan McCabe
  8 | email: stefanmccabe at gmail dot com
  9 | Submitted as part of the 2019 NetSI Collabathon
 10 | 
 11 | """
 12 | from .base import BaseReconstructor
 13 | import numpy as np
 14 | from scipy import stats, linalg
 15 | from ..utilities import create_graph, threshold
 16 | 
 17 | 
 18 | class PartialCorrelationMatrix(BaseReconstructor):
 19 |     """Uses a regularized form of the precision matrix."""
 20 | 
 21 |     def fit(
 22 |         self,
 23 |         TS,
 24 |         index=None,
 25 |         drop_index=True,
 26 |         of_residuals=False,
 27 |         threshold_type="range",
 28 |         **kwargs
 29 |     ):
 30 |         """Uses a regularized form of the precision matrix.
 31 | 
 32 |         The results dictionary also stores the weight matrix as
 33 |         `'weights_matrix'` and the thresholded version of the weight matrix
 34 |         as `'thresholded_matrix'`. For details see [1]_.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 | 
 39 |         index (int, array of ints, or None)
 40 |             Take the partial correlations of each pair of elements holding
 41 |             constant an index variable or set of index variables. If None,
 42 |             take the partial correlations of the variables holding constant
 43 |             all other variables.
 44 | 
 45 |         drop_index (bool)
 46 |             If True, drop the index variables after calculating the partial
 47 |             correlations.
 48 | 
 49 |         of_residuals (bool)
 50 |             If True, after calculating the partial correlations (presumably
 51 |             using a dropped index variable), recalculate the partial
 52 |             correlations between each variable, holding constant all other
 53 |             variables.
 54 | 
 55 |         threshold_type (str)
 56 |             Which thresholding function to use on the matrix of
 57 |             weights. See `netrd.utilities.threshold.py` for
 58 |             documentation. Pass additional arguments to the thresholder
 59 |             using ``**kwargs``.
 60 | 
 61 |         Returns
 62 |         -------
 63 | 
 64 |         G (nx.Graph)
 65 |             a reconstructed graph.
 66 | 
 67 |         References
 68 |         ----------
 69 | 
 70 |         .. [1] https://bwlewis.github.io/correlation-regularization/
 71 | 
 72 |         """
 73 | 
 74 |         p_cor = partial_corr(TS, index=index)
 75 | 
 76 |         if drop_index and index is not None:
 77 |             p_cor = np.delete(p_cor, index, axis=0)
 78 |             p_cor = np.delete(p_cor, index, axis=1)
 79 | 
 80 |         if of_residuals:
 81 |             p_cor = partial_corr(p_cor, index=None)
 82 | 
 83 |         self.results["weights_matrix"] = p_cor
 84 | 
 85 |         # threshold the network
 86 |         W_thresh = threshold(p_cor, threshold_type, **kwargs)
 87 | 
 88 |         # construct the network
 89 |         self.results["graph"] = create_graph(W_thresh)
 90 |         self.results["thresholded_matrix"] = W_thresh
 91 | 
 92 |         G = self.results["graph"]
 93 | 
 94 |         return G
 95 | 
 96 | 
 97 | # This partial correlation function is adapted from Fabian Pedregosa-Izquierdo's
 98 | # implementation of partial correlation in Python, found at [this gist](
 99 | # https://gist.github.com/fabianp/9396204419c7b638d38f)
100 | """
101 | Partial Correlation in Python (clone of Matlab's partialcorr)
102 | 
103 | This uses the linear regression approach to compute the partial
104 | correlation (might be slow for a huge number of variables). The
105 | algorithm is detailed here:
106 | 
107 |     http://en.wikipedia.org/wiki/Partial_correlation#Using_linear_regression
108 | 
109 | Taking X and Y two variables of interest and Z the matrix with all the variable minus {X, Y},
110 | the algorithm can be summarized as
111 | 
112 |     1) perform a normal linear least-squares regression with X as the target and Z as the predictor
113 |     2) calculate the residuals in Step #1
114 |     3) perform a normal linear least-squares regression with Y as the target and Z as the predictor
115 |     4) calculate the residuals in Step #3
116 |     5) calculate the correlation coefficient between the residuals from Steps #2 and #4;
117 | 
118 |     The result is the partial correlation between X and Y while controlling for the effect of Z
119 | 
120 | 
121 | Date: Nov 2014
122 | Author: Fabian Pedregosa-Izquierdo, f@bianp.net
123 | Testing: Valentina Borghesani, valentinaborghesani@gmail.com
124 | """
125 | 
126 | 
127 | def partial_corr(C, index=None):
128 |     """Returns the sample linear partial correlation coefficients between pairs of
129 |     variables in C, controlling for the remaining variables in C.
130 | 
131 | 
132 |     Parameters
133 |     --------------
134 |     C : array-like, shape (p, n)
135 |         Array with the different variables. Each row of C is taken as a variable
136 | 
137 | 
138 |     Returns -------
139 |     P : array-like, shape (p, p)
140 |         P[i, j] contains the partial correlation of C[:, i] and C[:, j]
141 |         controlling for the remaining variables in C.
142 | 
143 |     """
144 | 
145 |     C = np.asarray(C).T
146 |     p = C.shape[1]
147 |     P_corr = np.zeros((p, p), dtype=np.float64)
148 | 
149 |     for i in range(p):
150 |         P_corr[i, i] = 1
151 |         for j in range(i + 1, p):
152 |             if index is None:
153 |                 idx = np.ones(p, dtype=bool)
154 |                 idx[i] = False
155 |                 idx[j] = False
156 |             elif type(index) is int or (
157 |                 isinstance(index, np.ndarray) and issubclass(index.dtype.type, np.int_)
158 |             ):
159 |                 idx = np.zeros(p, dtype=bool)
160 |                 idx[index] = True
161 |             else:
162 |                 raise ValueError(
163 |                     "Index must be an integer, an array of " "integers, or None."
164 |                 )
165 | 
166 |             beta_i = linalg.lstsq(C[:, idx], C[:, j])[0]
167 |             beta_j = linalg.lstsq(C[:, idx], C[:, i])[0]
168 | 
169 |             res_j = C[:, j] - C[:, idx].dot(beta_i)
170 |             res_i = C[:, i] - C[:, idx].dot(beta_j)
171 | 
172 |             corr = stats.pearsonr(res_i, res_j)[0]
173 |             P_corr[i, j] = corr
174 |             P_corr[j, i] = corr
175 | 
176 |     return P_corr
177 | 


--------------------------------------------------------------------------------
/netrd/distance/graph_diffusion.py:
--------------------------------------------------------------------------------
  1 | """
  2 | graph_diffusion.py
  3 | --------------------------
  4 | 
  5 | Graph diffusion distance, from
  6 | 
  7 | Hammond, D. K., Gur, Y., & Johnson, C. R. (2013, December). Graph diffusion
  8 | distance: A difference measure for weighted graphs based on the graph Laplacian
  9 | exponential kernel. In Global Conference on Signal and Information Processing,
 10 | 2013 IEEE (pp 419-422). IEEE. https://doi.org/10.1109/GlobalSIP.2013.6736904
 11 | 
 12 | This implementation is adapted from the authors' MATLAB code, available at
 13 | https://rb.gy/txbfrh, and available under an MIT license with the authors'
 14 | permission.
 15 | 
 16 | author: Brennan Klein
 17 | email: brennanjamesklein at gmail dot com
 18 | Submitted as part of the 2019 NetSI Collabathon.
 19 | 
 20 | """
 21 | 
 22 | import numpy as np
 23 | import networkx as nx
 24 | from scipy.sparse.csgraph import laplacian
 25 | from .base import BaseDistance
 26 | from ..utilities import undirected
 27 | 
 28 | 
 29 | class GraphDiffusion(BaseDistance):
 30 |     """Find the maximally dissimilar diffusion kernels between two graphs."""
 31 | 
 32 |     @undirected
 33 |     def dist(self, G1, G2, thresh=1e-08, resolution=1000):
 34 |         r"""The graph diffusion distance between two graphs, :math:`G` and :math:`G'`,
 35 |         is a distance measure based on the notion of flow within each graph. As
 36 |         such, this measure uses the unnormalized Laplacian matrices of both
 37 |         graphs, :math:`\mathcal{L}` and :math:`\mathcal{L}'`, and uses them to
 38 |         construct time-varying Laplacian exponential diffusion kernels,
 39 |         :math:`e^{-t\mathcal{L}}` and :math:`e^{-t\mathcal{L}'}`, by
 40 |         effectively simulating a diffusion process for :math:`t` timesteps,
 41 |         creating a column vector of node-level activity at each timestep. The
 42 |         distance :math:`d_\texttt{GDD}(G, G')` is defined as the Frobenius norm
 43 |         between the two diffusion kernels at the timestep :math:`t^{*}` where
 44 |         the two kernels are maximally different. That is, we compute the
 45 |         Frobenius norms and their differences for each timestep, and return the
 46 |         maximum difference.
 47 | 
 48 |         .. math::
 49 |             D_{GDD}(G,G') = \sqrt{||e^{-t^{*}\mathcal{L}}-e^{-t^{*}\mathcal{L}'}||}
 50 | 
 51 |         The results dictionary also stores a 2-tuple of the underlying
 52 |         adjacency matrices in `adjacency_matrices`, the Laplacian matrices in
 53 |         `laplacian_matrices`, and the output of the optimization process
 54 |         (`peak_diffusion_time` and `peak_deviation`).
 55 | 
 56 |         Adapted from the authors' MATLAB code, available at: https://rb.gy/txbfrh
 57 | 
 58 | 
 59 |         Parameters
 60 |         ----------
 61 | 
 62 |         G1, G2 (nx.Graph)
 63 |             two networkx graphs to be compared.
 64 | 
 65 |         thresh (float)
 66 |             minimum value above which the eigenvalues will be considered.
 67 | 
 68 |         resolution (int)
 69 |             number of :math:`t` values to span through.
 70 | 
 71 |         Returns
 72 |         -------
 73 |         dist (float)
 74 |             the distance between `G1` and `G2`.
 75 | 
 76 |         References
 77 |         ----------
 78 | 
 79 |         .. [1] Hammond, D. K., Gur, Y., & Johnson, C. R. (2013, December).
 80 |                Graph diffusion distance: A difference measure for weighted graphs based on the
 81 |                graph Laplacian exponential kernel. In Global Conference on Signal and
 82 |                Information Processing, 2013 IEEE (pp 419-422). IEEE.
 83 |                https://doi.org/10.1109/GlobalSIP.2013.6736904
 84 | 
 85 |         """
 86 | 
 87 |         A1 = nx.to_numpy_array(G1)
 88 |         A2 = nx.to_numpy_array(G2)
 89 | 
 90 |         L1 = laplacian(A1)
 91 |         L2 = laplacian(A2)
 92 | 
 93 |         def sort_eigs(eigs):
 94 |             vals, vecs = eigs
 95 |             idx = np.argsort(abs(vals))
 96 |             return vals[idx], vecs[:, idx]
 97 | 
 98 |         vals1, vecs1 = sort_eigs(np.linalg.eig(L1))
 99 |         vals2, vecs2 = sort_eigs(np.linalg.eig(L2))
100 | 
101 |         eigs = np.hstack((np.diag(vals1), np.diag(vals2)))
102 |         eigs = eigs[np.where(eigs > thresh)]
103 |         eigs = np.sort(eigs)
104 | 
105 |         if len(eigs) == 0:
106 |             dist = 0
107 |             self.results["dist"] = dist
108 |             return dist
109 | 
110 |         t_upperbound = np.real(1.0 / eigs[0])
111 |         ts = np.linspace(0, t_upperbound, resolution)
112 | 
113 |         # Find the Frobenius norms between all the diffusion kernels at
114 |         # different times. Return the value and where this vector is minimized.
115 |         E = -exponential_diffusion_diff(vecs1, vals1, vecs2, vals2, ts)
116 |         f_val, t_star = (np.nanmin(E), np.argmin(E))
117 | 
118 |         dist = np.sqrt(-f_val)
119 | 
120 |         self.results["adjacency_matrices"] = A1, A2
121 |         self.results["laplacian_matrices"] = L1, L2
122 |         self.results["peak_diffusion_time"] = t_star
123 |         self.results["peak_deviation"] = f_val
124 | 
125 |         self.results["dist"] = dist
126 | 
127 |         return dist
128 | 
129 | 
130 | def exponential_diffusion_diff(vecs1, vals1, vecs2, vals2, ts):
131 |     """
132 |     Computes Frobenius norm of difference of Laplacian exponential diffusion
133 |     kernels, at specified timepoints.
134 | 
135 |     Parameters
136 |     ----------
137 | 
138 |     vecs1, vecs2 (np.array)
139 |         eigenvectors of the Laplacians of `G1` and `G2`
140 | 
141 |     vals1, vals2 (np.array)
142 |         eigenvalues of the Laplacians of `G1` and `G2`
143 | 
144 |     ts (np.array)
145 |         times at which to compute the difference in Frobenius norms
146 | 
147 |     Returns
148 |     -------
149 | 
150 |     diffs (np.array)
151 |         same shape as :math:`t`, contains differences of Frobenius norms
152 | 
153 |     """
154 | 
155 |     diffs = np.zeros(len(ts))
156 | 
157 |     for kt, t in enumerate(ts):
158 |         exp_diag_1 = np.diag(np.exp(-t * np.diag(vals1)))
159 |         exp_diag_2 = np.diag(np.exp(-t * np.diag(vals2)))
160 | 
161 |         # multiply the eigenvectors element-wise by the appropriate diffusion value
162 |         # before left-multiplying the eigenvectors again.
163 |         norm1 = vecs1.dot(np.multiply(exp_diag_1, vecs1).T)
164 |         norm2 = vecs2.dot(np.multiply(exp_diag_2, vecs2).T)
165 |         diff = norm1 - norm2
166 | 
167 |         diffs[kt] = (diff**2).sum()
168 | 
169 |     return diffs
170 | 


--------------------------------------------------------------------------------
/netrd/distance/quantum_jsd.py:
--------------------------------------------------------------------------------
  1 | """
  2 | quantum_jsd.py
  3 | --------------------------
  4 | 
  5 | Graph distance based on the quantum $q$-Jenson-Shannon divergence.
  6 | 
  7 | De Domenico, Manlio, and Jacob Biamonte. 2016. “Spectral Entropies as
  8 | Information-Theoretic Tools for Complex Network Comparison.” Physical Review X
  9 | 6 (4). https://doi.org/10.1103/PhysRevX.6.041062.
 10 | 
 11 | 
 12 | author: Stefan McCabe & Brennan Klein
 13 | email:
 14 | Submitted as part of the 2019 NetSI Collabathon.
 15 | 
 16 | """
 17 | 
 18 | import warnings
 19 | import networkx as nx
 20 | import numpy as np
 21 | from scipy.linalg import expm
 22 | from .base import BaseDistance
 23 | from ..utilities import undirected, unweighted
 24 | 
 25 | 
 26 | class QuantumJSD(BaseDistance):
 27 |     """Compares the spectral entropies of the density matrices."""
 28 | 
 29 |     @undirected
 30 |     @unweighted
 31 |     def dist(self, G1, G2, beta=0.1, q=None):
 32 |         r"""Square root of the quantum :math:`q`-Jensen-Shannon divergence between two
 33 |         graphs.
 34 | 
 35 |         The generalized Jensen-Shannon divergence compares two graphs b √(H0 - 0.5 * (H1 + H2))y the
 36 |         spectral entropies of their quantum-statistical-mechanical density
 37 |         matrices. It can be written as
 38 | 
 39 |         .. math::
 40 |             \mathcal{J}_q(\mathbf{\rho} || \mathbf{\sigma}) =
 41 |             S_q\left( \frac{\mathbf{\rho} + \mathbf{\sigma}}{2} \right) -
 42 |             \frac{1}{2} [S_q(\mathbf{\rho}) + S_q(\mathbf{\sigma})],
 43 | 
 44 | 
 45 |         where :math:`\mathbf{\rho}` and :math:`\mathbf{\sigma}` are density
 46 |         matrices and :math:`q` is the order parameter.
 47 | 
 48 |         The density matrix
 49 | 
 50 |         .. math::
 51 |             \mathbf{\rho} = \frac{e^{-\beta\mathbf{L}}}{Z},
 52 | 
 53 | 
 54 |         where
 55 | 
 56 |         .. math::
 57 |             Z = \sum_{i=1}^{N}e^{-\beta\lambda_i(\mathbf{L})}
 58 | 
 59 | 
 60 |         and :math:`\lambda_i(\mathbf{L})` is the :math:`i`th eigenvalue of the Laplacian
 61 |         matrix :math:`L`, represents an imaginary diffusion process over the network
 62 |         with time parameter :math:`\beta > 0`.
 63 | 
 64 |         For these density matrices and the mixture matrix, we calculate the
 65 |         Rényi entropy of order :math:`q`
 66 | 
 67 |         .. math::
 68 |             S_q = \frac{1}{1-q} \log_2 \sum_{i=1}^{N}\lambda_i(\mathbf{\rho})^q,
 69 | 
 70 | 
 71 |         or, if :math:`q=1`, the Von Neumann entropy
 72 | 
 73 |         .. math::
 74 |             S_1 = - \sum_{i=1}^{N}\lambda_i(\mathbf{\rho})\log_2\lambda_i(\mathbf{\rho}).
 75 | 
 76 | 
 77 |         Note that this implementation is not exact because the matrix
 78 |         exponentiation is performed using the Padé approximation and
 79 |         because of imprecision in the calculation of the eigenvalues of the
 80 |         density matrix.
 81 | 
 82 |         Parameters
 83 |         ----------
 84 | 
 85 |         G1, G2 (nx.Graph)
 86 |             two networkx graphs to be compared
 87 | 
 88 |         beta (float)
 89 |             time parameter for diffusion propagator
 90 | 
 91 |         q (float)
 92 |             order parameter for Rényi entropy. If None or 1, use the Von
 93 |             Neumann entropy (i.e., Shannon entropy) instead.
 94 | 
 95 |         Returns
 96 |         -------
 97 | 
 98 |         dist (float)
 99 |             the distance between `G1` and `G2`.
100 | 
101 |         References
102 |         ----------
103 | 
104 |         .. [1] De Domenico, Manlio, and Jacob Biamonte. 2016. "Spectral
105 |                Entropies as Information-Theoretic Tools for Complex Network
106 |                Comparison." Physical Review X 6
107 |                (4). https://doi.org/10.1103/PhysRevX.6.041062.
108 | 
109 |         """
110 |         if beta <= 0:
111 |             raise ValueError("beta must be positive.")
112 | 
113 |         if q and q >= 2:
114 |             warnings.warn("JSD is only a metric for 0 ≤ q < 2.", RuntimeWarning)
115 | 
116 |         def density_matrix(A, beta):
117 |             """
118 |             Create the density matrix encoding probabilities for entropies.
119 |             This is done using a fictive diffusion process with time parameter
120 |             :math:`beta`.
121 |             """
122 |             L = np.diag(np.sum(A, axis=1)) - A
123 |             rho = expm(-1 * beta * L)
124 |             rho = rho / np.trace(rho)
125 | 
126 |             return rho
127 | 
128 |         def renyi_entropy(X, q=None):
129 |             """
130 |             Calculate the Rényi entropy with order :math:`q`, or the Von Neumann
131 |             entropy if :math:`q` is `None` or 1.
132 |             """
133 |             # Note that where there are many zero eigenvalues (i.e., large
134 |             # values of beta) in the density matrix, floating-point precision
135 |             # issues mean that there will be negative eigenvalues and the
136 |             # eigenvalues will not sum to precisely one. To avoid encountering
137 |             # `nan`s in `np.log2`, we remove all eigenvalues that are close
138 |             # to zero within 1e-6 tolerance. As for the eigenvalues not summing
139 |             # to exactly one, this is a small source of error in the
140 |             # calculation.
141 |             eigs = np.linalg.eigvalsh(X)
142 |             zero_eigenvalues = np.isclose(np.abs(eigs), 0, atol=1e-6)
143 |             eigs = eigs[np.logical_not(zero_eigenvalues)]
144 | 
145 |             if q is None or q == 1:
146 |                 # plain Von Neumann entropy
147 |                 H = -1 * np.sum(eigs * np.log2(eigs))
148 |             else:
149 |                 prefactor = 1 / (1 - q)
150 |                 H = prefactor * np.log2((eigs**q).sum())
151 |             return H
152 | 
153 |         A1 = nx.to_numpy_array(G1)
154 |         A2 = nx.to_numpy_array(G2)
155 | 
156 |         rho1 = density_matrix(A1, beta)
157 |         rho2 = density_matrix(A2, beta)
158 |         mix = (rho1 + rho2) / 2
159 | 
160 |         H0 = renyi_entropy(mix, q)
161 |         H1 = renyi_entropy(rho1, q)
162 |         H2 = renyi_entropy(rho2, q)
163 | 
164 |         dist = np.sqrt(H0 - 0.5 * (H1 + H2))
165 | 
166 |         self.results['density_matrix_1'] = rho1
167 |         self.results['density_matrix_2'] = rho2
168 |         self.results['mixture_matrix'] = mix
169 |         self.results['entropy_1'] = H1
170 |         self.results['entropy_2'] = H2
171 |         self.results['entropy_mixture'] = H0
172 |         self.results['dist'] = dist
173 |         return dist
174 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/partial_correlation_influence.py:
--------------------------------------------------------------------------------
  1 | """
  2 | partial_correlation_influence.py
  3 | --------------------------------
  4 | 
  5 | Reconstruction of graphs using the partial correlation influence, as defined in:
  6 | 
  7 | Kenett, D. Y. et al. Dominating clasp of the financial sector revealed by
  8 | partial correlation analysis of the stock market. PLoS ONE 5, e15032 (2010).
  9 | 
 10 | The index variable option as in:
 11 | 
 12 | Kenett, D. Y., Huang, X., Vodenska, I., Havlin, S. & Stanley, H. E. Partial correlation
 13 | analysis: applications for financial markets. Quantitative Finance 15, 569–578 (2015).
 14 | 
 15 | 
 16 | author: Carolina Mattsson and Chia-Hung Yang
 17 | email: mattsson dot c at northeastern dot edu
 18 | Submitted as part of the 2019 NetSI Collabathon
 19 | """
 20 | from .base import BaseReconstructor
 21 | import numpy as np
 22 | from scipy import linalg
 23 | from ..utilities import create_graph, threshold
 24 | 
 25 | 
 26 | class PartialCorrelationInfluence(BaseReconstructor):
 27 |     """Uses average effect from a sensor to all others."""
 28 | 
 29 |     def fit(self, TS, index=None, threshold_type='range', **kwargs):
 30 |         r"""Uses the average effect of a series :math:`Z` on the correlation between
 31 |         a series :math:`X` and all other series.
 32 | 
 33 |         The partial correlation influence:
 34 | 
 35 |         .. math::
 36 | 
 37 |             d(X:Z) = <d(X,Y:Z)>_Y \neq X,
 38 | 
 39 |         where :math:`d(X,Y:Z) = \rho(X,Y) - \rho(X,Y:Z)`
 40 | 
 41 | 
 42 |         If an index is given, both terms become partial correlations:
 43 | 
 44 |         .. math::
 45 | 
 46 |             d(X,Y:Z) ≡ ρ(X,Y:M) − ρ(X,Y:M,Z)
 47 | 
 48 | 
 49 |         The results dictionary also stores the matrix of partial
 50 |         correlations as `'weights_matrix'` and the thresholded version of
 51 |         the partial correlation matrix as `'thresholded_matrix'`.
 52 | 
 53 |         Parameters
 54 |         ----------
 55 |         TS (np.ndarray)
 56 |             Array consisting of :math:`L` observations from :math:`N` sensors.
 57 | 
 58 |         index (int, array of ints, or None)
 59 |             An index variable or set of index variables, which are assumed to
 60 |             be confounders of all other variables. They are held constant when
 61 |             calculating the partial correlations. Default to None.
 62 | 
 63 |         threshold_type (str):
 64 |             Which thresholding function to use on the matrix of
 65 |             weights. See `netrd.utilities.threshold.py` for
 66 |             documentation. Pass additional arguments to the thresholder
 67 |             using ``**kwargs``.
 68 | 
 69 |         Returns
 70 |         -------
 71 | 
 72 |         G (nx.Graph)
 73 |             a reconstructed graph.
 74 | 
 75 |         References
 76 |         -----------
 77 | 
 78 |         .. [1] Kenett, D. Y. et al. Dominating clasp of the financial
 79 |                sector revealed by partial correlation analysis of the stock
 80 |                market. PLoS ONE 5, e15032 (2010).
 81 | 
 82 |         .. [2] Kenett, D. Y., Huang, X., Vodenska, I., Havlin, S. &
 83 |                Stanley, H. E. Partial correlation analysis: applications
 84 |                for financial markets. Quantitative Finance 15, 569–578
 85 |                (2015).
 86 | 
 87 |         """
 88 |         data = TS.T
 89 |         N = data.shape[1]
 90 | 
 91 |         # Create masks to separate variables of interests from the pre-included
 92 |         # index variables
 93 |         mask = np.ones(N, dtype=bool)
 94 |         if index is not None:
 95 |             mask[index] = False
 96 | 
 97 |         # Compute partial correlations with the index variables held constant
 98 |         p_corr = np.full((N, N), np.nan)
 99 |         p_corr[np.ix_(mask, mask)] = partial_corr(data[:, mask], data[:, ~mask])
100 | 
101 |         # For every non-index variable Z, compute partial correlation influence
102 |         # between other variables when Z is also held constant
103 |         p_corr_inf = np.full((N, N, N), np.nan)
104 |         for z in np.arange(N)[mask]:
105 |             m_new = mask.copy()  # New mask including variable Z
106 |             m_new[z] = False
107 | 
108 |             diff = p_corr[np.ix_(m_new, m_new)]
109 |             diff -= partial_corr(data[:, m_new], data[:, ~m_new])
110 |             p_corr_inf[np.ix_(m_new, m_new, [z])] = diff[:, :, np.newaxis]
111 | 
112 |             # Exclude the cases of Y = X
113 |             np.fill_diagonal(p_corr_inf[:, :, z], np.nan)
114 |             # Set PCI for X = Z to 0 for consistency after averaging
115 |             p_corr_inf[z, :, z] = 0
116 | 
117 |         # Obtain the average partial correlation influence
118 |         influence = np.zeros((N, N))  # Default self-influence by zero
119 |         influence[mask, mask] = np.nanmean(p_corr_inf[mask, mask], axis=1)
120 | 
121 |         influence[~mask, :] = np.inf  # Index variables influence all others
122 |         influence[:, ~mask] = 0  # but no one influences the index variables
123 | 
124 |         self.results['weights_matrix'] = influence
125 | 
126 |         # threshold the network
127 |         W_thresh = threshold(influence, threshold_type, **kwargs)
128 | 
129 |         # construct the network
130 |         self.results['graph'] = create_graph(W_thresh)
131 |         self.results['thresholded_matrix'] = W_thresh
132 | 
133 |         G = self.results['graph']
134 | 
135 |         return G
136 | 
137 | 
138 | def partial_corr(_vars, idx_vars):
139 |     """
140 |     Return the partial correlations between pairs of variables, given a set of
141 |     index variables held constant.
142 | 
143 |     Parameters
144 |     ----------
145 |     _vars (numpy.ndarray)
146 |         Variables of interests (which are columns of the array).
147 | 
148 |     idx_vars (numpy.ndarray)
149 |         Index variables to be held constant (which are columns of the array).
150 |         If the array has zero size, namely no index variable, return the
151 |         Pearson correlations between variables.
152 | 
153 |     Return
154 |     ------
155 |     p_corr (numpy.ndarray)
156 |          Square array of pairwise partial correlations between variables.
157 | 
158 |     Note
159 |     ----
160 |     Precondition: The index variables should not contain or synchronize with
161 |                   a variable of interests.
162 | 
163 |     """
164 |     if idx_vars.size == 0:
165 |         return np.corrcoef(_vars, rowvar=False)
166 |     else:
167 |         coef = linalg.lstsq(idx_vars, _vars)[0]  # Coefficients of regression
168 |         resid = _vars - idx_vars.dot(coef)  # Residuals
169 |         return np.corrcoef(resid, rowvar=False)
170 | 


--------------------------------------------------------------------------------
/tests/test_distance.py:
--------------------------------------------------------------------------------
  1 | """
  2 | test_distance.py
  3 | ----------------
  4 | 
  5 | Test distance algorithms.
  6 | 
  7 | """
  8 | 
  9 | import warnings
 10 | import numpy as np
 11 | import networkx as nx
 12 | from netrd import distance
 13 | from netrd.distance import BaseDistance
 14 | 
 15 | 
 16 | def test_same_graph():
 17 |     """The distance between two equal graphs must be zero."""
 18 |     G = nx.barbell_graph(10, 5)
 19 | 
 20 |     for label, obj in distance.__dict__.items():
 21 |         if isinstance(obj, type) and BaseDistance in obj.__bases__:
 22 |             dist = obj().dist(G, G)
 23 |             assert np.isclose(dist, 0.0), f"{label} fails same-graph test"
 24 | 
 25 | 
 26 | def test_different_graphs():
 27 |     """The distance between two different graphs must be nonzero."""
 28 |     ## NOTE: This test is not totally rigorous. For example, two different
 29 |     ## networks may have the same eigenvalues, thus a method that compares
 30 |     ## their eigenvalues would result in distance 0. However, this is very
 31 |     ## unlikely in the constructed case, so we rely on it for now.
 32 |     G1 = nx.fast_gnp_random_graph(100, 0.3)
 33 |     G2 = nx.barabasi_albert_graph(100, 5)
 34 | 
 35 |     for obj in distance.__dict__.values():
 36 |         if isinstance(obj, type) and BaseDistance in obj.__bases__:
 37 |             dist = obj().dist(G1, G2)
 38 |             assert dist > 0.0, f"{label} not nonzero"
 39 | 
 40 | 
 41 | def test_symmetry():
 42 |     """The distance between two graphs must be symmetric."""
 43 |     G1 = nx.barabasi_albert_graph(100, 4)
 44 |     G2 = nx.fast_gnp_random_graph(100, 0.3)
 45 | 
 46 |     for label, obj in distance.__dict__.items():
 47 |         if isinstance(obj, type) and BaseDistance in obj.__bases__:
 48 |             dist1 = obj().dist(G1, G2)
 49 |             dist2 = obj().dist(G2, G1)
 50 |             assert np.isclose(dist1, dist2), f"{label} not symmetric"
 51 | 
 52 | 
 53 | def test_quantum_jsd():
 54 |     """Run the above tests again using the collision entropy instead of the
 55 |     Von Neumann entropy to ensure that all the logic of the JSD implementation
 56 |     is tested.
 57 |     """
 58 | 
 59 |     with warnings.catch_warnings():
 60 |         warnings.filterwarnings("ignore", message="JSD is only a metric for 0 ≤ q < 2.")
 61 |         JSD = distance.QuantumJSD()
 62 |         G = nx.barbell_graph(10, 5)
 63 |         dist = JSD.dist(G, G, beta=0.1, q=2)
 64 |         assert np.isclose(dist, 0.0), "collision entropy fails same-graph test"
 65 | 
 66 |         G1 = nx.fast_gnp_random_graph(100, 0.3)
 67 |         G2 = nx.barabasi_albert_graph(100, 5)
 68 |         dist = JSD.dist(G1, G2, beta=0.1, q=2)
 69 |         assert dist > 0.0, "collision entropy not nonzero"
 70 | 
 71 |         G1 = nx.barabasi_albert_graph(100, 4)
 72 |         G2 = nx.fast_gnp_random_graph(100, 0.3)
 73 |         dist1 = JSD.dist(G1, G2, beta=0.1, q=2)
 74 |         dist2 = JSD.dist(G2, G1, beta=0.1, q=2)
 75 |         assert np.isclose(dist1, dist2), "collision entropy not symmetric"
 76 | 
 77 | 
 78 | def test_directed_input():
 79 |     with warnings.catch_warnings():
 80 |         warnings.filterwarnings(
 81 |             "ignore", message="Coercing directed graph to undirected."
 82 |         )
 83 |         G = nx.fast_gnp_random_graph(100, 0.3, directed=True)
 84 | 
 85 |         for label, obj in distance.__dict__.items():
 86 |             if isinstance(obj, type) and BaseDistance in obj.__bases__:
 87 |                 dist = obj().dist(G, G)
 88 |                 assert np.isclose(dist, 0.0), f"{label} not deterministic"
 89 | 
 90 |         G1 = nx.fast_gnp_random_graph(100, 0.3, directed=True)
 91 |         G2 = nx.fast_gnp_random_graph(100, 0.3, directed=True)
 92 | 
 93 |         for label, obj in distance.__dict__.items():
 94 |             if isinstance(obj, type) and BaseDistance in obj.__bases__:
 95 |                 dist1 = obj().dist(G1, G2)
 96 |                 dist2 = obj().dist(G2, G1)
 97 |                 assert np.isclose(dist1, dist2), f"{label} not symmetric"
 98 | 
 99 |         for obj in distance.__dict__.values():
100 |             if isinstance(obj, type) and BaseDistance in obj.__bases__:
101 |                 dist = obj().dist(G1, G2)
102 |                 assert dist > 0.0, f"{label} not nonzero"
103 | 
104 | 
105 | def test_weighted_input():
106 |     G1 = nx.barbell_graph(10, 5)
107 |     G2 = nx.barbell_graph(10, 5)
108 |     rand = np.random.RandomState(seed=42)
109 |     edge_weights = {e: rand.randint(0, 1000) for e in G2.edges}
110 |     nx.set_edge_attributes(G2, edge_weights, "weight")
111 |     assert nx.is_isomorphic(G1, G2)
112 | 
113 |     for label, obj in distance.__dict__.items():
114 |         with warnings.catch_warnings(record=True) as w:
115 |             warnings.simplefilter("always")
116 |             if isinstance(obj, type) and BaseDistance in obj.__bases__:
117 |                 dist = obj().dist(G1, G2)
118 |                 warning_triggered = False
119 |                 for warning in w:
120 |                     if "weighted" in str(warning.message):
121 |                         warning_triggered = True
122 |                 if not warning_triggered:
123 |                     assert not np.isclose(dist, 0.0), f"{label} = 0"
124 |                 else:
125 |                     assert np.isclose(dist, 0.0), f"{label} != 0"
126 | 
127 | 
128 | def test_isomorphic_input():
129 |     G1 = nx.fast_gnp_random_graph(150, 0.10)
130 | 
131 |     N = G1.order()
132 |     new_nodes = [(i + 5) % N for i in G1.nodes]
133 | 
134 |     # create G1 by permuting the adjacency matrix
135 |     new_adj_mat = nx.to_numpy_array(G1, nodelist=new_nodes)
136 |     G2 = nx.from_numpy_array(new_adj_mat)
137 | 
138 |     assert nx.is_isomorphic(G1, G2)
139 | 
140 |     # not all distances should be invariant under isomorphism
141 |     # document those here
142 |     EXCLUDED_DISTANCES = [
143 |         "Hamming",
144 |         "Frobenius",
145 |         "JaccardDistance",
146 |         "HammingIpsenMikhailov",
147 |         "ResistancePerturbation",
148 |         "LaplacianSpectral",
149 |         "PolynomialDissimilarity",
150 |         "DeltaCon",
151 |         "QuantumJSD",
152 |         "DistributionalNBD",
153 |         "NonBacktrackingSpectral",
154 |         "GraphDiffusion",
155 |     ]
156 | 
157 |     for label, obj in distance.__dict__.items():
158 |         print(label)
159 |         if (
160 |             isinstance(obj, type)
161 |             and BaseDistance in obj.__bases__
162 |             and label not in EXCLUDED_DISTANCES
163 |         ):
164 |             dist = obj().dist(G1, G2)
165 |             assert np.isclose(
166 |                 dist, 0.0, atol=1e-3
167 |             ), f"{label} not invariant under isomorphism"
168 | 


--------------------------------------------------------------------------------
/netrd/reconstruction/marchenko_pastur.py:
--------------------------------------------------------------------------------
  1 | """
  2 | marchenko_pastur.py
  3 | --------------
  4 | 
  5 | Graph reconstruction algorithm based on Marchenko, V. A., & Pastur, L. A. (1967).
  6 | Distribution of eigenvalues for some sets of random matrices. Matematicheskii
  7 | Sbornik, 114(4), 507-536.
  8 | 
  9 | author: Matteo Chinazzi
 10 | Submitted as part of the 2019 NetSI Collabathon.
 11 | """
 12 | 
 13 | from .base import BaseReconstructor
 14 | import numpy as np
 15 | import networkx as nx
 16 | from ..utilities import create_graph, threshold
 17 | 
 18 | 
 19 | class MarchenkoPastur(BaseReconstructor):
 20 |     """Uses Marchenko-Pastur law to remove noise."""
 21 | 
 22 |     def fit(
 23 |         self,
 24 |         TS,
 25 |         remove_largest=False,
 26 |         metric_distance=False,
 27 |         threshold_type='range',
 28 |         **kwargs
 29 |     ):
 30 |         r"""Create a correlation-based graph using Marchenko-Pastur law to remove noise.
 31 | 
 32 |         A signed graph is built by constructing a projection of the
 33 |         empirical correlation matrix generated from the time series data
 34 |         after having removed noisy components.  This method combines the
 35 |         results presented in [1]_, [2]_, and [3]_.
 36 | 
 37 |         The results dictionary also stores the weight matrix as
 38 |         `'weights_matrix'` and the thresholded version of the weight matrix
 39 |         as `'thresholded_matrix'`.
 40 | 
 41 |         Parameters
 42 |         ----------
 43 | 
 44 |         TS (np.ndarray)
 45 |             :math:`N \times L` array consisting of :math:`L` observations
 46 |             from :math:`N` sensors.
 47 | 
 48 |         remove_largest (bool), optional
 49 |             If ``False``, all the eigenvectors associated to the
 50 |             significant eigenvalues will be used to reconstruct the
 51 |             de-noised empirical correlation matrix. If ``True``, the
 52 |             eigenvector associated to the largest eigenvalue (normally
 53 |             known as the ``market`` mode, [2]) is going to be excluded from
 54 |             the recontruction step.  metric_distance (bool), optional: If
 55 |             ``False``, a signed graph is obtained.  The weights associated
 56 |             to the edges represent the de-noised correlation coefficient
 57 |             :math:`\rho_{i,j}` between time series :math:`i` and :math:`j`.
 58 |             If ``True``, the correlation is transformed by defining a
 59 |             metric distance between each pair of nodes where :math:`d_{i,j}
 60 |             = \sqrt{2(1-\rho_{i,j})}` as proposed in [3].  threshold_type
 61 |             (str): Which thresholding function to use on the matrix of
 62 |             weights. See `netrd.utilities.threshold.py` for
 63 |             documentation. Pass additional arguments to the thresholder
 64 |             using ``**kwargs``.
 65 | 
 66 |         Returns
 67 |         -------
 68 | 
 69 |         G (nx.Graph)
 70 |             A reconstructed graph with :math:`N` nodes.
 71 | 
 72 |         Examples
 73 |         --------
 74 |         .. code:: python
 75 | 
 76 |             import numpy as np
 77 |             import networkx as nx
 78 |             from matplotlib import pyplot as plt
 79 |             from netrd.reconstruction import MarchenkoPastur
 80 | 
 81 |             N = 250
 82 |             T = 300
 83 |             M = np.random.normal(size=(N,T))
 84 | 
 85 |             print('Create correlated time series')
 86 |             market_mode = 0.4*np.random.normal(size=(1,T))
 87 |             M += market_mode
 88 | 
 89 |             sector_modes = {d: 0.5*np.random.normal(size=(1,T)) for d in range(5)}
 90 |             for sector_mode, vals in sector_modes.items():
 91 |                 M[sector_mode*50:(sector_mode+1)*50,:] += vals
 92 | 
 93 |             print('Network reconstruction step')
 94 |             mp_net = MarchenkoPastur()
 95 |             G = mp_net.fit(M, only_positive=True)
 96 |             G_no_market = mp_net.fit(M, only_positive=True, remove_largest=True)
 97 | 
 98 |             print('Observed noisy correlation')
 99 |             C = np.corrcoef(M)
100 |             C[C<0] = 0 # remove negative values
101 |             np.fill_diagonal(C,0) # remove self-loops
102 |             G_noisy = nx.from_numpy_array(C) # create graph
103 | 
104 |             print('Plot observed noisy correlation graph')
105 |             fig, ax = plt.subplots()
106 |             nx.draw(G_noisy, ax=ax)
107 | 
108 |             print('Plot reconstructed correlation graph')
109 |             fig, ax = plt.subplots()
110 |             nx.draw(G, ax=ax)
111 | 
112 |             print('Plot reconstructed correlation graph without market mode')
113 |             fig, ax = plt.subplots()
114 |             nx.draw(G_no_market, ax=ax)
115 | 
116 | 
117 |         References
118 |         ----------
119 |         .. [1] Marchenko, V. A., & Pastur, L. A. (1967). Distribution of
120 |                eigenvalues for some sets of random
121 |                matrices. Matematicheskii Sbornik, 114(4), 507-536.
122 |                http://www.mathnet.ru/links/a8d2a49dec161f50c944d9a96298c35a/sm4101.pdf
123 | 
124 |         .. [2] Laloux, L., Cizeau, P., Bouchaud, J. P., & Potters,
125 |                M. (1999). Noise dressing of financial correlation
126 |                matrices. Physical review letters, 83(7), 1467.
127 |                https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.83.1467
128 | 
129 |         .. [3] Bonanno, G., Caldarelli, G., Lillo, F., Micciche, S.,
130 |                Vandewalle, N., & Mantegna, R. N. (2004). Networks of
131 |                equities in financial markets. The European Physical Journal
132 |                B, 38(2), 363-371.
133 |                https://link.springer.com/article/10.1140/epjb/e2004-00129-6
134 | 
135 |         """
136 |         N, L = TS.shape
137 |         if N > L:
138 |             raise ValueError("L must be greater or equal than N.")
139 | 
140 |         Q = L / N
141 |         C = np.corrcoef(TS)  # Empirical correlation matrix
142 | 
143 |         w, v = np.linalg.eigh(C)  # Spectral decomposition of C
144 | 
145 |         w_min = 1 + 1 / Q - 2 * np.sqrt(1 / Q)
146 |         w_max = 1 + 1 / Q + 2 * np.sqrt(1 / Q)
147 | 
148 |         selected = (w < w_min) | (w > w_max)
149 | 
150 |         if selected.sum() == 0:
151 |             G = nx.empty_graph(n=N)
152 |             self.results['graph'] = G
153 |             return G
154 | 
155 |         if remove_largest:
156 |             selected[-1] = False
157 | 
158 |         w_signal = w[selected]
159 |         v_signal = v[:, selected]
160 | 
161 |         C_signal = v_signal.dot(np.diag(w_signal)).dot(v_signal.T)
162 | 
163 |         if metric_distance:
164 |             C_signal = np.sqrt(2 * (1 - C_signal))
165 | 
166 |         self.results['weights_matrix'] = C_signal
167 | 
168 |         # threshold signal matrix
169 | 
170 |         self.results['thresholded_matrix'] = threshold(
171 |             C_signal, threshold_type, **kwargs
172 |         )
173 | 
174 |         G = create_graph(self.results['thresholded_matrix'])
175 | 
176 |         self.results['graph'] = G
177 |         return G
178 | 


--------------------------------------------------------------------------------
/netrd/distance/distributional_nbd.py:
--------------------------------------------------------------------------------
  1 | """
  2 | distributional_nbd.py
  3 | ------
  4 | 
  5 | Distributional Non-backtracking Spectral Distance.
  6 | 
  7 | """
  8 | 
  9 | import numpy as np
 10 | import networkx as nx
 11 | import scipy.sparse as sp
 12 | from scipy.spatial.distance import euclidean, chebyshev
 13 | from ..utilities.graph import unweighted
 14 | 
 15 | from .base import BaseDistance
 16 | 
 17 | 
 18 | class DistributionalNBD(BaseDistance):
 19 |     """
 20 |     Distributional Non-backtracking Spectral Distance.
 21 | 
 22 |     Computes the distance between two graphs using the empirical spectral density
 23 |     of the non-backtracking operator.
 24 | 
 25 |     See:
 26 |     "Graph Comparison via the Non-backtracking Spectrum"
 27 |     A. Mellor & A. Grusovin
 28 |     arXiv:1812.05457 / 10.1103/PhysRevE.99.052309
 29 | 
 30 |     """
 31 | 
 32 |     VECTOR_DISTANCES = {'euclidean': euclidean, 'chebyshev': chebyshev}
 33 | 
 34 |     @unweighted
 35 |     def dist(
 36 |         self,
 37 |         G1,
 38 |         G2,
 39 |         sparse=False,
 40 |         shave=True,
 41 |         keep_evals=True,
 42 |         k=None,
 43 |         vector_distance='euclidean',
 44 |         **kwargs
 45 |     ):
 46 |         """
 47 |         Distributional Non-backtracking Spectral Distance.
 48 | 
 49 |         Parameters
 50 |         ----------
 51 | 
 52 |         G1, G2 (nx.Graph)
 53 |             The two graphs to compare.
 54 | 
 55 |         sparse (bool)
 56 |             If sparse, matrices and eigenvalues found using sparse methods.
 57 |             If sparse, parameter 'k' should also be specified.
 58 |             Default: False
 59 | 
 60 |         k (int)
 61 |             The number of largest eigenvalues to be calculated for the
 62 |             spectral density.
 63 | 
 64 |         vector_distance (str)
 65 |             The distance measure used to compare two empirical distributions.
 66 |             Currently available are 'euclidean' and 'chebyshev', implemented
 67 |             using SciPy.
 68 |             Default: 'euclidean'
 69 | 
 70 |         keep_evals (bool)
 71 |             If True, stores the eigenvalues of the reduced non-backtracking
 72 |             matrix in self.results['evals']
 73 |             Default: False
 74 | 
 75 | 
 76 |         Returns
 77 |         -------
 78 |         float
 79 |             The distance between `G1` and `G2`
 80 | 
 81 |         """
 82 |         B1 = reduced_hashimoto(G1, shave=shave, sparse=sparse, **kwargs)
 83 |         B2 = reduced_hashimoto(G2, shave=shave, sparse=sparse, **kwargs)
 84 | 
 85 |         # Find spectrum
 86 |         evals1 = nb_eigenvalues(B1, k=k)
 87 |         evals2 = nb_eigenvalues(B2, k=k)
 88 | 
 89 |         # Save spectrum
 90 |         if keep_evals:
 91 |             self.results['eigenvalues'] = (evals1, evals2)
 92 | 
 93 |         # Find rescaled spectral density
 94 |         distribution_1 = spectral_distribution(evals1)
 95 |         distribution_2 = spectral_distribution(evals2)
 96 | 
 97 |         # Compute distance
 98 |         distance_metric = self.__class__.VECTOR_DISTANCES[vector_distance]
 99 | 
100 |         return distance_metric(distribution_1, distribution_2)
101 | 
102 | 
103 | def shave_graph(graph):
104 |     """
105 |     Returns the two-core of a graph.
106 | 
107 |     Iteratively remove the nodes of degree 0 or 1, until all nodes have
108 |     degree at least 2.
109 | 
110 |     NOTE: duplicated from "nbd.py" to avoid excessive imports.
111 | 
112 |     """
113 |     core = graph.copy()
114 |     while True:
115 |         to_remove = [node for node, neighbors in core.adj.items() if len(neighbors) < 2]
116 |         core.remove_nodes_from(to_remove)
117 |         if len(to_remove) == 0:
118 |             break
119 |     return core
120 | 
121 | 
122 | def pseudo_hashimoto(graph):
123 |     """
124 |     Return the pseudo-Hashimoto matrix.
125 | 
126 |     The pseudo Hashimoto matrix of a graph is the block matrix defined as
127 |     B' = [0  D-I]
128 |          [-I  A ]
129 | 
130 |     Where D is the degree-diagonal matrix, I is the identity matrix and A
131 |     is the adjacency matrix.  The eigenvalues of B' are always eigenvalues
132 |     of B, the non-backtracking or Hashimoto matrix.
133 | 
134 |     Parameters
135 |     ----------
136 | 
137 |     graph (nx.Graph): A NetworkX graph object.
138 | 
139 |     Returns
140 |     -------
141 | 
142 |     A sparse matrix in csr format.
143 | 
144 |     NOTE: duplicated from "nbd.py" to avoid excessive imports.
145 | 
146 |     """
147 |     # Note: the rows of nx.adjacency_matrix(graph) are in the same order as
148 |     # the list returned by graph.nodes().
149 |     degrees = graph.degree()
150 |     degrees = sp.diags([degrees[n] for n in graph.nodes()])
151 |     adj = nx.adjacency_matrix(graph)
152 |     ident = sp.eye(graph.order())
153 |     pseudo = sp.bmat([[None, degrees - ident], [-ident, adj]])
154 |     return pseudo.asformat('csr')
155 | 
156 | 
157 | def reduced_hashimoto(graph, shave=True, sparse=True):
158 |     """
159 | 
160 | 
161 |     Parameters
162 |     ----------
163 | 
164 |     shave (bool)
165 |         If True, first reduce the graph to its two-core.
166 |         Else graph processed in its entirety.
167 | 
168 |     sparse (bool)
169 |         If True, returned matrix will be sparse,
170 |         else it will be dense.
171 | 
172 |     Returns
173 |     -------
174 | 
175 |     np.ndarray/sp.csr_matrix
176 |         The reduced Hashimoto Matrix.
177 | 
178 |     """
179 | 
180 |     if shave:
181 |         graph = shave_graph(graph)
182 |         if len(graph) == 0:
183 |             # We can provide a workaround for this case, however it is best
184 |             # that it is brought to the attention of the user.
185 |             raise NotImplementedError(
186 |                 "Graph two-core is empty: non-backtracking methods unsuitable."
187 |             )
188 | 
189 |     B = pseudo_hashimoto(graph)
190 | 
191 |     if not sparse:
192 |         B = B.todense()
193 | 
194 |     return B
195 | 
196 | 
197 | def nb_eigenvalues(B, k=None, **kwargs):
198 |     """
199 |     Calculates the eigenvalues of a matrix B.
200 | 
201 |     Detects whether B is sparse/dense and uses the appropriate method.
202 |     If B is sparse then parameter 'k' should be provided.
203 |     """
204 | 
205 |     if isinstance(B, np.ndarray):
206 |         return np.linalg.eigvals(B)
207 | 
208 |     elif isinstance(B, sp.csr_matrix):
209 |         random_state = np.random.RandomState(
210 |             1
211 |         )  # Ensures that eigenvalue calculation is deterministic.
212 |         return sp.linalg.eigs(
213 |             B, k=k, v0=random_state.random(B.shape[0]), return_eigenvectors=False
214 |         )
215 |     else:
216 |         raise Exception("Matrix must be of type np.ndarray or scipy.sparse.csr")
217 | 
218 | 
219 | def logr(r, rmax):
220 |     """
221 |     Logarithm to the base r.
222 | 
223 |     NOTE:Maps zero to zero as a special case.
224 |     """
225 | 
226 |     if r == 0:
227 |         return 0
228 |     return np.log(r) / np.log(rmax)
229 | 
230 | 
231 | def spectral_distribution(points, cumulative=True):
232 |     """
233 |     Returns the distribution of complex values (in r,theta-space).
234 |     """
235 | 
236 |     points = np.array([(np.abs(z), np.angle(z)) for z in points])
237 |     r, theta = np.split(points, 2, axis=1)
238 | 
239 |     r = np.array([logr(x, r.max()) for x in r])
240 | 
241 |     Z, R, THETA = np.histogram2d(
242 |         x=r[:, 0],
243 |         y=theta[:, 0],
244 |         bins=(np.linspace(0, 1, 101), np.linspace(0, np.pi, 101)),
245 |     )
246 | 
247 |     if cumulative:
248 |         Z = Z.cumsum(axis=0).cumsum(axis=1)
249 |         Z = Z / Z.max()
250 | 
251 |     return Z.flatten()
252 | 


--------------------------------------------------------------------------------