├── notebooks └── .gitkeep ├── .gitattributes ├── doc ├── source │ ├── read.rst │ ├── cluster.rst │ ├── entropy.rst │ ├── graph.rst │ ├── threshold.rst │ ├── standardize.rst │ ├── utilities.rst │ ├── dynamics.rst │ ├── index.rst │ ├── distance.rst │ ├── reconstruction.rst │ ├── conf.py │ └── tutorial.rst ├── Makefile └── make.bat ├── netrd_distance_example.png ├── netrd_dynamics_example.png ├── netrd_reconstruction_example.png ├── paper ├── netrd_distance_example.pdf └── allRecons_withGroundtruth_SherringtonKirkpatrick.pdf ├── requirements.txt ├── netrd ├── __init__.py ├── dynamics │ ├── __init__.py │ ├── base.py │ ├── single_unbiased_random_walker.py │ ├── voter.py │ ├── sherrington_kirkpatrick.py │ ├── ising_glauber.py │ ├── SIS.py │ ├── kuramoto.py │ └── lotka_volterra.py ├── utilities │ ├── read.py │ ├── __init__.py │ ├── cluster.py │ ├── standardize.py │ ├── graph.py │ ├── entropy.py │ └── threshold.py ├── distance │ ├── base.py │ ├── frobenius.py │ ├── jaccard_distance.py │ ├── __init__.py │ ├── degree_divergence.py │ ├── polynomial_dissimilarity.py │ ├── hamming.py │ ├── ipsen_mikhailov.py │ ├── netlsd.py │ ├── deltacon.py │ ├── communicability_jsd.py │ ├── resistance_perturbation.py │ ├── dk_series.py │ ├── netsimile.py │ ├── graph_diffusion.py │ ├── quantum_jsd.py │ └── distributional_nbd.py └── reconstruction │ ├── base.py │ ├── __init__.py │ ├── random.py │ ├── maximum_likelihood_estimation.py │ ├── graphical_lasso.py │ ├── correlation_matrix.py │ ├── free_energy_minimization.py │ ├── ou_inference.py │ ├── thouless_anderson_palmer.py │ ├── granger_causality.py │ ├── correlation_spanning_tree.py │ ├── mean_field.py │ ├── naive_transfer_entropy.py │ ├── partial_correlation_matrix.py │ ├── partial_correlation_influence.py │ └── marchenko_pastur.py ├── .readthedocs.yml ├── .github └── workflows │ ├── draft-pdf.yml │ └── python-ci.yml ├── LICENSE ├── tests ├── test_dynamics.py ├── test_utilities.py ├── test_reconstruction.py └── test_distance.py ├── setup.py ├── .gitignore └── README.md /notebooks/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | notebooks/* linguist-vendored 2 | -------------------------------------------------------------------------------- /doc/source/read.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: netrd.utilities.read 2 | :members: 3 | :undoc-members: 4 | -------------------------------------------------------------------------------- /netrd_distance_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netsiphd/netrd/HEAD/netrd_distance_example.png -------------------------------------------------------------------------------- /netrd_dynamics_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netsiphd/netrd/HEAD/netrd_dynamics_example.png -------------------------------------------------------------------------------- /doc/source/cluster.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: netrd.utilities.cluster 2 | :members: 3 | :undoc-members: 4 | -------------------------------------------------------------------------------- /doc/source/entropy.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: netrd.utilities.entropy 2 | :members: 3 | :undoc-members: 4 | -------------------------------------------------------------------------------- /doc/source/graph.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: netrd.utilities.graph 2 | :members: 3 | :undoc-members: 4 | -------------------------------------------------------------------------------- /doc/source/threshold.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: netrd.utilities.threshold 2 | :members: 3 | :undoc-members: 4 | -------------------------------------------------------------------------------- /doc/source/standardize.rst: -------------------------------------------------------------------------------- 1 | .. automodule:: netrd.utilities.standardize 2 | :members: 3 | :undoc-members: 4 | -------------------------------------------------------------------------------- /netrd_reconstruction_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netsiphd/netrd/HEAD/netrd_reconstruction_example.png -------------------------------------------------------------------------------- /paper/netrd_distance_example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netsiphd/netrd/HEAD/paper/netrd_distance_example.pdf -------------------------------------------------------------------------------- /paper/allRecons_withGroundtruth_SherringtonKirkpatrick.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/netsiphd/netrd/HEAD/paper/allRecons_withGroundtruth_SherringtonKirkpatrick.pdf -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | networkx>=2.2.0 2 | numpy>=1.16.0 3 | scipy>=1.0.0 4 | scikit-learn>=0.18.2 5 | numpydoc>=0.9 6 | ortools>=6.7 7 | sphinx-rtd-theme>=0.4 8 | Sphinx==2.0.1 9 | -------------------------------------------------------------------------------- /doc/source/utilities.rst: -------------------------------------------------------------------------------- 1 | Utilities 2 | ========= 3 | 4 | Common utilities for use within ``netrd``. 5 | 6 | 7 | .. toctree:: 8 | :maxdepth: 2 9 | :caption: Submodules 10 | 11 | cluster 12 | entropy 13 | graph 14 | read 15 | standardize 16 | threshold 17 | -------------------------------------------------------------------------------- /netrd/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | netrd 3 | ----- 4 | 5 | netrd stands for Network Reconstruction and Distances. It is a repository 6 | of different algorithms for constructing a network from time series data, 7 | as well as for comparing two networks. It is the product of the Network 8 | Science Insitute 2019 Collabathon. 9 | 10 | """ 11 | 12 | from . import distance # noqa 13 | from . import reconstruction # noqa 14 | from . import dynamics # noqa 15 | from . import utilities # noqa 16 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Build documentation in the docs/ directory with Sphinx 9 | sphinx: 10 | configuration: doc/source/conf.py 11 | 12 | # Optionally set the version of Python and requirements required 13 | python: 14 | version: 3.6 15 | install: 16 | - requirements: requirements.txt 17 | - method: pip 18 | path: . 19 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /netrd/dynamics/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDynamics 2 | from .sherrington_kirkpatrick import SherringtonKirkpatrickIsing 3 | from .single_unbiased_random_walker import SingleUnbiasedRandomWalker 4 | from .kuramoto import Kuramoto 5 | from .lotka_volterra import LotkaVolterra 6 | from .ising_glauber import IsingGlauber 7 | from .branching_process import BranchingModel 8 | from .voter import VoterModel 9 | from .SIS import SISModel 10 | 11 | __all__ = [ 12 | 'BaseDynamics', 13 | 'SherringtonKirkpatrickIsing', 14 | 'SingleUnbiasedRandomWalker', 15 | 'Kuramoto', 16 | 'LotkaVolterra', 17 | 'IsingGlauber', 18 | 'BranchingModel', 19 | 'VoterModel', 20 | 'SISModel', 21 | ] 22 | -------------------------------------------------------------------------------- /.github/workflows/draft-pdf.yml: -------------------------------------------------------------------------------- 1 | on: [push] 2 | 3 | jobs: 4 | paper: 5 | runs-on: ubuntu-latest 6 | name: Paper Draft 7 | steps: 8 | - name: Checkout 9 | uses: actions/checkout@v2 10 | - name: Build draft PDF 11 | uses: openjournals/openjournals-draft-action@master 12 | with: 13 | journal: joss 14 | # This should be the path to the paper within your repo. 15 | paper-path: paper/paper.md 16 | - name: Upload 17 | uses: actions/upload-artifact@v1 18 | with: 19 | name: paper 20 | # This is the output path where Pandoc will write the compiled 21 | # PDF. Note, this should be the same directory as the input 22 | # paper.md 23 | path: paper/paper.pdf 24 | -------------------------------------------------------------------------------- /netrd/utilities/read.py: -------------------------------------------------------------------------------- 1 | """ 2 | read.py 3 | ------- 4 | 5 | Utilities for reading data. 6 | 7 | author: Tim LaRock (timothylarock at gmail dot com) 8 | 9 | Submitted as part of the 2019 NetSI Collabathon. 10 | 11 | """ 12 | import numpy as np 13 | 14 | 15 | def read_time_series(filename, delimiter=','): 16 | r"""Read a time series from a file into an array. 17 | 18 | This function expects `filename` to be a comma separated text file with 19 | only data (no headers). 20 | 21 | Parameters 22 | ---------- 23 | filename (str) 24 | path to a file that will be read 25 | 26 | delimiter (str) 27 | delimiter in the file 28 | 29 | Returns 30 | ------- 31 | 32 | arr 33 | the array read from filename 34 | 35 | """ 36 | return np.loadtxt(filename, delimiter=delimiter) 37 | -------------------------------------------------------------------------------- /doc/source/dynamics.rst: -------------------------------------------------------------------------------- 1 | Dynamics 2 | ======== 3 | 4 | Dynamics classes allow the user to run simulations over a network. 5 | 6 | 7 | Base class 8 | ---------- 9 | .. autoclass:: netrd.dynamics.BaseDynamics 10 | 11 | 12 | Available dynamics 13 | ------------------ 14 | 15 | All of the following dynamics inherit from ``BaseDynamics`` and have the 16 | same general usage as above. 17 | 18 | .. autosummary:: 19 | :nosignatures: 20 | 21 | netrd.dynamics.BranchingModel 22 | netrd.dynamics.IsingGlauber 23 | netrd.dynamics.Kuramoto 24 | netrd.dynamics.LotkaVolterra 25 | netrd.dynamics.SISModel 26 | netrd.dynamics.SherringtonKirkpatrickIsing 27 | netrd.dynamics.SingleUnbiasedRandomWalker 28 | netrd.dynamics.VoterModel 29 | 30 | 31 | Reference 32 | --------- 33 | 34 | .. automodule:: netrd.dynamics 35 | :members: 36 | :undoc-members: 37 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /netrd/utilities/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | utilities 3 | ---------- 4 | 5 | Common utilities for use within ``netrd``. 6 | 7 | """ 8 | from .threshold import threshold 9 | from .graph import ( 10 | create_graph, 11 | ensure_undirected, 12 | undirected, 13 | ensure_unweighted, 14 | unweighted, 15 | ) 16 | from .read import read_time_series 17 | from .cluster import clusterGraph 18 | from .standardize import mean_GNP_distance 19 | from .entropy import ( 20 | js_divergence, 21 | entropy_from_seq, 22 | joint_entropy, 23 | conditional_entropy, 24 | categorized_data, 25 | linear_bins, 26 | ) 27 | 28 | __all__ = [ 29 | 'threshold', 30 | 'clusterGraph', 31 | 'js_divergence', 32 | 'entropy_from_seq', 33 | 'joint_entropy', 34 | 'conditional_entropy', 35 | 'categorized_data', 36 | 'linear_bins', 37 | 'create_graph', 38 | 'undirected', 39 | 'ensure_undirected', 40 | 'unweighted', 41 | 'ensure_unweighted', 42 | 'read_time_series', 43 | 'mean_GNP_distance', 44 | ] 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 NetSI 2019 Collabathon Team 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a 6 | copy of this software and associated documentation files (the "Software"), 7 | to deal in the Software without restriction, including without limitation 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | and/or sell copies of the Software, and to permit persons to whom the 10 | Software is furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /tests/test_dynamics.py: -------------------------------------------------------------------------------- 1 | """ 2 | test_dynamics.py 3 | ---------------- 4 | 5 | Test dynamics algorithms. 6 | 7 | """ 8 | 9 | import networkx as nx 10 | from netrd import dynamics 11 | from netrd.dynamics import BaseDynamics 12 | from netrd.dynamics import LotkaVolterra 13 | 14 | 15 | def test_dynamics_valid_dimensions(): 16 | """Dynamics models should return N x L arrays.""" 17 | 18 | G = nx.barbell_graph(10, 5) 19 | N = G.number_of_nodes() 20 | 21 | for L in [25, 100]: 22 | for obj in dynamics.__dict__.values(): 23 | if isinstance(obj, type) and BaseDynamics in obj.__bases__: 24 | TS = obj().simulate(G, L) 25 | assert TS.shape == (N, L), "f{label} has wrong dimensions" 26 | 27 | assert BaseDynamics().simulate(G, 25).shape == (N, 25) 28 | assert BaseDynamics().simulate(G, 100).shape == (N, 100) 29 | 30 | 31 | def test_lotka_volterra(): 32 | """Test Lotka Volterra simulation""" 33 | g = nx.fast_gnp_random_graph(10, 0.001) 34 | lv_model = LotkaVolterra() 35 | assert lv_model.simulate(g, 100, stochastic=False).shape == (10, 100) 36 | assert lv_model.simulate(g, 100, stochastic=False).shape == (10, 100) 37 | -------------------------------------------------------------------------------- /.github/workflows/python-ci.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | matrix: 15 | os: [macos-latest, ubuntu-latest] 16 | python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install black pytest flake8 28 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 29 | pip install -e . 30 | - name: Lint with black 31 | run: | 32 | black --skip-string-normalization --check netrd 33 | black --skip-string-normalization --check tests 34 | - name: Check for unused imports with flake8 35 | run: | 36 | flake8 --select=F401,F403 netrd 37 | flake8 --select=F401,F403 tests 38 | - name: Test with pytest 39 | run: | 40 | cd tests/ 41 | pytest 42 | -------------------------------------------------------------------------------- /netrd/dynamics/base.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class BaseDynamics: 5 | """Base class for all dynamics processes. 6 | 7 | The basic usage is as follows: 8 | 9 | >>> ground_truth = nx.read_edgelist("ground_truth.txt") 10 | >>> dynamics_model = Dynamics() 11 | >>> synthetic_TS = dynamics_model.simulate(ground_truth, ) 12 | >>> # G = Reconstructor().fit(synthetic_TS) 13 | 14 | This produces a numpy array of time series data. 15 | 16 | """ 17 | 18 | def __init__(self): 19 | self.results = {} 20 | 21 | def simulate(self, G, L): 22 | r"""Simulate dynamics on a ground truth network. 23 | 24 | The results dictionary stores the ground truth network as 25 | `'ground_truth'`. 26 | 27 | Parameters 28 | ---------- 29 | 30 | G (nx.Graph) 31 | the input (ground-truth) graph with :math:`N` nodes. 32 | 33 | L (int) 34 | the length of the desired time series. 35 | 36 | Returns 37 | ------- 38 | 39 | TS (np.ndarray) 40 | an :math`N \times L` array of synthetic time series data. 41 | 42 | """ 43 | N = G.number_of_nodes() 44 | self.results['ground_truth'] = G 45 | self.results['TS'] = np.ones((N, L)) 46 | return self.results['TS'] 47 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | netrd: A library for network {reconstruction, distances, dynamics} 2 | ====================================================================== 3 | 4 | This library provides a consistent, NetworkX-based interface to various 5 | utilities for graph distances, graph reconstruction from time series 6 | data, and simulated dynamics on networks. 7 | 8 | To see the library in action, visit the `netrd 9 | explorer `__. 10 | 11 | Installation 12 | ============ 13 | 14 | :: 15 | 16 | git clone https://github.com/netsiphd/netrd 17 | cd netrd 18 | pip install . 19 | 20 | Tutorial 21 | ======== 22 | 23 | A tutorial on using the library can be found `here `__. To see 24 | more advanced usage of the library, refer to `this 25 | notebook `__. 26 | 27 | Contributing 28 | ============ 29 | 30 | Contributing guidelines can be found in 31 | `CONTRIBUTING.md `__. 32 | 33 | .. toctree:: 34 | :maxdepth: 1 35 | :caption: Contents 36 | 37 | tutorial 38 | dynamics 39 | distance 40 | reconstruction 41 | utilities 42 | 43 | 44 | Indices and tables 45 | ================== 46 | 47 | * :ref:`genindex` 48 | * :ref:`modindex` 49 | * :ref:`search` 50 | -------------------------------------------------------------------------------- /doc/source/distance.rst: -------------------------------------------------------------------------------- 1 | Distance 2 | ======== 3 | 4 | Graph distance methods to compare two networks. 5 | 6 | 7 | Base class 8 | ---------- 9 | .. autoclass:: netrd.distance.BaseDistance 10 | 11 | 12 | Available distances 13 | ------------------- 14 | 15 | All of the following algorithms inherit from ``BaseDistance`` and have the 16 | same general usage as above. 17 | 18 | .. autosummary:: 19 | :nosignatures: 20 | 21 | netrd.distance.CommunicabilityJSD 22 | netrd.distance.DegreeDivergence 23 | netrd.distance.DeltaCon 24 | netrd.distance.DistributionalNBD 25 | netrd.distance.dkSeries 26 | netrd.distance.DMeasure 27 | netrd.distance.Frobenius 28 | netrd.distance.GraphDiffusion 29 | netrd.distance.Hamming 30 | netrd.distance.HammingIpsenMikhailov 31 | netrd.distance.IpsenMikhailov 32 | netrd.distance.JaccardDistance 33 | netrd.distance.LaplacianSpectral 34 | netrd.distance.NonBacktrackingSpectral 35 | netrd.distance.NetLSD 36 | netrd.distance.NetSimile 37 | netrd.distance.OnionDivergence 38 | netrd.distance.PolynomialDissimilarity 39 | netrd.distance.PortraitDivergence 40 | netrd.distance.QuantumJSD 41 | netrd.distance.ResistancePerturbation 42 | 43 | 44 | Reference 45 | --------- 46 | 47 | .. automodule:: netrd.distance 48 | :members: 49 | :undoc-members: 50 | -------------------------------------------------------------------------------- /netrd/distance/base.py: -------------------------------------------------------------------------------- 1 | class BaseDistance: 2 | """Base class for all distance algorithms. 3 | 4 | The basic usage of a distance algorithm is as follows: 5 | 6 | >>> dist_obj = DistanceAlgorithm() 7 | >>> distance = dist_obj.dist(G1, G2, ) 8 | >>> # or alternatively: distance = dist_obj.results['dist'] 9 | 10 | Here, `G1` and `G2` are ``nx.Graph`` objects (or subclasses such as 11 | ``nx.DiGraph``). The results dictionary holds the distance value, as 12 | well as any other values that were computed as a side effect. 13 | 14 | """ 15 | 16 | def __init__(self): 17 | self.results = {} 18 | 19 | def __call__(self, *args, **kwargs): 20 | return self.dist(*args, **kwargs) 21 | 22 | def dist(self, G1, G2): 23 | """Compute distance between two graphs. 24 | 25 | Values computed as side effects of the distance method can be foun 26 | in self.results. 27 | 28 | Parameters 29 | ---------- 30 | 31 | G1, G2 (nx.Graph): two graphs. 32 | 33 | Returns 34 | ----------- 35 | 36 | distance (float). 37 | 38 | """ 39 | dist = -1 # compute the distance 40 | self.results['dist'] = dist # store dist in self.results 41 | # self.results[..] = .. # also store other values if needed 42 | return dist # return only one value! 43 | -------------------------------------------------------------------------------- /doc/source/reconstruction.rst: -------------------------------------------------------------------------------- 1 | Reconstruction 2 | ============== 3 | 4 | Algorithms to recosntruct a graph from time series data. 5 | 6 | 7 | Base class 8 | ---------- 9 | .. autoclass:: netrd.reconstruction.BaseReconstructor 10 | 11 | 12 | Available algorithms 13 | -------------------- 14 | 15 | All of the following algorithms inherit from ``BaseReconstructor`` and have 16 | the same general usage as above. 17 | 18 | .. autosummary:: 19 | :nosignatures: 20 | 21 | netrd.reconstruction.ConvergentCrossMapping 22 | netrd.reconstruction.CorrelationMatrix 23 | netrd.reconstruction.CorrelationSpanningTree 24 | netrd.reconstruction.FreeEnergyMinimization 25 | netrd.reconstruction.GrangerCausality 26 | netrd.reconstruction.GraphicalLasso 27 | netrd.reconstruction.MarchenkoPastur 28 | netrd.reconstruction.MaximumLikelihoodEstimation 29 | netrd.reconstruction.MeanField 30 | netrd.reconstruction.MutualInformationMatrix 31 | netrd.reconstruction.NaiveTransferEntropy 32 | netrd.reconstruction.OUInference 33 | netrd.reconstruction.OptimalCausationEntropy 34 | netrd.reconstruction.PartialCorrelationInfluence 35 | netrd.reconstruction.PartialCorrelationMatrix 36 | netrd.reconstruction.RandomReconstructor 37 | netrd.reconstruction.ThoulessAndersonPalmer 38 | 39 | 40 | Reference 41 | --------- 42 | 43 | .. automodule:: netrd.reconstruction 44 | :members: 45 | :undoc-members: 46 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | 4 | with open('requirements.txt') as file: 5 | requires = [line.strip() for line in file if not line.startswith('#')] 6 | 7 | with open('README.md') as fin: 8 | # read the first section of README - set between the first two '#' lines - 9 | # as long_description, and use the first section header as description. 10 | long_description = "" 11 | at_first_section = False 12 | read = iter(fin.readlines()) 13 | for line in read: 14 | if at_first_section: 15 | break 16 | at_first_section = line.startswith('#') 17 | description = line[1:].strip() 18 | long_description += line 19 | for line in read: 20 | if line.startswith('#'): 21 | break 22 | long_description += line 23 | long_description = long_description.strip() 24 | 25 | 26 | setuptools.setup( 27 | name='netrd', 28 | version='0.3.0', 29 | author='NetSI 2019 Collabathon Team', 30 | author_email='stefanmccabe@gmail.com', 31 | description=description, 32 | long_description=long_description, 33 | long_description_content_type='text/markdown', 34 | url='https://github.com/netsiphd/netrd', 35 | packages=setuptools.find_packages(), 36 | install_requires=requires, 37 | classifiers=[ 38 | 'Programming Language :: Python :: 3', 39 | 'License :: OSI Approved :: MIT License', 40 | 'Operating System :: OS Independent', 41 | ], 42 | ) 43 | -------------------------------------------------------------------------------- /netrd/distance/frobenius.py: -------------------------------------------------------------------------------- 1 | """ 2 | frobenius.py 3 | ------------ 4 | 5 | Frobenius norm between two adjacency matrices. 6 | 7 | """ 8 | 9 | import numpy as np 10 | import networkx as nx 11 | from .base import BaseDistance 12 | from ..utilities.graph import unweighted 13 | 14 | 15 | class Frobenius(BaseDistance): 16 | """The Frobenius distance between their adjacency matrices.""" 17 | 18 | @unweighted 19 | def dist(self, G1, G2): 20 | r"""Frobenius distance between two graphs. 21 | 22 | If :math:`a_{ij}` and :math:`b_{ij}` are the two adjacency matrices 23 | we define 24 | 25 | .. math:: 26 | d(G1, G2) = \sqrt{\sum_{i,j} |a_{ij} - b_{ij}|^2} 27 | 28 | 29 | The results dictionary also stores a 2-tuple of the underlying 30 | adjacency matrices in the key `'adjacency_matrices'`. 31 | 32 | Parameters 33 | ---------- 34 | G1, G2 (nx.Graph) 35 | two graphs to compare 36 | 37 | Returns 38 | ------- 39 | float 40 | the distance between `G1` and `G2` 41 | 42 | Notes 43 | ----- 44 | 45 | The graphs must have the same number of nodes. 46 | 47 | """ 48 | 49 | adj1 = nx.to_numpy_array(G1) 50 | adj2 = nx.to_numpy_array(G2) 51 | dist = np.linalg.norm((adj1 - adj2)) 52 | self.results['dist'] = dist 53 | self.results['adjacency_matrices'] = adj1, adj2 54 | return dist 55 | -------------------------------------------------------------------------------- /netrd/reconstruction/base.py: -------------------------------------------------------------------------------- 1 | class BaseReconstructor: 2 | r"""Base class for graph reconstruction algorithms. 3 | 4 | The basic usage of a graph reconstruction algorithm is as follows: 5 | 6 | >>> reconstructor = ReconstructionAlgorithm() 7 | >>> G = reconstructor.fit(TS, ) 8 | >>> # or alternately, G = reconstructor.results['graph'] 9 | 10 | Here, `TS` is an :math:`N \times L` numpy array consisting of :math:`L` 11 | observations for each of :math:`N` sensors. This constrains the graphs 12 | to have integer-valued nodes. 13 | 14 | The ``results`` dict object, in addition to containing the graph 15 | object, may also contain objects created as a side effect of 16 | reconstructing the network, which may be useful for debugging or 17 | considering goodness of fit. What is returned will vary between 18 | reconstruction algorithms. 19 | 20 | """ 21 | 22 | def __init__(self): 23 | self.results = {} 24 | 25 | def fit(self, TS, **kwargs): 26 | """Reconstruct a graph from time series TS. 27 | 28 | Parameters 29 | ---------- 30 | TS (np.ndarray): Array consisting of $L$ observations from $N$ sensors. 31 | 32 | Returns 33 | ------- 34 | G (nx.Graph): A reconstructed graph with $N$ nodes. 35 | 36 | """ 37 | G = nx.Graph() # reconstruct the graph 38 | self.results['graph'] = G # and store it in self.results 39 | # self.results[..] = .. # also store other values if needed 40 | return G 41 | -------------------------------------------------------------------------------- /netrd/distance/jaccard_distance.py: -------------------------------------------------------------------------------- 1 | """ 2 | jaccard_distance.py 3 | -------------- 4 | 5 | Graph distance based on the Jaccard index between edge sets. 6 | 7 | author: David Saffo 8 | email: saffo.d@husky.neu.edu 9 | Submitted as part of the 2019 NetSI Collabathon. 10 | 11 | """ 12 | 13 | from .base import BaseDistance 14 | from ..utilities import unweighted 15 | 16 | 17 | class JaccardDistance(BaseDistance): 18 | """Jaccard distance between edge sets.""" 19 | 20 | @unweighted 21 | def dist(self, G1, G2): 22 | r"""Compute the Jaccard index between two graphs. 23 | 24 | The Jaccard index between two sets 25 | 26 | .. math:: 27 | J(A, B) = \frac{|A \cap B|}{|A \cup B|} 28 | 29 | provides a measure of similarity between sets. Here, we use the edge 30 | sets of two graphs. The index, a measure of similarity, is converted to 31 | a distance 32 | 33 | .. math:: 34 | d_J(A, B) = 1 - J(A, B) 35 | 36 | for consistency with other graph distances. 37 | 38 | Parameters 39 | ---------- 40 | 41 | G1, G2 (nx.Graph) 42 | two graphs to be compared. 43 | 44 | Returns 45 | ------- 46 | 47 | dist (float) 48 | the distance between G1 and G2. 49 | 50 | """ 51 | 52 | e1 = set(G1.edges) 53 | e2 = set(G2.edges) 54 | cup = set.union(e1, e2) 55 | cap = set.intersection(e1, e2) 56 | 57 | dist = 1 - len(cap) / len(cup) 58 | 59 | self.results["dist"] = dist 60 | return dist 61 | -------------------------------------------------------------------------------- /netrd/distance/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDistance 2 | from .hamming import Hamming 3 | from .frobenius import Frobenius 4 | from .portrait_divergence import PortraitDivergence 5 | from .jaccard_distance import JaccardDistance 6 | from .ipsen_mikhailov import IpsenMikhailov 7 | from .hamming_ipsen_mikhailov import HammingIpsenMikhailov 8 | from .resistance_perturbation import ResistancePerturbation 9 | from .netsimile import NetSimile 10 | from .netlsd import NetLSD 11 | from .laplacian_spectral_method import LaplacianSpectral 12 | from .polynomial_dissimilarity import PolynomialDissimilarity 13 | from .degree_divergence import DegreeDivergence 14 | from .onion_divergence import OnionDivergence 15 | from .deltacon import DeltaCon 16 | from .quantum_jsd import QuantumJSD 17 | from .communicability_jsd import CommunicabilityJSD 18 | from .distributional_nbd import DistributionalNBD 19 | from .dk_series import dkSeries 20 | from .dmeasure import DMeasure 21 | from .nbd import NonBacktrackingSpectral 22 | from .graph_diffusion import GraphDiffusion 23 | 24 | __all__ = [ 25 | 'BaseDistance', 26 | 'Hamming', 27 | 'Frobenius', 28 | 'PortraitDivergence', 29 | 'JaccardDistance', 30 | 'IpsenMikhailov', 31 | 'HammingIpsenMikhailov', 32 | 'ResistancePerturbation', 33 | 'NetSimile', 34 | 'NetLSD', 35 | 'LaplacianSpectral', 36 | 'PolynomialDissimilarity', 37 | 'DegreeDivergence', 38 | 'OnionDivergence', 39 | 'DeltaCon', 40 | 'QuantumJSD', 41 | 'CommunicabilityJSD', 42 | 'DistributionalNBD', 43 | 'dkSeries', 44 | 'DMeasure', 45 | 'NonBacktrackingSpectral', 46 | 'GraphDiffusion', 47 | ] 48 | -------------------------------------------------------------------------------- /netrd/reconstruction/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseReconstructor 2 | from .random import RandomReconstructor 3 | from .correlation_matrix import CorrelationMatrix 4 | from .partial_correlation_matrix import PartialCorrelationMatrix 5 | from .partial_correlation_influence import PartialCorrelationInfluence 6 | from .free_energy_minimization import FreeEnergyMinimization 7 | from .mean_field import MeanField 8 | from .thouless_anderson_palmer import ThoulessAndersonPalmer 9 | from .maximum_likelihood_estimation import MaximumLikelihoodEstimation 10 | from .convergent_cross_mapping import ConvergentCrossMapping 11 | from .mutual_information_matrix import MutualInformationMatrix 12 | from .ou_inference import OUInference 13 | from .graphical_lasso import GraphicalLasso 14 | from .marchenko_pastur import MarchenkoPastur 15 | from .naive_transfer_entropy import NaiveTransferEntropy 16 | from .granger_causality import GrangerCausality 17 | from .optimal_causation_entropy import OptimalCausationEntropy 18 | from .correlation_spanning_tree import CorrelationSpanningTree 19 | 20 | __all__ = [ 21 | 'BaseReconstructor', 22 | 'RandomReconstructor', 23 | 'CorrelationMatrix', 24 | 'PartialCorrelationMatrix', 25 | 'PartialCorrelationInfluence', 26 | 'FreeEnergyMinimization', 27 | 'ThoulessAndersonPalmer', 28 | 'MeanField', 29 | 'MaximumLikelihoodEstimation', 30 | 'ConvergentCrossMapping', 31 | 'MutualInformationMatrix', 32 | 'OUInference', 33 | 'GraphicalLasso', 34 | 'MarchenkoPastur', 35 | 'NaiveTransferEntropy', 36 | 'GrangerCausality', 37 | 'OptimalCausationEntropy', 38 | 'CorrelationSpanningTree', 39 | ] 40 | -------------------------------------------------------------------------------- /netrd/reconstruction/random.py: -------------------------------------------------------------------------------- 1 | """ 2 | random.py 3 | --------- 4 | 5 | Reconstruct a network from a random matrix 6 | not taking the time series into account. 7 | 8 | author: Brennan Klein 9 | email: klein.br@husky.neu.edu 10 | Submitted as part of the 2019 NetSI Collabathon. 11 | 12 | """ 13 | 14 | from .base import BaseReconstructor 15 | import numpy as np 16 | from ..utilities import create_graph, threshold 17 | 18 | 19 | class RandomReconstructor(BaseReconstructor): 20 | """Returns a random graph (dummy class).""" 21 | 22 | def fit(self, TS, threshold_type='range', **kwargs): 23 | """Return a random correlation matrix with a threshold. 24 | 25 | The results dictionary also stores the weight matrix as 26 | `'weights_matrix'` and the thresholded version of the weight matrix 27 | as `'thresholded_matrix'`. 28 | 29 | Parameters 30 | ---------- 31 | 32 | TS (np.ndarray) 33 | array consisting of :math:`L` observations from :math:`N` sensors. 34 | 35 | threshold_type (str) 36 | Which thresholding function to use on the matrix of 37 | weights. See `netrd.utilities.threshold.py` for 38 | documentation. Pass additional arguments to the thresholder 39 | using ``**kwargs``. 40 | 41 | Returns 42 | ------- 43 | G (nx.Graph) 44 | a reconstructed graph with :math:`N` nodes. 45 | 46 | """ 47 | N, L = TS.shape 48 | W = np.random.rand(N, N) 49 | A = threshold(W, threshold_type, **kwargs) 50 | G = create_graph(A) 51 | self.results['graph'] = G 52 | self.results['weights_matrix'] = W 53 | self.results['thresholded_matrix'] = A 54 | return G 55 | -------------------------------------------------------------------------------- /netrd/utilities/cluster.py: -------------------------------------------------------------------------------- 1 | """ 2 | cluster.py 3 | ---------- 4 | 5 | Utilities for creating a seriated/ordered adjacency matrix with 6 | hierarchical clustering. 7 | 8 | author: David Saffo (saffo.d@husky.neu.edu) 9 | 10 | Submitted as part of the 2019 NetSI Collabathon. 11 | 12 | """ 13 | import networkx as nx 14 | from scipy.cluster.hierarchy import dendrogram, linkage 15 | 16 | 17 | def clusterGraph(G, method='single', metric='euclidean', optimal_ordering=False): 18 | """Create seriated adjacency matrix. 19 | 20 | Parameters 21 | ---------- 22 | 23 | G (nx.Graph) 24 | a networkx graph 25 | 26 | method 27 | the clustering algorithm to use for options see [1]. 28 | 29 | metric (str) 30 | linkage method to use 31 | 32 | optimal_ordering (bool) 33 | if true tries to minimize the distance of succesive indexes 34 | 35 | Returns 36 | ------- 37 | 38 | adjClustered (np.ndarray) 39 | a numpy matrix with rows and columns reordered based on clustering 40 | 41 | order (list) 42 | a list with the new index order for rows and columns 43 | 44 | dend (dict) 45 | a dictionary with the hierarchy for the dendogram 46 | 47 | link (np.ndarray) 48 | a linkage matrix with results from clustering 49 | 50 | References 51 | ---------- 52 | 53 | [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html 54 | 55 | """ 56 | adj = nx.to_numpy_matrix(G) 57 | link = linkage(adj, method, metric, optimal_ordering) 58 | dend = dendrogram(link, no_plot=True) 59 | order = dend['leaves'] 60 | adjClustered = adj[order, :] 61 | adjClustered = adjClustered[:, order] 62 | return adjClustered, order, dend, link 63 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | docs/build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | 107 | # mac dstore 108 | .DS_Store 109 | -------------------------------------------------------------------------------- /netrd/distance/degree_divergence.py: -------------------------------------------------------------------------------- 1 | """ 2 | degree_divergence.py 3 | -------------------------- 4 | 5 | Baseline distance measure: the K-L divergence 6 | between the two degree distributions. 7 | 8 | author: Stefan McCabe 9 | email: stefanmccabe at gmail dot com 10 | Submitted as part of the 2019 NetSI Collabathon. 11 | 12 | """ 13 | 14 | from collections import Counter 15 | import numpy as np 16 | import networkx as nx 17 | from .base import BaseDistance 18 | from ..utilities import entropy, unweighted 19 | 20 | 21 | class DegreeDivergence(BaseDistance): 22 | """Compare two degree distributions.""" 23 | 24 | @unweighted 25 | def dist(self, G1, G2): 26 | """Jenson-Shannon divergence between degree distributions. 27 | 28 | Assumes undirected networks. 29 | 30 | Parameters 31 | ---------- 32 | 33 | G1, G2 (nx.Graph) 34 | two networkx graphs to be compared. 35 | 36 | Returns 37 | ------- 38 | 39 | dist (float) 40 | the distance between `G1` and `G2`. 41 | 42 | """ 43 | 44 | def degree_vector_histogram(graph): 45 | """Return the degrees in both formats. 46 | 47 | max_deg is the length of the histogram, to be padded with 48 | zeros. 49 | 50 | """ 51 | vec = np.array(list(dict(graph.degree()).values())) 52 | if next(nx.selfloop_edges(graph), False): 53 | max_deg = len(graph) 54 | else: 55 | max_deg = len(graph) - 1 56 | counter = Counter(vec) 57 | hist = np.array([counter[v] for v in range(max_deg)]) 58 | return vec, hist 59 | 60 | deg1, hist1 = degree_vector_histogram(G1) 61 | deg2, hist2 = degree_vector_histogram(G2) 62 | self.results['degree_vectors'] = deg1, deg2 63 | self.results['degree_histograms'] = hist1, hist2 64 | 65 | max_len = max(len(hist1), len(hist2)) 66 | p1 = np.pad(hist1, (0, max_len - len(hist1)), 'constant', constant_values=0) 67 | p2 = np.pad(hist2, (0, max_len - len(hist2)), 'constant', constant_values=0) 68 | self.results['dist'] = entropy.js_divergence(p1, p2) 69 | return self.results['dist'] 70 | -------------------------------------------------------------------------------- /netrd/dynamics/single_unbiased_random_walker.py: -------------------------------------------------------------------------------- 1 | """ 2 | single_unbiased_random_walker.py 3 | -------------------------------- 4 | 5 | Simulate a lonely walker on a network. 6 | 7 | """ 8 | from .base import BaseDynamics 9 | import networkx as nx 10 | import numpy as np 11 | 12 | 13 | class SingleUnbiasedRandomWalker(BaseDynamics): 14 | """Random walk dynamics.""" 15 | 16 | def simulate(self, G, L, initial_node=None): 17 | r"""Simulate single random-walker dynamics on a ground truth network. 18 | 19 | Generates an :math:`N \times L` time series `TS` with 20 | ``TS[j,t]==1`` if the walker is at node :math:`j` at time 21 | :math:`t`, and ``TS[j,t]==0`` otherwise. 22 | 23 | The results dictionary also stores the ground truth network as 24 | `'ground_truth'`. 25 | 26 | Examples 27 | -------- 28 | .. code:: python 29 | 30 | G = nx.ring_of_cliques(4,16) 31 | L = 2001 32 | dynamics = SingleUnbiasedRandomWalker() 33 | TS = dynamics.simulate(G, L) 34 | 35 | 36 | Parameters 37 | ---------- 38 | G (nx.Graph) 39 | The input (ground-truth) graph with :math:`N` nodes. 40 | 41 | L (int) 42 | The length of the desired time series. 43 | 44 | Returns 45 | ------- 46 | TS (np.ndarray) 47 | An :math:`N \times L` array of synthetic time series data. 48 | 49 | """ 50 | # get adjacency matrix and set up vector of indices 51 | A = nx.to_numpy_array(G) 52 | N = G.number_of_nodes() 53 | W = np.zeros(L, dtype=int) 54 | # place walker at initial location 55 | if initial_node: 56 | W[0] = initial_node 57 | else: 58 | W[0] = np.random.randint(N) 59 | 60 | # run dynamical process 61 | for t in range(L - 1): 62 | W[t + 1] = np.random.choice(np.where(A[W[t], :])[0]) 63 | self.results['node_index_sequence'] = W 64 | # turn into a binary-valued 65 | TS = np.zeros((N, L)) 66 | for t, w in enumerate(W): 67 | TS[w, t] = 1 68 | self.results['TS'] = TS 69 | self.results['ground_truth'] = G 70 | return TS 71 | -------------------------------------------------------------------------------- /netrd/utilities/standardize.py: -------------------------------------------------------------------------------- 1 | """ 2 | standardize.py 3 | -------------- 4 | 5 | Utilities for computing standardization values for distance measures. 6 | 7 | author: Harrison Hartle/Tim LaRock (timothylarock at gmail dot com) 8 | 9 | Submitted as part of the 2019 NetSI Collabathon. 10 | 11 | """ 12 | 13 | import numpy as np 14 | import networkx as nx 15 | 16 | 17 | def mean_GNP_distance(n, prob, distance, samples=10, **kwargs): 18 | r"""Mean distance between :math:`G(n, p)` graphs. 19 | 20 | Compute the mean distance between `samples` :math:`G(n, p)` graphs with 21 | parameters using distance function `distance`, whose parameters are 22 | passed with ``**kwargs``. 23 | 24 | 25 | Parameters 26 | ---------- 27 | 28 | n (int) 29 | Number of nodes in ER graphs to be generated 30 | 31 | prob (float) 32 | Probability of edge in ER graphs to be generated. 33 | 34 | samples (int) 35 | Number of samples to average distance over. 36 | 37 | distance (function) 38 | Function from `netrd.distances..dist` 39 | 40 | **kwargs (dict) 41 | Keyword arguments to pass to the distance function. 42 | 43 | Returns 44 | ------- 45 | mean (float) 46 | The average distance between the sampled ER networks. 47 | 48 | std (float) 49 | The standard deviation of the distances. 50 | 51 | dist (np.ndarray) 52 | Array storing the actual distances. 53 | 54 | Examples 55 | -------- 56 | .. code:: python 57 | 58 | dist_obj = netrd.distance.ResistancePerturbation() 59 | kwargs = {'p':2} 60 | mean, std, dists = netrd.utilities.mean_GNP_distance(100, 0.1, dist_obj.dist, **kwargs) 61 | 62 | 63 | Notes 64 | ----- 65 | Ideally, each sample would involve generating two :math:`G(n, p)` 66 | graphs, computing the distance between them, then throwing them both 67 | away. However, this will be computationally expensive, so for now we 68 | are reusing samples. The diagonal of the distance matrix is excluded, 69 | i.e., do not compute the distance between a sample graph and itself. 70 | 71 | """ 72 | graphs = [nx.fast_gnp_random_graph(n, prob) for _ in range(samples)] 73 | dis_mat = np.full((samples, samples), np.nan) 74 | for i in range(samples): 75 | for j in range(samples): 76 | if i == j: 77 | continue 78 | dis_mat[i, j] = distance(graphs[i], graphs[j], **kwargs) 79 | 80 | # the nan* versions below ignore NaNs and normalize appropriately 81 | return np.nanmean(dis_mat), np.nanstd(dis_mat), dis_mat 82 | -------------------------------------------------------------------------------- /netrd/dynamics/voter.py: -------------------------------------------------------------------------------- 1 | """ 2 | voter.py 3 | -------- 4 | 5 | Implementation of voter model dynamics on a network. 6 | 7 | author: Stefan McCabe 8 | 9 | Submitted as part of the 2019 NetSI Collabathon. 10 | 11 | """ 12 | 13 | from netrd.dynamics import BaseDynamics 14 | import numpy as np 15 | import networkx as nx 16 | from ..utilities import unweighted 17 | 18 | 19 | class VoterModel(BaseDynamics): 20 | """Voter dynamics.""" 21 | 22 | @unweighted 23 | def simulate(self, G, L, noise=None): 24 | r"""Simulate voter-model-style dynamics on a network. 25 | 26 | Nodes are randomly assigned a state in :math:`\{-1, 1\}`; at each 27 | time step all nodes asynchronously update by choosing their new 28 | state uniformly from their neighbors. Generates an :math:`N \times 29 | L` time series. 30 | 31 | The results dictionary also stores the ground truth network as 32 | `'ground_truth'`. 33 | 34 | Parameters 35 | ---------- 36 | G (nx.Graph) 37 | the input (ground-truth) graph with `N` nodes. 38 | 39 | L (int) 40 | the length of the desired time series. 41 | 42 | noise (float, str or None) 43 | if noise is present, with this probability a node's state will 44 | be randomly redrawn from :math:`\{-1, 1\}` independent of its 45 | neighbors' states. If 'automatic', set noise to :math:`1/N`. 46 | 47 | Returns 48 | ------- 49 | TS (np.ndarray) 50 | an :math:`N \times L` array of synthetic time series data. 51 | 52 | """ 53 | 54 | N = G.number_of_nodes() 55 | 56 | if noise is None: 57 | noise = 0 58 | elif noise == 'automatic' or noise == 'auto': 59 | noise = 1 / N 60 | elif not isinstance(noise, (int, float)): 61 | raise ValueError("noise must be a number, 'automatic', or None") 62 | 63 | transitions = nx.to_numpy_array(G) 64 | transitions = transitions / np.sum(transitions, axis=0) 65 | 66 | TS = np.zeros((N, L)) 67 | TS[:, 0] = [1 if x < 0.5 else -1 for x in np.random.rand(N)] 68 | indices = np.arange(N) 69 | 70 | for t in range(1, L): 71 | np.random.shuffle(indices) 72 | TS[:, t] = TS[:, t - 1] 73 | for i in indices: 74 | TS[i, t] = np.random.choice(TS[:, t], p=transitions[:, i]) 75 | if np.random.rand() < noise: 76 | TS[i, t] = 1 if np.random.rand() < 0.5 else -1 77 | 78 | self.results['ground_truth'] = G 79 | self.results['TS'] = TS 80 | return TS 81 | -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # http://www.sphinx-doc.org/en/master/config 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('../../')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'netrd' 21 | copyright = '2019, NetSI 2019 Collabathon team' 22 | author = 'NetSI 2019 Collabathon team' 23 | 24 | # The full version, including alpha/beta/rc tags 25 | release = '0.1' 26 | 27 | 28 | # -- General configuration --------------------------------------------------- 29 | 30 | # Add any Sphinx extension module names here, as strings. They can be 31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 32 | # ones. 33 | extensions = ['sphinx.ext.autodoc', 34 | 'numpydoc', 35 | 'sphinx.ext.coverage', 36 | 'sphinx.ext.mathjax', 37 | 'sphinx.ext.viewcode'] 38 | numpydoc_show_class_members = False 39 | 40 | # Add any paths that contain templates here, relative to this directory. 41 | templates_path = ['_templates'] 42 | 43 | # List of patterns, relative to source directory, that match files and 44 | # directories to ignore when looking for source files. 45 | # This pattern also affects html_static_path and html_extra_path. 46 | exclude_patterns = [] 47 | 48 | # The suffix(es) of source filenames. 49 | source_suffix = ['.rst', '.md'] 50 | 51 | # Add the markdown parser. 52 | # from recommonmark.parser import CommonMarkParser 53 | # source_parsers = {'.md': CommonMarkParser} 54 | 55 | # from recommonmark.transform import AutoStructify 56 | # def setup(app): 57 | # app.add_source_parser() 58 | # app.add_config_value( 59 | # 'recommonmark_config', {'enable_eval_rst': True}, True) 60 | # app.add_transform(AutoStructify) 61 | 62 | # -- Options for HTML output ------------------------------------------------- 63 | 64 | # The theme to use for HTML and HTML Help pages. See the documentation for 65 | # a list of builtin themes. 66 | # 67 | html_theme = 'sphinx_rtd_theme' 68 | 69 | # Add any paths that contain custom static files (such as style sheets) here, 70 | # relative to this directory. They are copied after the builtin static files, 71 | # so a file named "default.css" will overwrite the builtin "default.css". 72 | html_static_path = ['_static'] 73 | -------------------------------------------------------------------------------- /netrd/distance/polynomial_dissimilarity.py: -------------------------------------------------------------------------------- 1 | """ 2 | polynomial_dissimilarity.py 3 | -------------- 4 | 5 | From 6 | ---- 7 | Donnat, Claire, and Susan Holmes. "Tracking 8 | network dynamics: A survey of distances 9 | and similarity metrics." arXiv 10 | preprint arXiv:1801.07351 (2018). 11 | 12 | author: Jessica T. Davis 13 | email: 14 | Submitted as part of the 2019 NetSI Collabathon. 15 | 16 | """ 17 | import numpy as np 18 | import networkx as nx 19 | from .base import BaseDistance 20 | from ..utilities import unweighted 21 | 22 | 23 | class PolynomialDissimilarity(BaseDistance): 24 | """Compares polynomials relating to the eigenvalues of the adjacency matrices.""" 25 | 26 | @unweighted 27 | def dist(self, G1, G2, k=5, alpha=1): 28 | r"""Compares the polynomials of the eigenvalue decomposition of 29 | two adjacency matrices. 30 | 31 | Note that the :math:`ij`-th element of where :math:`A^k` 32 | corresponds to the number of paths of length :math:`k` between 33 | nodes :math:`i` and :math:`j`. 34 | 35 | The results dictionary also stores a 2-tuple of the underlying 36 | adjacency matrices in the key `'adjacency_matrices'`. 37 | 38 | Parameters 39 | ---------- 40 | 41 | G1, G2 (nx.Graph) 42 | two networkx graphs to be compared. 43 | 44 | k (float) 45 | maximum degree of the polynomial 46 | 47 | alpha (float) 48 | weighting factor 49 | 50 | Returns 51 | ------- 52 | dist (float) 53 | Polynomial Dissimilarity between `G1`, `G2` 54 | 55 | References 56 | ---------- 57 | .. [1] Donnat, Claire, and Susan Holmes. "Tracking network 58 | dynamics: A survey of distances and similarity metrics." 59 | arXiv preprint arXiv:1801.07351 (2018). 60 | 61 | """ 62 | 63 | A1 = nx.to_numpy_array(G1) 64 | A2 = nx.to_numpy_array(G2) 65 | 66 | P_A1 = similarity_score(A1, k, alpha) 67 | P_A2 = similarity_score(A2, k, alpha) 68 | 69 | dist = np.linalg.norm(P_A1 - P_A2, ord="fro") / A1.shape[0] ** 2 70 | 71 | self.results["adjacency_matrices"] = A1, A2 72 | self.results["dist"] = dist 73 | return dist 74 | 75 | 76 | def similarity_score(A, k, alpha): 77 | """ 78 | Calculate the similarity score used in the polynomial dissimilarity 79 | distance. This uses a polynomial transformation of the eigenvalues of the 80 | of the adjacency matrix in combination with the eigenvectors of the 81 | adjacency matrix. See p. 27 of Donnat and Holmes (2018). 82 | """ 83 | 84 | eig_vals, Q = np.linalg.eig(A) 85 | 86 | n = A.shape[0] 87 | 88 | def polynomial(kp): 89 | return eig_vals**kp / (n - 1) ** (alpha * (kp - 1)) 90 | 91 | W = np.diag(sum([polynomial(k) for k in range(1, k + 1)])) 92 | P_A = np.dot(np.dot(Q, W), Q.T) 93 | 94 | return P_A 95 | -------------------------------------------------------------------------------- /netrd/dynamics/sherrington_kirkpatrick.py: -------------------------------------------------------------------------------- 1 | """ 2 | sherrington_kirkpatrick.py 3 | --------------------- 4 | Generate an ising model-like time series on a graph 5 | 6 | author: Brennan Klein 7 | email: brennanjamesklein at gmail dot com 8 | submitted as part of the 2019 NetSI Collabathon 9 | """ 10 | from .base import BaseDynamics 11 | import networkx as nx 12 | import numpy as np 13 | from ..utilities import unweighted 14 | 15 | 16 | class SherringtonKirkpatrickIsing(BaseDynamics): 17 | """Ising model-like dynamics.""" 18 | 19 | @unweighted 20 | def simulate(self, G, L, noisy=False): 21 | r"""Simulate Kinetic Ising model dynamics on a ground truth network. 22 | 23 | The results dictionary also stores the ground truth network as 24 | `'ground_truth'`. 25 | 26 | Parameters 27 | ---------- 28 | G (nx.Graph) 29 | The input (ground-truth) graph with :math:`N` nodes. 30 | 31 | L (int) 32 | The length of the desired time series. 33 | 34 | Returns 35 | ------- 36 | TS (np.ndarray) 37 | An :math:`N \times L` array of synthetic time series data. 38 | 39 | Examples 40 | -------- 41 | .. code:: python 42 | 43 | G = nx.ring_of_cliques(4,16) 44 | L = 2001 45 | dynamics = SherringtonKirkpatrickIsing() 46 | TS = dynamics.simulate(G, L) 47 | 48 | 49 | References 50 | ---------- 51 | .. [1] D. Sherrington and S. Kirkpatrick, Phys. Rev. Lett. 35, 1792 52 | (1975). 53 | 54 | .. [2] Hoang, D.T., Song, J., Periwal, V. and Jo, J., Network 55 | inference in stochastic systems from neurons to currencies: 56 | Improved performance at small sample size. (2019) 57 | 58 | """ 59 | 60 | N = G.number_of_nodes() 61 | 62 | # get transition probability matrix of G 63 | A = nx.to_numpy_array(G) 64 | W = np.zeros(A.shape) 65 | for i in range(A.shape[0]): 66 | if A[i].sum() > 0: 67 | W[i] = A[i] / A[i].sum() 68 | 69 | # initialize a time series of ones 70 | ts = np.ones((L, N)) 71 | for t in range(1, L - 1): 72 | h = np.sum(W[:, :] * ts[t, :], axis=1) # Wij from j to i 73 | p = 1 / (1 + np.exp(-2 * h)) 74 | if noisy: 75 | ts[t + 1, :] = p - np.random.rand(N) 76 | else: 77 | ts[t + 1, :] = sign_vec(p - np.random.rand(N)) 78 | 79 | self.results['ground_truth'] = G 80 | self.results['TS'] = ts.T 81 | 82 | return self.results['TS'] 83 | 84 | 85 | def sign(x): 86 | """ 87 | np.sign(0) = 0 but here to avoid value 0, 88 | we redefine it as def sign(0) = 1 89 | """ 90 | return 1.0 if x >= 0 else -1.0 91 | 92 | 93 | def sign_vec(x): 94 | """ 95 | Binarize an array 96 | """ 97 | x_vec = np.vectorize(sign) 98 | return x_vec(x) 99 | -------------------------------------------------------------------------------- /netrd/reconstruction/maximum_likelihood_estimation.py: -------------------------------------------------------------------------------- 1 | """ 2 | maximum_likelihood_estimation.py 3 | --------------------- 4 | Reconstruction of graphs using maximum likelihood estimation 5 | author: Brennan Klein 6 | email: brennanjamesklein at gmail dot com 7 | submitted as part of the 2019 NeTSI Collabathon 8 | """ 9 | from .base import BaseReconstructor 10 | import numpy as np 11 | from ..utilities import create_graph, threshold 12 | 13 | 14 | class MaximumLikelihoodEstimation(BaseReconstructor): 15 | """Uses maximum likelihood estimation.""" 16 | 17 | def fit(self, TS, rate=1.0, stop_criterion=True, threshold_type='degree', **kwargs): 18 | """Infer inter-node coupling weights using maximum likelihood estimation 19 | methods. 20 | 21 | The results dictionary also stores the weight matrix as 22 | `'weights_matrix'` and the thresholded version of the weight matrix 23 | as `'thresholded_matrix'`. 24 | 25 | Parameters 26 | ---------- 27 | 28 | TS (np.ndarray) 29 | Array consisting of :math:`L` observations from :math:`N` sensors. 30 | 31 | rate (float) 32 | rate term in maximum likelihood 33 | 34 | stop_criterion (bool) 35 | if True, prevent overly-long runtimes 36 | 37 | threshold_type (str) 38 | Which thresholding function to use on the matrix of 39 | weights. See `netrd.utilities.threshold.py` for 40 | documentation. Pass additional arguments to the thresholder 41 | using '`**kwargs`'. 42 | 43 | Returns 44 | ------- 45 | G (nx.Graph or nx.DiGraph) 46 | a reconstructed graph. 47 | 48 | References 49 | ---------- 50 | 51 | .. [1] https://github.com/nihcompmed/network-inference/blob/master/sphinx/codesource/inference.py 52 | 53 | """ 54 | 55 | N, L = np.shape(TS) # N nodes, length L 56 | rate = rate / L 57 | 58 | s1 = TS[:, :-1] 59 | W = np.zeros((N, N)) 60 | 61 | nloop = 10000 62 | for i0 in range(N): 63 | st1 = TS[i0, 1:] # time series activity of single node 64 | 65 | w = np.zeros(N) 66 | h = np.zeros(L - 1) 67 | cost = np.full(nloop, 100.0) 68 | 69 | for iloop in range(nloop): 70 | dw = np.dot(s1, (st1 - np.tanh(h))) 71 | 72 | w += rate * dw 73 | h = np.dot(s1.T, w) 74 | 75 | cost[iloop] = ((st1 - np.tanh(h)) ** 2).mean() 76 | 77 | if stop_criterion and cost[iloop] >= cost[iloop - 1]: 78 | break 79 | 80 | W[i0, :] = w 81 | 82 | # threshold the network 83 | W_thresh = threshold(W, threshold_type, **kwargs) 84 | 85 | # construct the network 86 | 87 | self.results['graph'] = create_graph(W_thresh) 88 | self.results['weights_matrix'] = W 89 | self.results['thresholded_matrix'] = W_thresh 90 | G = self.results['graph'] 91 | 92 | return G 93 | -------------------------------------------------------------------------------- /netrd/dynamics/ising_glauber.py: -------------------------------------------------------------------------------- 1 | """ 2 | ising_glauber.py 3 | ---------------- 4 | 5 | Implementation to simulate the Ising-Glauber model on a network. 6 | 7 | author: Chia-Hung Yang 8 | Submitted as part of the 2019 NetSI Collabathon. 9 | """ 10 | 11 | from netrd.dynamics import BaseDynamics 12 | import numpy as np 13 | import networkx as nx 14 | from numpy.random import rand 15 | from ..utilities import unweighted 16 | 17 | 18 | class IsingGlauber(BaseDynamics): 19 | """Ising-Glauber model.""" 20 | 21 | @unweighted 22 | def simulate(self, G, L, init=None, beta=2): 23 | r"""Simulate time series on a network from the Ising-Glauber model. 24 | 25 | In the Ising-Glauber model, each node has a binary state. At every 26 | time step, nodes switch their state with certain probability. For 27 | inactive nodes, this probability is :math:`1 / (1 + e^{\beta (k - 28 | 2m) / k})` where :math:`\beta` is a parameter tuning the likelihood 29 | of switching state, :math:`k` is degree of the node and :math:`m` 30 | is the number of its active neighbors; for active nodes the 31 | switch-state probability is :math:`1 - 1 / (1 + e^{\beta (k - 2m) / 32 | k})` instead. 33 | 34 | The results dictionary also stores the ground truth network as 35 | `'ground_truth'`. 36 | 37 | Parameters 38 | ---------- 39 | G (nx.Graph) 40 | Underlying ground-truth network of simulated time series which 41 | has :math:`N` nodes. 42 | 43 | L (int) 44 | Length of time series. 45 | 46 | init (np.ndarray) 47 | Length-:math:`N` 1D array of nodes' initial condition, which 48 | must have binary value (0 or 1). 49 | 50 | beta (float) 51 | Inverse temperature tuning the likelihood that a node switches 52 | its state. Default to :math:`2`. 53 | 54 | Returns 55 | ------- 56 | TS (np.ndarray) 57 | :math:`N \times L` array of :math:`L` observations on :math:`N` 58 | nodes. 59 | 60 | """ 61 | 62 | N = G.number_of_nodes() 63 | adjmat = nx.to_numpy_array(G, dtype=float) 64 | degs = adjmat.sum(axis=0) 65 | 66 | # Randomly initialize an initial condition if not specified 67 | TS = np.zeros((N, L), dtype=int) 68 | if init is None: 69 | init = rand(N) 70 | TS[:, 0] = np.round(init).astype(int) 71 | 72 | # Simulate the time series 73 | for t in range(L - 1): 74 | state = TS[:, t].copy() # State for each node 75 | num_act_nei = np.dot(state, adjmat) # Number of active neighbors 76 | 77 | hamltn = (degs - 2 * num_act_nei) / degs 78 | thrds = 1 / (1 + np.exp(beta * hamltn)) 79 | # Probability of switching state 80 | probs = np.where(state == 0, thrds, 1 - thrds) 81 | 82 | _next = np.where(rand(N) < probs, 1 - state, state) 83 | TS[:, t + 1] = _next 84 | 85 | self.results['ground_truth'] = G 86 | self.results['TS'] = TS 87 | return TS 88 | -------------------------------------------------------------------------------- /netrd/distance/hamming.py: -------------------------------------------------------------------------------- 1 | """ 2 | hamming.py 3 | -------------- 4 | 5 | Hamming distance, wrapper for scipy function: 6 | https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.hamming.html#scipy.spatial.distance.hamming 7 | 8 | """ 9 | 10 | import scipy 11 | import numpy as np 12 | import networkx as nx 13 | from .base import BaseDistance 14 | from ..utilities import unweighted 15 | 16 | 17 | class Hamming(BaseDistance): 18 | """Entry-wise disagreement between adjacency matrices.""" 19 | 20 | @unweighted 21 | def dist(self, G1, G2): 22 | r"""The proportion of disagreeing nodes between the flattened adjacency 23 | matrices. 24 | 25 | If :math:`u` and :math:`v` are boolean vectors, then Hamming 26 | distance is: 27 | 28 | .. math:: 29 | 30 | \frac{c_{01} + c_{10}}{n} 31 | 32 | where :math:`c_{ij}` is the number of occurrences of where 33 | :math:`u[k] = i` and :math:`v[k] = j` for :math:`k < n`. 34 | 35 | The graphs must have the same number of nodes. A small modification 36 | to this code could allow weights can be applied, but only one set 37 | of weights that apply to both graphs. 38 | 39 | The results dictionary also stores a 2-tuple of the underlying 40 | adjacency matrices in the key `'adjacency_matrices'`. 41 | 42 | Parameters 43 | ---------- 44 | 45 | G1, G2 (nx.Graph) 46 | two networkx graphs to be compared. 47 | 48 | Returns 49 | ------- 50 | 51 | dist (float) 52 | the distance between `G1` and `G2`. 53 | 54 | References 55 | ---------- 56 | 57 | .. [1] https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.hamming.html#scipy.spatial.distance.hamming 58 | 59 | """ 60 | 61 | if G1.number_of_nodes() == G2.number_of_nodes(): 62 | N = G1.number_of_nodes() 63 | else: 64 | raise ValueError("Graphs have the same number of nodes") 65 | 66 | adj1 = nx.to_numpy_array(G1) 67 | adj2 = nx.to_numpy_array(G2) 68 | 69 | # undirected case: consider only upper triangular 70 | mask = np.triu_indices(N, k=1) 71 | 72 | # directed case: consider all but the diagonal 73 | if nx.is_directed(G1) or nx.is_directed(G2): 74 | new_mask = np.tril_indices(N, k=-1) 75 | mask = (np.append(mask[0], new_mask[0]), np.append(mask[1], new_mask[1])) 76 | 77 | # only if there are self-loops include the diagonal 78 | # this corrects the implicit denominator of Hamming, which 79 | # should be N^2 for networks with self-loops and N(N-1) for 80 | # those without 81 | if next(nx.selfloop_edges(G1), False) or next(nx.selfloop_edges(G2), False): 82 | new_mask = np.diag_indices(N) 83 | mask = (np.append(mask[0], new_mask[0]), np.append(mask[1], new_mask[1])) 84 | 85 | dist = scipy.spatial.distance.hamming( 86 | adj1[mask].flatten(), adj2[mask].flatten() 87 | ) 88 | self.results["dist"] = dist 89 | self.results["adjacency_matrices"] = adj1, adj2 90 | return dist 91 | -------------------------------------------------------------------------------- /tests/test_utilities.py: -------------------------------------------------------------------------------- 1 | """ 2 | test_utilities.py 3 | ----------------- 4 | 5 | Test utility functions. 6 | 7 | """ 8 | 9 | import numpy as np 10 | from netrd.utilities.entropy import categorized_data 11 | from netrd.utilities.entropy import entropy_from_seq, joint_entropy, conditional_entropy 12 | from netrd.utilities import threshold 13 | 14 | 15 | def test_thresholds(): 16 | """ 17 | Test the threshold function by testing three underlying thresholding 18 | methods: range, quantile, and degree. 19 | """ 20 | 21 | mat = np.arange(1, 17, 1).reshape((4, 4)) 22 | 23 | for k in range(5): 24 | thresholded_mat = threshold(mat, 'degree', avg_k=k, remove_self_loops=False) 25 | assert (thresholded_mat != 0).sum() == 4 * k 26 | 27 | for n in range(17): 28 | thresholded_mat = threshold( 29 | mat, 'quantile', quantile=n / 16, remove_self_loops=False 30 | ) 31 | print(n) 32 | assert (thresholded_mat != 0).sum() == 16 - n 33 | 34 | thresholded_mat = threshold( 35 | mat, 'range', cutoffs=[(0, np.inf)], remove_self_loops=False 36 | ) 37 | assert (thresholded_mat >= 0).all() 38 | 39 | thresholded_mat = threshold( 40 | mat, 'range', cutoffs=[(-np.inf, 0)], remove_self_loops=False 41 | ) 42 | assert (thresholded_mat <= 0).all() 43 | 44 | target_mat = np.array( 45 | [[0, 0, 0, 0], [0, 0, 0, 0], [9, 10, 11, 12], [13, 14, 15, 16]] 46 | ) 47 | 48 | assert np.array_equal( 49 | threshold(mat, 'range', cutoffs=[(9, 16)], remove_self_loops=False), target_mat 50 | ) 51 | assert np.array_equal( 52 | threshold(mat, 'degree', avg_k=2, remove_self_loops=False), target_mat 53 | ) 54 | assert np.array_equal( 55 | threshold(mat, 'quantile', quantile=0.5, remove_self_loops=False), target_mat 56 | ) 57 | 58 | target_mat = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]]) 59 | 60 | assert np.array_equal( 61 | threshold( 62 | mat, 'range', cutoffs=[(9, 16)], binary=True, remove_self_loops=False 63 | ), 64 | target_mat, 65 | ) 66 | assert np.array_equal( 67 | threshold(mat, 'degree', avg_k=2, binary=True, remove_self_loops=False), 68 | target_mat, 69 | ) 70 | assert np.array_equal( 71 | threshold(mat, 'quantile', quantile=0.5, binary=True, remove_self_loops=False), 72 | target_mat, 73 | ) 74 | 75 | 76 | def test_categorized_data(): 77 | """Test the function that turn continuous data into categorical.""" 78 | raw = np.array([[1.0, 1.4, 3.0], [2.0, 2.2, 5.0]]).T 79 | n_bins = 2 80 | data = categorized_data(raw, n_bins) 81 | 82 | data_true = np.array([[0, 0, 1], [0, 0, 1]]).T 83 | assert np.array_equal(data, data_true) 84 | 85 | 86 | def test_entropies(): 87 | """ 88 | Test functions computing entropy, joint entropy and conditional entropy. 89 | 90 | """ 91 | data = np.array([[1, 0, 0, 1, 1, 0, 1, 0], [0, 1, 0, 1, 1, 0, 1, 0]]).T 92 | H = entropy_from_seq(data[:, 0]) 93 | H_joint = joint_entropy(data) 94 | H_cond = conditional_entropy(data[:, 1, np.newaxis], data[:, 0, np.newaxis]) 95 | 96 | H_true = 1.0 97 | H_joint_true = 3 / 4 + 3 / 4 * np.log2(8 / 3) 98 | H_cond_true = H_joint - H 99 | 100 | assert np.isclose(H, H_true) 101 | assert np.isclose(H_joint, H_joint_true) 102 | assert np.isclose(H_cond, H_cond_true) 103 | -------------------------------------------------------------------------------- /netrd/distance/ipsen_mikhailov.py: -------------------------------------------------------------------------------- 1 | """ 2 | ipsen_mikhailov.py 3 | -------------------------- 4 | 5 | Graph distance based on paper: 6 | Evolutionary reconstruction of network 7 | Available here: 8 | https://journals.aps.org/pre/abstract/10.1103/PhysRevE.66.046109 9 | 10 | author: Guillaume St-Onge 11 | email: guillaume.st-onge.4@ulaval.ca 12 | Submitted as part of the 2019 NetSI Collabathon. 13 | 14 | """ 15 | 16 | import numpy as np 17 | import networkx as nx 18 | from .base import BaseDistance 19 | from scipy.sparse.csgraph import laplacian 20 | from scipy.linalg import eigh 21 | from scipy.integrate import quad 22 | from ..utilities.graph import unweighted 23 | 24 | 25 | class IpsenMikhailov(BaseDistance): 26 | """Compares the spectrum of the Laplacian matrices.""" 27 | 28 | @unweighted 29 | def dist(self, G1, G2, hwhm=0.08): 30 | """Compare the spectrum ot the associated Laplacian matrices. 31 | 32 | The results dictionary also stores a 2-tuple of the underlying 33 | adjacency matrices in the key `'adjacency_matrices'`. 34 | 35 | Parameters 36 | ---------- 37 | 38 | G1, G2 (nx.Graph) 39 | two networkx graphs to be compared. 40 | 41 | hwhm (float) 42 | half with at half maximum of the lorentzian kernel. 43 | 44 | Returns 45 | ------- 46 | 47 | dist (float) 48 | the distance between G1 and G2. 49 | 50 | Notes 51 | ----- 52 | 53 | Requires undirected networks. 54 | 55 | References 56 | ---------- 57 | 58 | .. [1] https://journals.aps.org/pre/abstract/10.1103/PhysRevE.66.046109 59 | 60 | """ 61 | # get the adjacency matrices 62 | adj1 = nx.to_numpy_array(G1) 63 | adj2 = nx.to_numpy_array(G2) 64 | self.results['adjacency_matrices'] = adj1, adj2 65 | 66 | # get the IM distance 67 | dist = _im_distance(adj1, adj2, hwhm) 68 | 69 | self.results['dist'] = dist 70 | 71 | return dist 72 | 73 | 74 | def _im_distance(adj1, adj2, hwhm): 75 | """Computes the Ipsen-Mikhailov distance for two symmetric adjacency 76 | matrices 77 | 78 | Base on this paper : 79 | https://journals.aps.org/pre/abstract/10.1103/PhysRevE.66.046109 80 | 81 | Note : this is also used by the file hamming_ipsen_mikhailov.py 82 | 83 | Parameters 84 | ---------- 85 | 86 | adj1, adj2 (array): adjacency matrices. 87 | 88 | hwhm (float) : hwhm of the lorentzian distribution. 89 | 90 | Returns 91 | ------- 92 | 93 | dist (float) : Ipsen-Mikhailov distance. 94 | 95 | """ 96 | N = len(adj1) 97 | # get laplacian matrix 98 | L1 = laplacian(adj1, normed=False) 99 | L2 = laplacian(adj2, normed=False) 100 | 101 | # get the modes for the positive-semidefinite laplacian 102 | w1 = np.sqrt(np.abs(eigh(L1)[0][1:])) 103 | w2 = np.sqrt(np.abs(eigh(L2)[0][1:])) 104 | 105 | # we calculate the norm for both spectrum 106 | norm1 = (N - 1) * np.pi / 2 - np.sum(np.arctan(-w1 / hwhm)) 107 | norm2 = (N - 1) * np.pi / 2 - np.sum(np.arctan(-w2 / hwhm)) 108 | 109 | # define both spectral densities 110 | density1 = lambda w: np.sum(hwhm / ((w - w1) ** 2 + hwhm**2)) / norm1 111 | density2 = lambda w: np.sum(hwhm / ((w - w2) ** 2 + hwhm**2)) / norm2 112 | 113 | func = lambda w: (density1(w) - density2(w)) ** 2 114 | 115 | return np.sqrt(quad(func, 0, np.inf, limit=100)[0]) 116 | -------------------------------------------------------------------------------- /netrd/reconstruction/graphical_lasso.py: -------------------------------------------------------------------------------- 1 | """ 2 | graphical_lasso.py 3 | -------------- 4 | 5 | Graph reconstruction algorithm based on [1, 2]. 6 | 7 | [1] J. Friedman, T. Hastie, R. Tibshirani, "Sparse inverse covariance estimation with 8 | the graphical lasso", Biostatistics 9, pp. 432–441 (2008). 9 | [2] https://github.com/CamDavidsonPilon/Graphical-Lasso-in-Finance 10 | 11 | author: Charles Murphy 12 | email: charles.murphy.1@ulaval.ca 13 | Submitted as part of the 2019 NetSI Collabathon. 14 | """ 15 | 16 | import numpy as np 17 | from sklearn.covariance import graphical_lasso 18 | from .base import BaseReconstructor 19 | from ..utilities import create_graph, threshold 20 | 21 | 22 | class GraphicalLasso(BaseReconstructor): 23 | """Performs graphical lasso.""" 24 | 25 | def fit( 26 | self, 27 | TS, 28 | alpha=0.01, 29 | max_iter=100, 30 | tol=0.0001, 31 | threshold_type='degree', 32 | **kwargs 33 | ): 34 | """Performs a graphical lasso. 35 | 36 | For details see [1, 2]. 37 | 38 | The results dictionary also stores the covariance matrix as 39 | `'weights_matrix'`, the precision matrix as `'precision_matrix'`, 40 | and the thresholded version of the covariance matrix as 41 | `'thresholded_matrix'`. 42 | 43 | This implementation uses `scikit-learn`'s implementation of the 44 | graphical lasso; for convenience two control parameters `tol` and 45 | `max_iter` are available to interface with their method. 46 | 47 | Parameters 48 | ---------- 49 | 50 | TS (np.ndarray) 51 | Array consisting of :math:`L` observations from :math:`N` 52 | sensors. 53 | 54 | alpha (float, default=0.01) 55 | Coefficient of penalization, higher values means more 56 | sparseness 57 | 58 | max_iter (int, default=100) 59 | Maximum number of iterations. 60 | 61 | tol (float, default=0.0001) 62 | Stop the algorithm when the duality gap is below a certain 63 | threshold. 64 | 65 | threshold_type (str) 66 | Which thresholding function to use on the matrix of 67 | weights. See `netrd.utilities.threshold.py` for 68 | documentation. Pass additional arguments to the thresholder 69 | using ``**kwargs``. 70 | 71 | Returns 72 | ------- 73 | 74 | G (nx.Graph) 75 | A reconstructed graph with :math:`N` nodes. 76 | 77 | References 78 | ---------- 79 | 80 | .. [1] J. Friedman, T. Hastie, R. Tibshirani, "Sparse inverse 81 | covariance estimation with the graphical lasso", 82 | Biostatistics 9, pp. 432–441 (2008). 83 | 84 | .. [2] https://github.com/CamDavidsonPilon/Graphical-Lasso-in-Finance 85 | 86 | """ 87 | emp_cov = np.cov(TS) 88 | 89 | cov, prec = graphical_lasso(emp_cov, alpha, max_iter=max_iter, tol=tol) 90 | self.results['weights_matrix'] = cov 91 | self.results['precision_matrix'] = prec 92 | 93 | # threshold the network 94 | self.results['thresholded_matrix'] = threshold( 95 | self.results['weights_matrix'], threshold_type, **kwargs 96 | ) 97 | 98 | # construct the network 99 | G = create_graph(self.results['thresholded_matrix']) 100 | self.results['graph'] = G 101 | 102 | return G 103 | -------------------------------------------------------------------------------- /tests/test_reconstruction.py: -------------------------------------------------------------------------------- 1 | """ 2 | test_reconstruction.py 3 | ---------------------- 4 | 5 | Test reconstruction algorithms. 6 | 7 | """ 8 | 9 | import numpy as np 10 | from netrd import reconstruction 11 | from netrd.reconstruction import ConvergentCrossMapping 12 | from netrd.reconstruction import BaseReconstructor 13 | 14 | 15 | def test_graph_size(): 16 | """ 17 | The number of nodes in a reconstructed graph should be 18 | equal to the number of sensors in the time series data 19 | used to reconstruct the graph. 20 | """ 21 | size = 50 22 | for label, obj in reconstruction.__dict__.items(): 23 | if label in [ 24 | 'PartialCorrelationMatrix', 25 | 'NaiveTransferEntropy' 'OptimalCausationEntropy', 26 | ]: 27 | continue 28 | if isinstance(obj, type) and BaseReconstructor in obj.__bases__: 29 | TS = np.random.random((size, 125)) 30 | G = obj().fit(TS, threshold_type='range', cutoffs=[(-np.inf, np.inf)]) 31 | assert G.order() == size, f"{label} has wrong size" 32 | 33 | 34 | def test_naive_transfer_entropy(): 35 | """ 36 | Use a smaller data set to test the NaiveTransferEntropy, 37 | because it is very slow. 38 | 39 | """ 40 | size = 25 41 | TS = np.random.random((size, 100)) 42 | G = reconstruction.NaiveTransferEntropy().fit( 43 | TS, delay_max=2, threshold_type='range', cutoffs=[(-np.inf, np.inf)] 44 | ) 45 | assert G.order() == size 46 | 47 | 48 | def test_oce(): 49 | """ 50 | Test optimal causation entropy using a smaller dataset. 51 | """ 52 | 53 | size = 25 54 | TS = np.random.random((size, 50)) 55 | G = reconstruction.OptimalCausationEntropy().fit( 56 | TS, threshold_type='range', cutoffs=[(-np.inf, np.inf)] 57 | ) 58 | assert G.order() == size 59 | 60 | 61 | def test_convergent_cross_mapping(): 62 | """ 63 | Examine the outcome of ConvergentCrossMapping with synthetic 64 | time series data generated from a two-species Lotka-Vottera model. 65 | 66 | """ 67 | filepath = '../data/two_species_coupled_time_series.dat' 68 | edgelist = {(1, 0), (0, 1)} 69 | keys = ['graph', 'weights_matrix', 'pvalues_matrix'] 70 | 71 | TS = np.loadtxt(filepath, delimiter=',') 72 | recon = ConvergentCrossMapping() 73 | G = recon.fit(TS, threshold_type='range', cutoffs=[(-np.inf, np.inf)]) 74 | el = set(G.edges()) 75 | res = recon.results.keys() 76 | 77 | assert el == edgelist 78 | assert all(k in res for k in keys) 79 | 80 | 81 | def test_partial_correlation(): 82 | """ 83 | The PartialCorrelationMatrix has many parameterizations 84 | that ought to be tested differently. Otherwise, this should be 85 | equivalent to `test_graph_size`. 86 | """ 87 | for resid in [True, False]: 88 | for index in [0, None]: 89 | for size in [10, 100]: 90 | if index is None and resid is True: 91 | pass # this shouldn't be a valid parameterization 92 | else: 93 | TS = np.random.random((size, 50)) 94 | G = reconstruction.PartialCorrelationMatrix().fit( 95 | TS, index=index, cutoffs=[(-np.inf, np.inf)] 96 | ) 97 | if index is None: 98 | assert G.order() == size 99 | else: 100 | assert G.order() == (size - 1) 101 | -------------------------------------------------------------------------------- /netrd/dynamics/SIS.py: -------------------------------------------------------------------------------- 1 | """ 2 | SIS.py 3 | ------ 4 | 5 | Implementation of Susceptible-Infected-Susceptible models dynamics on a 6 | network. 7 | 8 | author: Stefan McCabe 9 | 10 | Submitted as part of the 2019 NetSI Collabathon. 11 | 12 | """ 13 | 14 | from netrd.dynamics import BaseDynamics 15 | import numpy as np 16 | import networkx as nx 17 | 18 | 19 | class SISModel(BaseDynamics): 20 | """Susceptible-Infected-Susceptible dynamical process.""" 21 | 22 | def simulate(self, G, L, num_seeds=1, beta=None, mu=None): 23 | r"""Simulate SIS model dynamics on a network. 24 | 25 | The results dictionary also stores the ground truth network as 26 | `'ground_truth'`. 27 | 28 | Parameters 29 | ---------- 30 | G (nx.Graph) 31 | the input (ground-truth) graph with :math:`N` nodes. 32 | 33 | L (int) 34 | the length of the desired time series. 35 | 36 | num_seeds (int) 37 | the number of initially infected nodes. 38 | 39 | beta (float) 40 | the infection rate for the SIS process. 41 | 42 | mu (float) 43 | the recovery rate for the SIS process. 44 | 45 | Returns 46 | ------- 47 | TS (np.ndarray) 48 | an :math:`N \times L` array of synthetic time series data. 49 | 50 | """ 51 | H = G.copy() 52 | N = H.number_of_nodes() 53 | TS = np.zeros((N, L)) 54 | index_to_node = dict(zip(range(G.order()), list(G.nodes()))) 55 | 56 | # sensible defaults for beta and mu 57 | if not beta: 58 | avg_k = np.mean(list(dict(H.degree()).values())) 59 | beta = 1 / avg_k 60 | if not mu: 61 | mu = 1 / H.number_of_nodes() 62 | 63 | seeds = np.random.permutation( 64 | np.concatenate([np.repeat(1, num_seeds), np.repeat(0, N - num_seeds)]) 65 | ) 66 | TS[:, 0] = seeds 67 | infected_attr = {index_to_node[i]: s for i, s in enumerate(seeds)} 68 | nx.set_node_attributes(H, infected_attr, 'infected') 69 | nx.set_node_attributes(H, 0, 'next_infected') 70 | 71 | # SIS dynamics 72 | for t in range(1, L): 73 | nodes = np.random.permutation(H.nodes) 74 | for i in nodes: 75 | if H.nodes[i]['infected']: 76 | neigh = H.neighbors(i) 77 | for j in neigh: 78 | if np.random.random() < beta: 79 | H.nodes[j]['next_infected'] = 1 80 | if np.random.random() < mu: 81 | H.nodes[i]['infected'] = 0 82 | infections = nx.get_node_attributes(H, 'infected') 83 | next_infections = nx.get_node_attributes(H, 'next_infected') 84 | 85 | # store SIS dynamics for time t 86 | TS[:, t] = np.array(list(infections.values())) 87 | nx.set_node_attributes(H, next_infections, 'infected') 88 | nx.set_node_attributes(H, 0, 'next_infected') 89 | 90 | # if the epidemic dies off, stop 91 | if TS[:, t].sum() < 1: 92 | break 93 | 94 | # if the epidemic died off, pad the time series to the right shape 95 | if TS.shape[1] < L: 96 | TS = np.hstack([TS, np.zeros((N, L - TS.shape[1]))]) 97 | 98 | self.results['ground_truth'] = H 99 | self.results['TS'] = TS 100 | self.results['index_to_node'] = index_to_node 101 | 102 | return TS 103 | -------------------------------------------------------------------------------- /netrd/reconstruction/correlation_matrix.py: -------------------------------------------------------------------------------- 1 | """ 2 | correlation_matrix.py 3 | --------------------- 4 | Reconstruction of graphs using the correlation matrix. 5 | author: Stefan McCabe 6 | email: stefanmccabe at gmail dot com 7 | Submitted as part of the 2019 NetSI Collabathon 8 | """ 9 | from .base import BaseReconstructor 10 | import numpy as np 11 | from ..utilities import create_graph, threshold 12 | 13 | 14 | class CorrelationMatrix(BaseReconstructor): 15 | """Uses the correlation matrix.""" 16 | 17 | def fit(self, TS, num_eigs=None, threshold_type='range', **kwargs): 18 | """Uses the correlation matrix. 19 | 20 | If ``num_eigs`` is `None`, perform the reconstruction using the 21 | unregularized correlation matrix. Otherwise, construct a regularized 22 | precision matrix using ``num_eigs`` eigenvectors and eigenvalues of the 23 | correlation matrix. For details on the regularization method, see [1]. 24 | The results dictionary also stores the raw correlation matrix 25 | (potentially regularized) as `'weights_matrix'` and the thresholded 26 | version of the correlation matrix as `'thresholded_matrix'`. For 27 | details see [2]_. 28 | 29 | Parameters 30 | ---------- 31 | TS (np.ndarray) 32 | Array consisting of :math:`L` observations from :math:`N` sensors 33 | 34 | num_eigs (int) 35 | The number of eigenvalues to use. (This corresponds to the 36 | amount of regularization.) The number of eigenvalues used must 37 | be less than :math:`N`. 38 | 39 | threshold_type (str) 40 | Which thresholding function to use on the matrix of 41 | weights. See `netrd.utilities.threshold.py` for 42 | documentation. Pass additional arguments to the thresholder 43 | using `**kwargs`. 44 | 45 | Returns 46 | ------- 47 | G (nx.Graph) 48 | a reconstructed graph. 49 | 50 | References 51 | ---------- 52 | .. [1] https://bwlewis.github.io/correlation-regularization/ 53 | 54 | .. [2] https://github.com/valeria-io/visualising_stocks_correlations/blob/master/corr_matrix_viz.ipynb 55 | 56 | """ 57 | # get the correlation matrix 58 | cor = np.corrcoef(TS) 59 | 60 | if num_eigs: 61 | N = TS.shape[0] 62 | if num_eigs > N: 63 | raise ValueError( 64 | "The number of eigenvalues used must be less " 65 | "than the number of sensors." 66 | ) 67 | 68 | # get eigenvalues and eigenvectors of the correlation matrix 69 | vals, vecs = np.linalg.eigh(cor) 70 | idx = vals.argsort()[::-1] 71 | vals = vals[idx] 72 | vecs = vecs[:, idx] 73 | 74 | # construct the precision matrix and store it 75 | P = (vecs[:, :num_eigs]) @ ( 76 | 1 / (vals[:num_eigs]).reshape(num_eigs, 1) * (vecs[:, :num_eigs]).T 77 | ) 78 | P = P / ( 79 | np.sqrt(np.diag(P)).reshape(N, 1) @ np.sqrt(np.diag(P)).reshape(1, N) 80 | ) 81 | mat = P 82 | else: 83 | mat = cor 84 | 85 | # store the appropriate source matrix 86 | self.results['weights_matrix'] = mat 87 | 88 | # threshold the correlation matrix 89 | A = threshold(mat, threshold_type, **kwargs) 90 | self.results['thresholded_matrix'] = A 91 | 92 | # construct the network 93 | self.results['graph'] = create_graph(A) 94 | G = self.results['graph'] 95 | 96 | return G 97 | -------------------------------------------------------------------------------- /netrd/distance/netlsd.py: -------------------------------------------------------------------------------- 1 | """ 2 | netlsd.py 3 | -------------- 4 | 5 | Graph distance based on: 6 | A. Tsitsulin, D. Mottin, P. Karras, A. Bronstein & E. Müller. NetLSD: Hearing the Shape of a Graph. KDD 2018 7 | 8 | author: Anton Tsitsulin 9 | 10 | """ 11 | import numpy as np 12 | import networkx as nx 13 | import scipy.linalg as spl 14 | 15 | from .base import BaseDistance 16 | from ..utilities import undirected, unweighted 17 | 18 | 19 | class NetLSD(BaseDistance): 20 | """Compares spectral node signature distributions.""" 21 | 22 | @undirected 23 | @unweighted 24 | def dist(self, G1, G2, normalization=None, timescales=None): 25 | """NetLSD: Hearing the Shape of a Graph. 26 | 27 | A network similarity measure based on spectral node signature 28 | distributions. 29 | 30 | The results dictionary includes the underlying signature vectors in 31 | `'signatures'`. 32 | 33 | Parameters 34 | ---------- 35 | 36 | G1, G2 (nx.Graph) 37 | two undirected networkx graphs to be compared. 38 | 39 | normalization (str) 40 | type of normalization of the heat kernel vectors. either 41 | `'complete'`, `'empty'` or `'none'` 42 | 43 | timescales (np.ndarray) 44 | timescales for the comparison. None yields default. 45 | 46 | Returns 47 | ------- 48 | 49 | dist (float) 50 | the distance between `G1` and `G2`. 51 | 52 | References 53 | ---------- 54 | 55 | .. [1] A. Tsitsulin, D. Mottin, P. Karras, A. Bronstein & 56 | E. Müller. NetLSD: Hearing the Shape of a Graph. KDD 2018 57 | 58 | """ 59 | if normalization is None: 60 | normalization = 'none' 61 | if timescales is None: 62 | timescales = np.logspace(-2, 2, 256) 63 | assert isinstance( 64 | normalization, str 65 | ), 'Normalization parameter must be of string type' 66 | 67 | lap1 = nx.normalized_laplacian_matrix(G1) 68 | lap2 = nx.normalized_laplacian_matrix(G2) 69 | 70 | # Note: this is O(n^3) worst-case. 71 | eigs1 = spl.eigvalsh(lap1.todense()) 72 | eigs2 = spl.eigvalsh(lap2.todense()) 73 | 74 | hkt1 = _lsd_signature(eigs1, timescales, normalization) 75 | hkt2 = _lsd_signature(eigs2, timescales, normalization) 76 | 77 | self.results['signatures'] = (hkt1, hkt2) 78 | self.results['dist'] = np.linalg.norm(hkt1 - hkt2) 79 | 80 | return self.results['dist'] 81 | 82 | 83 | def _lsd_signature(eigenvalues, timescales, normalization): 84 | """ 85 | Computes heat kernel trace from given eigenvalues, timescales, and normalization. 86 | 87 | Parameters 88 | -------------- 89 | eigenvalues (numpy.ndarray): Eigenvalue vector 90 | timescales (numpy.ndarray): Vector of discrete timesteps for the kernel computation 91 | normalization (str): 92 | Either 'empty', 'complete' or 'none'. 93 | If 'none' or any other value, return unnormalized heat kernel trace. 94 | For the details how 'empty' and 'complete' are computed, please refer to the paper. 95 | Returns 96 | ------- 97 | numpy.ndarray 98 | Heat kernel trace signature 99 | """ 100 | nv = eigenvalues.shape[0] 101 | hkt = np.zeros(timescales.shape) 102 | for idx, t in enumerate(timescales): 103 | hkt[idx] = np.sum(np.exp(-t * eigenvalues)) 104 | if normalization == 'empty': 105 | return hkt / nv 106 | if normalization == 'complete': 107 | return hkt / (1 + (nv - 1) * np.exp(-(1 + 1 / (nv - 1)) * timescales)) 108 | return hkt 109 | -------------------------------------------------------------------------------- /netrd/utilities/graph.py: -------------------------------------------------------------------------------- 1 | """ 2 | graph.py 3 | -------- 4 | 5 | Utilities for creating and interacting with graph objects. 6 | 7 | author: Stefan McCabe (stefanmccabe at gmail dot com) 8 | 9 | Submitted as part of the 2019 NetSI Collabathon. 10 | 11 | """ 12 | from functools import wraps 13 | import warnings 14 | import numpy as np 15 | import networkx as nx 16 | 17 | 18 | def create_graph(A, create_using=None, remove_self_loops=True): 19 | """Flexibly creating a networkx graph from a numpy array. 20 | 21 | Parameters 22 | ---------- 23 | A (np.ndarray) 24 | A numpy array. 25 | 26 | create_using (nx.Graph or None) 27 | Create the graph using a specific networkx graph. Can be used for 28 | forcing an asymmetric matrix to create an undirected graph, for 29 | example. 30 | 31 | remove_self_loops (bool) 32 | If True, remove the diagonal of the matrix before creating the 33 | graph object. 34 | 35 | Returns 36 | ------- 37 | G 38 | A graph, typically a nx.Graph or nx.DiGraph. 39 | 40 | """ 41 | if remove_self_loops: 42 | np.fill_diagonal(A, 0) 43 | 44 | if create_using is None: 45 | if np.allclose(A, A.T): 46 | G = nx.from_numpy_array(A, create_using=nx.Graph()) 47 | else: 48 | G = nx.from_numpy_array(A, create_using=nx.DiGraph()) 49 | else: 50 | G = nx.from_numpy_array(A, create_using=create_using) 51 | 52 | return G 53 | 54 | 55 | def ensure_undirected(G): 56 | """Ensure the graph G is undirected. 57 | 58 | If it is not, coerce it to undirected and warn the user. 59 | 60 | Parameters 61 | ---------- 62 | G (networkx graph) 63 | The graph to be checked 64 | 65 | Returns 66 | ------- 67 | 68 | G (nx.Graph) 69 | Undirected version of the input graph 70 | 71 | """ 72 | if nx.is_directed(G): 73 | G = G.to_undirected(as_view=False) 74 | warnings.warn("Coercing directed graph to undirected.", RuntimeWarning) 75 | return G 76 | 77 | 78 | def undirected(func): 79 | """ 80 | Decorator applying ``ensure_undirected()`` to all ``nx.Graph``-subclassed 81 | arguments of ``func``. 82 | """ 83 | 84 | @wraps(func) 85 | def wrapper(*args, **kwargs): 86 | args = [ 87 | ensure_undirected(arg) if issubclass(arg.__class__, nx.Graph) else arg 88 | for arg in args 89 | ] 90 | return func(*args, **kwargs) 91 | 92 | return wrapper 93 | 94 | 95 | def ensure_unweighted(G): 96 | """Ensure the graph G is unweighted. 97 | 98 | If it is not, coerce it to unweighted and warn the user. 99 | 100 | Parameters 101 | ---------- 102 | G (networkx graph) 103 | The graph to be checked 104 | 105 | Returns 106 | ------- 107 | 108 | G (nx.Graph) 109 | Unweighted version of the input graph 110 | 111 | """ 112 | 113 | for _, _, attr in G.edges(data=True): 114 | if not np.isclose(attr.get("weight", 1.0), 1.0): 115 | H = G.__class__() 116 | H.add_nodes_from(G) 117 | H.add_edges_from(G.edges) 118 | warnings.warn("Coercing weighted graph to unweighted.", RuntimeWarning) 119 | return H 120 | 121 | return G 122 | 123 | 124 | def unweighted(func): 125 | """ 126 | Decorator applying ``ensure_unweighted()`` to all ``nx.Graph``-subclassed 127 | arguments of ``func``. 128 | """ 129 | 130 | @wraps(func) 131 | def wrapper(*args, **kwargs): 132 | args = [ 133 | ensure_unweighted(arg) if issubclass(arg.__class__, nx.Graph) else arg 134 | for arg in args 135 | ] 136 | return func(*args, **kwargs) 137 | 138 | return wrapper 139 | -------------------------------------------------------------------------------- /netrd/reconstruction/free_energy_minimization.py: -------------------------------------------------------------------------------- 1 | """ 2 | free_energy_minimization.py 3 | --------------------------- 4 | Reconstruction of graphs by minimizing a free energy of your data 5 | author: Brennan Klein 6 | email: brennanjamesklein at gmail dot com 7 | submitted as part of the 2019 NetSI Collabathon 8 | """ 9 | from .base import BaseReconstructor 10 | import numpy as np 11 | from scipy import linalg 12 | from ..utilities import create_graph, threshold 13 | 14 | 15 | class FreeEnergyMinimization(BaseReconstructor): 16 | """Applies free energy principle.""" 17 | 18 | def fit(self, TS, threshold_type='degree', **kwargs): 19 | """Infer inter-node coupling weights by minimizing a free energy over the 20 | data structure. 21 | 22 | The results dictionary also stores the weight matrix as 23 | `'weights_matrix'` and the thresholded version of the weight matrix 24 | as `'thresholded_matrix'`. For details see [1]_. 25 | 26 | Parameters 27 | ---------- 28 | 29 | TS (np.ndarray) 30 | Array consisting of :math:`L` observations from :math.`N` 31 | sensors. 32 | 33 | threshold_type (str) 34 | Which thresholding function to use on the matrix of 35 | weights. See `netrd.utilities.threshold.py` for 36 | documentation. Pass additional arguments to the thresholder 37 | using ``**kwargs``. 38 | 39 | Returns 40 | ------- 41 | 42 | G (nx.Graph or nx.DiGraph) 43 | a reconstructed graph. 44 | 45 | References 46 | ---------- 47 | 48 | .. [1] https://github.com/nihcompmed/network-inference/blob/master/sphinx/codesource/inference.py 49 | 50 | """ 51 | 52 | N, L = np.shape(TS) # N nodes, length L 53 | m = np.mean(TS[:, :-1], axis=1) # model average 54 | ds = TS[:, :-1].T - m # discrepancy 55 | t1 = L - 1 # time limit 56 | 57 | # covariance of the discrepeancy 58 | c = np.cov(ds, rowvar=False, bias=True) 59 | 60 | c_inv = linalg.inv(c) # inverse 61 | dst = ds.T # discrepancy at time t 62 | 63 | # empty matrix to populate w/ inferred couplings 64 | W = np.empty((N, N)) 65 | 66 | nloop = 10000 # failsafe 67 | 68 | for i0 in range(N): # for each node 69 | TS1 = TS[i0, 1:] # take its entire time series 70 | h = TS1 # calculate the the local field 71 | 72 | cost = np.full(nloop, 100.0) 73 | 74 | for iloop in range(nloop): 75 | h_av = np.mean(h) # average local field 76 | hs_av = np.dot(dst, h - h_av) / t1 # deltaE_i delta\sigma_k 77 | w = np.dot(hs_av, c_inv) # expectation under model 78 | 79 | h = np.dot(TS[:, :-1].T, w[:]) # estimate of local field 80 | TS_model = np.tanh(h) # under kinetic Ising model 81 | 82 | # discrepancy cost 83 | cost[iloop] = np.mean((TS1[:] - TS_model[:]) ** 2) 84 | 85 | if cost[iloop] >= cost[iloop - 1]: 86 | break # if it increases, break 87 | 88 | # complicated, but this seems to be the estimate of W_i 89 | h *= np.divide( 90 | TS1, TS_model, out=np.ones_like(TS1), where=TS_model != 0 91 | ) 92 | 93 | W[i0, :] = w[:] 94 | 95 | # threshold the network 96 | W_thresh = threshold(W, threshold_type, **kwargs) 97 | 98 | # construct the network 99 | 100 | self.results['graph'] = create_graph(W_thresh) 101 | self.results['weights_matrix'] = W 102 | self.results['thresholded_matrix'] = W_thresh 103 | G = self.results['graph'] 104 | 105 | return G 106 | -------------------------------------------------------------------------------- /doc/source/tutorial.rst: -------------------------------------------------------------------------------- 1 | Tutorial 2 | ======== 3 | 4 | Reconstructing a graph 5 | ---------------------- 6 | 7 | All reconstruction algorithms provide a simple interface. First, 8 | initialize the reconstructor object by calling its constructor with no 9 | arguments. Then, use the ``fit()`` method to obtain the reconstructed 10 | network. 11 | 12 | .. code:: python 13 | 14 | TS = np.loadtxt('data/synth_4clique_N64_simple.csv', 15 | delimiter=',', 16 | encoding='utf8') 17 | # TS is a NumPy array of shape N (number of nodes) x L (observations). 18 | 19 | recon = netrd.reconstruction.RandomReconstructor() 20 | G = recon.fit(TS) 21 | 22 | Many reconstruction algorithms store additional metadata in a 23 | ``results`` dictionary. 24 | 25 | .. code:: python 26 | 27 | # Another way to obtain the reconstructed graph 28 | G = recon.results['graph'] 29 | 30 | # A dense matrix of weights 31 | W = recon.results['weights_matrix'] 32 | 33 | # The binarized matrix from which the graph is created 34 | A = recon.results['thresholded_matrix'] 35 | 36 | Many, though not all, reconstruction algorithms work by assigning each 37 | potential edge a weight and then thresholding the matrix to obtain a 38 | sparse representation. This thresholding can be controlled by setting 39 | the ``threshold_type`` argument to one of four values: 40 | 41 | - ``range``: Consider only weights whose values fall within a range. 42 | - ``degree``: Consider only the largest weights, targeting a specific 43 | average degree. 44 | - ``quantile``: Consider only weights in, e.g., the 0.90 quantile and 45 | above. 46 | - ``custom``: Pass a custom function for thresholding the matrix 47 | yourself. 48 | 49 | Each of these has a specific argument to pass to tune the thresholding: 50 | 51 | - ``cutoffs``: A list of 2-tuples specifying the values to keep. For 52 | example, to keep only values whose absolute values are above 0.5, use 53 | ``cutoffs=[(-np.inf, -0.5), (0.5, np.inf)]`` 54 | - ``avg_k``: The desired average degree of the network. 55 | - ``quantile``: The appropriate quantile (not percentile). 56 | - ``custom_thresholder``: A user-defined function that returns an N x N 57 | NumPy array. 58 | 59 | .. code:: python 60 | 61 | H = recon.fit(TS, threshold_type='degree', avg_k = 15.125) 62 | 63 | print(nx.info(G)) 64 | # This network is a complete graph. 65 | 66 | print(nx.info(H)) 67 | # This network is not. 68 | 69 | Distances between graphs 70 | ------------------------ 71 | 72 | Distances behave similarly to reconstructors. All distance objects have 73 | a ``dist()`` method that takes two NetworkX graphs. 74 | 75 | .. code:: python 76 | 77 | G1 = nx.fast_gnp_random_graph(1000, 0.1) 78 | G2 = nx.fast_gnp_random_graph(1000, 0.1) 79 | 80 | dist = netrd.distance.NetSimile() 81 | D = dist.dist(G1, G2) 82 | 83 | Some distances also store metadata in ``results`` dictionaries. 84 | 85 | .. code:: python 86 | 87 | # Another way to get the distance 88 | D = dist.results['dist'] 89 | 90 | # The underlying features used in NetSimile 91 | vecs = dist.results['signature_vectors'] 92 | 93 | Dynamics on graphs 94 | ------------------ 95 | 96 | As a utility, we also implement various ways to simulate dynamics on a 97 | network. These have a similar interface to reconstructors and distances. 98 | Their ``simulate()`` method takes an input graph and the desired length 99 | of the dynamics, returning the same N x L array used in the graph 100 | reconstruction methods. 101 | 102 | .. code:: python 103 | 104 | model = netrd.dynamics.VoterModel() 105 | TS = model.simulate(G, 1000, noise=.001) 106 | 107 | # Another way to get the dynamics 108 | TS = model.results['TS'] 109 | 110 | # The original graph is stored in results 111 | H = model.results['ground_truth'] 112 | 113 | -------------------------------------------------------------------------------- /netrd/distance/deltacon.py: -------------------------------------------------------------------------------- 1 | """ 2 | deltacon.py 3 | -------------------------- 4 | 5 | Deltacon measure for graph distance, after: 6 | 7 | Koutra, Danai, Joshua T. Vogelstein, and Christos Faloutsos. 2013. “Deltacon: A 8 | Principled Massive-Graph Similarity Function.” In Proceedings of the 2013 SIAM 9 | International Conference on Data Mining, 162–70. Society for Industrial and 10 | Applied Mathematics. https://doi.org/10.1137/1.9781611972832.18. 11 | 12 | author: Stefan McCabe 13 | email: stefanmccabe at gmail dot com 14 | Submitted as part of the 2019 NetSI Collabathon. 15 | 16 | """ 17 | 18 | import numpy as np 19 | import networkx as nx 20 | from .base import BaseDistance 21 | from ..utilities import undirected 22 | 23 | 24 | class DeltaCon(BaseDistance): 25 | """Compare matrices related to Fast Belief Propagation.""" 26 | 27 | @undirected 28 | def dist(self, G1, G2, exact=True, g=None): 29 | """DeltaCon is based on the Matsusita between matrices created from fast 30 | belief propagation (FBP) on graphs G1 and G2. 31 | 32 | Because the FBP algorithm requires a costly matrix inversion, there 33 | is a faster, roughly linear, algorithm that gives approximate 34 | results. 35 | 36 | Parameters 37 | ---------- 38 | 39 | G1, G2 (nx.Graph) 40 | two networkx graphs to be compared. 41 | 42 | exact (bool) 43 | if True, use the slower but exact algorithm (DeltaCon_0) 44 | 45 | g (int) 46 | the number of groups to use in the efficient algorithm. If 47 | exact is set to False but g is not set, the efficient algorithm 48 | will still behave like the exact algorithm, since each node is 49 | put in its own group. 50 | 51 | Returns 52 | ------- 53 | 54 | dist (float) 55 | the distance between G1 and G2. 56 | 57 | References 58 | ---------- 59 | 60 | .. [1] Koutra, Danai, Joshua T. Vogelstein, and Christos 61 | Faloutsos. 2013. "Deltacon: A Principled Massive-Graph 62 | Similarity Function." In Proceedings of the 2013 SIAM 63 | International Conference on Data Mining, 162–70. Society for 64 | Industrial and Applied 65 | Mathematics. https://doi.org/10.1137/1.9781611972832.18. 66 | 67 | """ 68 | assert G1.number_of_nodes() == G2.number_of_nodes() 69 | N = G1.number_of_nodes() 70 | 71 | if not exact and g is None: 72 | g = N 73 | 74 | A1 = nx.to_numpy_array(G1) 75 | L1 = nx.laplacian_matrix(G1).toarray() 76 | D1 = L1 + A1 77 | 78 | A2 = nx.to_numpy_array(G2) 79 | L2 = nx.laplacian_matrix(G2).toarray() 80 | D2 = L2 + A2 81 | 82 | eps_1 = 1 / (1 + np.max(D1)) 83 | eps_2 = 1 / (1 + np.max(D2)) 84 | 85 | if exact: 86 | S1 = np.linalg.inv(np.eye(N) + (eps_1**2) * D1 - eps_1 * A1) 87 | S2 = np.linalg.inv(np.eye(N) + (eps_2**2) * D2 - eps_2 * A2) 88 | else: 89 | raise NotImplementedError( 90 | "The efficient algorithm is not " 91 | "implemented. Please use the exact " 92 | "algorithm." 93 | ) 94 | 95 | def matusita_dist(X, Y): 96 | r"""Return the Matusita distance 97 | 98 | .. math:: 99 | 100 | \sqrt{\sum_i \sum_j \left( \sqrt{X_{ij}} - \sqrt{Y_{ij}} \right)^{2}} 101 | 102 | 103 | between X and Y. 104 | """ 105 | return np.sqrt(np.sum(np.square(np.sqrt(X) - np.sqrt(Y)))) 106 | 107 | dist = matusita_dist(S1, S2) 108 | 109 | self.results['belief_matrix_1'] = S1 110 | self.results['belief_matrix_2'] = S2 111 | 112 | self.results['dist'] = dist 113 | return dist 114 | -------------------------------------------------------------------------------- /netrd/reconstruction/ou_inference.py: -------------------------------------------------------------------------------- 1 | """ 2 | 0) 58 | Y = TS[index, :][0] 59 | 60 | yCovariance = np.cov(Y) 61 | index_pair = np.array([(i, j) for i in index for j in index]) 62 | weights = inverse_method(-yCovariance, temperatures) 63 | self.results['covariance_matrix'] = np.zeros([N, N]) 64 | self.results['covariance_matrix'][index_pair] = yCovariance 65 | 66 | self.results['weights_matrix'] = np.zeros([N, N]) 67 | self.results['weights_matrix'][index_pair] = weights 68 | 69 | # threshold the network 70 | W_thresh = threshold(self.results['weights_matrix'], threshold_type, **kwargs) 71 | self.results['thresholded_matrix'] = W_thresh 72 | 73 | # construct the network 74 | self.results['graph'] = create_graph(W_thresh) 75 | G = self.results['graph'] 76 | 77 | return G 78 | 79 | 80 | def inverse_method(covariance, temperatures): 81 | """This function finds the weights of an heterogenous Ornstein-Uhlenbeck 82 | process 83 | covariance = covariance matrix of the zero-mean signal 84 | 85 | Parameters 86 | ---------- 87 | 88 | covariance (np.ndarray): Covariance matrix of the zero-mean signal. 89 | 90 | temperatures (np.ndarray): Diffusion coefficient of each of the signals. 91 | 92 | Returns 93 | ------- 94 | 95 | weights (np.ndarray): Coupling between nodes under the OU process asumption. 96 | 97 | """ 98 | 99 | if len(np.shape(temperatures)) == 1: 100 | T = np.diag(temperatures) 101 | elif len(np.shape(temperatures)) == 2: 102 | T = temperatures 103 | else: 104 | raise ValueError("temperature must either be a vector or a matrix.") 105 | 106 | n, m = np.shape(covariance) 107 | 108 | eig_val, eig_vec = eig(-covariance) 109 | eig_val = np.diag(eig_val) 110 | 111 | e_mat = np.matmul(eig_vec.T, np.matmul(T, eig_vec)) 112 | 113 | eig_val = np.matmul(np.ones([n, n]), eig_val) 114 | eig_val = (eig_val + eig_val.T) ** (-1) 115 | eig_val = eig_val.real 116 | weights = -np.matmul(eig_vec, np.matmul(2 * eig_val * e_mat, eig_vec.T)) 117 | 118 | return weights 119 | -------------------------------------------------------------------------------- /netrd/distance/communicability_jsd.py: -------------------------------------------------------------------------------- 1 | """ 2 | communicability_jsd.py 3 | -------------------------- 4 | 5 | Distance measure based on the Jensen-Shannon Divergence 6 | between the communicability sequence of two graphs as 7 | defined in: 8 | 9 | Chen, D., Shi, D. D., Qin, M., Xu, S. M., & Pan, G. J. (2018). 10 | Complex network comparison based on communicability 11 | sequence entropy. Physical Review E, 98(1), 012319. 12 | 13 | https://journals.aps.org/pre/abstract/10.1103/PhysRevE.98.012319 14 | 15 | author: Brennan Klein 16 | email: brennanjamesklein@gmail.com 17 | Submitted as part of the 2019 NetSI Collabathon. 18 | 19 | """ 20 | 21 | import networkx as nx 22 | import numpy as np 23 | from .base import BaseDistance 24 | from ..utilities import entropy, undirected, unweighted 25 | 26 | 27 | class CommunicabilityJSD(BaseDistance): 28 | """Jensen-Shannon divergence between communicability sequences.""" 29 | 30 | @undirected 31 | @unweighted 32 | def dist(self, G1, G2): 33 | r"""Compares the communicability matrix of two graphs. 34 | 35 | This distance is based on the communicability matrix, :math:`C`, of 36 | a graph consisting of elements :math:`c_{ij}` which are values 37 | corresponding to the numbers of shortest paths of length :math:`k` 38 | between nodes :math:`i` and :math:`j`. 39 | 40 | The commmunicability matrix is symmetric, which means the 41 | communicability sequence is formed by flattening the upper 42 | triangular of :math:`C`, which is then normalized to create the 43 | communicability sequence, :math:`P`. 44 | 45 | The communicability sequence entropy distance between two graphs, 46 | `G1` and `G2`, is the Jensen-Shannon divergence between these 47 | communicability sequence distributions, :math:`P1` and :math:`P2` 48 | of the two graphs. 49 | 50 | Parameters 51 | ---------- 52 | 53 | G1, G2 (nx.Graph) 54 | two graphs 55 | 56 | Returns 57 | ------- 58 | 59 | dist (float) 60 | between zero and one, this is the communicability sequence 61 | distance bewtween `G1` and `G2`. 62 | 63 | Notes 64 | ----- 65 | 66 | This function uses the networkx approximation of the 67 | communicability of a graph, `nx.communicability_exp`, which 68 | requires `G1` and `G2` to be simple undirected networks. In 69 | addition to the final distance scalar, `self.results` stores the 70 | two vectors :math:`P1` and :math:`P2`, their mixed vector, 71 | :math:`P0`, and their associated entropies. 72 | 73 | 74 | References 75 | ---------- 76 | 77 | .. [1] Estrada, E., & Hatano, N. (2008). Communicability in complex 78 | networks. Physical Review E, 77(3), 036111. 79 | https://journals.aps.org/pre/abstract/10.1103/PhysRevE.77.036111 80 | 81 | .. [2] Chen, D., Shi, D. D., Qin, M., Xu, S. M., & Pan, 82 | G. J. (2018). Complex network comparison based on 83 | communicability sequence entropy. Physical Review E, 98(1), 84 | 012319. 85 | 86 | """ 87 | 88 | N1 = G1.number_of_nodes() 89 | N2 = G2.number_of_nodes() 90 | 91 | C1 = nx.communicability_exp(G1) 92 | C2 = nx.communicability_exp(G2) 93 | 94 | Ca1 = np.zeros((N1, N1)) 95 | Ca2 = np.zeros((N2, N2)) 96 | 97 | for i in range(Ca1.shape[0]): 98 | Ca1[i] = np.array(list(C1[i].values())) 99 | for i in range(Ca2.shape[0]): 100 | Ca2[i] = np.array(list(C2[i].values())) 101 | 102 | lil_sigma1 = np.triu(Ca1).flatten() 103 | lil_sigma2 = np.triu(Ca2).flatten() 104 | 105 | big_sigma1 = sum(lil_sigma1[np.nonzero(lil_sigma1)[0]]) 106 | big_sigma2 = sum(lil_sigma2[np.nonzero(lil_sigma2)[0]]) 107 | 108 | P1 = lil_sigma1 / big_sigma1 109 | P2 = lil_sigma2 / big_sigma2 110 | P1 = np.array(sorted(P1)) 111 | P2 = np.array(sorted(P2)) 112 | 113 | dist = entropy.js_divergence(P1, P2) 114 | 115 | self.results['P1'] = P1 116 | self.results['P2'] = P2 117 | self.results['dist'] = dist 118 | 119 | return dist 120 | -------------------------------------------------------------------------------- /netrd/reconstruction/thouless_anderson_palmer.py: -------------------------------------------------------------------------------- 1 | """ 2 | thouless_anderson_palmer.py 3 | --------------------- 4 | Reconstruction of graphs using a Thouless-Anderson-Palmer 5 | mean field approximation 6 | author: Brennan Klein 7 | email: brennanjamesklein at gmail dot com 8 | submitted as part of the 2019 NetSI Collabathon 9 | """ 10 | from .base import BaseReconstructor 11 | import numpy as np 12 | from scipy import linalg 13 | from ..utilities import create_graph, threshold 14 | 15 | 16 | class ThoulessAndersonPalmer(BaseReconstructor): 17 | """Uses Thouless-Anderson-Palmer mean field approximation.""" 18 | 19 | def fit(self, TS, threshold_type='range', **kwargs): 20 | """Infer inter-node coupling weights using a Thouless-Anderson-Palmer mean 21 | field approximation. 22 | 23 | From the paper: "Similar to naive mean field, TAP works well only 24 | in the regime of large sample sizes and small coupling variability. 25 | However, this method leads to poor inference results in the regime 26 | of small sample sizes and/or large coupling variability." For 27 | details see [1]_. 28 | 29 | The results dictionary also stores the weight matrix as 30 | `'weights_matrix'` and the thresholded version of the weight matrix 31 | as `'thresholded_matrix'`. 32 | 33 | Parameters 34 | ---------- 35 | 36 | TS (np.ndarray) 37 | Array consisting of :math:`L` observations from :math:`N` 38 | sensors. 39 | 40 | threshold_type (str) 41 | Which thresholding function to use on the matrix of 42 | weights. See `netrd.utilities.threshold.py` for 43 | documentation. Pass additional arguments to the thresholder 44 | using ``**kwargs``. 45 | 46 | Returns 47 | ------- 48 | G (nx.Graph or nx.DiGraph) 49 | a reconstructed graph. 50 | 51 | References 52 | ----------- 53 | 54 | .. [1] https://github.com/nihcompmed/network-inference/blob/master/sphinx/codesource/inference.py 55 | 56 | """ 57 | 58 | N, L = np.shape(TS) # N nodes, length L 59 | m = np.mean(TS, axis=1) # empirical value 60 | 61 | # A matrix 62 | A = 1 - m**2 63 | A_inv = np.diag(1 / A) 64 | A = np.diag(A) 65 | ds = TS.T - m # equal time correlation 66 | C = np.cov(ds, rowvar=False, bias=True) 67 | C_inv = linalg.inv(C) 68 | 69 | s1 = TS[:, 1:] # one-step-delayed correlation 70 | 71 | ds1 = s1.T - np.mean(s1, axis=1) 72 | D = cross_cov(ds1, ds[:-1]) 73 | 74 | # predict naive mean field W: 75 | B = np.dot(D, C_inv) 76 | W_NMF = np.dot(A_inv, B) 77 | 78 | # TAP part: solving for Fi in the following equation 79 | # F(1-F)**2) = (1-m**2)sum_j W_NMF**2(1-m**2) ==> 0 <(a - )(b - )> (axis=0) 128 | """ 129 | da = a - np.mean(a, axis=0) 130 | db = b - np.mean(b, axis=0) 131 | 132 | return np.matmul(da.T, db) / a.shape[0] 133 | -------------------------------------------------------------------------------- /netrd/reconstruction/granger_causality.py: -------------------------------------------------------------------------------- 1 | """ 2 | granger_causality.py 3 | -------------- 4 | 5 | Graph reconstruction algorithm based on [1]. 6 | 7 | [1] P. Desrosiers, S. Labrecque, M. Tremblay, M. Bélanger, B. De Dorlodot, 8 | D. C. Côté, "Network inference from functional experimental data", Proc. SPIE 9 | 9690, Clinical and Translational Neurophotonics; Neural Imaging and Sensing; 10 | and Optogenetics and Optical Manipulation, 969019 (2016); 11 | 12 | author: Charles Murphy 13 | email: charles.murphy.1@ulaval.ca 14 | Submitted as part of the 2019 NetSI Collabathon. 15 | """ 16 | 17 | import numpy as np 18 | 19 | from .base import BaseReconstructor 20 | from sklearn.linear_model import LinearRegression 21 | from ..utilities import create_graph, threshold 22 | 23 | 24 | class GrangerCausality(BaseReconstructor): 25 | """Uses the Granger causality between nodes.""" 26 | 27 | def fit(self, TS, lag=1, threshold_type="range", **kwargs): 28 | r"""Reconstruct a network based on the Granger causality. To evaluate 29 | the effect of a time series :math:`j` over another, :math:`i`, it first 30 | evaluates the error :math:`e_1` given by an autoregressive model fit 31 | with :math:`i` alone. Then, it evaluates another error :math:`e_2` 32 | given by an autoregressive model trained to correlate the future of 33 | :math:`i` with the past of :math:`i` and :math:`j`. The Granger 34 | causality of node :math:`j` over :math:`i` is simply given by 35 | :math:`log(var(e_1) / var(e_2))``. 36 | 37 | It reconstructs the network by calculating the Granger 38 | causality for each pair of nodes. 39 | 40 | Parameters 41 | ---------- 42 | 43 | TS (np.ndarray) 44 | Array consisting of :math:`L` observations from :math:`N` 45 | sensors. 46 | 47 | lag (int) 48 | Time lag to consider. 49 | 50 | threshold_type (str) 51 | Which thresholding function to use on the matrix of 52 | weights. See `netrd.utilities.threshold.py` for 53 | documentation. Pass additional arguments to the thresholder 54 | using ``**kwargs``. 55 | 56 | Returns 57 | -------- 58 | 59 | G (nx.Graph) 60 | A reconstructed graph with :math:`N` nodes. 61 | 62 | """ 63 | 64 | n = TS.shape[0] 65 | W = np.zeros([n, n]) 66 | 67 | for i in range(n): 68 | xi, yi = GrangerCausality.split_data(TS[i, :], lag) 69 | 70 | for j in range(n): 71 | xj, yj = GrangerCausality.split_data(TS[j, :], lag) 72 | xij = np.concatenate([xi, xj], axis=-1) 73 | reg1 = LinearRegression().fit(xi, yi) 74 | reg2 = LinearRegression().fit(xij, yi) 75 | err1 = yi - reg1.predict(xi) 76 | err2 = yi - reg2.predict(xij) 77 | 78 | std_i = np.std(err1) 79 | std_ij = np.std(err2) 80 | 81 | if std_i == 0: 82 | W[j, i] = -99999999 83 | elif std_ij == 0: 84 | W[j, i] = 99999999 85 | else: 86 | W[j, i] = np.log(std_i) - np.log(std_ij) 87 | 88 | self.results["weights_matrix"] = W 89 | # threshold the network 90 | W_thresh = threshold(W, threshold_type, **kwargs) 91 | self.results["thresholded_matrix"] = W_thresh 92 | 93 | # construct the network 94 | self.results["graph"] = create_graph(W_thresh) 95 | G = self.results["graph"] 96 | 97 | return G 98 | 99 | @staticmethod 100 | def split_data(TS, lag): 101 | """From a single node time series, return a training dataset with 102 | corresponding targets. 103 | 104 | Parameters 105 | ---------- 106 | 107 | TS (np.ndarray) 108 | Array consisting of :math:`L` observations from :math:`N` 109 | sensors. 110 | 111 | lag (int) 112 | Time lag to consider. 113 | 114 | Returns 115 | ------- 116 | 117 | inputs (np.ndarray) 118 | Training data for the inputs. 119 | 120 | targets (np.ndarray) 121 | Training data for the targets. 122 | 123 | """ 124 | T = len(TS) 125 | inputs = np.zeros([T - lag - 1, lag]) 126 | targets = np.zeros(T - lag - 1) 127 | 128 | for t in range(T - lag - 1): 129 | inputs[t, :] = TS[t : lag + t] 130 | targets[t] = TS[t + lag] 131 | 132 | return inputs, targets 133 | -------------------------------------------------------------------------------- /netrd/dynamics/kuramoto.py: -------------------------------------------------------------------------------- 1 | """ 2 | kuramoto.py 3 | ----------- 4 | Kuramoto model of oscillators. 5 | 6 | author: Harrison Hartle 7 | """ 8 | 9 | from .base import BaseDynamics 10 | import networkx as nx 11 | import numpy as np 12 | import scipy.integrate as it 13 | from ..utilities import unweighted 14 | 15 | 16 | class Kuramoto(BaseDynamics): 17 | """Kuramoto model of oscillators.""" 18 | 19 | @unweighted 20 | def simulate(self, G, L, dt=0.01, strength=1, phases=None, freqs=None): 21 | r"""Simulate Kuramoto model on a ground truth network. 22 | 23 | Kuramoto oscillators model synchronization processes. At each time 24 | step, each node adjusts its phase :math:`\theta_i` according to the 25 | equation 26 | 27 | .. math:: 28 | \theta_i = \omega_i + \frac{\lambda}{N}\sum_{j=1}^{N}\sin\left(\theta_j - \theta_i\right), 29 | 30 | 31 | where :math:`\lambda`, is a coupling `strength` parameter and each node 32 | has an internal frequency :math:`\omega_i`; the `freqs` function 33 | parameter provides the option to initialize these frequencies with 34 | user-defined values (or leave as `None` to randomly initialize). Each 35 | node's initial phase :math:`\theta_{i0}` can be randomly initialized 36 | (the default behavior) or set by specifying the `phases` parameter. 37 | 38 | The results dictionary also stores the ground truth network as 39 | `'ground_truth'` and the internal frequencies of the process as 40 | `'internal_frequencies'`. 41 | 42 | For more information on the Kuramoto model, see the review essay 43 | included below. 44 | 45 | Parameters 46 | ---------- 47 | 48 | G (nx.Graph) 49 | the input (ground-truth) graph with :math:`N` nodes. 50 | 51 | L (int) 52 | the length of the desired time series. 53 | 54 | dt (float) 55 | size of timestep for numerical integration. 56 | 57 | strength (float) 58 | coupling strength (prefactor for interaction terms). 59 | 60 | phases (np.ndarray) 61 | an :math:`N \times 1` array of initial phases. 62 | 63 | freqs (np.ndarray) 64 | an :math:`N \times 1` array of internal frequencies. 65 | 66 | Returns 67 | ------- 68 | 69 | TS (np.ndarray) 70 | an :math:`N \times L` array of synthetic time series data. 71 | 72 | Examples 73 | -------- 74 | 75 | .. code:: python 76 | 77 | G = nx.ring_of_cliques(4,16) 78 | N = G.number_of_nodes() 79 | L = int(1e4) 80 | omega = np.random.uniform(0.95, 1.05, N) 81 | dynamics = Kuramoto() 82 | TS = dynamics.simulate(G, L, dt=0.01, strength=0.3, freqs=omega) 83 | 84 | References 85 | ---------- 86 | .. [1] F. Rodrigues, T. Peron, P. Ji, J. Kurths. 87 | The Kuramoto model in complex networks. 88 | https://arxiv.org/abs/1511.07139 89 | 90 | """ 91 | A = nx.to_numpy_array(G) 92 | N = G.number_of_nodes() 93 | 94 | try: 95 | if phases is not None: 96 | assert len(phases) == N 97 | theta_0 = phases 98 | else: 99 | theta_0 = 2 * np.pi * np.random.rand(N) 100 | 101 | if freqs is not None: 102 | assert len(freqs) == N 103 | omega = freqs 104 | else: 105 | omega = np.random.uniform(0.9, 1.1, N) 106 | 107 | except AssertionError: 108 | raise ValueError("Initial conditions must be None or lists of length N.") 109 | 110 | t = np.linspace(dt, L * dt, L) # time-vector 111 | one = np.ones(N) # define a rate of change function 112 | 113 | def ddt_theta(theta, t, g, strength, A): 114 | prefactor = strength / N 115 | first = np.outer(one, theta) 116 | second = np.outer(theta, one) 117 | 118 | return g + prefactor * (A * np.sin(first - second)).dot(one) 119 | 120 | # integrate the equations of motion numerically 121 | args = (omega, strength, A) 122 | TS_T = it.odeint(ddt_theta, theta_0, t, args=args) 123 | 124 | # odeint returns LxN result 125 | # transposing yields reversed-order nodes => apply flipud. 126 | TS = np.flipud(TS_T.T) 127 | 128 | # adjust phases 129 | TS = TS % (2 * np.pi) 130 | 131 | self.results["internal_frequencies"] = omega 132 | self.results["ground_truth"] = G 133 | self.results["TS"] = TS 134 | 135 | return TS 136 | -------------------------------------------------------------------------------- /netrd/distance/resistance_perturbation.py: -------------------------------------------------------------------------------- 1 | """ 2 | resistance_perturbation.py 3 | -------------------------- 4 | 5 | Graph distance based on resistance perturbation (https://arxiv.org/abs/1605.01091v2) 6 | 7 | author: Ryan J. Gallagher & Jessica T. Davis 8 | 9 | Submitted as part of the 2019 NetSI Collabathon. 10 | 11 | """ 12 | import numpy as np 13 | import networkx as nx 14 | from .base import BaseDistance 15 | from ..utilities import undirected 16 | 17 | 18 | class ResistancePerturbation(BaseDistance): 19 | """Compares the resistance matrices.""" 20 | 21 | @undirected 22 | def dist(self, G1, G2, p=2): 23 | r"""The p-norm of the difference between two graph resistance matrices. 24 | 25 | The resistance perturbation distance changes if either graph is 26 | relabeled (it is not invariant under graph isomorphism), so node 27 | labels should be consistent between the two graphs being 28 | compared. The distance is not normalized. 29 | 30 | The resistance matrix of a graph :math:`G` is calculated as 31 | :math:`R = \text{diag}(L_i) 1^T + 1 \text{diag}(L_i)^T - 2L_i`, 32 | where :math:`L_i` is the Moore-Penrose pseudoinverse of the 33 | Laplacian of :math:`G`. 34 | 35 | The resistance perturbation distance between :math:`G_1` and 36 | :math:`G_2` is calculated as the :math:`p`-norm of the difference 37 | in their resitance matrices, 38 | 39 | .. math:: 40 | d_{r(p)} = | R^{(1)} - R^{(2)} | = ( \sum_{i,j \in V} | R^{(1)}_{i,j} - R^{(2)}_{i,j} |^p )^{1/p}, 41 | 42 | where :math:`R^{(1)}` and :math:`R^{(2)}` are the resistance 43 | matrices of :math:`G_1` and :math:`G_2` respectively. When :math:`p 44 | = \infty`, we have 45 | 46 | .. math:: 47 | d_{r(\infty)} = \max_{i,j \in V} |R^{(1)}_{i,j} - R^{(2)}_{i,j}|. 48 | 49 | 50 | This method assumes that the input graphs are undirected; if 51 | directed graphs are used, it will coerce them to undirected graphs 52 | and emit a RuntimeWarning. 53 | 54 | The results dictionary also stores a 2-tuple of the underlying 55 | resistance matrices in the key `'resistance_matrices'`. 56 | 57 | Parameters 58 | ---------- 59 | 60 | G1, G2 (nx.Graph) 61 | two networkx graphs to be compared. 62 | 63 | p (float or str, optional) 64 | :math:`p`-norm to take of the difference between the resistance 65 | matrices. Specify ``np.inf`` to take :math:`\infty`-norm. 66 | 67 | Returns 68 | ------- 69 | dist (float) 70 | the distance between G1 and G2. 71 | 72 | References 73 | ---------- 74 | 75 | .. [1] https://arxiv.org/abs/1605.01091v2 76 | 77 | """ 78 | # Check for connected graphs 79 | if not nx.is_connected(G1) or not nx.is_connected(G2): 80 | raise ValueError( 81 | "Resistance perturbation is undefined for disconnected graphs." 82 | ) 83 | 84 | # Get resistance matrices 85 | R1 = get_resistance_matrix(G1) 86 | R2 = get_resistance_matrix(G2) 87 | self.results['resistance_matrices'] = R1, R2 88 | 89 | # Get resistance perturbation distance 90 | if not np.isinf(p): 91 | dist = np.power(np.sum(np.power(np.abs(R1 - R2), p)), 1 / p) 92 | else: 93 | dist = np.amax(np.abs(R1 - R2)) 94 | self.results['dist'] = dist 95 | 96 | return dist 97 | 98 | 99 | def get_resistance_matrix(G): 100 | """Get the resistance matrix of a networkx graph. 101 | 102 | The resistance matrix of a graph :math:`G` is calculated as 103 | :math:`R = \text{diag}(L_i) 1^T + 1 \text{diag}(L_i)^T - 2L_i`, 104 | where L_i is the Moore-Penrose pseudoinverse of the Laplacian of :math:`G`. 105 | 106 | Parameters 107 | ---------- 108 | G (nx.Graph): networkx graph from which to get its resistance matrix 109 | 110 | Returns 111 | ------- 112 | R (np.array): resistance matrix of G 113 | 114 | """ 115 | # Get adjacency matrix 116 | n = len(G.nodes()) 117 | A = nx.to_numpy_array(G) 118 | # Get Laplacian 119 | D = np.diag(A.sum(axis=0)) 120 | L = D - A 121 | # Get Moore-Penrose pseudoinverses of Laplacian 122 | # Note: converts to dense matrix and introduces n^2 operation here 123 | I = np.eye(n) 124 | J = (1 / n) * np.ones((n, n)) 125 | L_i = np.linalg.solve(L + J, I) - J 126 | # Get resistance matrix 127 | ones = np.ones(n) 128 | ones = ones.reshape((1, n)) 129 | L_i_diag = np.diag(L_i) 130 | L_i_diag = L_i_diag.reshape((n, 1)) 131 | R = np.dot(L_i_diag, ones) + np.dot(ones.T, L_i_diag.T) - 2 * L_i 132 | return R 133 | -------------------------------------------------------------------------------- /netrd/reconstruction/correlation_spanning_tree.py: -------------------------------------------------------------------------------- 1 | """ 2 | correlation_spanning_tree.py 3 | ---------------------------- 4 | 5 | Graph reconstruction algorithm based on Mantegna, R. N. (1999). Hierarchical structure in 6 | financial markets. The European Physical Journal B-Condensed Matter and Complex Systems, 7 | 11(1), 193-197. DOI https://doi.org/10.1007/s100510050929 8 | https://link.springer.com/article/10.1007/s100510050929 9 | 10 | author: Matteo Chinazzi 11 | Submitted as part of the 2019 NetSI Collabathon. 12 | """ 13 | 14 | from .base import BaseReconstructor 15 | import numpy as np 16 | from scipy.sparse.csgraph import minimum_spanning_tree 17 | 18 | try: 19 | from networkx import from_scipy_sparse_array as from_sparse 20 | except ImportError: 21 | from networkx import from_scipy_sparse_matrix as from_sparse 22 | 23 | 24 | class CorrelationSpanningTree(BaseReconstructor): 25 | """Minimum spanning tree connecting the sensors.""" 26 | 27 | def fit(self, TS, distance='root_inv', **kwargs): 28 | r"""Create a minimum spanning tree connecting the sensors. 29 | 30 | The empirical correlation matrix is used to first compute a 31 | distance matrix and then to create a minimum spanning tree 32 | connecting all the sensors in the data. This method implements the 33 | methodology described in [1]_ and applied in the context of creating 34 | a graph connecting the stocks of a portfolio of generated by 35 | looking at the correlations between the daily time series of stock 36 | prices. 37 | 38 | The results dictionary also stores the distance matrix (computed 39 | from the correlations) as `'distance_matrix'`. 40 | 41 | Parameters 42 | ---------- 43 | 44 | TS (np.ndarray) 45 | :math:`N \times L` array consisting of :math:`L` observations 46 | from :math:`N` sensors. 47 | 48 | distance (str) 49 | 'inv_square' calculates distance as :math:`1-corr_{ij}^2` 50 | as in [1]_. 'root_inv' calculates distance as 51 | :math:`\sqrt{2 (1-corr_{ij})}` [2]_. 52 | 53 | Returns 54 | ------- 55 | 56 | G (nx.Graph) 57 | A reconstructed graph with :math:`N` nodes. 58 | 59 | Examples 60 | -------- 61 | .. code:: python 62 | 63 | import numpy as np 64 | import networkx as nx 65 | from matplotlib import pyplot as plt 66 | from netrd.reconstruction import CorrelationSpanningTree 67 | 68 | N = 25 69 | T = 300 70 | M = np.random.normal(size=(N,T)) 71 | 72 | print('Create correlated time series') 73 | market_mode = 0.4*np.random.normal(size=(1,T)) 74 | M += market_mode 75 | 76 | sector_modes = {d: 0.5*np.random.normal(size=(1,T)) for d in range(5)} 77 | for sector_mode, vals in sector_modes.items(): 78 | M[sector_mode*5:(sector_mode+1)*5,:] += vals 79 | 80 | print('Link node colors to sectors') 81 | colors = ['b','r','g','y','m'] 82 | node_colors = [color for color in colors for __ in range(5)] 83 | 84 | print('Network reconstruction step') 85 | cst_net = CorrelationSpanningTree() 86 | G = cst_net.fit(M) 87 | 88 | print('Plot reconstructed spanning tree') 89 | fig, ax = plt.subplots() 90 | nx.draw(G, ax=ax, node_color=node_colors) 91 | 92 | 93 | References 94 | ---------- 95 | 96 | .. [1] Mantegna, R. N. (1999). Hierarchical structure in financial 97 | markets. The European Physical Journal B-Condensed Matter 98 | and Complex Systems, 11(1), 193-197. DOI 99 | https://doi.org/10.1007/s100510050929 100 | https://link.springer.com/article/10.1007/s100510050929 101 | 102 | .. [2] Bonanno, G., Caldarelli, G., Lillo, F. & Mantegna, 103 | R. N. (2003) Topology of correlation-based minimal spanning 104 | trees in real and model markets. Physical Review E 68. 105 | 106 | .. [3] Vandewalle, N., Brisbois, F. & Tordoir, X. (2001) Non-random 107 | topology of stock markets. Quantitative Finance 1, 372–374. 108 | 109 | """ 110 | C = np.corrcoef(TS) # Empirical correlation matrix 111 | 112 | D = ( 113 | np.sqrt(2 * (1 - C)) if distance == 'root_inv' else 1 - np.square(C) 114 | ) # Distance matrix 115 | 116 | self.results['distance_matrix'] = D 117 | 118 | MST = minimum_spanning_tree(D) # Minimum Spanning Tree 119 | 120 | G = from_sparse(MST) 121 | 122 | self.results['graph'] = G 123 | 124 | return G 125 | -------------------------------------------------------------------------------- /netrd/distance/dk_series.py: -------------------------------------------------------------------------------- 1 | """ 2 | dk_series.py 3 | -------------------------- 4 | 5 | Graph distance based on the dk-series. 6 | 7 | author: Brennan Klein & Stefan McCabe 8 | email: brennanjamesklein@gmail.com 9 | Submitted as part of the 2019 NetSI Collabathon. 10 | 11 | """ 12 | 13 | 14 | import networkx as nx 15 | import numpy as np 16 | from scipy.sparse import coo_matrix 17 | from collections import defaultdict 18 | from .base import BaseDistance 19 | from ..utilities import entropy, undirected, unweighted 20 | 21 | 22 | class dkSeries(BaseDistance): 23 | """Compare graphs based on their :math:`dk`-series.""" 24 | 25 | @unweighted 26 | @undirected 27 | def dist(self, G1, G2, d=2): 28 | r"""Compute the distance between two graphs by using the Jensen-Shannon 29 | divergence between the :math:`dk`-series of the graphs. 30 | 31 | The :math:`dk`-series of a graph is the collection of distributions of 32 | size :math:`d` subgraphs, where nodes are labelled by degrees. For 33 | simplicity, we currently consider only the :math:`1k`-series, i.e., the 34 | degree distribution, or the :math:`2k`-series, i.e., the 35 | distribution of edges between nodes of degree :math:`(k_i, k_j)`. The 36 | distance between these :math:`dk`-series is calculated using the 37 | Jensen-Shannon divergence. 38 | 39 | Parameters 40 | ---------- 41 | 42 | G1, G2 (nx.Graph) 43 | two networkx graphs to be compared 44 | 45 | d (int) 46 | the size of the subgraph to consider 47 | 48 | Returns 49 | ------- 50 | 51 | dist (float) 52 | the distance between `G1` and `G2`. 53 | 54 | References 55 | ---------- 56 | 57 | .. [1] Orsini, Chiara, Marija M. Dankulov, Pol Colomer-de-Simón, 58 | Almerima Jamakovic, Priya Mahadevan, Amin Vahdat, Kevin E. 59 | Bassler, et al. 2015. “Quantifying Randomness in Real Networks.” 60 | Nature Communications 6 (1). https://doi.org/10.1038/ncomms9627. 61 | 62 | """ 63 | 64 | N = max(len(G1), len(G2)) 65 | 66 | if d == 1: 67 | from .degree_divergence import DegreeDivergence 68 | 69 | degdiv = DegreeDivergence() 70 | dist = degdiv.dist() 71 | 72 | # the 2k-distance stores the distribution in a sparse matrix, 73 | # so here we take the output of DegreeDivergence and 74 | # produce a comparable object 75 | hist1, hist2 = degdiv.results["degree_histograms"] 76 | hist1 /= len(G1) 77 | hist2 /= len(G2) 78 | hist1 = coo_matrix(hist1) 79 | hist2 = coo_matrix(hist2) 80 | 81 | self.results["dk_distributions"] = hist1, hist2 82 | 83 | elif d == 2: 84 | D1 = dk2_series(G1, N) 85 | D2 = dk2_series(G2, N) 86 | 87 | # store the 2K-distributions 88 | self.results["dk_distributions"] = D1, D2 89 | 90 | # flatten matrices. this is safe because we've padded to the same size 91 | G1_dk_normed = D1.toarray()[np.triu_indices(N)].flatten() 92 | G2_dk_normed = D2.toarray()[np.triu_indices(N)].flatten() 93 | 94 | assert np.isclose(G1_dk_normed.sum(), 1) 95 | assert np.isclose(G2_dk_normed.sum(), 1) 96 | 97 | dist = entropy.js_divergence(G1_dk_normed, G2_dk_normed) 98 | else: 99 | raise NotImplementedError() 100 | 101 | self.results["dist"] = dist 102 | return dist 103 | 104 | 105 | def dk2_series(G, N=None): 106 | """ 107 | Calculate the 2k-series (i.e. the number of edges between 108 | degree-labelled nodes) for G. 109 | """ 110 | 111 | if N is None: 112 | N = len(G) 113 | 114 | k_dict = dict(nx.degree(G)) 115 | dk2 = defaultdict(int) 116 | 117 | for i, j in G.edges: 118 | k_i = k_dict[i] 119 | k_j = k_dict[j] 120 | 121 | # We're enforcing order here because at the end we're going to 122 | # leverage that all the information can be stored in the upper 123 | # triangular for convenience. 124 | if k_i <= k_j: 125 | dk2[(k_i, k_j)] += 1 126 | else: 127 | dk2[(k_j, k_i)] += 1 128 | 129 | # every edge should be counted once 130 | assert sum(list(dk2.values())) == G.size() 131 | 132 | # convert from dict to sparse matrix 133 | row = [i for (i, j) in dk2.keys()] 134 | col = [j for (i, j) in dk2.keys()] 135 | data = [x for x in dk2.values()] 136 | 137 | D = coo_matrix((data, (row, col)), shape=(N, N)) 138 | 139 | # this should be normalized by the number of edges 140 | D = D / G.size() 141 | 142 | return D 143 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![DOI](https://joss.theoj.org/papers/10.21105/joss.02990/status.svg)](https://doi.org/10.21105/joss.02990) 2 | [![PyPI version](https://badge.fury.io/py/netrd.svg)](https://badge.fury.io/py/netrd) 3 | [![ReadTheDocs](https://img.shields.io/readthedocs/netrd.svg)]( 4 | https://netrd.readthedocs.io) 5 | ![CI](https://github.com/netsiphd/netrd/workflows/build/badge.svg) 6 | 7 | # netrd: A library for network {reconstruction, distances, dynamics} 8 | 9 | This library provides a consistent, NetworkX-based interface to various 10 | utilities for graph distances, graph reconstruction from time series data, and 11 | simulated dynamics on networks. 12 | 13 | Some resources that maybe of interest: 14 | 15 | * A [tutorial](https://netrd.readthedocs.io/en/latest/tutorial.html) on how to use the library 16 | * The API [reference](https://netrd.readthedocs.io/en/latest/) 17 | * A [notebook](https://nbviewer.jupyter.org/github/netsiphd/netrd/blob/master/notebooks/example.ipynb) showing advanced usage 18 | 19 | # Installation 20 | 21 | `netrd` is easy to install through pip: 22 | 23 | ``` 24 | pip install netrd 25 | ``` 26 | 27 | If you are thinking about contributing to `netrd`, you can install a 28 | development version by executing 29 | 30 | ``` 31 | git clone https://github.com/netsiphd/netrd 32 | cd netrd 33 | pip install . 34 | ``` 35 | 36 | # Usage 37 | 38 | ## Reconstructing a graph 39 | 40 |

41 | example reconstruction 42 |

43 | 44 | The basic usage of a graph reconstruction algorithm is as follows: 45 | 46 | ```python 47 | from netrd.reconstruction import CorrelationMatrix 48 | import numpy as np 49 | # 100 nodes, 1000 observations 50 | TS = np.random.random((100, 1000)) 51 | 52 | reconstructor = CorrelationMatrix() 53 | G = reconstructor.fit(TS, threshold_type='degree', avg_k=15) 54 | # or alternately, G = reconstructor.results['graph'] 55 | ``` 56 | 57 | Here, `TS` is an N x L numpy array consisting of L 58 | observations for each of N sensors. This constrains the graphs 59 | to have integer-valued nodes. 60 | 61 | The `results` dict object, in addition to containing the graph 62 | object, may also contain objects created as a side effect of 63 | reconstructing the network, which may be useful for debugging or 64 | considering goodness of fit. What is returned will vary between 65 | reconstruction algorithms. 66 | 67 | Many reconstruction algorithms create a dense matrix of weights and 68 | use additional parameters to describe how to create a sparse graph; the 69 | [tutorial](https://netrd.readthedocs.io/en/latest/tutorial.html) has more 70 | details on these parameters. 71 | 72 | 73 | ## Distances between graphs 74 | 75 |

76 | example distance 77 |

78 | 79 | The basic usage of a distance algorithm is as follows: 80 | 81 | ```python 82 | from netrd.distance import QuantumJSD 83 | import networkx as nx 84 | G1 = nx.fast_gnp_random_graph(1000, .1) 85 | G2 = nx.fast_gnp_random_graph(1000, .1) 86 | 87 | dist_obj = QuantumJSD() 88 | distance = dist_obj.dist(G1, G2) 89 | # or alternatively: distance = dist_obj.results['dist'] 90 | ``` 91 | 92 | Here, `G1` and `G2` are `nx.Graph` objects (or subclasses such as 93 | `nx.DiGraph`). The results dictionary holds the distance value, as 94 | well as any other values that were computed as a side effect. 95 | 96 | ## Dynamics on graphs 97 | 98 |

99 | example distance 100 |

101 | 102 | The basic usage of a dynamics algorithm is as follows: 103 | 104 | ```python 105 | from netrd.dynamics import VoterModel 106 | import networkx as nx 107 | ground_truth = nx.karate_club_graph() 108 | 109 | dynamics_model = VoterModel() 110 | synthetic_TS = dynamics_model.simulate(ground_truth, 1000) 111 | # this is the same structure as the input data to a reconstructor 112 | # G = CorrelationMatrix().fit(synthetic_TS) 113 | ``` 114 | 115 | This produces a numpy array of time series data. 116 | 117 | 118 | # Contributing 119 | 120 | Contributing guidelines can be found in [CONTRIBUTING.md](CONTRIBUTING.md). 121 | 122 | 123 | # Publications 124 | 125 | * McCabe, S., Torres, L., LaRock, T., Haque, S. A., Yang, C.-H., Hartle, H., and 126 | Klein, B. (2021). netrd: A library for network reconstruction and graph 127 | distances. *Journal of Open Source Software* 6(62): 2990. 128 | doi: [10.21105/joss.02990](https://doi.org/10.21105/joss.02990). 129 | arXiv: [2010.16019](https://arxiv.org/abs/2010.16019). 130 | + paper detailing the methods used in this package 131 | 132 | * Hartle H., Klein B., McCabe S., Daniels A., St-Onge G., Murphy C., and 133 | Hébert-Dufresne L. (2020). Network comparison and the within-ensemble graph 134 | distance. *Proceedings of the Royal Society A* 476: 20190744. 135 | doi: [10.1098/rspa.2019.0744](http://doi.org/10.1098/rspa.2019.0744). 136 | arXiv: [2008.02415](https://arxiv.org/abs/2008.02415). 137 | + recent work introducing a baseline measure for comparing graph distances 138 | -------------------------------------------------------------------------------- /netrd/utilities/entropy.py: -------------------------------------------------------------------------------- 1 | """ 2 | entropy.py 3 | ---------- 4 | 5 | Utility functions computing entropy of variables in time series data. 6 | 7 | author: Chia-Hung Yang 8 | 9 | Submitted as part of the 2019 NetSI Collabathon. 10 | """ 11 | 12 | from collections import defaultdict 13 | import numpy as np 14 | from scipy.stats import entropy as sp_entropy 15 | 16 | 17 | def js_divergence(P, Q): 18 | """Jensen-Shannon divergence between `P` and `Q`. 19 | 20 | Parameters 21 | ---------- 22 | 23 | P, Q (np.ndarray) 24 | Two discrete distributions represented as 1D arrays. They are 25 | assumed to have the same support 26 | 27 | Returns 28 | ------- 29 | 30 | float 31 | The Jensen-Shannon divergence between `P` and `Q`. 32 | 33 | """ 34 | M = 0.5 * (P + Q) 35 | jsd = 0.5 * (sp_entropy(P, M, base=2) + sp_entropy(Q, M, base=2)) 36 | 37 | # If the input distributions are identical, floating-point error in the 38 | # construction of the mixture matrix can result in negative values that are 39 | # very close to zero. If one wants to compute the root-JSD metric, these 40 | # negative values lead to undesirable nans. 41 | if np.isclose(jsd, 0.0): 42 | return 0 43 | else: 44 | return jsd 45 | 46 | 47 | def entropy_from_seq(var): 48 | r"""Return the Shannon entropy of a variable. This differs from 49 | Scipy's entropy by taking a sequence of observations as input 50 | rather than a histogram or probability distribution. 51 | 52 | Parameters 53 | ---------- 54 | 55 | var (ndarray) 56 | 1D array of observations of the variable. 57 | 58 | Notes 59 | ----- 60 | 61 | 1. :math:`H(X) = - \sum p(X) \log_2(p(X))` 62 | 2. Data of the variable must be categorical. 63 | 64 | """ 65 | return joint_entropy(var[:, np.newaxis]) 66 | 67 | 68 | def joint_entropy(data): 69 | r"""Joint entropy of all variables in the data. 70 | 71 | Parameters 72 | ---------- 73 | data (np.ndarray) 74 | Array of data with variables as columns and observations as rows. 75 | 76 | Returns 77 | ------- 78 | float 79 | Joint entropy of the variables of interests. 80 | 81 | Notes 82 | ----- 83 | 1. :math:`H(\{X_i\}) = - \sum p(\{X_i\}) \log_2(p(\{X_i\}))` 84 | 2. The data of variables must be categorical. 85 | 86 | """ 87 | # Entropy is computed through summing contribution of states with 88 | # non-zero empirical probability in the data 89 | count = defaultdict(int) 90 | for state in data: 91 | key = tuple(state) 92 | count[key] += 1 93 | 94 | return sp_entropy(list(count.values()), base=2) 95 | 96 | 97 | def conditional_entropy(data, given): 98 | r"""Conditional entropy of variables in the data conditioned on 99 | a given set of variables. 100 | 101 | Parameters 102 | ---------- 103 | data (np.ndarray) 104 | Array of data with variables of interests as columns and 105 | observations as rows. 106 | 107 | given (np.ndarray) 108 | Array of data with the conditioned variables as columns and 109 | observations as rows. 110 | 111 | Returns 112 | ------- 113 | float 114 | Conditional entrpoy of the variables :math:`\{X_i\}` of interest 115 | conditioned on variables :math:`\{Y_j\}`. 116 | 117 | Notes 118 | ----- 119 | 1. :math:`H(\{X_i\}|\{Y_j\}) = - \sum p(\{X_i\}\cup\{Y_j\}) \log_2(p(\{X_i\}|\{Y_j\}))` 120 | 2. The data of vairiables must be categorical. 121 | 122 | """ 123 | joint = np.hstack((data, given)) 124 | entrp = joint_entropy(joint) - joint_entropy(given) 125 | 126 | return entrp 127 | 128 | 129 | def categorized_data(raw, n_bins): 130 | """Categorize data. 131 | 132 | An entry in the returned array is the index of the bin of the 133 | linearly-binned raw continuous data. 134 | 135 | Parameters 136 | ---------- 137 | raw (np.ndarray) 138 | Array of raw continuous data. 139 | n_bins (int) 140 | A universal number of bins for all the variables. 141 | 142 | Returns 143 | ------- 144 | np.ndarray 145 | Array of bin indices after categorizing the raw data. 146 | 147 | """ 148 | bins = linear_bins(raw, n_bins) 149 | data = np.ones(raw.shape, dtype=int) 150 | 151 | # Find the index of bins each element in the raw data array belongs to 152 | for (i, j), val in np.ndenumerate(raw): 153 | data[i, j] = np.argmax(bins[1:, j] >= val) 154 | 155 | return data 156 | 157 | 158 | def linear_bins(raw, n_bins): 159 | r"""Separators of linear bins for each variable in the raw data. 160 | 161 | Parameters 162 | ---------- 163 | raw (np.ndarray) 164 | Array of raw continuous data. 165 | 166 | n_bins (int) 167 | A universal number of bins for all the variables. 168 | 169 | Returns 170 | ------- 171 | np.ndarray 172 | Array where a column is the separators of bins for a variable. 173 | 174 | Notes 175 | ----- 176 | The bins are :math:`B_0 = [b_0, b_1]`, :math:`B_i = (b_i, b_{i+1}]`, 177 | where :math:`b_i` s are the separators of bins. 178 | 179 | """ 180 | _min = raw.min(axis=0) 181 | _max = raw.max(axis=0) 182 | bins = np.array( 183 | [np.linspace(start, end, num=n_bins + 1) for start, end in zip(_min, _max)] 184 | ) 185 | return bins.T 186 | -------------------------------------------------------------------------------- /netrd/utilities/threshold.py: -------------------------------------------------------------------------------- 1 | """ 2 | threshold.py 3 | ------------ 4 | 5 | Utilities for thresholding matrices based on different criteria 6 | 7 | author: Stefan McCabe (stefanmccabe at gmail dot com) 8 | 9 | Submitted as part of the 2019 NetSI Collabathon. 10 | 11 | """ 12 | import numpy as np 13 | import warnings 14 | 15 | 16 | def threshold_in_range(mat, **kwargs): 17 | r"""Threshold by setting values not within a list of ranges to zero. 18 | 19 | Parameters 20 | ---------- 21 | mat (np.ndarray) 22 | A numpy array. 23 | 24 | cutoffs (list of tuples) 25 | When thresholding, include only edges whose correlations fall 26 | within a given range or set of ranges. The lower value must come 27 | first in each tuple. For example, to keep those values whose 28 | absolute value is between :math:`0.5` and :math:`1`, pass 29 | ``cutoffs=[(-1, -0.5), (0.5, 1)]``. 30 | 31 | Returns 32 | ------- 33 | thresholded_mat (np.ndarray) 34 | the thresholded numpy array 35 | 36 | """ 37 | if 'cutoffs' in kwargs: 38 | cutoffs = kwargs['cutoffs'] 39 | else: 40 | warnings.warn( 41 | "Setting 'cutoffs' argument is strongly encouraged. Using cutoff range of (-1, 1).", 42 | RuntimeWarning, 43 | ) 44 | cutoffs = [(-1, 1)] 45 | 46 | mask_function = np.vectorize( 47 | lambda x: any([x >= cutoff[0] and x <= cutoff[1] for cutoff in cutoffs]) 48 | ) 49 | mask = mask_function(mat) 50 | 51 | thresholded_mat = mat * mask 52 | 53 | if kwargs.get('binary', False): 54 | thresholded_mat = np.abs(np.sign(thresholded_mat)) 55 | 56 | if kwargs.get('remove_self_loops', True): 57 | np.fill_diagonal(thresholded_mat, 0) 58 | 59 | return thresholded_mat 60 | 61 | 62 | def threshold_on_quantile(mat, **kwargs): 63 | """Threshold by setting values below a given quantile to zero. 64 | 65 | Parameters 66 | ---------- 67 | 68 | mat (np.ndarray) 69 | A numpy array. 70 | 71 | quantile (float) 72 | The threshold above which to keep an element of the array, e.g., 73 | set to zero elements below the 90th quantile of the array. 74 | 75 | Returns 76 | ------- 77 | thresholded_mat 78 | the thresholded numpy array 79 | 80 | """ 81 | if 'quantile' in kwargs: 82 | quantile = kwargs['quantile'] 83 | else: 84 | warnings.warn( 85 | "Setting 'quantile' argument is strongly recommended. Using target quantile of 0.9 for thresholding.", 86 | RuntimeWarning, 87 | ) 88 | quantile = 0.9 89 | 90 | if kwargs.get('remove_self_loops', True): 91 | np.fill_diagonal(mat, 0) 92 | 93 | if quantile != 0: 94 | thresholded_mat = mat * (mat > np.percentile(mat, quantile * 100)) 95 | else: 96 | thresholded_mat = mat 97 | 98 | if kwargs.get('binary', False): 99 | thresholded_mat = np.abs(np.sign(thresholded_mat)) 100 | 101 | return thresholded_mat 102 | 103 | 104 | def threshold_on_degree(mat, **kwargs): 105 | """Threshold by setting values below a given quantile to zero. 106 | 107 | Parameters 108 | ---------- 109 | 110 | mat (np.ndarray) 111 | A numpy array. 112 | 113 | avg_k (float) 114 | The average degree to target when thresholding the matrix. 115 | 116 | Returns 117 | ------- 118 | thresholded_mat 119 | the thresholded numpy array 120 | 121 | """ 122 | 123 | if 'avg_k' in kwargs: 124 | avg_k = kwargs['avg_k'] 125 | else: 126 | warnings.warn( 127 | "Setting 'avg_k' argument is strongly encouraged. Using average " 128 | "degree of 1 for thresholding.", 129 | RuntimeWarning, 130 | ) 131 | avg_k = 1 132 | 133 | n = len(mat) 134 | A = np.ones((n, n)) 135 | 136 | if kwargs.get('remove_self_loops', True): 137 | np.fill_diagonal(A, 0) 138 | np.fill_diagonal(mat, 0) 139 | 140 | if np.mean(np.sum(A, 1)) <= avg_k: 141 | # degenerate case: threshold the whole matrix 142 | thresholded_mat = mat 143 | else: 144 | for m in sorted(mat.flatten()): 145 | A[mat == m] = 0 146 | if np.mean(np.sum(A, 1)) <= avg_k: 147 | break 148 | thresholded_mat = mat * (mat > m) 149 | 150 | if kwargs.get('binary', False): 151 | thresholded_mat = np.abs(np.sign(thresholded_mat)) 152 | 153 | return thresholded_mat 154 | 155 | 156 | def threshold(mat, rule, **kwargs): 157 | """A flexible interface to other thresholding functions. 158 | 159 | Parameters 160 | ---------- 161 | 162 | mat (np.ndarray) 163 | A numpy array. 164 | 165 | rule (str) 166 | A string indicating which thresholding function to invoke. 167 | 168 | kwargs (dict) 169 | Named arguments to pass to the underlying threshold function. 170 | 171 | Returns 172 | ------- 173 | thresholded_mat 174 | the thresholded numpy array 175 | 176 | """ 177 | try: 178 | if rule == 'degree': 179 | return threshold_on_degree(mat, **kwargs) 180 | elif rule == 'range': 181 | return threshold_in_range(mat, **kwargs) 182 | elif rule == 'quantile': 183 | return threshold_on_quantile(mat, **kwargs) 184 | elif rule == 'custom': 185 | return kwargs['custom_thresholder'](mat) 186 | except KeyError: 187 | raise ValueError("missing threshold parameter") 188 | -------------------------------------------------------------------------------- /netrd/reconstruction/mean_field.py: -------------------------------------------------------------------------------- 1 | """ 2 | exact_mean_field.py 3 | --------------------- 4 | Reconstruction of graphs using the exact mean field 5 | author: Brennan Klein 6 | email: brennanjamesklein at gmail dot com 7 | submitted as part of the 2019 NetSI Collabathon 8 | """ 9 | from .base import BaseReconstructor 10 | import numpy as np 11 | from scipy import linalg 12 | from scipy.integrate import quad 13 | from scipy.optimize import fsolve 14 | from ..utilities import create_graph, threshold 15 | 16 | 17 | class MeanField(BaseReconstructor): 18 | def fit( 19 | self, TS, exact=True, stop_criterion=True, threshold_type='range', **kwargs 20 | ): 21 | """Infer inter-node coupling weights using a mean field approximation. 22 | 23 | From the paper: "Exact mean field (eMF) is another mean field 24 | approximation, similar to naive mean field and thouless anderson 25 | palmer. We can improve the performance of this method by adding our 26 | stopping criterion. In general, eMF outperforms nMF and TAP, but it 27 | is still worse than FEM and MLE, especially in the limit of small 28 | sample sizes and large coupling variability." For details see [1]_. 29 | 30 | The results dictionary also stores the weight matrix as 31 | `'weights_matrix'` and the thresholded version of the weight matrix 32 | as `'thresholded_matrix'`. 33 | 34 | Parameters 35 | ---------- 36 | 37 | TS (np.ndarray) 38 | Array consisting of :math:`L` observations from :math:`N` sensors. 39 | 40 | exact (bool) 41 | If True, use the exact mean field approximation. If False, use the 42 | naive mean field approximation. 43 | 44 | stop_criterion (bool) 45 | If True, prevent overly-long runtimes. Only applies for exact mean 46 | field. 47 | 48 | threshold_type (str) 49 | Which thresholding function to use on the matrix of 50 | weights. See `netrd.utilities.threshold.py` for 51 | documentation. Pass additional arguments to the thresholder 52 | using ``**kwargs``. 53 | 54 | Returns 55 | ------- 56 | 57 | G (nx.Graph or nx.DiGraph) 58 | a reconstructed graph. 59 | 60 | References 61 | ---------- 62 | 63 | .. [1] https://github.com/nihcompmed/network-inference/blob/master/sphinx/codesource/inference.py 64 | 65 | """ 66 | N, L = np.shape(TS) # N nodes, length L 67 | m = np.mean(TS, axis=1) # empirical value 68 | 69 | # A matrix 70 | A = 1 - m**2 71 | A_inv = np.diag(1 / A) 72 | A = np.diag(A) 73 | 74 | ds = TS.T - m # equal time correlation 75 | C = np.cov(ds, rowvar=False, bias=True) 76 | C_inv = linalg.inv(C) 77 | 78 | s1 = TS[:, 1:] # one-step-delayed correlation 79 | ds1 = s1.T - np.mean(s1, axis=1) 80 | D = cross_cov(ds1, ds[:-1]) 81 | 82 | # predict naive mean field W: 83 | B = np.dot(D, C_inv) 84 | 85 | if exact: 86 | # --------------------------------------------------------------- 87 | fun1 = ( 88 | lambda x, H: (1 / np.sqrt(2 * np.pi)) 89 | * np.exp(-(x**2) / 2) 90 | * np.tanh(H + x * np.sqrt(delta)) 91 | ) 92 | 93 | fun2 = ( 94 | lambda x: (1 / np.sqrt(2 * np.pi)) 95 | * np.exp(-(x**2) / 2) 96 | * (1 - np.square(np.tanh(H + x * np.sqrt(delta)))) 97 | ) 98 | 99 | W = np.empty((N, N)) 100 | 101 | nloop = 100 102 | 103 | for i0 in range(N): 104 | cost = np.zeros(nloop + 1) 105 | delta = 1.0 106 | 107 | def integrand(H): 108 | """ 109 | Return the integrand of this function 110 | """ 111 | y, err = quad(fun1, -np.inf, np.inf, args=(H,)) 112 | 113 | return y - m[i0] 114 | 115 | for iloop in range(1, nloop): 116 | H = fsolve(integrand, 0.0) 117 | H = float(H) 118 | 119 | a, err = quad(fun2, -np.inf, np.inf) 120 | a = float(a) 121 | 122 | if a != 0: 123 | delta = (1 / (a**2)) * np.sum( 124 | (B[i0, :] ** 2) * (1 - m[:] ** 2) 125 | ) 126 | W_temp = B[i0, :] / a 127 | 128 | H_temp = np.dot(TS[:, :-1].T, W_temp) 129 | cost[iloop] = np.mean((s1.T[:, i0] - np.tanh(H_temp)) ** 2) 130 | 131 | if stop_criterion and cost[iloop] >= cost[iloop - 1]: 132 | break 133 | 134 | W[i0, :] = W_temp[:] 135 | else: 136 | W = np.dot(A_inv, B) 137 | 138 | # threshold the network 139 | W_thresh = threshold(W, threshold_type, **kwargs) 140 | 141 | # construct the network 142 | 143 | self.results['graph'] = create_graph(W_thresh) 144 | self.results['weights_matrix'] = W 145 | self.results['thresholded_matrix'] = W_thresh 146 | G = self.results['graph'] 147 | 148 | return G 149 | 150 | 151 | def cross_cov(a, b): 152 | """ 153 | cross_covariance 154 | a,b --> <(a -
)(b - )> (axis=0) 155 | """ 156 | da = a - np.mean(a, axis=0) 157 | db = b - np.mean(b, axis=0) 158 | 159 | return np.matmul(da.T, db) / a.shape[0] 160 | -------------------------------------------------------------------------------- /netrd/reconstruction/naive_transfer_entropy.py: -------------------------------------------------------------------------------- 1 | """ 2 | naive_transfer_entropy.py 3 | -------------- 4 | Graph reconstruction algorithm based on 5 | Schreiber, T. (2000). Measuring information transfer. 6 | Physical Review Letters, 85(2):461–464 7 | https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.85.461 8 | 9 | author: Chia-Hung Yang and Brennan Klein 10 | email: yang.chi[at]husky[dot]neu[dot]edu and klein.br@husky.neu.edu 11 | Submitted as part of the 2019 NetSI Collabathon. 12 | """ 13 | 14 | from .base import BaseReconstructor 15 | import numpy as np 16 | from itertools import permutations 17 | from ..utilities import create_graph, threshold 18 | from ..utilities.entropy import conditional_entropy, categorized_data 19 | 20 | 21 | class NaiveTransferEntropy(BaseReconstructor): 22 | """Uses transfer entropy between sensors.""" 23 | 24 | def fit(self, TS, delay_max=1, n_bins=2, threshold_type='range', **kwargs): 25 | r"""Calculates the transfer entropy from i --> j. 26 | 27 | The resulting network is asymmetric, and each element 28 | :math:`TE_{ij}` represents the amount of information contained 29 | about the future states of :math:`i` by knowing the past states of 30 | :math:`i` and past states of :math:`j`. Presumably, if one time 31 | series :math:`i` does not depend on the other :math:`j`, knowing 32 | all of i does not increase your certainty about the next state of 33 | :math:`i`. 34 | 35 | The reason that this method is referred to as "naive" transfer 36 | entropy is because it appears there are much more complicated 37 | conditional mutual informations that need to be calculated in order 38 | for this method to be true to the notion of information 39 | transfer. These are implemented in state of the art algorighms, as 40 | in the Java Information Dynamics Toolkit [1]_. 41 | 42 | The results dictionary also stores the weight matrix as 43 | `'weights_matrix'` and the thresholded version of the weight matrix 44 | as `'thresholded_matrix'`. 45 | 46 | Parameters 47 | ---------- 48 | 49 | TS (np.ndarray) 50 | array consisting of :math:`L` observations from :math:`N` 51 | sensors. 52 | 53 | delay_max (int) 54 | the number of timesteps in the past to aggregate and average in 55 | order to get :math:`TE_{ij}` 56 | 57 | n_bins (int) 58 | the number of bins to turn values in the time series to categorical 59 | data, which is a pre-processing step to compute entropy. 60 | 61 | threshold_type (str) 62 | Which thresholding function to use on the matrix of 63 | weights. See `netrd.utilities.threshold.py` for 64 | documentation. Pass additional arguments to the thresholder 65 | using ``**kwargs``. 66 | 67 | Returns 68 | ------- 69 | 70 | G (nx.Graph) 71 | a reconstructed graph with :math:`N` nodes. 72 | 73 | References 74 | ---------- 75 | 76 | .. [1] https://github.com/jlizier/jidt 77 | 78 | """ 79 | N, L = TS.shape # Get the shape and length of the time series 80 | data = TS.T # Transpose the time series to make observations the rows 81 | if delay_max >= L: 82 | raise ValueError('Max steps of delay exceeds time series length.') 83 | 84 | # Transform the data into its binned categorical version, 85 | # which is a pre-processing before computing entropy 86 | data = categorized_data(data, n_bins) 87 | 88 | # Compute the transfer entropy of every tuple of nodes 89 | TE = np.zeros((N, N)) # Initialize an matrix for transfer entropy 90 | for i, j in permutations(range(N), 2): 91 | # Check several delay values and average them together 92 | # This average is naive, but appears to be sufficient in 93 | # some circumstances 94 | te_list = [ 95 | transfer_entropy(data[:, i], data[:, j], delay) 96 | for delay in range(1, delay_max + 1) 97 | ] 98 | TE[i, j] = np.mean(te_list) 99 | 100 | self.results['weights_matrix'] = TE 101 | 102 | # threshold the network 103 | TE_thresh = threshold(TE, threshold_type, **kwargs) 104 | self.results['thresholded_matrix'] = TE_thresh 105 | 106 | # construct the network 107 | self.results['graph'] = create_graph(TE_thresh) 108 | G = self.results['graph'] 109 | 110 | return G 111 | 112 | 113 | def transfer_entropy(X, Y, delay): 114 | """ 115 | This is a TE implementation: asymmetric statistic measuring the reduction 116 | in uncertainty for the dynamics of Y given the history of X. Or the 117 | amount of information from X to Y. The calculation is done via conditional 118 | mutual information. 119 | 120 | Parameters 121 | ---------- 122 | X (np.ndarray): time series of categorical values from node :math:`i` 123 | Y (np.ndarray): time series of categorical values from node :math:`j` 124 | delay (int): steps with which node :math:`i` past state is accounted 125 | 126 | Returns 127 | ------- 128 | te (float): the transfer entropy from nodes i to j 129 | 130 | """ 131 | X_past = X[:-delay, np.newaxis] 132 | Y_past = Y[:-delay, np.newaxis] 133 | joint_past = np.hstack((Y_past, X_past)) 134 | Y_future = Y[delay:, np.newaxis] 135 | 136 | te = conditional_entropy(Y_future, Y_past) 137 | te -= conditional_entropy(Y_future, joint_past) 138 | 139 | return te 140 | -------------------------------------------------------------------------------- /netrd/dynamics/lotka_volterra.py: -------------------------------------------------------------------------------- 1 | """ 2 | lotka_volterra.py 3 | ---------------- 4 | 5 | Implementation to simulate a Lotka-Volterra model on a network. 6 | 7 | author: Chia-Hung Yang 8 | Submitted as part of the 2019 NetSI Collabathon. 9 | """ 10 | 11 | from netrd.dynamics import BaseDynamics 12 | import numpy as np 13 | import networkx as nx 14 | from numpy.random import uniform, normal 15 | from scipy.integrate import ode 16 | from ..utilities import unweighted 17 | 18 | 19 | class LotkaVolterra(BaseDynamics): 20 | """Lotka-Volterra dynamics of species abundance.""" 21 | 22 | @unweighted 23 | def simulate( 24 | self, 25 | G, 26 | L, 27 | init=None, 28 | gr=None, 29 | cap=None, 30 | inter=None, 31 | dt=1e-2, 32 | stochastic=True, 33 | pertb=None, 34 | ): 35 | r"""Simulate time series on a network from the Lotka-Volterra model. 36 | 37 | The Lotka-Volterra model was designed to describe dynamics of 38 | species abundances in an ecosystem. Species :math:`i`'s abundance 39 | change per time is :math:`\frac{d X_i}{d t} = r_i X_i \left(1 - 40 | \frac{X_i}{K_i} + \sum_{j \neq i} W_{ij} \frac{X_j}{K_i}\right)` 41 | where :math:`r_i` and :math:`K_i` are the growth rate and the 42 | carrying capacity of species :math:`i` respectively, and 43 | :math:`W_{ij}` are the relative interaction strength of species 44 | :math:`j` on :math:`i`. 45 | 46 | The results dictionary also stores the ground truth network as 47 | `'ground_truth'` and the intermediate time steps as `'time_steps'`. 48 | 49 | Parameters 50 | ---------- 51 | 52 | G (nx.Graph) 53 | Underlying ground-truth network of simulated time series which 54 | has :math:`N` nodes. 55 | 56 | L (int) 57 | Length of time series. 58 | 59 | init (np.ndarray) 60 | Length-:math:`N` 1D array of nodes' initial condition. If not 61 | specified an initial condition is unifromly generated from 0 to 62 | the nodes' carrying capacity. 63 | 64 | gr (np.ndarray) 65 | Length-:math:`N` 1D array of nodes' growth rate. If not 66 | specified, default to 1 for all nodes. 67 | 68 | cap (np.ndarray) 69 | Length-:math:`N` 1D array of nodes' carrying capacity. If not 70 | specified, default to 1 for all nodes. 71 | 72 | inter (np.ndarray) 73 | :math:`N \times N` array of interaction weights between 74 | nodes. If not specified, default to a zero-diagonal matrix 75 | whose [i, j] entry is :math:`\frac{sign(j - i)}{N - 1}`. 76 | 77 | dt (float or np.ndarray) 78 | Sizes of time steps when simulating the continuous-time 79 | dynamics. 80 | 81 | stochastic (bool) 82 | Whether to simulate the stochastic or deterministic dynamics. 83 | 84 | pertb (np.ndarray) 85 | Length-:math:`N` 1D array of perturbation magnitude of nodes' 86 | growth. If not specified, default to 0.01 for all nodes. 87 | 88 | Returns 89 | ------- 90 | 91 | TS (np.ndarray) 92 | :math:`N \times L` array of `L` observations on :math:`N` nodes. 93 | 94 | Notes 95 | ----- 96 | 97 | The deterministic dynamics is simulated through the forth-order 98 | Runge-Kutta method, and the stochastic one is simulated through 99 | multiplicative noise with the Euler-Maruyama method. 100 | 101 | The ground-truth network, time steps and the time series can be 102 | found in results['ground-truth'], reuslts['time_steps'] and 103 | results['time_series'] respectively. 104 | 105 | """ 106 | 107 | N = G.number_of_nodes() 108 | adjmat = nx.to_numpy_array(G) 109 | 110 | # Initialize the model's parameters if not specified 111 | if gr is None: 112 | gr = np.ones(N, dtype=float) 113 | if cap is None: 114 | cap = np.ones(N, dtype=float) 115 | if inter is None: 116 | wei = 1 / (N - 1) 117 | full = np.full((N, N), wei, dtype=float) 118 | inter = np.zeros((N, N), dtype=float) 119 | inter += np.triu(full) - np.tril(full) 120 | 121 | if stochastic and pertb is None: 122 | pertb = 1e-2 * np.ones(N, dtype=float) 123 | 124 | # Randomly initialize an initial condition if not speciefied 125 | TS = np.zeros((N, L), dtype=float) 126 | if init is None: 127 | init = uniform(low=0, high=cap) 128 | TS[:, 0] = init 129 | 130 | # Define the function of dynamics 131 | mat = np.where(adjmat == 1, inter, 0.0) + np.diag(-np.ones(N)) 132 | mat /= cap[:, np.newaxis] 133 | 134 | def dyn(t, state): 135 | return state * (gr + np.dot(mat, state)) 136 | 137 | # Simulate the time series 138 | if isinstance(dt, float): 139 | dt = dt * np.ones(L - 1) 140 | 141 | # Deterministic dynamics 142 | if not stochastic: 143 | integrator = ode(dyn).set_integrator('dopri5') 144 | integrator.set_initial_value(init, 0.0) 145 | for t in range(L - 1): 146 | if integrator.successful(): 147 | TS[:, t + 1] = integrator.integrate(integrator.t + dt[t]) 148 | else: 149 | message = 'Integration not succesful. ' 150 | message += 'Change sizes of time steps or the parameters.' 151 | raise RuntimeError(message) 152 | 153 | # Stochastic dynamics 154 | else: 155 | for t in range(L - 1): 156 | state = TS[:, t].copy() 157 | _next = state + dyn(t, state) * dt[t] 158 | _next += state * normal(scale=pertb) * np.sqrt(dt[t]) 159 | TS[:, t + 1] = _next 160 | 161 | # Store the results 162 | self.results['ground_truth'] = G 163 | self.results['time_steps'] = np.cumsum(dt) 164 | self.results['TS'] = TS 165 | 166 | return TS 167 | -------------------------------------------------------------------------------- /netrd/distance/netsimile.py: -------------------------------------------------------------------------------- 1 | """ 2 | netsimile.py 3 | -------------- 4 | 5 | Graph distance based on: 6 | Berlingerio, M., Koutra, D., Eliassi-Rad, T. & Faloutsos, C. NetSimile: A Scalable Approach to Size-Independent Network Similarity. arXiv (2012) 7 | 8 | author: Alex Gates 9 | email: ajgates42@gmail.com (optional) 10 | Submitted as part of the 2019 NetSI Collabathon. 11 | 12 | """ 13 | import networkx as nx 14 | import numpy as np 15 | from scipy.spatial.distance import canberra 16 | from scipy.stats import skew, kurtosis 17 | 18 | from .base import BaseDistance 19 | from ..utilities import undirected, unweighted 20 | 21 | 22 | class NetSimile(BaseDistance): 23 | """Compares node signature distributions.""" 24 | 25 | @undirected 26 | @unweighted 27 | def dist(self, G1, G2): 28 | """A scalable approach to network similarity. 29 | 30 | A network similarity measure based on node signature distributions. 31 | 32 | The results dictionary includes the underlying feature matrices in 33 | `'feature_matrices'` and the underlying signature vectors in 34 | `'signature_vectors'`. 35 | 36 | Parameters 37 | ---------- 38 | 39 | G1, G2 (nx.Graph) 40 | two undirected networkx graphs to be compared. 41 | 42 | Returns 43 | ------- 44 | 45 | dist (float) 46 | the distance between `G1` and `G2`. 47 | 48 | References 49 | ---------- 50 | 51 | .. [1] Michele Berlingerio, Danai Koutra, Tina Eliassi-Rad, 52 | Christos Faloutsos: NetSimile: A Scalable Approach to 53 | Size-Independent Network Similarity. CoRR abs/1209.2684 54 | (2012) 55 | 56 | """ 57 | 58 | # find the graph node feature matrices 59 | G1_node_features = feature_extraction(G1) 60 | G2_node_features = feature_extraction(G2) 61 | 62 | # get the graph signature vectors 63 | G1_signature = graph_signature(G1_node_features) 64 | G2_signature = graph_signature(G2_node_features) 65 | 66 | # the final distance is the absolute canberra distance 67 | dist = abs(canberra(G1_signature, G2_signature)) 68 | 69 | self.results['feature_matrices'] = G1_node_features, G2_node_features 70 | self.results['signature_vectors'] = G1_signature, G2_signature 71 | self.results['dist'] = dist 72 | 73 | return dist 74 | 75 | 76 | def feature_extraction(G): 77 | """Node feature extraction. 78 | 79 | Parameters 80 | ---------- 81 | 82 | G (nx.Graph): a networkx graph. 83 | 84 | Returns 85 | ------- 86 | 87 | node_features (float): the Nx7 matrix of node features.""" 88 | 89 | # necessary data structures 90 | node_features = np.zeros(shape=(G.number_of_nodes(), 7)) 91 | node_list = sorted(G.nodes()) 92 | node_degree_dict = dict(G.degree()) 93 | node_clustering_dict = dict(nx.clustering(G)) 94 | egonets = {n: nx.ego_graph(G, n) for n in node_list} 95 | 96 | # node degrees 97 | degs = [node_degree_dict[n] for n in node_list] 98 | 99 | # clustering coefficient 100 | clusts = [node_clustering_dict[n] for n in node_list] 101 | 102 | # average degree of neighborhood 103 | neighbor_degs = [ 104 | np.mean([node_degree_dict[m] for m in egonets[n].nodes if m != n]) 105 | if node_degree_dict[n] > 0 106 | else 0 107 | for n in node_list 108 | ] 109 | 110 | # average clustering coefficient of neighborhood 111 | neighbor_clusts = [ 112 | np.mean([node_clustering_dict[m] for m in egonets[n].nodes if m != n]) 113 | if node_degree_dict[n] > 0 114 | else 0 115 | for n in node_list 116 | ] 117 | 118 | # number of edges in the neighborhood 119 | neighbor_edges = [ 120 | egonets[n].number_of_edges() if node_degree_dict[n] > 0 else 0 121 | for n in node_list 122 | ] 123 | 124 | # number of outgoing edges from the neighborhood 125 | # the sum of neighborhood degrees = 2*(internal edges) + external edges 126 | # node_features[:,5] = node_features[:,0] * node_features[:,2] - 2*node_features[:,4] 127 | neighbor_outgoing_edges = [ 128 | len( 129 | [ 130 | edge 131 | for edge in set.union(*[set(G.edges(j)) for j in egonets[i].nodes]) 132 | if not egonets[i].has_edge(*edge) 133 | ] 134 | ) 135 | for i in node_list 136 | ] 137 | 138 | # number of neighbors of neighbors (not in neighborhood) 139 | neighbors_of_neighbors = [ 140 | len( 141 | set([p for m in G.neighbors(n) for p in G.neighbors(m)]) 142 | - set(G.neighbors(n)) 143 | - set([n]) 144 | ) 145 | if node_degree_dict[n] > 0 146 | else 0 147 | for n in node_list 148 | ] 149 | 150 | # assembling the features 151 | node_features[:, 0] = degs 152 | node_features[:, 1] = clusts 153 | node_features[:, 2] = neighbor_degs 154 | node_features[:, 3] = neighbor_clusts 155 | node_features[:, 4] = neighbor_edges 156 | node_features[:, 5] = neighbor_outgoing_edges 157 | node_features[:, 6] = neighbors_of_neighbors 158 | 159 | return np.nan_to_num(node_features) 160 | 161 | 162 | def graph_signature(node_features): 163 | signature_vec = np.zeros(7 * 5) 164 | 165 | # for each of the 7 features 166 | for k in range(7): 167 | # find the mean 168 | signature_vec[k * 5] = node_features[:, k].mean() 169 | # find the median 170 | signature_vec[k * 5 + 1] = np.median(node_features[:, k]) 171 | # find the std 172 | signature_vec[k * 5 + 2] = node_features[:, k].std() 173 | # find the skew 174 | signature_vec[k * 5 + 3] = skew(node_features[:, k]) 175 | # find the kurtosis 176 | signature_vec[k * 5 + 4] = kurtosis(node_features[:, k]) 177 | 178 | return signature_vec 179 | 180 | 181 | """ 182 | # sample usage 183 | >>>from netrd.distance import NetSimile 184 | >>>G1 = nx.karate_club_graph() 185 | >>>G2 = nx.krackhardt_kite_graph() 186 | 187 | >>>test = NetSimile() 188 | >>>print(test.dist(G1, G2)) 189 | 20.180783067167326 190 | """ 191 | -------------------------------------------------------------------------------- /netrd/reconstruction/partial_correlation_matrix.py: -------------------------------------------------------------------------------- 1 | """ 2 | partial_correlation_matrix.py 3 | --------------------- 4 | 5 | Reconstruction of graphs using the partial correlation matrix. 6 | 7 | author: Stefan McCabe 8 | email: stefanmccabe at gmail dot com 9 | Submitted as part of the 2019 NetSI Collabathon 10 | 11 | """ 12 | from .base import BaseReconstructor 13 | import numpy as np 14 | from scipy import stats, linalg 15 | from ..utilities import create_graph, threshold 16 | 17 | 18 | class PartialCorrelationMatrix(BaseReconstructor): 19 | """Uses a regularized form of the precision matrix.""" 20 | 21 | def fit( 22 | self, 23 | TS, 24 | index=None, 25 | drop_index=True, 26 | of_residuals=False, 27 | threshold_type="range", 28 | **kwargs 29 | ): 30 | """Uses a regularized form of the precision matrix. 31 | 32 | The results dictionary also stores the weight matrix as 33 | `'weights_matrix'` and the thresholded version of the weight matrix 34 | as `'thresholded_matrix'`. For details see [1]_. 35 | 36 | Parameters 37 | ---------- 38 | 39 | index (int, array of ints, or None) 40 | Take the partial correlations of each pair of elements holding 41 | constant an index variable or set of index variables. If None, 42 | take the partial correlations of the variables holding constant 43 | all other variables. 44 | 45 | drop_index (bool) 46 | If True, drop the index variables after calculating the partial 47 | correlations. 48 | 49 | of_residuals (bool) 50 | If True, after calculating the partial correlations (presumably 51 | using a dropped index variable), recalculate the partial 52 | correlations between each variable, holding constant all other 53 | variables. 54 | 55 | threshold_type (str) 56 | Which thresholding function to use on the matrix of 57 | weights. See `netrd.utilities.threshold.py` for 58 | documentation. Pass additional arguments to the thresholder 59 | using ``**kwargs``. 60 | 61 | Returns 62 | ------- 63 | 64 | G (nx.Graph) 65 | a reconstructed graph. 66 | 67 | References 68 | ---------- 69 | 70 | .. [1] https://bwlewis.github.io/correlation-regularization/ 71 | 72 | """ 73 | 74 | p_cor = partial_corr(TS, index=index) 75 | 76 | if drop_index and index is not None: 77 | p_cor = np.delete(p_cor, index, axis=0) 78 | p_cor = np.delete(p_cor, index, axis=1) 79 | 80 | if of_residuals: 81 | p_cor = partial_corr(p_cor, index=None) 82 | 83 | self.results["weights_matrix"] = p_cor 84 | 85 | # threshold the network 86 | W_thresh = threshold(p_cor, threshold_type, **kwargs) 87 | 88 | # construct the network 89 | self.results["graph"] = create_graph(W_thresh) 90 | self.results["thresholded_matrix"] = W_thresh 91 | 92 | G = self.results["graph"] 93 | 94 | return G 95 | 96 | 97 | # This partial correlation function is adapted from Fabian Pedregosa-Izquierdo's 98 | # implementation of partial correlation in Python, found at [this gist]( 99 | # https://gist.github.com/fabianp/9396204419c7b638d38f) 100 | """ 101 | Partial Correlation in Python (clone of Matlab's partialcorr) 102 | 103 | This uses the linear regression approach to compute the partial 104 | correlation (might be slow for a huge number of variables). The 105 | algorithm is detailed here: 106 | 107 | http://en.wikipedia.org/wiki/Partial_correlation#Using_linear_regression 108 | 109 | Taking X and Y two variables of interest and Z the matrix with all the variable minus {X, Y}, 110 | the algorithm can be summarized as 111 | 112 | 1) perform a normal linear least-squares regression with X as the target and Z as the predictor 113 | 2) calculate the residuals in Step #1 114 | 3) perform a normal linear least-squares regression with Y as the target and Z as the predictor 115 | 4) calculate the residuals in Step #3 116 | 5) calculate the correlation coefficient between the residuals from Steps #2 and #4; 117 | 118 | The result is the partial correlation between X and Y while controlling for the effect of Z 119 | 120 | 121 | Date: Nov 2014 122 | Author: Fabian Pedregosa-Izquierdo, f@bianp.net 123 | Testing: Valentina Borghesani, valentinaborghesani@gmail.com 124 | """ 125 | 126 | 127 | def partial_corr(C, index=None): 128 | """Returns the sample linear partial correlation coefficients between pairs of 129 | variables in C, controlling for the remaining variables in C. 130 | 131 | 132 | Parameters 133 | -------------- 134 | C : array-like, shape (p, n) 135 | Array with the different variables. Each row of C is taken as a variable 136 | 137 | 138 | Returns ------- 139 | P : array-like, shape (p, p) 140 | P[i, j] contains the partial correlation of C[:, i] and C[:, j] 141 | controlling for the remaining variables in C. 142 | 143 | """ 144 | 145 | C = np.asarray(C).T 146 | p = C.shape[1] 147 | P_corr = np.zeros((p, p), dtype=np.float64) 148 | 149 | for i in range(p): 150 | P_corr[i, i] = 1 151 | for j in range(i + 1, p): 152 | if index is None: 153 | idx = np.ones(p, dtype=bool) 154 | idx[i] = False 155 | idx[j] = False 156 | elif type(index) is int or ( 157 | isinstance(index, np.ndarray) and issubclass(index.dtype.type, np.int_) 158 | ): 159 | idx = np.zeros(p, dtype=bool) 160 | idx[index] = True 161 | else: 162 | raise ValueError( 163 | "Index must be an integer, an array of " "integers, or None." 164 | ) 165 | 166 | beta_i = linalg.lstsq(C[:, idx], C[:, j])[0] 167 | beta_j = linalg.lstsq(C[:, idx], C[:, i])[0] 168 | 169 | res_j = C[:, j] - C[:, idx].dot(beta_i) 170 | res_i = C[:, i] - C[:, idx].dot(beta_j) 171 | 172 | corr = stats.pearsonr(res_i, res_j)[0] 173 | P_corr[i, j] = corr 174 | P_corr[j, i] = corr 175 | 176 | return P_corr 177 | -------------------------------------------------------------------------------- /netrd/distance/graph_diffusion.py: -------------------------------------------------------------------------------- 1 | """ 2 | graph_diffusion.py 3 | -------------------------- 4 | 5 | Graph diffusion distance, from 6 | 7 | Hammond, D. K., Gur, Y., & Johnson, C. R. (2013, December). Graph diffusion 8 | distance: A difference measure for weighted graphs based on the graph Laplacian 9 | exponential kernel. In Global Conference on Signal and Information Processing, 10 | 2013 IEEE (pp 419-422). IEEE. https://doi.org/10.1109/GlobalSIP.2013.6736904 11 | 12 | This implementation is adapted from the authors' MATLAB code, available at 13 | https://rb.gy/txbfrh, and available under an MIT license with the authors' 14 | permission. 15 | 16 | author: Brennan Klein 17 | email: brennanjamesklein at gmail dot com 18 | Submitted as part of the 2019 NetSI Collabathon. 19 | 20 | """ 21 | 22 | import numpy as np 23 | import networkx as nx 24 | from scipy.sparse.csgraph import laplacian 25 | from .base import BaseDistance 26 | from ..utilities import undirected 27 | 28 | 29 | class GraphDiffusion(BaseDistance): 30 | """Find the maximally dissimilar diffusion kernels between two graphs.""" 31 | 32 | @undirected 33 | def dist(self, G1, G2, thresh=1e-08, resolution=1000): 34 | r"""The graph diffusion distance between two graphs, :math:`G` and :math:`G'`, 35 | is a distance measure based on the notion of flow within each graph. As 36 | such, this measure uses the unnormalized Laplacian matrices of both 37 | graphs, :math:`\mathcal{L}` and :math:`\mathcal{L}'`, and uses them to 38 | construct time-varying Laplacian exponential diffusion kernels, 39 | :math:`e^{-t\mathcal{L}}` and :math:`e^{-t\mathcal{L}'}`, by 40 | effectively simulating a diffusion process for :math:`t` timesteps, 41 | creating a column vector of node-level activity at each timestep. The 42 | distance :math:`d_\texttt{GDD}(G, G')` is defined as the Frobenius norm 43 | between the two diffusion kernels at the timestep :math:`t^{*}` where 44 | the two kernels are maximally different. That is, we compute the 45 | Frobenius norms and their differences for each timestep, and return the 46 | maximum difference. 47 | 48 | .. math:: 49 | D_{GDD}(G,G') = \sqrt{||e^{-t^{*}\mathcal{L}}-e^{-t^{*}\mathcal{L}'}||} 50 | 51 | The results dictionary also stores a 2-tuple of the underlying 52 | adjacency matrices in `adjacency_matrices`, the Laplacian matrices in 53 | `laplacian_matrices`, and the output of the optimization process 54 | (`peak_diffusion_time` and `peak_deviation`). 55 | 56 | Adapted from the authors' MATLAB code, available at: https://rb.gy/txbfrh 57 | 58 | 59 | Parameters 60 | ---------- 61 | 62 | G1, G2 (nx.Graph) 63 | two networkx graphs to be compared. 64 | 65 | thresh (float) 66 | minimum value above which the eigenvalues will be considered. 67 | 68 | resolution (int) 69 | number of :math:`t` values to span through. 70 | 71 | Returns 72 | ------- 73 | dist (float) 74 | the distance between `G1` and `G2`. 75 | 76 | References 77 | ---------- 78 | 79 | .. [1] Hammond, D. K., Gur, Y., & Johnson, C. R. (2013, December). 80 | Graph diffusion distance: A difference measure for weighted graphs based on the 81 | graph Laplacian exponential kernel. In Global Conference on Signal and 82 | Information Processing, 2013 IEEE (pp 419-422). IEEE. 83 | https://doi.org/10.1109/GlobalSIP.2013.6736904 84 | 85 | """ 86 | 87 | A1 = nx.to_numpy_array(G1) 88 | A2 = nx.to_numpy_array(G2) 89 | 90 | L1 = laplacian(A1) 91 | L2 = laplacian(A2) 92 | 93 | def sort_eigs(eigs): 94 | vals, vecs = eigs 95 | idx = np.argsort(abs(vals)) 96 | return vals[idx], vecs[:, idx] 97 | 98 | vals1, vecs1 = sort_eigs(np.linalg.eig(L1)) 99 | vals2, vecs2 = sort_eigs(np.linalg.eig(L2)) 100 | 101 | eigs = np.hstack((np.diag(vals1), np.diag(vals2))) 102 | eigs = eigs[np.where(eigs > thresh)] 103 | eigs = np.sort(eigs) 104 | 105 | if len(eigs) == 0: 106 | dist = 0 107 | self.results["dist"] = dist 108 | return dist 109 | 110 | t_upperbound = np.real(1.0 / eigs[0]) 111 | ts = np.linspace(0, t_upperbound, resolution) 112 | 113 | # Find the Frobenius norms between all the diffusion kernels at 114 | # different times. Return the value and where this vector is minimized. 115 | E = -exponential_diffusion_diff(vecs1, vals1, vecs2, vals2, ts) 116 | f_val, t_star = (np.nanmin(E), np.argmin(E)) 117 | 118 | dist = np.sqrt(-f_val) 119 | 120 | self.results["adjacency_matrices"] = A1, A2 121 | self.results["laplacian_matrices"] = L1, L2 122 | self.results["peak_diffusion_time"] = t_star 123 | self.results["peak_deviation"] = f_val 124 | 125 | self.results["dist"] = dist 126 | 127 | return dist 128 | 129 | 130 | def exponential_diffusion_diff(vecs1, vals1, vecs2, vals2, ts): 131 | """ 132 | Computes Frobenius norm of difference of Laplacian exponential diffusion 133 | kernels, at specified timepoints. 134 | 135 | Parameters 136 | ---------- 137 | 138 | vecs1, vecs2 (np.array) 139 | eigenvectors of the Laplacians of `G1` and `G2` 140 | 141 | vals1, vals2 (np.array) 142 | eigenvalues of the Laplacians of `G1` and `G2` 143 | 144 | ts (np.array) 145 | times at which to compute the difference in Frobenius norms 146 | 147 | Returns 148 | ------- 149 | 150 | diffs (np.array) 151 | same shape as :math:`t`, contains differences of Frobenius norms 152 | 153 | """ 154 | 155 | diffs = np.zeros(len(ts)) 156 | 157 | for kt, t in enumerate(ts): 158 | exp_diag_1 = np.diag(np.exp(-t * np.diag(vals1))) 159 | exp_diag_2 = np.diag(np.exp(-t * np.diag(vals2))) 160 | 161 | # multiply the eigenvectors element-wise by the appropriate diffusion value 162 | # before left-multiplying the eigenvectors again. 163 | norm1 = vecs1.dot(np.multiply(exp_diag_1, vecs1).T) 164 | norm2 = vecs2.dot(np.multiply(exp_diag_2, vecs2).T) 165 | diff = norm1 - norm2 166 | 167 | diffs[kt] = (diff**2).sum() 168 | 169 | return diffs 170 | -------------------------------------------------------------------------------- /netrd/distance/quantum_jsd.py: -------------------------------------------------------------------------------- 1 | """ 2 | quantum_jsd.py 3 | -------------------------- 4 | 5 | Graph distance based on the quantum $q$-Jenson-Shannon divergence. 6 | 7 | De Domenico, Manlio, and Jacob Biamonte. 2016. “Spectral Entropies as 8 | Information-Theoretic Tools for Complex Network Comparison.” Physical Review X 9 | 6 (4). https://doi.org/10.1103/PhysRevX.6.041062. 10 | 11 | 12 | author: Stefan McCabe & Brennan Klein 13 | email: 14 | Submitted as part of the 2019 NetSI Collabathon. 15 | 16 | """ 17 | 18 | import warnings 19 | import networkx as nx 20 | import numpy as np 21 | from scipy.linalg import expm 22 | from .base import BaseDistance 23 | from ..utilities import undirected, unweighted 24 | 25 | 26 | class QuantumJSD(BaseDistance): 27 | """Compares the spectral entropies of the density matrices.""" 28 | 29 | @undirected 30 | @unweighted 31 | def dist(self, G1, G2, beta=0.1, q=None): 32 | r"""Square root of the quantum :math:`q`-Jensen-Shannon divergence between two 33 | graphs. 34 | 35 | The generalized Jensen-Shannon divergence compares two graphs b √(H0 - 0.5 * (H1 + H2))y the 36 | spectral entropies of their quantum-statistical-mechanical density 37 | matrices. It can be written as 38 | 39 | .. math:: 40 | \mathcal{J}_q(\mathbf{\rho} || \mathbf{\sigma}) = 41 | S_q\left( \frac{\mathbf{\rho} + \mathbf{\sigma}}{2} \right) - 42 | \frac{1}{2} [S_q(\mathbf{\rho}) + S_q(\mathbf{\sigma})], 43 | 44 | 45 | where :math:`\mathbf{\rho}` and :math:`\mathbf{\sigma}` are density 46 | matrices and :math:`q` is the order parameter. 47 | 48 | The density matrix 49 | 50 | .. math:: 51 | \mathbf{\rho} = \frac{e^{-\beta\mathbf{L}}}{Z}, 52 | 53 | 54 | where 55 | 56 | .. math:: 57 | Z = \sum_{i=1}^{N}e^{-\beta\lambda_i(\mathbf{L})} 58 | 59 | 60 | and :math:`\lambda_i(\mathbf{L})` is the :math:`i`th eigenvalue of the Laplacian 61 | matrix :math:`L`, represents an imaginary diffusion process over the network 62 | with time parameter :math:`\beta > 0`. 63 | 64 | For these density matrices and the mixture matrix, we calculate the 65 | Rényi entropy of order :math:`q` 66 | 67 | .. math:: 68 | S_q = \frac{1}{1-q} \log_2 \sum_{i=1}^{N}\lambda_i(\mathbf{\rho})^q, 69 | 70 | 71 | or, if :math:`q=1`, the Von Neumann entropy 72 | 73 | .. math:: 74 | S_1 = - \sum_{i=1}^{N}\lambda_i(\mathbf{\rho})\log_2\lambda_i(\mathbf{\rho}). 75 | 76 | 77 | Note that this implementation is not exact because the matrix 78 | exponentiation is performed using the Padé approximation and 79 | because of imprecision in the calculation of the eigenvalues of the 80 | density matrix. 81 | 82 | Parameters 83 | ---------- 84 | 85 | G1, G2 (nx.Graph) 86 | two networkx graphs to be compared 87 | 88 | beta (float) 89 | time parameter for diffusion propagator 90 | 91 | q (float) 92 | order parameter for Rényi entropy. If None or 1, use the Von 93 | Neumann entropy (i.e., Shannon entropy) instead. 94 | 95 | Returns 96 | ------- 97 | 98 | dist (float) 99 | the distance between `G1` and `G2`. 100 | 101 | References 102 | ---------- 103 | 104 | .. [1] De Domenico, Manlio, and Jacob Biamonte. 2016. "Spectral 105 | Entropies as Information-Theoretic Tools for Complex Network 106 | Comparison." Physical Review X 6 107 | (4). https://doi.org/10.1103/PhysRevX.6.041062. 108 | 109 | """ 110 | if beta <= 0: 111 | raise ValueError("beta must be positive.") 112 | 113 | if q and q >= 2: 114 | warnings.warn("JSD is only a metric for 0 ≤ q < 2.", RuntimeWarning) 115 | 116 | def density_matrix(A, beta): 117 | """ 118 | Create the density matrix encoding probabilities for entropies. 119 | This is done using a fictive diffusion process with time parameter 120 | :math:`beta`. 121 | """ 122 | L = np.diag(np.sum(A, axis=1)) - A 123 | rho = expm(-1 * beta * L) 124 | rho = rho / np.trace(rho) 125 | 126 | return rho 127 | 128 | def renyi_entropy(X, q=None): 129 | """ 130 | Calculate the Rényi entropy with order :math:`q`, or the Von Neumann 131 | entropy if :math:`q` is `None` or 1. 132 | """ 133 | # Note that where there are many zero eigenvalues (i.e., large 134 | # values of beta) in the density matrix, floating-point precision 135 | # issues mean that there will be negative eigenvalues and the 136 | # eigenvalues will not sum to precisely one. To avoid encountering 137 | # `nan`s in `np.log2`, we remove all eigenvalues that are close 138 | # to zero within 1e-6 tolerance. As for the eigenvalues not summing 139 | # to exactly one, this is a small source of error in the 140 | # calculation. 141 | eigs = np.linalg.eigvalsh(X) 142 | zero_eigenvalues = np.isclose(np.abs(eigs), 0, atol=1e-6) 143 | eigs = eigs[np.logical_not(zero_eigenvalues)] 144 | 145 | if q is None or q == 1: 146 | # plain Von Neumann entropy 147 | H = -1 * np.sum(eigs * np.log2(eigs)) 148 | else: 149 | prefactor = 1 / (1 - q) 150 | H = prefactor * np.log2((eigs**q).sum()) 151 | return H 152 | 153 | A1 = nx.to_numpy_array(G1) 154 | A2 = nx.to_numpy_array(G2) 155 | 156 | rho1 = density_matrix(A1, beta) 157 | rho2 = density_matrix(A2, beta) 158 | mix = (rho1 + rho2) / 2 159 | 160 | H0 = renyi_entropy(mix, q) 161 | H1 = renyi_entropy(rho1, q) 162 | H2 = renyi_entropy(rho2, q) 163 | 164 | dist = np.sqrt(H0 - 0.5 * (H1 + H2)) 165 | 166 | self.results['density_matrix_1'] = rho1 167 | self.results['density_matrix_2'] = rho2 168 | self.results['mixture_matrix'] = mix 169 | self.results['entropy_1'] = H1 170 | self.results['entropy_2'] = H2 171 | self.results['entropy_mixture'] = H0 172 | self.results['dist'] = dist 173 | return dist 174 | -------------------------------------------------------------------------------- /netrd/reconstruction/partial_correlation_influence.py: -------------------------------------------------------------------------------- 1 | """ 2 | partial_correlation_influence.py 3 | -------------------------------- 4 | 5 | Reconstruction of graphs using the partial correlation influence, as defined in: 6 | 7 | Kenett, D. Y. et al. Dominating clasp of the financial sector revealed by 8 | partial correlation analysis of the stock market. PLoS ONE 5, e15032 (2010). 9 | 10 | The index variable option as in: 11 | 12 | Kenett, D. Y., Huang, X., Vodenska, I., Havlin, S. & Stanley, H. E. Partial correlation 13 | analysis: applications for financial markets. Quantitative Finance 15, 569–578 (2015). 14 | 15 | 16 | author: Carolina Mattsson and Chia-Hung Yang 17 | email: mattsson dot c at northeastern dot edu 18 | Submitted as part of the 2019 NetSI Collabathon 19 | """ 20 | from .base import BaseReconstructor 21 | import numpy as np 22 | from scipy import linalg 23 | from ..utilities import create_graph, threshold 24 | 25 | 26 | class PartialCorrelationInfluence(BaseReconstructor): 27 | """Uses average effect from a sensor to all others.""" 28 | 29 | def fit(self, TS, index=None, threshold_type='range', **kwargs): 30 | r"""Uses the average effect of a series :math:`Z` on the correlation between 31 | a series :math:`X` and all other series. 32 | 33 | The partial correlation influence: 34 | 35 | .. math:: 36 | 37 | d(X:Z) = _Y \neq X, 38 | 39 | where :math:`d(X,Y:Z) = \rho(X,Y) - \rho(X,Y:Z)` 40 | 41 | 42 | If an index is given, both terms become partial correlations: 43 | 44 | .. math:: 45 | 46 | d(X,Y:Z) ≡ ρ(X,Y:M) − ρ(X,Y:M,Z) 47 | 48 | 49 | The results dictionary also stores the matrix of partial 50 | correlations as `'weights_matrix'` and the thresholded version of 51 | the partial correlation matrix as `'thresholded_matrix'`. 52 | 53 | Parameters 54 | ---------- 55 | TS (np.ndarray) 56 | Array consisting of :math:`L` observations from :math:`N` sensors. 57 | 58 | index (int, array of ints, or None) 59 | An index variable or set of index variables, which are assumed to 60 | be confounders of all other variables. They are held constant when 61 | calculating the partial correlations. Default to None. 62 | 63 | threshold_type (str): 64 | Which thresholding function to use on the matrix of 65 | weights. See `netrd.utilities.threshold.py` for 66 | documentation. Pass additional arguments to the thresholder 67 | using ``**kwargs``. 68 | 69 | Returns 70 | ------- 71 | 72 | G (nx.Graph) 73 | a reconstructed graph. 74 | 75 | References 76 | ----------- 77 | 78 | .. [1] Kenett, D. Y. et al. Dominating clasp of the financial 79 | sector revealed by partial correlation analysis of the stock 80 | market. PLoS ONE 5, e15032 (2010). 81 | 82 | .. [2] Kenett, D. Y., Huang, X., Vodenska, I., Havlin, S. & 83 | Stanley, H. E. Partial correlation analysis: applications 84 | for financial markets. Quantitative Finance 15, 569–578 85 | (2015). 86 | 87 | """ 88 | data = TS.T 89 | N = data.shape[1] 90 | 91 | # Create masks to separate variables of interests from the pre-included 92 | # index variables 93 | mask = np.ones(N, dtype=bool) 94 | if index is not None: 95 | mask[index] = False 96 | 97 | # Compute partial correlations with the index variables held constant 98 | p_corr = np.full((N, N), np.nan) 99 | p_corr[np.ix_(mask, mask)] = partial_corr(data[:, mask], data[:, ~mask]) 100 | 101 | # For every non-index variable Z, compute partial correlation influence 102 | # between other variables when Z is also held constant 103 | p_corr_inf = np.full((N, N, N), np.nan) 104 | for z in np.arange(N)[mask]: 105 | m_new = mask.copy() # New mask including variable Z 106 | m_new[z] = False 107 | 108 | diff = p_corr[np.ix_(m_new, m_new)] 109 | diff -= partial_corr(data[:, m_new], data[:, ~m_new]) 110 | p_corr_inf[np.ix_(m_new, m_new, [z])] = diff[:, :, np.newaxis] 111 | 112 | # Exclude the cases of Y = X 113 | np.fill_diagonal(p_corr_inf[:, :, z], np.nan) 114 | # Set PCI for X = Z to 0 for consistency after averaging 115 | p_corr_inf[z, :, z] = 0 116 | 117 | # Obtain the average partial correlation influence 118 | influence = np.zeros((N, N)) # Default self-influence by zero 119 | influence[mask, mask] = np.nanmean(p_corr_inf[mask, mask], axis=1) 120 | 121 | influence[~mask, :] = np.inf # Index variables influence all others 122 | influence[:, ~mask] = 0 # but no one influences the index variables 123 | 124 | self.results['weights_matrix'] = influence 125 | 126 | # threshold the network 127 | W_thresh = threshold(influence, threshold_type, **kwargs) 128 | 129 | # construct the network 130 | self.results['graph'] = create_graph(W_thresh) 131 | self.results['thresholded_matrix'] = W_thresh 132 | 133 | G = self.results['graph'] 134 | 135 | return G 136 | 137 | 138 | def partial_corr(_vars, idx_vars): 139 | """ 140 | Return the partial correlations between pairs of variables, given a set of 141 | index variables held constant. 142 | 143 | Parameters 144 | ---------- 145 | _vars (numpy.ndarray) 146 | Variables of interests (which are columns of the array). 147 | 148 | idx_vars (numpy.ndarray) 149 | Index variables to be held constant (which are columns of the array). 150 | If the array has zero size, namely no index variable, return the 151 | Pearson correlations between variables. 152 | 153 | Return 154 | ------ 155 | p_corr (numpy.ndarray) 156 | Square array of pairwise partial correlations between variables. 157 | 158 | Note 159 | ---- 160 | Precondition: The index variables should not contain or synchronize with 161 | a variable of interests. 162 | 163 | """ 164 | if idx_vars.size == 0: 165 | return np.corrcoef(_vars, rowvar=False) 166 | else: 167 | coef = linalg.lstsq(idx_vars, _vars)[0] # Coefficients of regression 168 | resid = _vars - idx_vars.dot(coef) # Residuals 169 | return np.corrcoef(resid, rowvar=False) 170 | -------------------------------------------------------------------------------- /tests/test_distance.py: -------------------------------------------------------------------------------- 1 | """ 2 | test_distance.py 3 | ---------------- 4 | 5 | Test distance algorithms. 6 | 7 | """ 8 | 9 | import warnings 10 | import numpy as np 11 | import networkx as nx 12 | from netrd import distance 13 | from netrd.distance import BaseDistance 14 | 15 | 16 | def test_same_graph(): 17 | """The distance between two equal graphs must be zero.""" 18 | G = nx.barbell_graph(10, 5) 19 | 20 | for label, obj in distance.__dict__.items(): 21 | if isinstance(obj, type) and BaseDistance in obj.__bases__: 22 | dist = obj().dist(G, G) 23 | assert np.isclose(dist, 0.0), f"{label} fails same-graph test" 24 | 25 | 26 | def test_different_graphs(): 27 | """The distance between two different graphs must be nonzero.""" 28 | ## NOTE: This test is not totally rigorous. For example, two different 29 | ## networks may have the same eigenvalues, thus a method that compares 30 | ## their eigenvalues would result in distance 0. However, this is very 31 | ## unlikely in the constructed case, so we rely on it for now. 32 | G1 = nx.fast_gnp_random_graph(100, 0.3) 33 | G2 = nx.barabasi_albert_graph(100, 5) 34 | 35 | for obj in distance.__dict__.values(): 36 | if isinstance(obj, type) and BaseDistance in obj.__bases__: 37 | dist = obj().dist(G1, G2) 38 | assert dist > 0.0, f"{label} not nonzero" 39 | 40 | 41 | def test_symmetry(): 42 | """The distance between two graphs must be symmetric.""" 43 | G1 = nx.barabasi_albert_graph(100, 4) 44 | G2 = nx.fast_gnp_random_graph(100, 0.3) 45 | 46 | for label, obj in distance.__dict__.items(): 47 | if isinstance(obj, type) and BaseDistance in obj.__bases__: 48 | dist1 = obj().dist(G1, G2) 49 | dist2 = obj().dist(G2, G1) 50 | assert np.isclose(dist1, dist2), f"{label} not symmetric" 51 | 52 | 53 | def test_quantum_jsd(): 54 | """Run the above tests again using the collision entropy instead of the 55 | Von Neumann entropy to ensure that all the logic of the JSD implementation 56 | is tested. 57 | """ 58 | 59 | with warnings.catch_warnings(): 60 | warnings.filterwarnings("ignore", message="JSD is only a metric for 0 ≤ q < 2.") 61 | JSD = distance.QuantumJSD() 62 | G = nx.barbell_graph(10, 5) 63 | dist = JSD.dist(G, G, beta=0.1, q=2) 64 | assert np.isclose(dist, 0.0), "collision entropy fails same-graph test" 65 | 66 | G1 = nx.fast_gnp_random_graph(100, 0.3) 67 | G2 = nx.barabasi_albert_graph(100, 5) 68 | dist = JSD.dist(G1, G2, beta=0.1, q=2) 69 | assert dist > 0.0, "collision entropy not nonzero" 70 | 71 | G1 = nx.barabasi_albert_graph(100, 4) 72 | G2 = nx.fast_gnp_random_graph(100, 0.3) 73 | dist1 = JSD.dist(G1, G2, beta=0.1, q=2) 74 | dist2 = JSD.dist(G2, G1, beta=0.1, q=2) 75 | assert np.isclose(dist1, dist2), "collision entropy not symmetric" 76 | 77 | 78 | def test_directed_input(): 79 | with warnings.catch_warnings(): 80 | warnings.filterwarnings( 81 | "ignore", message="Coercing directed graph to undirected." 82 | ) 83 | G = nx.fast_gnp_random_graph(100, 0.3, directed=True) 84 | 85 | for label, obj in distance.__dict__.items(): 86 | if isinstance(obj, type) and BaseDistance in obj.__bases__: 87 | dist = obj().dist(G, G) 88 | assert np.isclose(dist, 0.0), f"{label} not deterministic" 89 | 90 | G1 = nx.fast_gnp_random_graph(100, 0.3, directed=True) 91 | G2 = nx.fast_gnp_random_graph(100, 0.3, directed=True) 92 | 93 | for label, obj in distance.__dict__.items(): 94 | if isinstance(obj, type) and BaseDistance in obj.__bases__: 95 | dist1 = obj().dist(G1, G2) 96 | dist2 = obj().dist(G2, G1) 97 | assert np.isclose(dist1, dist2), f"{label} not symmetric" 98 | 99 | for obj in distance.__dict__.values(): 100 | if isinstance(obj, type) and BaseDistance in obj.__bases__: 101 | dist = obj().dist(G1, G2) 102 | assert dist > 0.0, f"{label} not nonzero" 103 | 104 | 105 | def test_weighted_input(): 106 | G1 = nx.barbell_graph(10, 5) 107 | G2 = nx.barbell_graph(10, 5) 108 | rand = np.random.RandomState(seed=42) 109 | edge_weights = {e: rand.randint(0, 1000) for e in G2.edges} 110 | nx.set_edge_attributes(G2, edge_weights, "weight") 111 | assert nx.is_isomorphic(G1, G2) 112 | 113 | for label, obj in distance.__dict__.items(): 114 | with warnings.catch_warnings(record=True) as w: 115 | warnings.simplefilter("always") 116 | if isinstance(obj, type) and BaseDistance in obj.__bases__: 117 | dist = obj().dist(G1, G2) 118 | warning_triggered = False 119 | for warning in w: 120 | if "weighted" in str(warning.message): 121 | warning_triggered = True 122 | if not warning_triggered: 123 | assert not np.isclose(dist, 0.0), f"{label} = 0" 124 | else: 125 | assert np.isclose(dist, 0.0), f"{label} != 0" 126 | 127 | 128 | def test_isomorphic_input(): 129 | G1 = nx.fast_gnp_random_graph(150, 0.10) 130 | 131 | N = G1.order() 132 | new_nodes = [(i + 5) % N for i in G1.nodes] 133 | 134 | # create G1 by permuting the adjacency matrix 135 | new_adj_mat = nx.to_numpy_array(G1, nodelist=new_nodes) 136 | G2 = nx.from_numpy_array(new_adj_mat) 137 | 138 | assert nx.is_isomorphic(G1, G2) 139 | 140 | # not all distances should be invariant under isomorphism 141 | # document those here 142 | EXCLUDED_DISTANCES = [ 143 | "Hamming", 144 | "Frobenius", 145 | "JaccardDistance", 146 | "HammingIpsenMikhailov", 147 | "ResistancePerturbation", 148 | "LaplacianSpectral", 149 | "PolynomialDissimilarity", 150 | "DeltaCon", 151 | "QuantumJSD", 152 | "DistributionalNBD", 153 | "NonBacktrackingSpectral", 154 | "GraphDiffusion", 155 | ] 156 | 157 | for label, obj in distance.__dict__.items(): 158 | print(label) 159 | if ( 160 | isinstance(obj, type) 161 | and BaseDistance in obj.__bases__ 162 | and label not in EXCLUDED_DISTANCES 163 | ): 164 | dist = obj().dist(G1, G2) 165 | assert np.isclose( 166 | dist, 0.0, atol=1e-3 167 | ), f"{label} not invariant under isomorphism" 168 | -------------------------------------------------------------------------------- /netrd/reconstruction/marchenko_pastur.py: -------------------------------------------------------------------------------- 1 | """ 2 | marchenko_pastur.py 3 | -------------- 4 | 5 | Graph reconstruction algorithm based on Marchenko, V. A., & Pastur, L. A. (1967). 6 | Distribution of eigenvalues for some sets of random matrices. Matematicheskii 7 | Sbornik, 114(4), 507-536. 8 | 9 | author: Matteo Chinazzi 10 | Submitted as part of the 2019 NetSI Collabathon. 11 | """ 12 | 13 | from .base import BaseReconstructor 14 | import numpy as np 15 | import networkx as nx 16 | from ..utilities import create_graph, threshold 17 | 18 | 19 | class MarchenkoPastur(BaseReconstructor): 20 | """Uses Marchenko-Pastur law to remove noise.""" 21 | 22 | def fit( 23 | self, 24 | TS, 25 | remove_largest=False, 26 | metric_distance=False, 27 | threshold_type='range', 28 | **kwargs 29 | ): 30 | r"""Create a correlation-based graph using Marchenko-Pastur law to remove noise. 31 | 32 | A signed graph is built by constructing a projection of the 33 | empirical correlation matrix generated from the time series data 34 | after having removed noisy components. This method combines the 35 | results presented in [1]_, [2]_, and [3]_. 36 | 37 | The results dictionary also stores the weight matrix as 38 | `'weights_matrix'` and the thresholded version of the weight matrix 39 | as `'thresholded_matrix'`. 40 | 41 | Parameters 42 | ---------- 43 | 44 | TS (np.ndarray) 45 | :math:`N \times L` array consisting of :math:`L` observations 46 | from :math:`N` sensors. 47 | 48 | remove_largest (bool), optional 49 | If ``False``, all the eigenvectors associated to the 50 | significant eigenvalues will be used to reconstruct the 51 | de-noised empirical correlation matrix. If ``True``, the 52 | eigenvector associated to the largest eigenvalue (normally 53 | known as the ``market`` mode, [2]) is going to be excluded from 54 | the recontruction step. metric_distance (bool), optional: If 55 | ``False``, a signed graph is obtained. The weights associated 56 | to the edges represent the de-noised correlation coefficient 57 | :math:`\rho_{i,j}` between time series :math:`i` and :math:`j`. 58 | If ``True``, the correlation is transformed by defining a 59 | metric distance between each pair of nodes where :math:`d_{i,j} 60 | = \sqrt{2(1-\rho_{i,j})}` as proposed in [3]. threshold_type 61 | (str): Which thresholding function to use on the matrix of 62 | weights. See `netrd.utilities.threshold.py` for 63 | documentation. Pass additional arguments to the thresholder 64 | using ``**kwargs``. 65 | 66 | Returns 67 | ------- 68 | 69 | G (nx.Graph) 70 | A reconstructed graph with :math:`N` nodes. 71 | 72 | Examples 73 | -------- 74 | .. code:: python 75 | 76 | import numpy as np 77 | import networkx as nx 78 | from matplotlib import pyplot as plt 79 | from netrd.reconstruction import MarchenkoPastur 80 | 81 | N = 250 82 | T = 300 83 | M = np.random.normal(size=(N,T)) 84 | 85 | print('Create correlated time series') 86 | market_mode = 0.4*np.random.normal(size=(1,T)) 87 | M += market_mode 88 | 89 | sector_modes = {d: 0.5*np.random.normal(size=(1,T)) for d in range(5)} 90 | for sector_mode, vals in sector_modes.items(): 91 | M[sector_mode*50:(sector_mode+1)*50,:] += vals 92 | 93 | print('Network reconstruction step') 94 | mp_net = MarchenkoPastur() 95 | G = mp_net.fit(M, only_positive=True) 96 | G_no_market = mp_net.fit(M, only_positive=True, remove_largest=True) 97 | 98 | print('Observed noisy correlation') 99 | C = np.corrcoef(M) 100 | C[C<0] = 0 # remove negative values 101 | np.fill_diagonal(C,0) # remove self-loops 102 | G_noisy = nx.from_numpy_array(C) # create graph 103 | 104 | print('Plot observed noisy correlation graph') 105 | fig, ax = plt.subplots() 106 | nx.draw(G_noisy, ax=ax) 107 | 108 | print('Plot reconstructed correlation graph') 109 | fig, ax = plt.subplots() 110 | nx.draw(G, ax=ax) 111 | 112 | print('Plot reconstructed correlation graph without market mode') 113 | fig, ax = plt.subplots() 114 | nx.draw(G_no_market, ax=ax) 115 | 116 | 117 | References 118 | ---------- 119 | .. [1] Marchenko, V. A., & Pastur, L. A. (1967). Distribution of 120 | eigenvalues for some sets of random 121 | matrices. Matematicheskii Sbornik, 114(4), 507-536. 122 | http://www.mathnet.ru/links/a8d2a49dec161f50c944d9a96298c35a/sm4101.pdf 123 | 124 | .. [2] Laloux, L., Cizeau, P., Bouchaud, J. P., & Potters, 125 | M. (1999). Noise dressing of financial correlation 126 | matrices. Physical review letters, 83(7), 1467. 127 | https://journals.aps.org/prl/abstract/10.1103/PhysRevLett.83.1467 128 | 129 | .. [3] Bonanno, G., Caldarelli, G., Lillo, F., Micciche, S., 130 | Vandewalle, N., & Mantegna, R. N. (2004). Networks of 131 | equities in financial markets. The European Physical Journal 132 | B, 38(2), 363-371. 133 | https://link.springer.com/article/10.1140/epjb/e2004-00129-6 134 | 135 | """ 136 | N, L = TS.shape 137 | if N > L: 138 | raise ValueError("L must be greater or equal than N.") 139 | 140 | Q = L / N 141 | C = np.corrcoef(TS) # Empirical correlation matrix 142 | 143 | w, v = np.linalg.eigh(C) # Spectral decomposition of C 144 | 145 | w_min = 1 + 1 / Q - 2 * np.sqrt(1 / Q) 146 | w_max = 1 + 1 / Q + 2 * np.sqrt(1 / Q) 147 | 148 | selected = (w < w_min) | (w > w_max) 149 | 150 | if selected.sum() == 0: 151 | G = nx.empty_graph(n=N) 152 | self.results['graph'] = G 153 | return G 154 | 155 | if remove_largest: 156 | selected[-1] = False 157 | 158 | w_signal = w[selected] 159 | v_signal = v[:, selected] 160 | 161 | C_signal = v_signal.dot(np.diag(w_signal)).dot(v_signal.T) 162 | 163 | if metric_distance: 164 | C_signal = np.sqrt(2 * (1 - C_signal)) 165 | 166 | self.results['weights_matrix'] = C_signal 167 | 168 | # threshold signal matrix 169 | 170 | self.results['thresholded_matrix'] = threshold( 171 | C_signal, threshold_type, **kwargs 172 | ) 173 | 174 | G = create_graph(self.results['thresholded_matrix']) 175 | 176 | self.results['graph'] = G 177 | return G 178 | -------------------------------------------------------------------------------- /netrd/distance/distributional_nbd.py: -------------------------------------------------------------------------------- 1 | """ 2 | distributional_nbd.py 3 | ------ 4 | 5 | Distributional Non-backtracking Spectral Distance. 6 | 7 | """ 8 | 9 | import numpy as np 10 | import networkx as nx 11 | import scipy.sparse as sp 12 | from scipy.spatial.distance import euclidean, chebyshev 13 | from ..utilities.graph import unweighted 14 | 15 | from .base import BaseDistance 16 | 17 | 18 | class DistributionalNBD(BaseDistance): 19 | """ 20 | Distributional Non-backtracking Spectral Distance. 21 | 22 | Computes the distance between two graphs using the empirical spectral density 23 | of the non-backtracking operator. 24 | 25 | See: 26 | "Graph Comparison via the Non-backtracking Spectrum" 27 | A. Mellor & A. Grusovin 28 | arXiv:1812.05457 / 10.1103/PhysRevE.99.052309 29 | 30 | """ 31 | 32 | VECTOR_DISTANCES = {'euclidean': euclidean, 'chebyshev': chebyshev} 33 | 34 | @unweighted 35 | def dist( 36 | self, 37 | G1, 38 | G2, 39 | sparse=False, 40 | shave=True, 41 | keep_evals=True, 42 | k=None, 43 | vector_distance='euclidean', 44 | **kwargs 45 | ): 46 | """ 47 | Distributional Non-backtracking Spectral Distance. 48 | 49 | Parameters 50 | ---------- 51 | 52 | G1, G2 (nx.Graph) 53 | The two graphs to compare. 54 | 55 | sparse (bool) 56 | If sparse, matrices and eigenvalues found using sparse methods. 57 | If sparse, parameter 'k' should also be specified. 58 | Default: False 59 | 60 | k (int) 61 | The number of largest eigenvalues to be calculated for the 62 | spectral density. 63 | 64 | vector_distance (str) 65 | The distance measure used to compare two empirical distributions. 66 | Currently available are 'euclidean' and 'chebyshev', implemented 67 | using SciPy. 68 | Default: 'euclidean' 69 | 70 | keep_evals (bool) 71 | If True, stores the eigenvalues of the reduced non-backtracking 72 | matrix in self.results['evals'] 73 | Default: False 74 | 75 | 76 | Returns 77 | ------- 78 | float 79 | The distance between `G1` and `G2` 80 | 81 | """ 82 | B1 = reduced_hashimoto(G1, shave=shave, sparse=sparse, **kwargs) 83 | B2 = reduced_hashimoto(G2, shave=shave, sparse=sparse, **kwargs) 84 | 85 | # Find spectrum 86 | evals1 = nb_eigenvalues(B1, k=k) 87 | evals2 = nb_eigenvalues(B2, k=k) 88 | 89 | # Save spectrum 90 | if keep_evals: 91 | self.results['eigenvalues'] = (evals1, evals2) 92 | 93 | # Find rescaled spectral density 94 | distribution_1 = spectral_distribution(evals1) 95 | distribution_2 = spectral_distribution(evals2) 96 | 97 | # Compute distance 98 | distance_metric = self.__class__.VECTOR_DISTANCES[vector_distance] 99 | 100 | return distance_metric(distribution_1, distribution_2) 101 | 102 | 103 | def shave_graph(graph): 104 | """ 105 | Returns the two-core of a graph. 106 | 107 | Iteratively remove the nodes of degree 0 or 1, until all nodes have 108 | degree at least 2. 109 | 110 | NOTE: duplicated from "nbd.py" to avoid excessive imports. 111 | 112 | """ 113 | core = graph.copy() 114 | while True: 115 | to_remove = [node for node, neighbors in core.adj.items() if len(neighbors) < 2] 116 | core.remove_nodes_from(to_remove) 117 | if len(to_remove) == 0: 118 | break 119 | return core 120 | 121 | 122 | def pseudo_hashimoto(graph): 123 | """ 124 | Return the pseudo-Hashimoto matrix. 125 | 126 | The pseudo Hashimoto matrix of a graph is the block matrix defined as 127 | B' = [0 D-I] 128 | [-I A ] 129 | 130 | Where D is the degree-diagonal matrix, I is the identity matrix and A 131 | is the adjacency matrix. The eigenvalues of B' are always eigenvalues 132 | of B, the non-backtracking or Hashimoto matrix. 133 | 134 | Parameters 135 | ---------- 136 | 137 | graph (nx.Graph): A NetworkX graph object. 138 | 139 | Returns 140 | ------- 141 | 142 | A sparse matrix in csr format. 143 | 144 | NOTE: duplicated from "nbd.py" to avoid excessive imports. 145 | 146 | """ 147 | # Note: the rows of nx.adjacency_matrix(graph) are in the same order as 148 | # the list returned by graph.nodes(). 149 | degrees = graph.degree() 150 | degrees = sp.diags([degrees[n] for n in graph.nodes()]) 151 | adj = nx.adjacency_matrix(graph) 152 | ident = sp.eye(graph.order()) 153 | pseudo = sp.bmat([[None, degrees - ident], [-ident, adj]]) 154 | return pseudo.asformat('csr') 155 | 156 | 157 | def reduced_hashimoto(graph, shave=True, sparse=True): 158 | """ 159 | 160 | 161 | Parameters 162 | ---------- 163 | 164 | shave (bool) 165 | If True, first reduce the graph to its two-core. 166 | Else graph processed in its entirety. 167 | 168 | sparse (bool) 169 | If True, returned matrix will be sparse, 170 | else it will be dense. 171 | 172 | Returns 173 | ------- 174 | 175 | np.ndarray/sp.csr_matrix 176 | The reduced Hashimoto Matrix. 177 | 178 | """ 179 | 180 | if shave: 181 | graph = shave_graph(graph) 182 | if len(graph) == 0: 183 | # We can provide a workaround for this case, however it is best 184 | # that it is brought to the attention of the user. 185 | raise NotImplementedError( 186 | "Graph two-core is empty: non-backtracking methods unsuitable." 187 | ) 188 | 189 | B = pseudo_hashimoto(graph) 190 | 191 | if not sparse: 192 | B = B.todense() 193 | 194 | return B 195 | 196 | 197 | def nb_eigenvalues(B, k=None, **kwargs): 198 | """ 199 | Calculates the eigenvalues of a matrix B. 200 | 201 | Detects whether B is sparse/dense and uses the appropriate method. 202 | If B is sparse then parameter 'k' should be provided. 203 | """ 204 | 205 | if isinstance(B, np.ndarray): 206 | return np.linalg.eigvals(B) 207 | 208 | elif isinstance(B, sp.csr_matrix): 209 | random_state = np.random.RandomState( 210 | 1 211 | ) # Ensures that eigenvalue calculation is deterministic. 212 | return sp.linalg.eigs( 213 | B, k=k, v0=random_state.random(B.shape[0]), return_eigenvectors=False 214 | ) 215 | else: 216 | raise Exception("Matrix must be of type np.ndarray or scipy.sparse.csr") 217 | 218 | 219 | def logr(r, rmax): 220 | """ 221 | Logarithm to the base r. 222 | 223 | NOTE:Maps zero to zero as a special case. 224 | """ 225 | 226 | if r == 0: 227 | return 0 228 | return np.log(r) / np.log(rmax) 229 | 230 | 231 | def spectral_distribution(points, cumulative=True): 232 | """ 233 | Returns the distribution of complex values (in r,theta-space). 234 | """ 235 | 236 | points = np.array([(np.abs(z), np.angle(z)) for z in points]) 237 | r, theta = np.split(points, 2, axis=1) 238 | 239 | r = np.array([logr(x, r.max()) for x in r]) 240 | 241 | Z, R, THETA = np.histogram2d( 242 | x=r[:, 0], 243 | y=theta[:, 0], 244 | bins=(np.linspace(0, 1, 101), np.linspace(0, np.pi, 101)), 245 | ) 246 | 247 | if cumulative: 248 | Z = Z.cumsum(axis=0).cumsum(axis=1) 249 | Z = Z / Z.max() 250 | 251 | return Z.flatten() 252 | --------------------------------------------------------------------------------