├── graphs
    ├── tests
    │   ├── __init__.py
    │   └── test_reorder.py
    ├── base
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_adj.py
    │   │   ├── test_pairs.py
    │   │   └── test_static.py
    │   ├── __init__.py
    │   ├── base.py
    │   ├── pairs.py
    │   └── adj.py
    ├── datasets
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_mountain_car.py
    │   │   ├── test_shapes.py
    │   │   └── test_swiss_roll.py
    │   ├── __init__.py
    │   ├── swiss_roll.py
    │   ├── shapes.py
    │   └── mountain_car.py
    ├── construction
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_directed.py
    │   │   ├── test_downsample.py
    │   │   ├── test_msg.py
    │   │   ├── test_spanning_tree.py
    │   │   ├── test_b_matching.py
    │   │   ├── test_incremental.py
    │   │   ├── test_saffron.py
    │   │   ├── test_neighbors.py
    │   │   ├── test_geometric.py
    │   │   └── test_regularized.py
    │   ├── __init__.py
    │   ├── directed.py
    │   ├── incremental.py
    │   ├── spanning_tree.py
    │   ├── downsample.py
    │   ├── geometric.py
    │   ├── saffron.py
    │   ├── b_matching.py
    │   ├── neighbors.py
    │   ├── _fast_paths.pyx
    │   └── regularized.py
    ├── generators
    │   ├── tests
    │   │   ├── __init__.py
    │   │   ├── test_rand.py
    │   │   ├── test_structured.py
    │   │   └── test_trajectories.py
    │   ├── __init__.py
    │   ├── rand.py
    │   ├── trajectories.py
    │   └── structured.py
    ├── _version.py
    ├── mixins
    │   ├── __init__.py
    │   ├── _betweenness_helper.pyxbld
    │   ├── tests
    │   │   ├── test_betweenness.py
    │   │   ├── test_viz.py
    │   │   ├── test_analysis.py
    │   │   ├── test_label.py
    │   │   ├── test_embed.py
    │   │   └── test_transformation.py
    │   ├── _betweenness.py
    │   ├── _betweenness_helper.pyx
    │   ├── analysis.py
    │   ├── embed.py
    │   ├── label.py
    │   └── transformation.py
    ├── mini_six.py
    ├── __init__.py
    └── reorder.py
├── benchmarks
    ├── benchmarks
    │   ├── __init__.py
    │   ├── basic.py
    │   ├── construction.py
    │   └── mixins.py
    ├── .gitignore
    └── asv.conf.json
├── .gitignore
├── run_tests.sh
├── .landscape.yml
├── .travis.yml
├── .coveragerc
├── examples
    ├── interactive.py
    ├── short_circuit.py
    └── swiss_roll.py
├── setup.py
├── LICENSE
└── README.md


/graphs/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/graphs/base/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/graphs/datasets/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/benchmarks/benchmarks/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/graphs/construction/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/graphs/generators/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/graphs/_version.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1.3'
2 | 


--------------------------------------------------------------------------------
/benchmarks/.gitignore:
--------------------------------------------------------------------------------
1 | env
2 | graphs
3 | results
4 | html
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.pyxbldc
 3 | htmlcov/
 4 | .coverage
 5 | build/
 6 | dist/
 7 | *.egg-info/
 8 | *.swp
 9 | *.c
10 | *.cpp
11 | *.so
12 | 


--------------------------------------------------------------------------------
/run_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | test_dirs=$(find graphs/ -type d -name tests | xargs)
4 | nosetests --with-cov --cov-report html --cov=graphs/ $test_dirs \
5 |   && coverage report
6 | 
7 | 


--------------------------------------------------------------------------------
/graphs/mixins/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from .analysis import AnalysisMixin
4 | from .embed import EmbedMixin
5 | from .label import LabelMixin
6 | from .transformation import TransformMixin
7 | from .viz import VizMixin
8 | 


--------------------------------------------------------------------------------
/.landscape.yml:
--------------------------------------------------------------------------------
 1 | strictness: medium
 2 | pep8:
 3 |   disable:
 4 |     - E111
 5 |     - E114
 6 |     - E231
 7 |     - E225
 8 |     - E402
 9 |     - W503
10 | pylint:
11 |   disable:
12 |     - bad-indentation
13 |     - invalid-name
14 |     - too-many-arguments
15 | ignore-paths:
16 |   - benchmarks/
17 | 


--------------------------------------------------------------------------------
/graphs/mini_six.py:
--------------------------------------------------------------------------------
 1 | '''Py3k compatibility hacks.'''
 2 | 
 3 | __all__ = ['range', 'zip', 'zip_longest']
 4 | 
 5 | # If we're on Python 2, use xrange instead of range, etc
 6 | if type(range(1)) is list:
 7 |   range = xrange
 8 |   from itertools import izip_longest as zip_longest, izip as zip
 9 | else:
10 |   range = range
11 |   zip = zip
12 |   from itertools import zip_longest
13 | 


--------------------------------------------------------------------------------
/graphs/mixins/_betweenness_helper.pyxbld:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def make_ext(modname, pyxfilename):
 4 |   from distutils.extension import Extension
 5 |   return Extension(
 6 |       name=modname,
 7 |       sources=[pyxfilename],
 8 |       extra_compile_args=['-O2'],
 9 |       libraries=['stdc++'],
10 |       language='c++')
11 | 
12 | def make_setup_args():
13 |   return {'include_dirs': np.get_include()}
14 | 
15 | 


--------------------------------------------------------------------------------
/graphs/generators/__init__.py:
--------------------------------------------------------------------------------
 1 | '''Graph generation helper functions.
 2 | 
 3 | trajectories : helpers for working with trajectory data
 4 | structured : functions for generating chain/lattice graphs
 5 | rand : functions for generating graphs with random edges
 6 | '''
 7 | from __future__ import absolute_import
 8 | 
 9 | from . import trajectories
10 | from .structured import chain_graph, lattice_graph
11 | from .rand import random_graph
12 | 


--------------------------------------------------------------------------------
/graphs/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | '''Dataset generation functions.
 2 | 
 3 | mountain_car : the "Mountain Car" toy domain from reinforcement learning
 4 | shapes : various parameterized shapes
 5 | swiss_roll : the "Swiss Roll" toy domain from manifold learning
 6 | '''
 7 | from __future__ import absolute_import
 8 | 
 9 | from .mountain_car import mountain_car_trajectories
10 | from .shapes import MobiusStrip, FigureEight, SCurve
11 | from .swiss_roll import swiss_roll
12 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | sudo: false
 3 | python:
 4 |   - "2.7"
 5 |   - "3.4"
 6 |   - "3.5"
 7 | cache: pip
 8 | before_install:
 9 |   - pip install --upgrade pip
10 |   - pip install wheel
11 |   - pip install numpy scipy Cython scikit-learn matplotlib coveralls nose-cov
12 | script:
13 |   - nosetests --with-cov --cov=graphs/ graphs/tests/ graphs/base/tests/ graphs/construction/tests/ graphs/generators/tests/ graphs/mixins/tests/
14 | after_success: coveralls
15 | 


--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | branch = true
 3 | omit =
 4 |     *.pyxbld*
 5 | 
 6 | [report]
 7 | # Regexes for lines to exclude from consideration
 8 | exclude_lines =
 9 |     # Have to re-enable the standard pragma
10 |     pragma: no cover
11 | 
12 |     # Don't complain if tests don't hit defensive assertion code:
13 |     raise AssertionError
14 |     raise NotImplementedError
15 | 
16 |     # Don't complain if non-runnable code isn't run:
17 |     if False:
18 |     if __name__ == .__main__.:
19 | 
20 |     # Don't complain about import-guarded code
21 |     except ImportError:
22 | 


--------------------------------------------------------------------------------
/examples/interactive.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import numpy as np
 3 | from matplotlib import pyplot as plt
 4 | from graphs.construction import neighbor_graph
 5 | 
 6 | if hasattr(__builtins__, 'raw_input'):
 7 |   input = raw_input
 8 | 
 9 | 
10 | def main():
11 |   print("Select coordinates for graph vertices:")
12 |   plt.plot([])
13 |   coords = np.array(plt.ginput(n=-1, timeout=-1))
14 | 
15 |   k = int(input("Number of nearest neighbors: "))
16 |   g = neighbor_graph(coords, k=k)
17 | 
18 |   print("Resulting graph:")
19 |   g.plot(coords, vertex_style='ro')()
20 | 
21 | if __name__ == '__main__':
22 |   main()
23 | 


--------------------------------------------------------------------------------
/graphs/generators/tests/test_rand.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | import unittest
 4 | from numpy.testing import assert_array_equal
 5 | 
 6 | from ..rand import random_graph
 7 | 
 8 | 
 9 | class TestRandomGraph(unittest.TestCase):
10 |   def test_random_graph(self):
11 |     for degree in (np.ones(5), [1,2,2], [1,0,0,1]):
12 |       G = random_graph(degree)
13 |       assert_array_equal(degree, G.degree(kind='out'))
14 |       self.assertEqual(1, G.edge_weights().max())
15 | 
16 |     # Check that degrees >= n will throw an error
17 |     self.assertRaises(ValueError, random_graph, [1,2,3])
18 | 
19 | 
20 | if __name__ == '__main__':
21 |   unittest.main()
22 | 


--------------------------------------------------------------------------------
/graphs/__init__.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Graphs: a library for efficiently manipulating graphs.
 3 | 
 4 |  Graph        -- the base class for all graph objects.
 5 |  construction -- a module for constructing graphs from data.
 6 |  generators   -- a module for generating graphs with desired properties.
 7 |  datasets     -- a module providing sample datasets.
 8 |  reorder      -- a module for reordering graph vertices.
 9 | 
10 | To create a Graph object, use the static constructors:
11 |  `Graph.from_adj_matrix` or `Graph.from_edge_pairs`.
12 | '''
13 | from __future__ import absolute_import
14 | 
15 | from ._version import __version__
16 | from .base import Graph
17 | from . import construction, generators, datasets, reorder
18 | 


--------------------------------------------------------------------------------
/benchmarks/benchmarks/basic.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | matplotlib.use('template')
 3 | import scipy.sparse as ss
 4 | from graphs import Graph
 5 | 
 6 | 
 7 | class BasicOperations(object):
 8 |     params = ['dense', 'coo', 'csr']
 9 |     param_names = ['adj_format']
10 | 
11 |     def setup(self, adj_format):
12 |         n = 1500
13 |         density = 0.2
14 |         adj = ss.rand(n, n, density=density)
15 |         if adj_format == 'dense':
16 |             self.adj = adj.A
17 |         else:
18 |             self.adj = adj.asformat(adj_format)
19 |         self.G = Graph.from_adj_matrix(self.adj)
20 | 
21 |     def time_construction(self, adj_format):
22 |         Graph.from_adj_matrix(self.adj)
23 | 
24 |     def time_num_edges(self, adj_format):
25 |         self.G.num_edges()
26 | 
27 |     def time_num_vertices(self, adj_format):
28 |         self.G.num_vertices()
29 | 


--------------------------------------------------------------------------------
/graphs/construction/tests/test_directed.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | import unittest
 4 | 
 5 | from ...datasets.shapes import SCurve
 6 | from .. import directed_graph
 7 | 
 8 | 
 9 | class TestDirected(unittest.TestCase):
10 | 
11 |   def test_directed_graph(self):
12 |     # XXX: This kind of testing isn't exactly reproducible across versions
13 |     np.random.seed(1234)
14 |     traj = SCurve().trajectories(5, 20)
15 |     G, X = directed_graph(traj, k=5, pruning_thresh=0, return_coords=True)
16 |     P = directed_graph(traj, k=5, pruning_thresh=0.1)
17 |     self.assertEqual(X.shape, (100, 3))
18 |     self.assertEqual(G.num_edges(), 500)
19 |     # Results may vary slightly, so just check that we're <500
20 |     self.assertLess(P.num_edges(), 500)
21 | 
22 | 
23 | if __name__ == '__main__':
24 |   unittest.main()
25 | 


--------------------------------------------------------------------------------
/graphs/datasets/tests/test_mountain_car.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | import unittest
 4 | from matplotlib import pyplot
 5 | pyplot.switch_backend('template')
 6 | 
 7 | from ... import Graph
 8 | from .. import mountain_car as mcar
 9 | 
10 | 
11 | class TestMountainCar(unittest.TestCase):
12 | 
13 |   def test_traj_sampling(self):
14 |     traj, traces = mcar.mountain_car_trajectories(3)
15 |     self.assertEqual(len(traces), 3)
16 |     self.assertEqual(len(traj), 3)
17 |     self.assertEqual(traj[0].shape[1], 2)
18 |     self.assertEqual(traj[1].shape[1], 2)
19 |     self.assertEqual(traj[2].shape[1], 2)
20 | 
21 |   def test_basis_plotting(self):
22 |     pts = np.random.random((5, 2))
23 |     G = Graph.from_adj_matrix(np.random.random((5,5)))
24 |     mcar.plot_mcar_basis(G, pts)
25 | 
26 | 
27 | if __name__ == '__main__':
28 |   unittest.main()
29 | 


--------------------------------------------------------------------------------
/benchmarks/benchmarks/construction.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | matplotlib.use('template')
 3 | import numpy as np
 4 | from sklearn.metrics import pairwise_distances
 5 | import graphs.construction as gc
 6 | 
 7 | 
 8 | class Neighbors(object):
 9 |     n = 500
10 |     params = [[None, 0.25, 0.5], [None, 1, 100], ['none', 'binary']]
11 |     param_names = ['epsilon', 'k', 'weighting']
12 | 
13 |     def setup(self, epsilon, k, weighting):
14 |         if epsilon is None and k is None:
15 |             raise NotImplementedError()
16 |         self.X = np.random.random((self.n, 3))
17 |         self.D = pairwise_distances(self.X)
18 | 
19 |     def time_neighbor_graph(self, epsilon, k, weighting):
20 |         gc.neighbor_graph(self.X, k=k, epsilon=epsilon, weighting=weighting)
21 | 
22 |     def time_neighbor_graph_precomputed(self, epsilon, k, weighting):
23 |         gc.neighbor_graph(self.D, k=k, epsilon=epsilon, weighting=weighting,
24 |                           precomputed=True)
25 | 


--------------------------------------------------------------------------------
/graphs/generators/rand.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | from scipy.sparse import coo_matrix
 4 | 
 5 | from .. import Graph
 6 | 
 7 | __all__ = ['random_graph']
 8 | 
 9 | 
10 | def random_graph(out_degree):
11 |   '''Random graph generator. Does not generate self-edges.
12 |   out_degree : array-like of ints, controlling the out degree of each vertex.
13 |   '''
14 |   n = len(out_degree)
15 |   out_degree = np.asarray(out_degree, dtype=int)
16 |   if (out_degree >= n).any():
17 |     raise ValueError('Cannot have degree >= num_vertices')
18 |   row = np.repeat(np.arange(n), out_degree)
19 |   weights = np.ones_like(row, dtype=float)
20 |   # Generate random edges from 0 to n-2, then shift by one to avoid self-edges.
21 |   col = np.concatenate([np.random.choice(n-1, d, replace=False)
22 |                         for d in out_degree])
23 |   col[col >= row] += 1
24 |   adj = coo_matrix((weights, (row, col)), shape=(n, n))
25 |   return Graph.from_adj_matrix(adj)
26 | 


--------------------------------------------------------------------------------
/graphs/mixins/tests/test_betweenness.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from numpy.testing import assert_array_equal
 4 | 
 5 | # Test the non-Cython version specifically
 6 | from graphs.mixins._betweenness import _betweenness
 7 | 
 8 | ADJ = np.array([[0,1,2,0],
 9 |                 [1,0,0,3],
10 |                 [2,0,0,1],
11 |                 [0,3,1,0]])
12 | 
13 | 
14 | class TestBetweenness(unittest.TestCase):
15 | 
16 |   def test_betweenness_edge_unweighted(self):
17 |     res = _betweenness(ADJ, False, False)
18 |     assert_array_equal(res, [2,2,2,2,2,2,2,2])
19 | 
20 |   def test_betweenness_edge_weighted(self):
21 |     res = _betweenness(ADJ, True, False)
22 |     assert_array_equal(res, [2,3,2,1,3,2,1,2])
23 | 
24 |   def test_betweenness_vertex_unweighted(self):
25 |     res = _betweenness(ADJ, False, True)
26 |     assert_array_equal(res, [1,1,1,1])
27 | 
28 |   def test_betweenness_vertex_weighted(self):
29 |     res = _betweenness(ADJ, True, True)
30 |     assert_array_equal(res, [2,0,2,0])
31 | 
32 | 
33 | if __name__ == '__main__':
34 |   unittest.main()
35 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from setuptools import setup, find_packages, Extension
 3 | 
 4 | try:
 5 |   from Cython.Build import cythonize
 6 |   import numpy as np
 7 | except ImportError:
 8 |   use_cython = False
 9 | else:
10 |   use_cython = True
11 | 
12 | version = open('graphs/_version.py').read().strip().split('=', 1)[1].strip(" '")
13 | 
14 | setup_kwargs = dict(
15 |     name='graphs',
16 |     version=version,
17 |     author='CJ Carey',
18 |     author_email='perimosocordiae@gmail.com',
19 |     description='A library for graph-based machine learning.',
20 |     url='http://github.com/all-umass/graphs',
21 |     license='MIT',
22 |     packages=find_packages(exclude=['tests']),
23 |     package_data={'': ['*.pyx']},
24 |     install_requires=[
25 |         'numpy >= 1.8',
26 |         'scipy >= 0.14',
27 |         'scikit-learn >= 0.15',
28 |         'matplotlib >= 1.3.1',
29 |         'Cython >= 0.21',
30 |     ],
31 | )
32 | if use_cython:
33 |   exts = [Extension('*', ['graphs/*/*.pyx'], include_dirs=[np.get_include()])]
34 |   setup_kwargs['ext_modules'] = cythonize(exts)
35 | 
36 | setup(**setup_kwargs)
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 ALL @ UMass
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/graphs/datasets/swiss_roll.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | __all__ = ['swiss_roll', 'error_ratio']
 4 | 
 5 | 
 6 | def swiss_roll(radians, num_points, radius=1.0,
 7 |                theta_noise=0.1, radius_noise=0.01,
 8 |                return_theta=False):
 9 |   theta = np.linspace(1, radians, num_points)
10 |   if theta_noise > 0:
11 |     theta += np.random.normal(scale=theta_noise, size=theta.shape)
12 |   r = np.sqrt(np.linspace(0, radius*radius, num_points))
13 |   if radius_noise > 0:
14 |     r += np.random.normal(scale=radius_noise, size=r.shape)
15 |   roll = np.empty((num_points, 3))
16 |   roll[:,0] = r * np.sin(theta)
17 |   roll[:,2] = r * np.cos(theta)
18 |   roll[:,1] = np.random.uniform(-1,1,num_points)
19 |   if return_theta:
20 |     return roll, theta
21 |   return roll
22 | 
23 | 
24 | def error_ratio(G, GT_points, max_delta_theta=0.1, return_tuple=False):
25 |   theta_edges = GT_points[G.pairs(),0]
26 |   delta_theta = np.abs(np.diff(theta_edges))
27 |   err_edges = np.count_nonzero(delta_theta > max_delta_theta)
28 |   tot_edges = delta_theta.shape[0]
29 |   if return_tuple:
30 |     return err_edges, tot_edges
31 |   return err_edges / float(tot_edges)
32 | 


--------------------------------------------------------------------------------
/graphs/datasets/tests/test_shapes.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import unittest
 3 | 
 4 | from .. import shapes
 5 | 
 6 | 
 7 | class TestShapes(unittest.TestCase):
 8 | 
 9 |   def test_mobius(self):
10 |     S = shapes.MobiusStrip(radius=1.0, max_width=1.0)
11 |     X = S.point_cloud(25)
12 |     T = S.trajectories(2, 10)
13 |     self.assertEqual(X.shape, (25, 3))
14 |     self.assertEqual(len(T), 2)
15 |     self.assertEqual(T[0].shape, (10, 3))
16 |     self.assertEqual(T[1].shape, (10, 3))
17 | 
18 |   def test_s_curve(self):
19 |     S = shapes.SCurve(radius=1.0)
20 |     X = S.point_cloud(25)
21 |     T = S.trajectories(2, 10)
22 |     self.assertEqual(X.shape, (25, 3))
23 |     self.assertEqual(len(T), 2)
24 |     self.assertEqual(T[0].shape, (10, 3))
25 |     self.assertEqual(T[1].shape, (10, 3))
26 | 
27 |   def test_figure_eight(self):
28 |     for d in (2,3):
29 |       S = shapes.FigureEight(radius=1.0, dimension=d)
30 |       X = S.point_cloud(25)
31 |       T = S.trajectories(2, 10)
32 |       self.assertEqual(X.shape, (25, d))
33 |       self.assertEqual(len(T), 2)
34 |       self.assertEqual(T[0].shape, (10, d))
35 |       self.assertEqual(T[1].shape, (10, d))
36 | 
37 | if __name__ == '__main__':
38 |   unittest.main()
39 | 


--------------------------------------------------------------------------------
/graphs/datasets/tests/test_swiss_roll.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | import unittest
 4 | 
 5 | from ... import Graph
 6 | from ..swiss_roll import swiss_roll, error_ratio
 7 | 
 8 | 
 9 | class TestSwissRoll(unittest.TestCase):
10 | 
11 |   def test_swiss_roll(self):
12 |     X = swiss_roll(6, 10)
13 |     self.assertEqual(X.shape, (10, 3))
14 |     X, theta = swiss_roll(3.0, 25, theta_noise=0, radius_noise=0,
15 |                           return_theta=True)
16 |     self.assertEqual(X.shape, (25, 3))
17 |     self.assertEqual(theta.shape, (25,))
18 |     self.assertAlmostEqual(theta.max(), 3.0)
19 | 
20 |   def test_error_ratio(self):
21 |     adj = np.diag(np.ones(3), k=1)
22 |     G = Graph.from_adj_matrix(adj + adj.T)
23 |     GT = np.tile(np.linspace(0, 1, adj.shape[0])**2, (2,1)).T
24 |     err_edges, tot_edges = error_ratio(G, GT, return_tuple=True)
25 |     self.assertEqual(err_edges, 6)
26 |     self.assertEqual(tot_edges, 6)
27 |     self.assertEqual(error_ratio(G, GT, max_delta_theta=0.2), 4/6.)
28 |     self.assertEqual(error_ratio(G, GT, max_delta_theta=0.5), 2/6.)
29 |     self.assertEqual(error_ratio(G, GT, max_delta_theta=1), 0.0)
30 | 
31 | 
32 | if __name__ == '__main__':
33 |   unittest.main()
34 | 


--------------------------------------------------------------------------------
/examples/short_circuit.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import numpy as np
 3 | from matplotlib import pyplot as plt
 4 | 
 5 | from graphs.datasets import swiss_roll
 6 | from graphs.construction import neighbor_graph
 7 | 
 8 | 
 9 | def main():
10 |   np.random.seed(1234)
11 |   X, theta = swiss_roll(8, 300, return_theta=True, radius=0.5)
12 |   GT = np.column_stack((theta, X[:,1]))
13 |   g = neighbor_graph(X, k=6)
14 |   g = g.from_adj_matrix(g.matrix('dense'))
15 |   ct = 12
16 | 
17 |   _, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 8),
18 |                          sharex=True, sharey=True)
19 |   _plot_diff(axes[0,0], GT, g, g.minimum_spanning_subtree(), title='MST')
20 |   _plot_diff(axes[0,1], GT, g, g.circle_tear(cycle_len_thresh=ct),
21 |              title='Circle Tear (%d)' % ct)
22 |   _plot_diff(axes[1,0], GT, g, g.cycle_cut(cycle_len_thresh=ct),
23 |              title='Cycle Cut (%d)' % ct)
24 |   _plot_diff(axes[1,1], GT, g, g.isograph(), title='Isograph')
25 |   plt.show()
26 | 
27 | 
28 | def _plot_diff(ax, x, g1, g2, title=''):
29 |   g1.plot(x, ax=ax, edge_style='y-', vertex_style='k.')
30 |   g2.plot(x, ax=ax, edge_style='b-', vertex_style='k.')
31 |   ax.set_title(title)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |   main()
36 | 


--------------------------------------------------------------------------------
/graphs/construction/tests/test_downsample.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | import unittest
 4 | from numpy.testing import assert_array_equal
 5 | 
 6 | from ...datasets.shapes import SCurve
 7 | from .. import downsample as d
 8 | 
 9 | 
10 | class TestDownsample(unittest.TestCase):
11 | 
12 |   def test_epsilon_net(self):
13 |     pts = np.array([[0,0],[1,2],[3,2],[-1,0]])
14 |     sample = d.epsilon_net(pts, 1.7)
15 |     self.assertTupleEqual(tuple(sample), (0,1,2))
16 |     traj = [pts[:2], pts[2:]]
17 |     sample = d.downsample_trajectories(traj, d.epsilon_net, 1.7)
18 |     assert_array_equal(sample[0], pts[:2])
19 |     assert_array_equal(sample[1], pts[2:3])
20 | 
21 |   def test_fuzzy_c_means(self):
22 |     pts = np.array([[0,0],[1,2],[3,2],[-1,0]])
23 |     sample = np.sort(d.fuzzy_c_means(pts, 2))
24 |     assert_array_equal(sample, [0, 2])
25 | 
26 |   def test_downsample_trajectories(self):
27 |     traj = SCurve().trajectories(5, 20)
28 |     pts = np.vstack(traj)
29 |     ds_traj = d.downsample_trajectories(traj, d.epsilon_net, 0.05)
30 |     ds_pts = pts[d.epsilon_net(pts, 0.05)]
31 |     assert_array_equal(np.vstack(ds_traj), ds_pts)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |   unittest.main()
36 | 


--------------------------------------------------------------------------------
/graphs/generators/trajectories.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from graphs import Graph
 3 | 
 4 | __all__ = ['chunk_up', 'concat_trajectories']
 5 | 
 6 | 
 7 | def chunk_up(trajectories, chunk_size=None, directed=False):
 8 |   if chunk_size is None:
 9 |     chunk_lengths = list(map(len, trajectories))
10 |   else:
11 |     chunk_lengths = []
12 |     for t in trajectories:
13 |       chunk_lengths.extend(_chunk_traj_idxs(len(t), chunk_size))
14 |   return concat_trajectories(chunk_lengths, directed=directed)
15 | 
16 | 
17 | def concat_trajectories(traj_lengths, directed=False):
18 |   P = []
19 |   last_idx = 0
20 |   for tl in traj_lengths:
21 |     P.append(last_idx + _traj_pair_idxs(tl))
22 |     last_idx += tl
23 |   return Graph.from_edge_pairs(np.vstack(P), num_vertices=last_idx,
24 |                                symmetric=(not directed))
25 | 
26 | 
27 | def _traj_pair_idxs(traj_len):
28 |   ii = np.arange(traj_len)
29 |   pairs = np.transpose((ii[:-1], ii[1:]))
30 |   return pairs
31 | 
32 | 
33 | def _chunk_traj_idxs(traj_len, chunk_size):
34 |   num_chunks, extra = divmod(traj_len, chunk_size)
35 |   if num_chunks == 0:
36 |     return [extra]
37 |   c = [chunk_size] * num_chunks
38 |   c[-1] += extra  # Add any leftovers to the last chunk.
39 |   return c
40 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Graphs
 2 | 
 3 | [![PyPI version](https://badge.fury.io/py/graphs.svg)](http://badge.fury.io/py/graphs)
 4 | [![Build Status](https://travis-ci.org/all-umass/graphs.svg?branch=master)](https://travis-ci.org/all-umass/graphs)
 5 | [![Coverage Status](https://coveralls.io/repos/all-umass/graphs/badge.svg?branch=master&service=github)](https://coveralls.io/github/all-umass/graphs?branch=master)
 6 | 
 7 | A library for graph-based learning in Python.
 8 | 
 9 | Provides several types of graph container objects,
10 | with a unified API for visualization, analysis, transformation,
11 | and embedding.
12 | 
13 | ## Usage example
14 | 
15 | ```python
16 | from graphs.generators import random_graph
17 | 
18 | G = random_graph([2,3,1,3,2,1,2])
19 | 
20 | print G.num_vertices()  # 7
21 | print G.num_edges()     # 14
22 | 
23 | G.symmetrize(method='max')
24 | X = G.isomap(num_dims=2)
25 | 
26 | G.plot(X, title='isomap embedding')()
27 | ```
28 | 
29 | ## Requirements
30 | 
31 | Requires recent versions of:
32 | 
33 |   * numpy
34 |   * scipy
35 |   * scikit-learn
36 |   * matplotlib
37 |   * Cython
38 | 
39 | Optional dependencies:
40 | 
41 |   * python-igraph
42 |   * graphtool
43 |   * networkx
44 | 
45 | Testing requires:
46 | 
47 |   * nose
48 |   * nose-cov
49 | 
50 | Run the test suite:
51 | 
52 | ```
53 | ./run_tests.sh
54 | ```
55 | 


--------------------------------------------------------------------------------
/graphs/construction/tests/test_msg.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | import unittest
 4 | from numpy.testing import assert_array_equal
 5 | 
 6 | from ...datasets.swiss_roll import swiss_roll, error_ratio
 7 | from ..msg import manifold_spanning_graph
 8 | 
 9 | 
10 | class TestMSG(unittest.TestCase):
11 | 
12 |   def test_swiss_roll(self):
13 |     np.random.seed(1234)
14 |     X, theta = swiss_roll(6, 120, radius=4.8, return_theta=True)
15 |     GT = np.hstack((theta[:,None], X[:,1:2]))
16 |     GT -= GT.min(axis=0)
17 |     GT /= GT.max(axis=0)
18 | 
19 |     G = manifold_spanning_graph(X, 2)
20 |     self.assertEqual(error_ratio(G, GT), 0.0)
21 | 
22 |   def test_two_moons(self):
23 |     np.random.seed(1234)
24 |     n1,n2 = 55,75
25 |     theta = np.hstack((np.random.uniform(0, 1, size=n1),
26 |                        np.random.uniform(1, 2, size=n2))) * np.pi
27 |     r = 1.3 + 0.12 * np.random.randn(n1+n2)[:,None]
28 |     X = r * np.hstack((np.cos(theta), np.sin(theta))).reshape((-1,2), order='F')
29 |     X[:n1] += np.array([[0, -0.2]])
30 |     X[n1:] += np.array([[0.9, 0.25]])
31 | 
32 |     G = manifold_spanning_graph(X, 2, num_ccs=2)
33 |     num_ccs, labels = G.connected_components()
34 |     self.assertEqual(num_ccs, 2)
35 |     assert_array_equal(labels[:n1], np.zeros(n1))
36 |     assert_array_equal(labels[n1:], np.ones(n2))
37 | 
38 | if __name__ == '__main__':
39 |   unittest.main()
40 | 


--------------------------------------------------------------------------------
/graphs/construction/__init__.py:
--------------------------------------------------------------------------------
 1 | '''Graph construction algorithms, including:
 2 | 
 3 |  - k-nearest and epsilon-close neighbors, with incremental variants
 4 |  - b-matching
 5 |  - directed graph construction
 6 |  - Delaunay and Gabriel graphs
 7 |  - Relative Neighborhood graphs
 8 |  - Manifold Spanning graphs
 9 |  - Sparse Regularized graphs
10 |  - traditional, perturbed, and disjoint Minimum Spanning Trees
11 | 
12 | Each construction function returns a Graph object.
13 | '''
14 | from __future__ import absolute_import
15 | 
16 | from .b_matching import *
17 | from .directed import *
18 | from .downsample import *
19 | from .geometric import *
20 | from .incremental import *
21 | from .msg import *
22 | from .neighbors import *
23 | from .regularized import *
24 | from .saffron import *
25 | from .spanning_tree import *
26 | 
27 | __all__ = [
28 |     # b_matching
29 |     'b_matching',
30 |     # directed
31 |     'directed_graph',
32 |     # downsample
33 |     'downsample_trajectories', 'epsilon_net', 'fuzzy_c_means',
34 |     # geometric
35 |     'delaunay_graph', 'gabriel_graph', 'relative_neighborhood_graph',
36 |     # incremental
37 |     'incremental_neighbor_graph',
38 |     # msg
39 |     'manifold_spanning_graph',
40 |     # neighbors
41 |     'neighbor_graph', 'nearest_neighbors',
42 |     # regularized
43 |     'sparse_regularized_graph',
44 |     # saffron
45 |     'saffron',
46 |     # spanning_tree
47 |     'mst', 'perturbed_mst', 'disjoint_mst',
48 | ]
49 | 


--------------------------------------------------------------------------------
/graphs/base/tests/test_adj.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from numpy.testing import assert_array_equal
 4 | from scipy.sparse import csr_matrix
 5 | 
 6 | from graphs.base.adj import (
 7 |     DenseAdjacencyMatrixGraph, SparseAdjacencyMatrixGraph)
 8 | 
 9 | PAIRS = np.array([[0,1],[0,2],[1,1],[2,1],[3,3]])
10 | ADJ = [[0,1,1,0],
11 |        [0,1,0,0],
12 |        [0,1,0,0],
13 |        [0,0,0,1]]
14 | 
15 | 
16 | class TestAdjacencyMatrixGraphs(unittest.TestCase):
17 |   def setUp(self):
18 |     self.G = DenseAdjacencyMatrixGraph(ADJ)
19 |     self.S = SparseAdjacencyMatrixGraph(csr_matrix(ADJ))
20 | 
21 |   def test_pairs(self):
22 |     assert_array_equal(self.G.pairs(), PAIRS)
23 |     assert_array_equal(self.S.pairs(), PAIRS)
24 | 
25 |   def test_matrix(self):
26 |     M = self.G.matrix()
27 |     assert_array_equal(M, ADJ)
28 |     M = self.G.matrix('csr')
29 |     self.assertEqual(M.format, 'csr')
30 |     assert_array_equal(M.toarray(), ADJ)
31 |     M = self.S.matrix()
32 |     self.assertEqual(M.format, 'csr')
33 |     assert_array_equal(M.toarray(), ADJ)
34 | 
35 |   def test_matrix_copy(self):
36 |     M = self.G.matrix('dense', copy=False)
37 |     assert_array_equal(M, ADJ)
38 |     M2 = self.G.matrix('dense', copy=True)
39 |     assert_array_equal(M, M2)
40 |     self.assertIsNot(M, M2)
41 |     # Sparse case
42 |     M = self.S.matrix('csr', copy=False)
43 |     assert_array_equal(M.toarray(), ADJ)
44 |     M2 = self.S.matrix('csr', copy=True)
45 |     assert_array_equal(M.toarray(), M2.toarray())
46 |     self.assertIsNot(M, M2)
47 | 
48 | if __name__ == '__main__':
49 |   unittest.main()
50 | 


--------------------------------------------------------------------------------
/graphs/generators/tests/test_structured.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | import unittest
 4 | from numpy.testing import assert_array_equal
 5 | 
 6 | from .. import chain_graph, lattice_graph
 7 | 
 8 | 
 9 | class TestStructured(unittest.TestCase):
10 | 
11 |   def test_chain_graph(self):
12 |     expected = np.diag([1,1], k=1)
13 |     g = chain_graph(3, directed=True)
14 |     assert_array_equal(g.matrix('dense'), expected)
15 | 
16 |     expected += np.diag([1,1], k=-1)
17 |     g = chain_graph(3, wraparound=False)
18 |     assert_array_equal(g.matrix('dense'), expected)
19 | 
20 |     expected[0,2] = 1
21 |     expected[2,0] = 1
22 |     g = chain_graph(3, wraparound=True)
23 |     assert_array_equal(g.matrix('dense'), expected)
24 | 
25 |   def test_lattice_graph(self):
26 |     self.assertRaises(ValueError, lattice_graph, [])
27 | 
28 |     expected = np.diag([1,1], k=1) + np.diag([1,1], k=-1)
29 |     g = lattice_graph((3,), wraparound=False)
30 |     assert_array_equal(g.matrix('dense'), expected)
31 | 
32 |     expected = np.diag([1,1,0,1,1], k=1) + np.diag([1,1,0,1,1], k=-1)
33 |     expected += np.diag([1,1,1], k=3) + np.diag([1,1,1], k=-3)
34 |     g = lattice_graph((3,2), wraparound=False)
35 |     assert_array_equal(g.matrix('dense'), expected)
36 | 
37 |     expected[[0,3],[2,5]] = 1
38 |     expected[[2,5],[0,3]] = 1
39 |     g = lattice_graph((3,2), wraparound=True)
40 |     assert_array_equal(g.matrix('dense'), expected)
41 | 
42 |     expected = np.diag([1,1,1], k=1) + np.diag([1,1,1], k=-1)
43 |     g = lattice_graph((1,4), wraparound=False)
44 |     assert_array_equal(g.matrix('dense'), expected)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |   unittest.main()
49 | 


--------------------------------------------------------------------------------
/graphs/construction/directed.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function
 2 | 
 3 | import numpy as np
 4 | from sklearn.metrics.pairwise import paired_distances
 5 | from graphs import Graph
 6 | from .neighbors import neighbor_graph
 7 | 
 8 | __all__ = ['directed_graph']
 9 | 
10 | 
11 | def directed_graph(trajectories, k=5, verbose=False, pruning_thresh=0,
12 |                    return_coords=False):
13 |   '''Directed graph construction alg. from Johns & Mahadevan, ICML '07.
14 |   trajectories: list of NxD arrays of ordered states
15 |   '''
16 |   X = np.vstack(trajectories)
17 |   G = neighbor_graph(X, k=k)
18 |   if pruning_thresh > 0:
19 |     traj_len = map(len, trajectories)
20 |     G = _prune_edges(G, X, traj_len, pruning_thresh, verbose=verbose)
21 |   if return_coords:
22 |     return G, X
23 |   return G
24 | 
25 | 
26 | def _prune_edges(G, X, traj_lengths, pruning_thresh=0.1, verbose=False):
27 |   '''Prune edges in graph G via cosine distance with trajectory edges.'''
28 |   W = G.matrix('dense', copy=True)
29 |   degree = G.degree(kind='out', weighted=False)
30 |   i = 0
31 |   num_bad = 0
32 |   for n in traj_lengths:
33 |     s, t = np.nonzero(W[i:i+n-1])
34 |     graph_edges = X[t] - X[s+i]
35 |     traj_edges = np.diff(X[i:i+n], axis=0)
36 |     traj_edges = np.repeat(traj_edges, degree[i:i+n-1], axis=0)
37 |     theta = paired_distances(graph_edges, traj_edges, 'cosine')
38 |     bad_edges = theta > pruning_thresh
39 |     s, t = s[bad_edges], t[bad_edges]
40 |     if verbose:  # pragma: no cover
41 |       num_bad += np.count_nonzero(W[s,t])
42 |     W[s,t] = 0
43 |     i += n
44 |   if verbose:  # pragma: no cover
45 |     print('removed %d bad edges' % num_bad)
46 |   return Graph.from_adj_matrix(W)
47 | 


--------------------------------------------------------------------------------
/graphs/generators/structured.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | import scipy.sparse as ss
 4 | 
 5 | from .. import Graph
 6 | 
 7 | 
 8 | def chain_graph(num_vertices, wraparound=False, directed=False, weights=None):
 9 |   if wraparound:
10 |     ii = np.arange(num_vertices)
11 |     jj = ii + 1
12 |     jj[-1] = 0
13 |   else:
14 |     ii = np.arange(num_vertices-1)
15 |     jj = ii + 1
16 |   pairs = np.column_stack((ii, jj))
17 |   return Graph.from_edge_pairs(pairs, num_vertices=num_vertices,
18 |                                symmetric=(not directed), weights=weights)
19 | 
20 | 
21 | def lattice_graph(dims, wraparound=False):
22 |   dims = [d for d in dims if d > 1]
23 |   if len(dims) == 0:
24 |     raise ValueError('Must supply at least one dimension >= 2')
25 |   if len(dims) == 1:
26 |     return chain_graph(dims[0], wraparound=wraparound)
27 |   if len(dims) > 2:  # pragma: no cover
28 |     raise NotImplementedError('NYI: len(dims) > 2')
29 | 
30 |   # 2d case
31 |   m, n = dims
32 |   num_vertices = m * n
33 |   if wraparound:
34 |     offsets = [-m*(n-1), -m, -m+1, -1, 1, m-1, m, m*(n-1)]
35 |     data = np.ones((8, num_vertices), dtype=int)
36 |     data[[2,5], :] = 0
37 |     data[2, ::m] = 1
38 |     data[3, m-1::m] = 0
39 |     data[4, ::m] = 0
40 |     data[5, m-1::m] = 1
41 |     # handle edge cases where offsets are duplicated
42 |     offsets, idx = np.unique(offsets, return_index=True)
43 |     data = data[idx]
44 |   else:
45 |     offsets = [-m, -1, 1, m]
46 |     data = np.ones((4, num_vertices), dtype=int)
47 |     data[1, m-1::m] = 0
48 |     data[2, 0::m] = 0
49 |   adj = ss.dia_matrix((data, offsets), shape=(num_vertices, num_vertices))
50 |   return Graph.from_adj_matrix(adj)
51 | 


--------------------------------------------------------------------------------
/graphs/generators/tests/test_trajectories.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | import unittest
 4 | from numpy.testing import assert_array_equal
 5 | 
 6 | from .. import trajectories as traj
 7 | 
 8 | 
 9 | class TestTrajectories(unittest.TestCase):
10 | 
11 |   def test_concat_trajectories(self):
12 |     expected = [[0,1,0,0,0],[1,0,0,0,0],[0,0,0,1,0],[0,0,1,0,1],[0,0,0,1,0]]
13 |     G = traj.concat_trajectories([2, 3], directed=False)
14 |     assert_array_equal(G.matrix('dense'), expected)
15 | 
16 |   def test_chunk_up(self):
17 |     T = [np.zeros(4), np.zeros(4)]
18 |     expected = [[0,1,0,0,0,0,0,0],
19 |                 [1,0,1,0,0,0,0,0],
20 |                 [0,1,0,1,0,0,0,0],
21 |                 [0,0,1,0,0,0,0,0],
22 |                 [0,0,0,0,0,1,0,0],
23 |                 [0,0,0,0,1,0,1,0],
24 |                 [0,0,0,0,0,1,0,1],
25 |                 [0,0,0,0,0,0,1,0]]
26 |     G = traj.chunk_up(T, directed=False)
27 |     assert_array_equal(G.matrix('dense'), expected)
28 |     expected = [[0,1,0,0,0,0,0,0],
29 |                 [1,0,0,0,0,0,0,0],
30 |                 [0,0,0,1,0,0,0,0],
31 |                 [0,0,1,0,0,0,0,0],
32 |                 [0,0,0,0,0,1,0,0],
33 |                 [0,0,0,0,1,0,0,0],
34 |                 [0,0,0,0,0,0,0,1],
35 |                 [0,0,0,0,0,0,1,0]]
36 |     G = traj.chunk_up(T, chunk_size=2, directed=False)
37 |     assert_array_equal(G.matrix('dense'), expected)
38 |     # test case where chunk overflows
39 |     T = [np.zeros(3), np.zeros(2)]
40 |     expected = [[0,1,0,0,0],
41 |                 [1,0,1,0,0],
42 |                 [0,1,0,0,0],
43 |                 [0,0,0,0,1],
44 |                 [0,0,0,1,0]]
45 |     G = traj.chunk_up(T, chunk_size=3, directed=False)
46 |     assert_array_equal(G.matrix('dense'), expected)
47 | 
48 | 
49 | if __name__ == '__main__':
50 |   unittest.main()
51 | 


--------------------------------------------------------------------------------
/graphs/tests/test_reorder.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import unittest
 3 | from numpy.testing import assert_array_equal
 4 | from graphs import Graph, reorder
 5 | 
 6 | 
 7 | class TestReorder(unittest.TestCase):
 8 |   def setUp(self):
 9 |     ii = np.array([0, 0, 1, 2, 2, 3, 3, 3, 4, 5])
10 |     jj = np.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 7])
11 |     adj = np.zeros((9,9), dtype=int)
12 |     adj[ii,jj] = 1
13 |     adj[jj,ii] = 1
14 |     self.G = Graph.from_adj_matrix(adj)
15 | 
16 |   def test_cuthill_mckee(self):
17 |     # Many orderings are "correct". Just ensure minimal bandwidth.
18 |     expected_b = 3
19 |     # test default version (probably scipy)
20 |     cm = reorder.cuthill_mckee(self.G)
21 |     self.assertEqual(cm.bandwidth(), expected_b)
22 |     # test the non-scipy version
23 |     cm = reorder._cuthill_mckee(self.G)
24 |     self.assertEqual(cm.bandwidth(), expected_b)
25 | 
26 |   def test_node_centroid_hill_climbing(self):
27 |     np.random.seed(1234)
28 |     nchc = reorder.node_centroid_hill_climbing(self.G, relax=1)
29 |     expected = np.array([[0,1],[0,2],[0,3],[0,4],[1,0],[2,0],[2,5],[3,0],[3,6],
30 |                          [3,7],[4,0],[5,2],[5,8],[6,3],[6,8],[7,3],[7,8],[8,5],
31 |                          [8,6],[8,7]])
32 |     assert_array_equal(nchc.pairs(), expected)
33 |     # test with relax < 1
34 |     nchc2 = reorder.node_centroid_hill_climbing(self.G, relax=0.99)
35 |     expected = np.array([[0,1],[1,0],[1,2],[1,3],[1,4],[2,1],[2,5],[3,1],[3,6],
36 |                          [3,7],[4,1],[5,2],[5,8],[6,3],[6,8],[7,3],[7,8],[8,5],
37 |                          [8,6],[8,7]])
38 |     assert_array_equal(nchc2.pairs(), expected)
39 | 
40 |   def test_laplacian_reordering(self):
41 |     lap = reorder.laplacian_reordering(self.G)
42 |     self.assertEqual(lap.bandwidth(), 3)
43 | 
44 | 
45 | if __name__ == '__main__':
46 |   unittest.main()
47 | 


--------------------------------------------------------------------------------
/benchmarks/benchmarks/mixins.py:
--------------------------------------------------------------------------------
 1 | import matplotlib
 2 | matplotlib.use('template')
 3 | import scipy.sparse as ss
 4 | import numpy as np
 5 | from graphs import Graph
 6 | 
 7 | 
 8 | class _RandomFormatsBase(object):
 9 |     n = 500
10 |     density = 0.05
11 |     params = ['dense', 'coo', 'csr']
12 |     param_names = ['adj_format']
13 | 
14 |     def setup(self, adj_format, *args):
15 |         adj = ss.rand(self.n, self.n, density=self.density, random_state=1234)
16 |         if adj_format == 'dense':
17 |             adj = adj.A
18 |         else:
19 |             adj = adj.asformat(adj_format)
20 |         self.G = Graph.from_adj_matrix(adj)
21 |         self.G.symmetrize()
22 | 
23 | 
24 | class Labeling(_RandomFormatsBase):
25 |     def time_greedy_coloring(self, *args):
26 |         self.G.greedy_coloring()
27 | 
28 |     def time_spectral_clustering(self, *args):
29 |         self.G.spectral_clustering(2)
30 | 
31 | 
32 | class LabelSpreading(_RandomFormatsBase):
33 |     params = [['dense', 'coo', 'csr'], ['rbf', 'none', 'binary']]
34 |     param_names = ['adj_format', 'kernel']
35 | 
36 |     def setup(self, *args):
37 |         _RandomFormatsBase.setup(self, *args)
38 |         np.random.seed(1234)
39 |         self.y = np.random.randint(5, size=self.n)
40 |         self.y[np.random.random(self.n) > 0.5] = -1
41 | 
42 |     def time_spread_labels(self, _, k):
43 |         self.G.spread_labels(self.y, kernel=k)
44 | 
45 | 
46 | class Regression(_RandomFormatsBase):
47 |     params = [['dense', 'coo', 'csr'], ['rbf', 'none', 'binary'], [0, 1e-3]]
48 |     param_names = ['adj_format', 'kernel', 'smoothness_penalty']
49 | 
50 |     def setup(self, *args):
51 |         _RandomFormatsBase.setup(self, *args)
52 |         self.y = np.random.random((self.n//2, 1))
53 |         self.mask = slice(None, None, 2)
54 | 
55 |     def time_regression(self, _, k, s):
56 |         self.G.regression(self.y, self.mask, smoothness_penalty=s, kernel=k)
57 | 


--------------------------------------------------------------------------------
/graphs/construction/incremental.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import numpy as np
 4 | from sklearn.metrics import pairwise_distances
 5 | 
 6 | from graphs import Graph
 7 | 
 8 | __all__ = ['incremental_neighbor_graph']
 9 | 
10 | 
11 | def incremental_neighbor_graph(X, precomputed=False, k=None, epsilon=None,
12 |                                weighting='none'):
13 |   '''See neighbor_graph.'''
14 |   assert ((k is not None) or (epsilon is not None)
15 |           ), "Must provide `k` or `epsilon`"
16 |   assert (_issequence(k) ^ _issequence(epsilon)
17 |           ), "Exactly one of `k` or `epsilon` must be a sequence."
18 |   assert weighting in ('binary','none'), "Invalid weighting param: " + weighting
19 |   is_weighted = weighting == 'none'
20 | 
21 |   if precomputed:
22 |     D = X
23 |   else:
24 |     D = pairwise_distances(X, metric='euclidean')
25 |   # pre-sort for efficiency
26 |   order = np.argsort(D)[:,1:]
27 | 
28 |   if k is None:
29 |     k = D.shape[0]
30 | 
31 |   # generate the sequence of graphs
32 |   # TODO: convert the core of these loops to Cython for speed
33 |   W = np.zeros_like(D)
34 |   I = np.arange(D.shape[0])
35 |   if _issequence(k):
36 |     # varied k, fixed epsilon
37 |     if epsilon is not None:
38 |       D[D > epsilon] = 0
39 |     old_k = 0
40 |     for new_k in k:
41 |       idx = order[:, old_k:new_k]
42 |       dist = D[I, idx.T]
43 |       W[I, idx.T] = dist if is_weighted else 1
44 |       yield Graph.from_adj_matrix(W)
45 |       old_k = new_k
46 |   else:
47 |     # varied epsilon, fixed k
48 |     idx = order[:,:k]
49 |     dist = D[I, idx.T].T
50 |     old_i = np.zeros(D.shape[0], dtype=int)
51 |     for eps in epsilon:
52 |       for i, row in enumerate(dist):
53 |         oi = old_i[i]
54 |         ni = oi + np.searchsorted(row[oi:], eps)
55 |         rr = row[oi:ni]
56 |         W[i, idx[i,oi:ni]] = rr if is_weighted else 1
57 |         old_i[i] = ni
58 |       yield Graph.from_adj_matrix(W)
59 | 
60 | 
61 | def _issequence(x):
62 |   # Note: isinstance(x, collections.Sequence) fails for numpy arrays
63 |   return hasattr(x, '__len__')
64 | 


--------------------------------------------------------------------------------
/graphs/construction/tests/test_spanning_tree.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import unittest
 3 | from numpy.testing import assert_array_almost_equal
 4 | from sklearn.metrics import pairwise_distances
 5 | 
 6 | from graphs.construction import mst, perturbed_mst, disjoint_mst
 7 | 
 8 | 
 9 | class TestSpanningTree(unittest.TestCase):
10 |   def setUp(self):
11 |     self.pts = np.array([[0,0],[1,2],[3,2],[-1,0]])
12 | 
13 |   def test_mst(self):
14 |     expected = [[0,    2.236,0, 1],
15 |                 [2.236,0,    2, 0],
16 |                 [0,    2,    0, 0],
17 |                 [1,    0,    0, 0]]
18 |     G = mst(self.pts)
19 |     assert_array_almost_equal(G.matrix('dense'), expected, decimal=3)
20 |     # Check precomputed metric.
21 |     D = pairwise_distances(self.pts)
22 |     D_copy = D.copy()
23 |     G = mst(D, metric='precomputed')
24 |     assert_array_almost_equal(G.matrix('dense'), expected, decimal=3)
25 |     assert_array_almost_equal(D, D_copy)
26 | 
27 |   def test_perturbed_mst(self):
28 |     np.random.seed(1234)
29 |     expected = [[0,0.71428571,0.23809524,1.00000000],
30 |                 [0.71428571,0,0.85714286,0.14285714],
31 |                 [0.23809524,0.85714286,0,0.04761905],
32 |                 [1.00000000,0.14285714,0.04761905,0]]
33 |     G = perturbed_mst(self.pts)
34 |     assert_array_almost_equal(G.matrix('dense'), expected)
35 | 
36 |   def test_disjoint_mst(self):
37 |     expected = [[0,2.23606798,3.60555128,1],
38 |                 [2.23606798,0,2,2.82842712],
39 |                 [3.60555128,2,0,4.47213595],
40 |                 [1,2.82842712,4.47213595,0]]
41 |     G = disjoint_mst(self.pts)
42 |     assert_array_almost_equal(G.matrix('dense'), expected)
43 | 
44 |     # check precomputed case, especially that we don't overwrite D
45 |     D = pairwise_distances(self.pts)
46 |     D_copy = D.copy()
47 |     G = disjoint_mst(D, metric='precomputed')
48 |     assert_array_almost_equal(G.matrix('dense'), expected)
49 |     assert_array_almost_equal(D, D_copy)
50 | 
51 | if __name__ == '__main__':
52 |   unittest.main()
53 | 


--------------------------------------------------------------------------------
/graphs/base/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import numpy as np
 4 | import scipy.sparse as ss
 5 | 
 6 | from .adj import SparseAdjacencyMatrixGraph, DenseAdjacencyMatrixGraph
 7 | from .base import Graph
 8 | from .pairs import EdgePairGraph, SymmEdgePairGraph
 9 | 
10 | __all__ = ['Graph']
11 | 
12 | 
13 | def from_edge_pairs(pairs, num_vertices=None, symmetric=False, weights=None):
14 |   '''Constructor for Graph objects based on edges given as pairs of vertices.
15 |   pairs : integer array-like with shape (num_edges, 2)
16 |   '''
17 |   if not symmetric:
18 |     if weights is None:
19 |       return EdgePairGraph(pairs, num_vertices=num_vertices)
20 |     row, col = np.asarray(pairs).T
21 |     row, weights = np.broadcast_arrays(row, weights)
22 |     shape = None if num_vertices is None else (num_vertices, num_vertices)
23 |     adj = ss.coo_matrix((weights, (row, col)), shape=shape)
24 |     return SparseAdjacencyMatrixGraph(adj)
25 |   # symmetric case
26 |   G = SymmEdgePairGraph(pairs, num_vertices=num_vertices)
27 |   if weights is None:
28 |     return G
29 |   # Convert to sparse adj graph with provided edge weights
30 |   s = G.matrix('coo').astype(float)
31 |   # shenanigans to assign edge weights in the right order
32 |   flat_idx = np.ravel_multi_index(s.nonzero(), s.shape)
33 |   r, c = np.transpose(pairs)
34 |   rc_idx = np.ravel_multi_index((r,c), s.shape)
35 |   cr_idx = np.ravel_multi_index((c,r), s.shape)
36 |   order = np.argsort(flat_idx)
37 |   flat_idx = flat_idx[order]
38 |   s.data[order[np.searchsorted(flat_idx, rc_idx)]] = weights
39 |   s.data[order[np.searchsorted(flat_idx, cr_idx)]] = weights
40 |   return SparseAdjacencyMatrixGraph(s)
41 | 
42 | 
43 | def from_adj_matrix(adj):
44 |   '''Constructor for Graph objects based on a given adjacency matrix.
45 |   adj : scipy.sparse matrix or array-like, shape (num_vertices, num_vertices)
46 |   '''
47 |   if ss.issparse(adj):
48 |     return SparseAdjacencyMatrixGraph(adj)
49 |   return DenseAdjacencyMatrixGraph(adj)
50 | 
51 | # Add static methods to the Graph class.
52 | Graph.from_edge_pairs = staticmethod(from_edge_pairs)
53 | Graph.from_adj_matrix = staticmethod(from_adj_matrix)
54 | 


--------------------------------------------------------------------------------
/graphs/construction/spanning_tree.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import numpy as np
 4 | from scipy.sparse.csgraph import minimum_spanning_tree
 5 | from sklearn.metrics.pairwise import pairwise_distances
 6 | from graphs import Graph
 7 | from ..mini_six import range
 8 | 
 9 | __all__ = ['mst', 'perturbed_mst', 'disjoint_mst']
10 | 
11 | 
12 | def mst(X, metric='euclidean'):
13 |   D = pairwise_distances(X, metric=metric)
14 |   mst = minimum_spanning_tree(D, overwrite=(metric!='precomputed'))
15 |   return Graph.from_adj_matrix(mst + mst.T)
16 | 
17 | 
18 | def perturbed_mst(X, num_perturbations=20, metric='euclidean', jitter=None):
19 |   '''Builds a graph as the union of several MSTs on perturbed data.
20 |   Reference: http://ecovision.mit.edu/~sloop/shao.pdf, page 8
21 |   jitter refers to the scale of the gaussian noise added for each perturbation.
22 |   When jitter is None, it defaults to the 5th percentile interpoint distance.
23 |   Note that metric cannot be 'precomputed', as multiple MSTs are computed.'''
24 |   assert metric != 'precomputed'
25 |   D = pairwise_distances(X, metric=metric)
26 |   if jitter is None:
27 |     jitter = np.percentile(D[D>0], 5)
28 |   W = minimum_spanning_tree(D)
29 |   W = W + W.T
30 |   W.data[:] = 1.0  # binarize
31 |   for i in range(num_perturbations):
32 |     pX = X + np.random.normal(scale=jitter, size=X.shape)
33 |     pW = minimum_spanning_tree(pairwise_distances(pX, metric=metric))
34 |     pW = pW + pW.T
35 |     pW.data[:] = 1.0
36 |     W = W + pW
37 |   # final graph is the average over all pertubed MSTs + the original
38 |   W.data /= (num_perturbations + 1.0)
39 |   return Graph.from_adj_matrix(W)
40 | 
41 | 
42 | def disjoint_mst(X, num_spanning_trees=3, metric='euclidean'):
43 |   '''Builds a graph as the union of several spanning trees,
44 |   each time removing any edges present in previously-built trees.
45 |   Reference: http://ecovision.mit.edu/~sloop/shao.pdf, page 9.'''
46 |   D = pairwise_distances(X, metric=metric)
47 |   if metric == 'precomputed':
48 |     D = D.copy()
49 |   mst = minimum_spanning_tree(D)
50 |   W = mst.copy()
51 |   for i in range(1, num_spanning_trees):
52 |     ii,jj = mst.nonzero()
53 |     D[ii,jj] = np.inf
54 |     D[jj,ii] = np.inf
55 |     mst = minimum_spanning_tree(D)
56 |     W = W + mst
57 |   # MSTs are all one-sided, so we symmetrize here
58 |   return Graph.from_adj_matrix(W + W.T)
59 | 


--------------------------------------------------------------------------------
/graphs/base/tests/test_pairs.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from numpy.testing import assert_array_equal
 4 | 
 5 | from graphs.base.pairs import EdgePairGraph, SymmEdgePairGraph
 6 | 
 7 | PAIRS = np.array([[0,1],[0,2],[1,1],[2,1],[3,3]])
 8 | ADJ = [[0,1,1,0],
 9 |        [0,1,0,0],
10 |        [0,1,0,0],
11 |        [0,0,0,1]]
12 | 
13 | 
14 | class TestEdgePairGraph(unittest.TestCase):
15 |   def setUp(self):
16 |     self.epg = EdgePairGraph(PAIRS)
17 | 
18 |   def test_pairs(self):
19 |     self.assert_(self.epg.pairs(copy=False) is PAIRS)
20 |     P = self.epg.pairs(copy=True)
21 |     self.assert_(P is not PAIRS)
22 |     assert_array_equal(P, PAIRS)
23 |     # test the directed case
24 |     P = self.epg.pairs(directed=False)
25 |     assert_array_equal(P, [[0,1],[0,2],[1,1],[1,2],[3,3]])
26 | 
27 |   def test_matrix(self):
28 |     M = self.epg.matrix()
29 |     assert_array_equal(M.toarray(), ADJ)
30 |     M = self.epg.matrix('dense')
31 |     assert_array_equal(M, ADJ)
32 |     M = self.epg.matrix('csr')
33 |     self.assertEqual(M.format, 'csr')
34 |     assert_array_equal(M.toarray(), ADJ)
35 | 
36 |   def test_self_edges(self):
37 |     self.epg.add_self_edges()
38 |     expected = self.epg.pairs()
39 |     # Ensure that calling it again does the right thing.
40 |     self.epg.add_self_edges()
41 |     assert_array_equal(self.epg.pairs(), expected)
42 | 
43 |   def test_symmetrize(self):
44 |     # Check that copy=True doesn't change anything
45 |     self.epg.symmetrize(copy=True)
46 |     assert_array_equal(self.epg.matrix('dense'), ADJ)
47 | 
48 | 
49 | class TestSymmEdgePairGraph(unittest.TestCase):
50 |   def setUp(self):
51 |     self.G = SymmEdgePairGraph(PAIRS)
52 | 
53 |   def test_copy(self):
54 |     gg = self.G.copy()
55 |     self.assertIsNot(gg, self.G)
56 |     assert_array_equal(gg.matrix('dense'), self.G.matrix('dense'))
57 |     assert_array_equal(gg.pairs(), self.G.pairs())
58 | 
59 |   def test_pairs(self):
60 |     expected = [[0,1], [0,2], [1,0], [1,1], [1,2], [2,0], [2,1], [3,3]]
61 |     P = self.G.pairs()
62 |     assert_array_equal(sorted(P.tolist()), expected)
63 |     # test the directed case
64 |     P = self.G.pairs(directed=False)
65 |     assert_array_equal(P, [[0,1],[0,2],[1,1],[1,2],[3,3]])
66 | 
67 |   def test_symmetrize(self):
68 |     self.assertIs(self.G.symmetrize(copy=False), self.G)
69 |     S = self.G.symmetrize(copy=True)
70 |     self.assertIsNot(S, self.G)
71 |     assert_array_equal(S.matrix('dense'), self.G.matrix('dense'))
72 | 
73 | if __name__ == '__main__':
74 |   unittest.main()
75 | 


--------------------------------------------------------------------------------
/graphs/construction/tests/test_b_matching.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import unittest
 3 | import warnings
 4 | from numpy.testing import assert_array_equal
 5 | from sklearn.metrics.pairwise import pairwise_distances
 6 | 
 7 | from graphs.construction import b_matching
 8 | 
 9 | 
10 | class TestBMatching(unittest.TestCase):
11 |   def setUp(self):
12 |     pts = np.array([
13 |         [0.192,0.622],[0.438,0.785],[0.780,0.273],[0.276,0.802],[0.958,0.876],
14 |         [0.358,0.501],[0.683,0.713],[0.370,0.561],[0.503,0.014],[0.773,0.883]])
15 |     self.dists = pairwise_distances(pts)
16 | 
17 |   def test_standard(self):
18 |     # Generated with the bdmatch binary (b=2,damp=0.5)
19 |     expected = np.array([
20 |         [0, 1, 0, 1, 0, 0, 0, 1, 0, 0],
21 |         [1, 0, 0, 1, 0, 0, 0, 0, 0, 0],
22 |         [0, 0, 0, 0, 1, 0, 1, 0, 1, 0],
23 |         [1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
24 |         [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
25 |         [0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
26 |         [0, 0, 1, 0, 0, 0, 0, 0, 0, 1],
27 |         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
28 |         [0, 0, 1, 0, 0, 1, 0, 0, 0, 0],
29 |         [0, 0, 0, 0, 1, 0, 1, 0, 0, 0]]).T
30 |     G = b_matching(self.dists, 2, damping=0.5)
31 |     assert_array_equal(G.matrix('dense').astype(int), expected)
32 | 
33 |   def test_warn_nonconvergence(self):
34 |     with warnings.catch_warnings(record=True) as w:
35 |       b_matching(self.dists, 2, max_iter=2)
36 |       self.assertEqual(len(w), 1)
37 |       self.assertEqual(str(w[0].message),
38 |                        'Hit iteration limit (2) before converging')
39 | 
40 |   def test_oscillation(self):
41 |     # Generated with the bdmatch binary (b=2,damp=1)
42 |     expected = np.array([
43 |         [0, 0, 0, 1, 0, 0, 0, 1, 0, 0],
44 |         [0, 0, 0, 1, 0, 0, 0, 1, 0, 0],
45 |         [0, 0, 0, 0, 1, 0, 0, 0, 1, 0],
46 |         [1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
47 |         [0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
48 |         [0, 0, 0, 0, 0, 0, 0, 1, 1, 0],
49 |         [0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
50 |         [1, 0, 0, 0, 0, 1, 0, 0, 0, 0],
51 |         [0, 0, 1, 0, 0, 1, 0, 0, 0, 0],
52 |         [0, 0, 0, 0, 1, 0, 1, 0, 0, 0]])
53 |     G = b_matching(self.dists, 2, damping=1)
54 |     assert_array_equal(G.matrix('dense').astype(int), expected)
55 | 
56 |   def test_array_b(self):
57 |     b = np.zeros(10, dtype=int)
58 |     b[5:] = 20
59 |     expected = 1 - np.eye(10, dtype=int)
60 |     expected[:5] = 0
61 |     G = b_matching(self.dists, b)
62 |     assert_array_equal(G.matrix('dense').astype(int), expected)
63 | 
64 | if __name__ == '__main__':
65 |   unittest.main()
66 | 


--------------------------------------------------------------------------------
/examples/swiss_roll.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import numpy as np
 3 | from matplotlib import pyplot as plt
 4 | from mpl_toolkits.mplot3d import Axes3D
 5 | from sklearn.metrics import pairwise_distances
 6 | from time import time
 7 | 
 8 | from graphs.datasets.swiss_roll import swiss_roll
 9 | from graphs.construction import (
10 |     neighbor_graph, b_matching, gabriel_graph,
11 |     relative_neighborhood_graph, manifold_spanning_graph,
12 |     sparse_regularized_graph, smce_graph, saffron, mst, disjoint_mst
13 | )
14 | 
15 | 
16 | def main():
17 |   X, theta = swiss_roll(8, 500, return_theta=True)
18 |   D = pairwise_distances(X)
19 |   graph_info = [
20 |     _c('5-NN', neighbor_graph, D, k=6, precomputed=True),
21 |     _c('b-matching', b_matching, D, 6),
22 |     _c('gabriel', gabriel_graph, X),
23 |     _c('rel. neighborhood', relative_neighborhood_graph,D,metric='precomputed'),
24 |     _c('manifold spanning', manifold_spanning_graph, X, 2),
25 |     _c('L1', sparse_regularized_graph, X, kmax=10, sparsity_param=0.0005),
26 |     _c('SMCE', _smce_symm_dist, X, kmax=25, sparsity_param=5),
27 |     _c('SAFFRON', saffron, X, q=15, k=5, tangent_dim=2),
28 |     _c('MST', mst, D, metric='precomputed'),
29 |     _c('dMST', disjoint_mst, D, metric='precomputed'),
30 |   ]
31 | 
32 |   print('Plotting graphs & embeddings')
33 |   fig1, axes1 = plt.subplots(nrows=3, ncols=3, subplot_kw=dict(projection='3d'))
34 |   fig2, axes2 = plt.subplots(nrows=3, ncols=3)
35 |   fig1.suptitle('Original Coordinates')
36 |   fig2.suptitle('Isomap Embeddings')
37 | 
38 |   for ax1, ax2, info in zip(axes1.flat, axes2.flat, graph_info):
39 |     label, G, gg, emb, mask = info
40 |     G.plot(X, ax=ax1, title=label, vertex_style=dict(c=theta))
41 |     gg.plot(emb, ax=ax2, title=label, vertex_style=dict(c=theta[mask]))
42 |     ax1.view_init(elev=5, azim=70)
43 |     ax1.set_axis_off()
44 |     ax2.set_axis_off()
45 |   plt.show()
46 | 
47 | 
48 | def _smce_symm_dist(X, **kwargs):
49 |   g = smce_graph(X, **kwargs)
50 |   # SMCE produces asymmetric similarity weights, so we have to convert it.
51 |   return g.symmetrize('max').reweight_by_distance(X)
52 | 
53 | 
54 | def _c(label, fn, *args, **kwargs):
55 |   print('Constructing', label, 'graph:')
56 |   tic = time()
57 |   G = fn(*args, **kwargs)
58 |   print('  -> took %.3f secs' % (time() - tic))
59 |   num_ccs, labels = G.connected_components(directed=False)
60 |   if num_ccs == 1:
61 |     mask = Ellipsis
62 |     gg = G
63 |   else:
64 |     mask = labels == np.bincount(labels).argmax()
65 |     gg = G.subgraph(mask)
66 |   emb = gg.isomap(num_dims=2, directed=False)
67 |   return label, G, gg, emb, mask
68 | 
69 | 
70 | if __name__ == '__main__':
71 |   main()
72 | 


--------------------------------------------------------------------------------
/benchmarks/asv.conf.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // The version of the config file format.  Do not change, unless
 3 |     // you know what you are doing.
 4 |     "version": 1,
 5 | 
 6 |     // The name of the project being benchmarked
 7 |     "project": "graphs",
 8 | 
 9 |     // The project's homepage
10 |     "project_url": "https://github.com/all-umass/graphs",
11 | 
12 |     // The URL or local path of the source code repository for the
13 |     // project being benchmarked
14 |     "repo": "..",
15 |     "dvcs": "git",
16 |     "branches": ["master"],
17 | 
18 |     // The tool to use to create environments.  May be "conda",
19 |     // "virtualenv" or other value depending on the plugins in use.
20 |     // If missing or the empty string, the tool will be automatically
21 |     // determined by looking for tools on the PATH environment
22 |     // variable.
23 |     "environment_type": "virtualenv",
24 | 
25 |     // the base URL to show a commit for the project.
26 |     "show_commit_url": "http://github.com/all-umass/graphs/commit/",
27 | 
28 |     // The Pythons you'd like to test against.  If not provided, defaults
29 |     // to the current version of Python used to run `asv`.
30 |     // "pythons": ["2.7", "3.3"],
31 | 
32 |     // The matrix of dependencies to test.  Each key is the name of a
33 |     // package (in PyPI) and the values are version numbers.  An empty
34 |     // list indicates to just test against the default (latest)
35 |     // version.
36 |     "matrix": {
37 |         "numpy": ["1.10.4"],
38 |         "scipy": ["0.17"],
39 |         "matplotlib": ["1.5.1"],
40 |         "scikit-learn": ["0.17"]
41 |     },
42 | 
43 |     // The directory (relative to the current directory) that benchmarks are
44 |     // stored in.  If not provided, defaults to "benchmarks"
45 |     // "benchmark_dir": "benchmarks",
46 | 
47 |     // The directory (relative to the current directory) to cache the Python
48 |     // environments in.  If not provided, defaults to "env"
49 |     // "env_dir": "env",
50 | 
51 | 
52 |     // The directory (relative to the current directory) that raw benchmark
53 |     // results are stored in.  If not provided, defaults to "results".
54 |     // "results_dir": "results",
55 | 
56 |     // The directory (relative to the current directory) that the html tree
57 |     // should be written to.  If not provided, defaults to "html".
58 |     // "html_dir": "html",
59 | 
60 |     // The number of characters to retain in the commit hashes.
61 |     // "hash_length": 8,
62 | 
63 |     // `asv` will cache wheels of the recent builds in each
64 |     // environment, making them faster to install next time.  This is
65 |     // number of builds to keep, per environment.
66 |     "wheel_cache_size": 2
67 | }
68 | 


--------------------------------------------------------------------------------
/graphs/construction/tests/test_incremental.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from numpy.testing import assert_array_almost_equal
 4 | from sklearn.metrics import pairwise_distances
 5 | 
 6 | from graphs.construction import neighbor_graph
 7 | from graphs.construction.incremental import incremental_neighbor_graph
 8 | from graphs.mini_six import zip_longest, range
 9 | 
10 | np.set_printoptions(precision=3, suppress=True)
11 | 
12 | 
13 | def ngraph(*a, **k):
14 |     return neighbor_graph(*a,**k).matrix('dense')
15 | 
16 | 
17 | class TestNeighbors(unittest.TestCase):
18 |   def setUp(self):
19 |     self.pts = np.array([[0,0],[1,2],[3,2.5],[-1,0],[.5,.2],[3,.6],[-2,-0.5]])
20 | 
21 |   def test_k_range(self):
22 |     k_range = range(1, 5)
23 |     incr_gen = incremental_neighbor_graph(self.pts, k=k_range)
24 |     for k, G in zip_longest(k_range, incr_gen):
25 |       expected = ngraph(self.pts, k=k)
26 |       assert_array_almost_equal(G.matrix('dense'), expected)
27 | 
28 |     # non-uniform steps
29 |     k_range = [1, 3, 6]
30 |     incr_gen = incremental_neighbor_graph(self.pts, k=k_range)
31 |     for k, G in zip_longest(k_range, incr_gen):
32 |       expected = ngraph(self.pts, k=k)
33 |       assert_array_almost_equal(G.matrix('dense'), expected)
34 | 
35 |   def test_eps_range(self):
36 |     eps_range = np.linspace(0.1, 5.5, 5)
37 |     incr_gen = incremental_neighbor_graph(self.pts, epsilon=eps_range)
38 |     for eps, G in zip_longest(eps_range, incr_gen):
39 |       expected = ngraph(self.pts, epsilon=eps)
40 |       assert_array_almost_equal(G.matrix('dense'), expected)
41 | 
42 |   def test_k_eps_range(self):
43 |     # varied k with fixed epsilon
44 |     k_range = range(1, 5)
45 |     incr_gen = incremental_neighbor_graph(self.pts, k=k_range, epsilon=3.)
46 |     for k, G in zip_longest(k_range, incr_gen):
47 |       expected = ngraph(self.pts, k=k, epsilon=3.)
48 |       assert_array_almost_equal(G.matrix('dense'), expected)
49 | 
50 |     # varied eps with fixed k
51 |     eps_range = np.linspace(0.1, 5.5, 5)
52 |     incr_gen = incremental_neighbor_graph(self.pts, k=3, epsilon=eps_range)
53 |     for eps, G in zip_longest(eps_range, incr_gen):
54 |       expected = ngraph(self.pts, k=3, epsilon=eps)
55 |       assert_array_almost_equal(G.matrix('dense'), expected)
56 | 
57 |   def test_l1_precomputed(self):
58 |     dist = pairwise_distances(self.pts, metric='l1')
59 |     k_range = range(1, 5)
60 |     incr_gen = incremental_neighbor_graph(dist, precomputed=True, k=k_range)
61 |     for k, G in zip_longest(k_range, incr_gen):
62 |       expected = ngraph(dist, precomputed=True, k=k)
63 |       assert_array_almost_equal(G.matrix('dense'), expected)
64 | 
65 | 
66 | if __name__ == '__main__':
67 |   unittest.main()
68 | 


--------------------------------------------------------------------------------
/graphs/construction/tests/test_saffron.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import unittest
 3 | from numpy.testing import assert_array_almost_equal
 4 | 
 5 | from graphs.construction import saffron
 6 | 
 7 | 
 8 | class TestSaffron(unittest.TestCase):
 9 | 
10 |   def test_x(self):
11 |     theta = np.concatenate((np.linspace(-0.25, 0.3, 8),
12 |                             np.linspace(2.86, 3.4, 8)))
13 |     n = theta.shape[0]
14 |     X = np.column_stack((np.sin(theta), np.sin(theta) * np.cos(theta)))
15 | 
16 |     G = saffron(X, q=5, k=2, tangent_dim=1, curv_thresh=0.9, decay_rate=0.5)
17 | 
18 |     expected_ii = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9,
19 |                    10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15]
20 |     expected_jj = [2, 1, 3, 2, 4, 1, 5, 2, 6, 3, 4, 6, 5, 7, 5, 6, 10, 9, 11,
21 |                    10, 12, 9, 9, 13, 11, 14, 15, 12, 13, 15, 13, 14]
22 |     expected_w = [0.214, 0.105, 0.219, 0.109, 0.222, 0.109, 0.221, 0.111, 0.216,
23 |                   0.111, 0.11, 0.107, 0.107, 0.102, 0.208, 0.102, 0.207, 0.101,
24 |                   0.213, 0.105, 0.217, 0.105, 0.213, 0.217, 0.109, 0.215, 0.209,
25 |                   0.108, 0.106, 0.103, 0.209, 0.103]
26 |     exp = np.zeros((n, n), dtype=float)
27 |     exp[expected_ii, expected_jj] = expected_w
28 | 
29 |     assert_array_almost_equal(G.matrix('dense'), exp, decimal=3)
30 | 
31 |   def test_intersecting_planes(self):
32 |     n1 = np.array([-0.25, -1, 1])
33 |     n2 = np.array([0.5, 0.75, 1.25])
34 |     x1, y1 = map(np.ravel, np.meshgrid(np.linspace(-0.75, 1.5, 10),
35 |                                        np.linspace(-1, 1, 9)))
36 |     z1 = (-n1[0]*x1 - n1[1]*y1) / n1[2]
37 |     x2, y2 = map(np.ravel, np.meshgrid(np.linspace(-1, 1, 8),
38 |                                        np.linspace(-1.2, 0.9, 9)))
39 |     z2 = (-n2[0]*x2 - n2[1]*y2) / n2[2]
40 |     X = np.vstack((np.c_[x1, y1, z1], np.c_[x2, y2, z2]))
41 | 
42 |     # just a smoke test for now, to test the tangent_dim > 1 case
43 |     saffron(X, q=16, k=3, tangent_dim=2, decay_rate=0.75, max_iter=30)
44 | 
45 |   # XXX: This test doesn't pass, though it's unclear if that's due to a bug.
46 |   '''
47 |   def test_helix(self):
48 |     # attempt to replicate the squashed helix example from the paper
49 |     t = np.linspace(0, 7*np.pi, 439)
50 |     X = np.column_stack((np.sin(t), np.cos(t), 0.001*t))
51 |     G = saffron(X, q=32, k=4, tangent_dim=1, curv_thresh=0.95, decay_rate=0.9,
52 |                 max_iter=100)
53 |     # check that G doesn't short circuit across loops of the helix
54 |     ii, jj = G.pairs().T
55 |     diag_offsets = np.unique(np.abs(ii - jj))
56 |     assert_array_equal(diag_offsets, [1, 2])'''
57 | 
58 | 
59 | if __name__ == '__main__':
60 |   unittest.main()
61 | 


--------------------------------------------------------------------------------
/graphs/base/tests/test_static.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from numpy.testing import assert_array_equal, assert_array_almost_equal
 4 | from scipy.sparse import csr_matrix
 5 | 
 6 | from graphs.base import Graph
 7 | 
 8 | PAIRS = np.array([[0,1],[0,2],[1,1],[2,1],[3,3]])
 9 | ADJ = [[0,1,1,0],
10 |        [0,1,0,0],
11 |        [0,1,0,0],
12 |        [0,0,0,1]]
13 | 
14 | 
15 | class TestStaticConstructors(unittest.TestCase):
16 |   def test_from_pairs(self):
17 |     g = Graph.from_edge_pairs(PAIRS)
18 |     self.assertEqual(g.num_edges(), 5)
19 |     self.assertEqual(g.num_vertices(), 4)
20 |     g = Graph.from_edge_pairs(PAIRS, num_vertices=10)
21 |     self.assertEqual(g.num_edges(), 5)
22 |     self.assertEqual(g.num_vertices(), 10)
23 |     g = Graph.from_edge_pairs(PAIRS, symmetric=True)
24 |     self.assertEqual(g.num_edges(), 8)
25 |     self.assertEqual(g.num_vertices(), 4)
26 | 
27 |   def test_from_pairs_empty(self):
28 |     g = Graph.from_edge_pairs([])
29 |     self.assertEqual(g.num_edges(), 0)
30 |     self.assertEqual(g.num_vertices(), 0)
31 |     ii, jj = g.pairs().T
32 |     assert_array_equal(ii, [])
33 |     assert_array_equal(jj, [])
34 |     # Make sure ii and jj have indexable dtypes
35 |     PAIRS[ii,jj]
36 |     # Make sure num_vertices is set correctly
37 |     g = Graph.from_edge_pairs([], num_vertices=5)
38 |     self.assertEqual(g.num_edges(), 0)
39 |     self.assertEqual(g.num_vertices(), 5)
40 | 
41 |   def test_from_pairs_floating(self):
42 |     g = Graph.from_edge_pairs(PAIRS.astype(float))
43 |     p = g.pairs()
44 |     self.assertTrue(np.can_cast(p, PAIRS.dtype, casting='same_kind'),
45 |                     "Expected integral dtype, got %s" % p.dtype)
46 |     assert_array_equal(p, PAIRS)
47 | 
48 |   def test_from_pairs_weighted(self):
49 |     w = np.array([1,1,0.1,2,1,2,3.1,4])
50 |     p = [[0,1],[1,2],[2,3],[3,4],[1,0],[2,1],[3,2],[4,3]]
51 |     expected = [[0,1,0,0,0],[1,0,1,0,0],[0,2,0,0.1,0],[0,0,3.1,0,2],[0,0,0,4,0]]
52 |     G = Graph.from_edge_pairs(p, weights=w, num_vertices=5)
53 |     assert_array_almost_equal(G.matrix('dense'), expected)
54 | 
55 |     # weighted + symmetric
56 |     w = np.arange(1, 6)
57 |     expected = [[0,1,2,0],[1,3,4,0],[2,4,0,0],[0,0,0,5]]
58 |     G = Graph.from_edge_pairs(PAIRS, symmetric=True, weights=w)
59 |     assert_array_equal(G.matrix('dense'), expected)
60 |     G = Graph.from_edge_pairs(PAIRS[::-1], symmetric=True, weights=w[::-1])
61 |     assert_array_equal(G.matrix('dense'), expected)
62 | 
63 |   def test_from_adj(self):
64 |     m = Graph.from_adj_matrix(ADJ)
65 |     self.assertEqual(m.num_edges(), 5)
66 |     self.assertEqual(m.num_vertices(), 4)
67 |     m = Graph.from_adj_matrix(csr_matrix(ADJ))
68 |     self.assertEqual(m.num_edges(), 5)
69 |     self.assertEqual(m.num_vertices(), 4)
70 | 
71 | if __name__ == '__main__':
72 |   unittest.main()
73 | 


--------------------------------------------------------------------------------
/graphs/construction/downsample.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | 
 3 | import numpy as np
 4 | import warnings
 5 | from sklearn.metrics.pairwise import pairwise_distances
 6 | 
 7 | from ..mini_six import range
 8 | from .neighbors import nearest_neighbors
 9 | 
10 | __all__ = [
11 |     'downsample_trajectories', 'epsilon_net', 'fuzzy_c_means'
12 | ]
13 | 
14 | 
15 | def downsample_trajectories(trajectories, downsampler, *args, **kwargs):
16 |   '''Downsamples all points together, then re-splits into original trajectories.
17 | 
18 |   trajectories : list of 2-d arrays, each representing a trajectory
19 |   downsampler(X, *args, **kwargs) : callable that returns indices into X
20 |   '''
21 |   X = np.vstack(trajectories)
22 |   traj_lengths = list(map(len, trajectories))
23 |   inds = np.sort(downsampler(X, *args, **kwargs))
24 |   new_traj = []
25 |   for stop in np.cumsum(traj_lengths):
26 |     n = np.searchsorted(inds, stop)
27 |     new_traj.append(X[inds[:n]])
28 |     inds = inds[n:]
29 |   return new_traj
30 | 
31 | 
32 | def epsilon_net(points, close_distance):
33 |   '''Selects a subset of `points` to preserve graph structure while minimizing
34 |   the number of points used, by removing points within `close_distance`.
35 |   Returns the downsampled indices.'''
36 |   num_points = points.shape[0]
37 |   indices = set(range(num_points))
38 |   selected = []
39 |   while indices:
40 |     idx = indices.pop()
41 |     nn_inds, = nearest_neighbors(points[idx], points, epsilon=close_distance)
42 |     indices.difference_update(nn_inds)
43 |     selected.append(idx)
44 |   return selected
45 | 
46 | 
47 | def fuzzy_c_means(points, num_centers, m=2., tol=1e-4, max_iter=100,
48 |                   verbose=False):
49 |   '''Uses Fuzzy C-Means to downsample `points`.
50 |   m : aggregation parameter >1, larger implies smoother clusters
51 |   Returns indices of downsampled points.
52 |   '''
53 |   num_points = points.shape[0]
54 |   if num_centers >= num_points:
55 |     return np.arange(num_points)
56 |   # randomly initialize cluster assignments matrix
57 |   assn = np.random.random((points.shape[0], num_centers))
58 |   # iterate assignments until they converge
59 |   for i in range(max_iter):
60 |     # compute centers
61 |     w = assn ** m
62 |     w /= w.sum(axis=0)
63 |     centers = w.T.dot(points)
64 |     # calculate new assignments
65 |     d = pairwise_distances(points, centers)
66 |     d **= 2. / (m - 1)
67 |     np.maximum(d, 1e-10, out=d)
68 |     new_assn = 1. / np.einsum('ik,ij->ik', d, 1./d)
69 |     # check for convergence
70 |     change = np.linalg.norm(new_assn - assn)
71 |     if verbose:
72 |       print('At iteration %d: change = %g' % (i+1, change))
73 |     if change < tol:
74 |       break
75 |     assn = new_assn
76 |   else:
77 |     warnings.warn("fuzzy_c_means didn't converge in %d iterations" % max_iter)
78 |   # find points closest to the selected cluster centers
79 |   return d.argmin(axis=0)
80 | 


--------------------------------------------------------------------------------
/graphs/construction/tests/test_neighbors.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from numpy.testing import assert_array_equal, assert_array_almost_equal
 4 | from sklearn.metrics.pairwise import pairwise_distances
 5 | 
 6 | from graphs.construction import neighbors
 7 | 
 8 | 
 9 | def ngraph(*a, **k):
10 |     return neighbors.neighbor_graph(*a,**k).matrix('dense')
11 | 
12 | 
13 | class TestNeighbors(unittest.TestCase):
14 |   def setUp(self):
15 |     self.pts = np.array([[0,0],[1,2],[3,2],[-1,0]])
16 |     self.bin_adj = np.array([[0,1,0,1],[1,0,1,0],[1,1,0,0],[1,1,0,0]])
17 |     self.l2_adj = np.sqrt([[0,5,0,1],[5,0,4,0],[13,4,0,0],[1,8,0,0]])
18 | 
19 |   def test_neighbor_graph(self):
20 |     self.assertRaises(ValueError, ngraph, self.pts)
21 | 
22 |   def test_binary_weighting(self):
23 |     assert_array_equal(ngraph(self.pts, weighting='binary', k=2), self.bin_adj)
24 |     assert_array_equal(ngraph(self.pts, weighting='binary', k=2, epsilon=100),
25 |                        self.bin_adj)
26 |     # Add extra values for e-ball
27 |     self.bin_adj[0,2] = 1
28 |     self.bin_adj[1,3] = 1
29 |     assert_array_equal(ngraph(self.pts, weighting='binary', epsilon=3.61),
30 |                        self.bin_adj)
31 | 
32 |   def test_no_weighting(self):
33 |     assert_array_almost_equal(ngraph(self.pts, k=2), self.l2_adj)
34 |     # Add extra values for e-ball
35 |     self.l2_adj[0,2] = np.sqrt(13)
36 |     self.l2_adj[1,3] = np.sqrt(8)
37 |     assert_array_almost_equal(ngraph(self.pts, epsilon=3.61), self.l2_adj)
38 | 
39 |   def test_precomputed(self):
40 |     D = pairwise_distances(self.pts, metric='l2')
41 |     actual = ngraph(D, metric='precomputed', k=2)
42 |     assert_array_almost_equal(actual, self.l2_adj, decimal=4)
43 |     actual = ngraph(D, metric='precomputed', k=2, weighting='binary')
44 |     assert_array_almost_equal(actual, self.bin_adj, decimal=4)
45 | 
46 |   def test_nearest_neighbors(self):
47 |     nns = neighbors.nearest_neighbors
48 |     pt = np.zeros(2)
49 |     self.assertRaises(ValueError, nns, pt, self.pts)
50 |     assert_array_equal(nns(pt, self.pts, k=2), [[0,3]])
51 |     assert_array_equal(nns(pt, self.pts, epsilon=2), [[0,3]])
52 |     assert_array_equal(nns(pt, self.pts, k=2, epsilon=10), [[0,3]])
53 |     # Check return_dists
54 |     dists, inds = nns(pt, self.pts, k=2, return_dists=True)
55 |     assert_array_equal(inds, [[0,3]])
56 |     assert_array_almost_equal(dists, [[0, 1]])
57 |     dists, inds = nns(pt, self.pts, epsilon=2, return_dists=True)
58 |     assert_array_equal(inds, [[0,3]])
59 |     assert_array_almost_equal(dists, [[0, 1]])
60 |     # Check precomputed
61 |     D = pairwise_distances(pt[None], self.pts, metric='l1')
62 |     self.assertRaises(ValueError, nns, pt, self.pts, metric='precomputed', k=2)
63 |     assert_array_equal(nns(D, metric='precomputed', k=2), [[0,3]])
64 |     # Check 2d query shape
65 |     pt = [[0,0]]
66 |     assert_array_equal(nns(pt, self.pts, k=2), [[0,3]])
67 |     # Check all-pairs mode
68 |     assert_array_equal(nns(self.pts, k=2), [[0,3],[1,2],[2,1],[3,0]])
69 | 
70 | 
71 | if __name__ == '__main__':
72 |   unittest.main()
73 | 


--------------------------------------------------------------------------------
/graphs/construction/geometric.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, print_function
 2 | 
 3 | import numpy as np
 4 | from scipy.spatial import Delaunay
 5 | from sklearn.metrics.pairwise import (
 6 |     pairwise_distances, paired_distances, pairwise_distances_argmin_min)
 7 | from graphs import Graph
 8 | from ..mini_six import range
 9 | 
10 | __all__ = [
11 |     'delaunay_graph', 'urquhart_graph', 'gabriel_graph',
12 |     'relative_neighborhood_graph'
13 | ]
14 | 
15 | 
16 | def delaunay_graph(X, weighted=False):
17 |   '''Delaunay triangulation graph.
18 |   '''
19 |   e1, e2 = _delaunay_edges(X)
20 |   pairs = np.column_stack((e1, e2))
21 |   w = paired_distances(X[e1], X[e2]) if weighted else None
22 |   return Graph.from_edge_pairs(pairs, num_vertices=X.shape[0], symmetric=True,
23 |                                weights=w)
24 | 
25 | 
26 | def urquhart_graph(X, weighted=False):
27 |   '''Urquhart graph: made from the 2 shortest edges of each Delaunay triangle.
28 |   '''
29 |   e1, e2 = _delaunay_edges(X)
30 |   w = paired_distances(X[e1], X[e2])
31 |   mask = np.ones_like(w, dtype=bool)
32 |   bad_inds = w.reshape((-1, 3)).argmax(axis=1) + np.arange(0, len(e1), 3)
33 |   mask[bad_inds] = False
34 | 
35 |   weights = w[mask] if weighted else None
36 |   pairs = np.column_stack((e1[mask], e2[mask]))
37 |   return Graph.from_edge_pairs(pairs, num_vertices=X.shape[0], symmetric=True,
38 |                                weights=weights)
39 | 
40 | 
41 | def gabriel_graph(X, metric='euclidean', weighted=False):
42 |   n = X.shape[0]
43 |   a, b = np.triu_indices(n, k=1)
44 |   midpoints = (X[a] + X[b]) / 2
45 |   _, Dmid = pairwise_distances_argmin_min(midpoints, X, metric=metric)
46 |   Dedge = paired_distances(X[a], X[b], metric=metric)
47 |   mask = (Dedge - Dmid * 2) < 1e-10
48 |   pairs = np.column_stack((a[mask], b[mask]))
49 |   w = Dedge[mask] if weighted else None
50 |   return Graph.from_edge_pairs(pairs, num_vertices=n, symmetric=True, weights=w)
51 | 
52 | 
53 | def relative_neighborhood_graph(X, metric='euclidean', weighted=False):
54 |   D = pairwise_distances(X, metric=metric)
55 |   n = D.shape[0]
56 |   pairs = np.asarray(find_relative_neighbors(D))
57 |   w = D[pairs[:,0],pairs[:,1]] if weighted else None
58 |   return Graph.from_edge_pairs(pairs, num_vertices=n, symmetric=True, weights=w)
59 | 
60 | 
61 | def _delaunay_edges(X):
62 |   tri = Delaunay(X)
63 |   e1 = tri.simplices.ravel()
64 |   e2 = np.roll(tri.simplices, 1, axis=1).ravel()
65 |   return e1, e2
66 | 
67 | 
68 | def _find_relative_neighbors(D):
69 |   # Naive algorithm, but it's generic to any D (doesn't depend on delaunay).
70 |   n = D.shape[0]
71 |   pairs = []
72 |   for r in range(n-1):
73 |     for c in range(r+1, n):
74 |       d = D[r,c]
75 |       for i in range(n):
76 |         if i == r or i == c:
77 |           continue
78 |         if D[r,i] < d and D[c,i] < d:
79 |           break  # Point in lune, this is not an edge
80 |       else:
81 |         pairs.append((r,c))
82 |   return pairs
83 | 
84 | 
85 | try:
86 |   from ._fast_paths import find_relative_neighbors
87 | except ImportError:
88 |   try:
89 |     import pyximport
90 |     pyximport.install(setup_args={'include_dirs': np.get_include()})
91 |     from ._fast_paths import find_relative_neighbors
92 |   except ImportError:
93 |     find_relative_neighbors = _find_relative_neighbors
94 | 


--------------------------------------------------------------------------------
/graphs/mixins/_betweenness.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, absolute_import, print_function
  2 | from collections import deque
  3 | from heapq import heappush, heappop
  4 | import numpy as np
  5 | import scipy.sparse as ss
  6 | from ..mini_six import range
  7 | 
  8 | 
  9 | def _betweenness(adj, weighted, vertex):
 10 |   fn = _brandes if vertex else _brandes_edges
 11 |   return fn(adj, weighted)
 12 | 
 13 | 
 14 | def _brandes(adj, weighted):
 15 |   # Brandes algorithm for vertex betweenness
 16 |   # sigma[v]: number of shortest paths from s->v
 17 |   # delta[v]: dependency of s on v
 18 |   sssp = _sssp_weighted if weighted else _sssp_unweighted
 19 |   n = adj.shape[0]
 20 |   btw = np.zeros(n)
 21 |   for s in range(n):
 22 |     S, pred, sigma = sssp(adj, s)
 23 |     delta = np.zeros(n)
 24 |     while S:
 25 |       w = S.pop()
 26 |       coeff = (1 + delta[w]) / sigma[w]
 27 |       for v in pred.get(w, []):
 28 |         delta[v] += sigma[v] * coeff
 29 |       if w != s:
 30 |         btw[w] += delta[w]
 31 |   return btw
 32 | 
 33 | 
 34 | def _brandes_edges(adj, weighted):
 35 |   sssp = _sssp_weighted if weighted else _sssp_unweighted
 36 |   n = adj.shape[0]
 37 |   # set up betweenness container with correct sparsity pattern
 38 |   btw = ss.csr_matrix(adj, dtype=float, copy=True)
 39 |   btw.eliminate_zeros()
 40 |   btw.data[:] = 0
 41 |   for s in range(n):
 42 |     S, pred, sigma = sssp(adj, s)
 43 |     delta = np.zeros(n)
 44 |     while S:
 45 |       w = S.pop()
 46 |       coeff = (1 + delta[w]) / sigma[w]
 47 |       for v in pred.get(w, []):
 48 |         c = sigma[v] * coeff
 49 |         btw[v,w] += c
 50 |         delta[v] += c
 51 |   return btw.data
 52 | 
 53 | 
 54 | def _sssp_unweighted(adj, s):
 55 |   n = adj.shape[0]
 56 |   S = []
 57 |   pred = {}
 58 |   sigma = np.zeros(n)
 59 |   sigma[s] = 1
 60 |   dist = sigma + np.inf
 61 |   dist[s] = 0
 62 |   Q = deque([s])
 63 |   while Q:
 64 |     v = Q.popleft()
 65 |     S.append(v)
 66 |     new_weight = dist[v] + 1
 67 |     neighbors = adj[v].nonzero()[-1]
 68 |     for w in neighbors:
 69 |       if np.isinf(dist[w]):
 70 |         pred[w] = [v]
 71 |         sigma[w] = sigma[v]
 72 |         dist[w] = new_weight
 73 |         Q.append(w)
 74 |       elif dist[w] == new_weight:
 75 |         pred[w].append(v)
 76 |         sigma[w] += sigma[v]
 77 |   return S, pred, sigma
 78 | 
 79 | 
 80 | def _sssp_weighted(adj, s):
 81 |   n = adj.shape[0]
 82 |   S = set()
 83 |   pred = {}
 84 |   sigma = np.zeros(n)
 85 |   sigma[s] = 1
 86 |   dist = sigma + np.inf
 87 |   dist[s] = 0
 88 |   Q = [(0,s)]
 89 |   while Q:
 90 |     dist_v, v = heappop(Q)
 91 |     S.add(v)
 92 |     neighbors = adj[v].nonzero()[-1]
 93 |     for w in neighbors:
 94 |       new_weight = dist_v + adj[v,w]
 95 |       if dist[w] > new_weight:
 96 |         pred[w] = [v]
 97 |         sigma[w] = sigma[v]
 98 |         dist[w] = new_weight
 99 |         heappush(Q, (new_weight, w))
100 |       elif dist[w] == new_weight:
101 |         pred[w].append(v)
102 |         sigma[w] += sigma[v]
103 |   S = sorted(S, key=lambda v: dist[v])
104 |   return S, pred, sigma
105 | 
106 | 
107 | try:
108 |   from ._betweenness_helper import betweenness
109 | except ImportError:
110 |   try:
111 |     import pyximport
112 |     pyximport.install(setup_args={'include_dirs': np.get_include()})
113 |     from ._betweenness_helper import betweenness
114 |   except ImportError:
115 |     betweenness = _betweenness
116 | 


--------------------------------------------------------------------------------
/graphs/datasets/shapes.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | __all__ = ['MobiusStrip', 'FigureEight', 'SCurve']
 4 | 
 5 | 
 6 | class ParameterizedShape(object):
 7 |   def __init__(self, **param_info):
 8 |     for name,(lb,ub,is_monotone) in param_info.items():
 9 |       assert lb <= ub, 'Lower bound must be <= upper bound for %s' % name
10 |       assert (bool(is_monotone) == is_monotone
11 |               ), 'monoticity must be boolean for %s' % name
12 |     self.param_info = param_info
13 | 
14 |   def evaluate(self, **param_values):
15 |     raise NotImplementedError('subclasses must implement this')
16 | 
17 |   def point_cloud(self, num_points):
18 |     param_values = {}
19 |     for name,(lb,ub,is_monotone) in self.param_info.items():
20 |       if is_monotone:
21 |         vals = np.linspace(lb, ub, num_points)
22 |       else:
23 |         vals = np.random.uniform(lb, ub, size=num_points)
24 |       param_values[name] = vals
25 |     return self.evaluate(**param_values)
26 | 
27 |   def trajectories(self, num_traj, points_per_traj):
28 |     param_values = {}
29 |     for name,(lb,ub,is_monotone) in self.param_info.items():
30 |       step = float(ub-lb)/points_per_traj
31 |       shape = (num_traj, points_per_traj)
32 |       if is_monotone:
33 |         vals = np.random.normal(loc=step, scale=step/3, size=shape)
34 |       else:
35 |         vals = np.random.normal(loc=0, scale=step, size=shape)
36 |       param_values[name] = np.cumsum(vals, axis=1)
37 |     #TODO: random offsets for starting vals?
38 |     return self.evaluate(**param_values)
39 | 
40 | 
41 | class MobiusStrip(ParameterizedShape):
42 |   def __init__(self, radius=1.0, max_width=1.0):
43 |     ParameterizedShape.__init__(self,
44 |                                 theta=(0, 2*np.pi, True),
45 |                                 width=(-max_width/2, max_width/2, False))
46 |     self.radius = radius
47 | 
48 |   def evaluate(self, theta=None, width=None):
49 |     tmp = self.radius + width * np.cos(theta/2)
50 |     X = np.empty(theta.shape + (3,))
51 |     X[...,0] = tmp * np.cos(theta)
52 |     X[...,1] = tmp * np.sin(theta)
53 |     X[...,2] = width * np.sin(theta/2)
54 |     return X
55 | 
56 | 
57 | class FigureEight(ParameterizedShape):
58 |   def __init__(self, radius=1.0, dimension=2):
59 |     ParameterizedShape.__init__(self,
60 |                                 theta=(0, 2*np.pi, True),
61 |                                 width=(0, 1, False))  # width is only if it's 3d
62 |     self.radius = radius
63 |     assert dimension in (2,3)
64 |     self.dim = dimension
65 | 
66 |   def evaluate(self, theta=None, width=None):
67 |     X = np.empty(theta.shape + (self.dim,))
68 |     X[...,0] = self.radius * np.sin(theta)
69 |     # The only difference from a circle is this extra sin(theta) term.
70 |     X[...,1] = X[...,0] * np.cos(theta)
71 |     if self.dim == 3:
72 |       X[...,2] = width
73 |     return X
74 | 
75 | 
76 | class SCurve(ParameterizedShape):
77 |   def __init__(self, radius=1.0):
78 |     ParameterizedShape.__init__(self,
79 |                                 theta=(-np.pi-1, np.pi+1, True),
80 |                                 width=(-1, 1, False))
81 |     self.radius = radius
82 | 
83 |   def evaluate(self, theta=None, width=None):
84 |     X = np.empty(theta.shape + (3,))
85 |     X[...,0] = np.sin(theta)
86 |     X[...,2] = np.cos(theta)
87 |     X[...,1] = width
88 |     first_half = slice(0, theta.shape[-1]//2)
89 |     X[...,first_half,2] = 2 + -X[...,first_half,2]
90 |     return X
91 | 


--------------------------------------------------------------------------------
/graphs/mixins/tests/test_viz.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | from io import StringIO
  4 | from scipy.sparse import csr_matrix
  5 | from matplotlib import pyplot
  6 | pyplot.switch_backend('template')
  7 | 
  8 | from graphs import Graph
  9 | 
 10 | 
 11 | class TestPlot(unittest.TestCase):
 12 |   def setUp(self):
 13 |     pairs = np.array([[0,1],[0,2],[1,2],[3,4]])
 14 |     adj = [[0,1,2,0,0],
 15 |            [0,0,3,0,0],
 16 |            [0,0,0,0,0],
 17 |            [0,0,0,0,4],
 18 |            [0,0,0,0,0]]
 19 |     self.graphs = [
 20 |         Graph.from_edge_pairs(pairs),
 21 |         Graph.from_edge_pairs(pairs, symmetric=True),
 22 |         Graph.from_adj_matrix(adj),
 23 |         Graph.from_adj_matrix(csr_matrix(adj)),
 24 |     ]
 25 |     self.coords = np.random.random((5, 3))
 26 | 
 27 |   def test_plot_default(self):
 28 |     for G in self.graphs:
 29 |       G.plot(self.coords[:,:1])  # 1d plotting
 30 |       G.plot(self.coords[:,:2])  # 2d plotting
 31 |       G.plot(self.coords)        # 3d plotting
 32 | 
 33 |   def test_plot_direction(self):
 34 |     for G in self.graphs:
 35 |       G.plot(self.coords[:,:2], directed=True)
 36 |       G.plot(self.coords[:,:2], directed=False)
 37 |       G.plot(self.coords, directed=True)
 38 |       G.plot(self.coords, directed=False)
 39 | 
 40 |   def test_plot_weighting(self):
 41 |     for G in self.graphs:
 42 |       G.plot(self.coords[:,:2], weighted=True)
 43 |       G.plot(self.coords[:,:2], weighted=False)
 44 |       G.plot(self.coords, weighted=True)
 45 |       G.plot(self.coords, weighted=False)
 46 | 
 47 |   def test_plot_styles(self):
 48 |     x = self.coords[:,:2]  # use 2d coords, 3d _get_axis is slow
 49 |     for G in self.graphs:
 50 |       G.plot(x, edge_style='r--')
 51 |       G.plot(x, edge_style=dict(colors=[(0.5,1,0)]*4, linestyles=':'))
 52 |       G.plot(x, vertex_style='rx')
 53 |       G.plot(x, vertex_style=dict(c=[(0,0,0),(1,1,1)], marker='o'))
 54 |       G.plot(x, edge_style='k')
 55 |       G.plot(x, directed=True, edge_style='1')
 56 |       G.plot(x, edge_style='01')
 57 |       G.plot(x, directed=True, edge_style=' x')
 58 |       G.plot(x, edge_style='-.')
 59 |       G.plot(x, edge_style='k-')
 60 |       # Make sure we break with bogus styles
 61 |       with self.assertRaises(ValueError):
 62 |         G.plot(x, edge_style='z')
 63 |       with self.assertRaises(ValueError):
 64 |         G.plot(x, edge_style='::')
 65 |       with self.assertRaises(ValueError):
 66 |         G.plot(x, edge_style='oo')
 67 |       with self.assertRaises(ValueError):
 68 |         G.plot(x, edge_style='kk')
 69 | 
 70 |   def test_plot_fig(self):
 71 |     for G in self.graphs:
 72 |       G.plot(self.coords[:,:2], fig='new')
 73 |       G.plot(self.coords[:,:2], fig='current')
 74 | 
 75 |   def test_to_html(self):
 76 |     for G in self.graphs:
 77 |       buf = StringIO()
 78 |       # just make sure no exceptions are thrown
 79 |       G.to_html(buf, directed=False)
 80 |       buf.truncate(0)
 81 | 
 82 |     c = np.arange(5)
 83 |     G.to_html(buf, vertex_ids=c, directed=False, title='Test Page')
 84 |     buf.truncate(0)
 85 |     G.to_html(buf, vertex_colors=c, directed=False)
 86 |     buf.truncate(0)
 87 |     G.to_html(buf, vertex_labels=c, directed=False)
 88 |     buf.truncate(0)
 89 |     with self.assertRaises(ValueError):
 90 |       G.to_html(buf, vertex_colors=c, vertex_labels=c, directed=False)
 91 |     with self.assertRaises(ValueError):
 92 |       G.to_html(buf, vertex_ids=c[:2], directed=False)
 93 |     with self.assertRaises(ValueError):
 94 |       G.to_html(buf, vertex_colors=c[:2], directed=False)
 95 |     with self.assertRaises(ValueError):
 96 |       G.to_html(buf, vertex_labels=c[:2], directed=False)
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |   unittest.main()
101 | 


--------------------------------------------------------------------------------
/graphs/datasets/mountain_car.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from matplotlib import pyplot as plt
  3 | from scipy.interpolate import griddata
  4 | 
  5 | __all__ = ['plot_mcar_basis', 'mountain_car_trajectories']
  6 | 
  7 | 
  8 | def plot_mcar_basis(G, X, title='Mountain Car graph'):
  9 |   _, axes = plt.subplots(nrows=2, ncols=2)
 10 |   G.plot(X, title=title, ax=axes[0,0])
 11 | 
 12 |   emb = G.laplacian_eigenmaps(num_dims=3)
 13 | 
 14 |   x, y = X.T
 15 |   # Set up grids for a contour plot
 16 |   x_range = (x.min(), x.max())
 17 |   y_range = (y.min(), y.max())
 18 |   pad_x = 0.05 * -np.subtract.reduce(x_range)
 19 |   pad_y = 0.05 * -np.subtract.reduce(y_range)
 20 |   grid_x = np.linspace(x_range[0] - pad_x, x_range[1] + pad_x, 100)
 21 |   grid_y = np.linspace(y_range[0] - pad_y, y_range[1] + pad_y, 100)
 22 |   for i,(ax,z) in enumerate(zip(axes.flat[1:], emb.T)):
 23 |     grid_z = griddata((x, y), z, (grid_x[None], grid_y[:,None]),
 24 |                       method='nearest')
 25 |     ax.contourf(grid_x, grid_y, grid_z)
 26 |     ax.plot(x, y, 'k,')
 27 |     ax.set_title('Basis %d' % (i+1))
 28 |   return plt.show
 29 | 
 30 | 
 31 | def mountain_car_trajectories(num_traj):
 32 |   '''Collect data using random hard-coded policies on MountainCar.
 33 | 
 34 |   num_traj : int, number of trajectories to collect
 35 | 
 36 |   Returns (trajectories, traces)
 37 |   '''
 38 |   domain = MountainCar()
 39 |   slopes = np.random.normal(0, 0.01, size=num_traj)
 40 |   v0s = np.random.normal(0, 0.005, size=num_traj)
 41 |   trajectories = []
 42 |   traces = []
 43 |   norm = np.array((domain.MAX_POS-domain.MIN_POS,
 44 |                    domain.MAX_VEL-domain.MIN_VEL))
 45 |   for m,b in zip(slopes, v0s):
 46 |     mcar_policy = lambda s: 0 if s[0]*m + s[1] + b > 0 else 2
 47 |     start = (np.random.uniform(domain.MIN_POS,domain.MAX_POS),
 48 |              np.random.uniform(domain.MIN_VEL,domain.MAX_VEL))
 49 |     samples = _run_episode(mcar_policy, domain, start, max_iters=40)
 50 |     # normalize
 51 |     samples.state /= norm
 52 |     samples.next_state /= norm
 53 |     traces.append(samples)
 54 |     if samples.reward[-1] == 0:
 55 |       # Don't include the warp to the final state.
 56 |       trajectories.append(samples.state[:-1])
 57 |     else:
 58 |       trajectories.append(samples.state)
 59 | 
 60 |   return trajectories, traces
 61 | 
 62 | 
 63 | def _run_episode(policy_action, domain, state, max_iters=1e100):
 64 |   action = policy_action(state)
 65 |   samples = []
 66 |   while not domain.finished(state):
 67 |     # get new state and action
 68 |     new_state = domain.take_action(state, action)
 69 |     new_action = policy_action(new_state)
 70 |     # update histories
 71 |     reward = domain.reward_for(state)
 72 |     samples.append((state, action, reward, new_state, new_action))
 73 |     if len(samples) >= max_iters:
 74 |       break
 75 |     state = new_state
 76 |     action = new_action
 77 |   ds = len(state)
 78 |   names = ('state','action','reward','next_state','next_action')
 79 |   formats = (('f',(ds,)),int,float,('f',(ds,)),int)
 80 |   dtype = dict(names=names, formats=formats)
 81 |   return np.array(samples, dtype).view(np.recarray)
 82 | 
 83 | 
 84 | class MountainCar(object):
 85 |   # directions: fwd neu rev
 86 |   action_dirs = [1, 0, -1]
 87 |   NUM_ACTIONS = 3
 88 | 
 89 |   GOAL_POS = 0.5
 90 |   DT = 0.001
 91 | 
 92 |   MIN_POS = -1.2
 93 |   MAX_POS = 0.5
 94 |   MIN_VEL = -0.07
 95 |   MAX_VEL = 0.07
 96 | 
 97 |   def __init__(self, gravity=-0.0025):
 98 |     self.gravity = gravity
 99 | 
100 |   def reward_for(self, state):
101 |     return 0 if state[0] >= MountainCar.GOAL_POS else -1
102 | 
103 |   def finished(self, state):
104 |     return self.reward_for(state) == 0
105 | 
106 |   def take_action(self, state, action):
107 |     p,v = state
108 |     a = MountainCar.action_dirs[action]
109 |     new_v = v + (MountainCar.DT*a) + (self.gravity*np.cos(3*p))
110 |     new_v = min(MountainCar.MAX_VEL, max(MountainCar.MIN_VEL, new_v))
111 |     new_p = p + new_v
112 |     if new_p < MountainCar.MIN_POS:
113 |       new_p = MountainCar.MIN_POS
114 |       new_v = 0
115 |     elif new_p > MountainCar.MAX_POS:
116 |       new_p = MountainCar.MAX_POS
117 |       new_v = 0
118 |     return new_p, new_v
119 | 


--------------------------------------------------------------------------------
/graphs/mixins/_betweenness_helper.pyx:
--------------------------------------------------------------------------------
  1 | #distutils: language = c++
  2 | #cython: boundscheck=False, wraparound=True, cdivision=True
  3 | cimport numpy as np
  4 | import numpy as np
  5 | import scipy.sparse as ss
  6 | from libcpp.deque cimport deque
  7 | from libcpp.stack cimport stack
  8 | from libcpp.pair cimport pair
  9 | from libcpp.queue cimport priority_queue
 10 | 
 11 | cdef double INF = float('inf')
 12 | ctypedef np.int_t intc
 13 | ctypedef dict (*sssp_fn)(object, intc, intc[::1], double[::1], stack[intc]&)
 14 | 
 15 | cpdef betweenness(adj, bint weighted, bint vertex):
 16 |   cdef sssp_fn sssp
 17 |   if weighted:
 18 |     sssp = &_sssp_weighted
 19 |   else:
 20 |     sssp = &_sssp_unweighted
 21 |   # sigma[v]: number of shortest paths from s->v
 22 |   # delta[v]: dependency of s on v
 23 |   cdef intc s, w, v, n = adj.shape[0]
 24 |   cdef double[::1] delta = np.zeros(n)
 25 |   cdef double[::1] dist = np.zeros(n)
 26 |   cdef intc[::1] sigma = np.zeros(n, dtype=np.int)
 27 |   cdef double coeff
 28 |   cdef stack[intc] S
 29 |   cdef dict pred
 30 |   cdef double[::1] vbtw
 31 |   if vertex:
 32 |     # Brandes algorithm for vertex betweenness
 33 |     vbtw = np.zeros(n)
 34 |     for s in range(n):
 35 |       pred = sssp(adj, s, sigma, dist, S)
 36 |       delta[:] = 0
 37 |       while not S.empty():
 38 |         w = S.top()
 39 |         coeff = (1.0 + delta[w]) / sigma[w]
 40 |         for v in pred.get(w, []):
 41 |           delta[v] += sigma[v] * coeff
 42 |         if w != s:
 43 |           vbtw[w] += delta[w]
 44 |         S.pop()
 45 |     return np.array(vbtw, dtype=float)
 46 |   # Brandes variant for edge betweennes
 47 |   # set up betweenness container with correct sparsity pattern
 48 |   ebtw = ss.csr_matrix(adj, dtype=float, copy=True)
 49 |   ebtw.eliminate_zeros()
 50 |   ebtw.data[:] = 0
 51 |   for s in range(n):
 52 |     pred = sssp(adj, s, sigma, dist, S)
 53 |     delta[:] = 0
 54 |     while not S.empty():
 55 |       w = S.top()
 56 |       coeff = (1.0 + delta[w]) / sigma[w]
 57 |       for v in pred.get(w, []):
 58 |         c = sigma[v] * coeff
 59 |         ebtw[v,w] += c
 60 |         delta[v] += c
 61 |       S.pop()
 62 |   return ebtw.data
 63 | 
 64 | 
 65 | cdef dict _sssp_unweighted(adj, intc s, intc[::1] sigma, double[::1] dist, stack[intc]& S):
 66 |   cdef intc v, w, i, j, widx
 67 |   cdef double new_weight
 68 |   cdef dict pred = {}
 69 |   sigma[:] = 0
 70 |   sigma[s] = 1
 71 |   dist[:] = INF
 72 |   dist[s] = 0
 73 |   cdef deque[intc] Q
 74 |   Q.push_back(s)
 75 |   while not Q.empty():
 76 |     v = Q.front()
 77 |     Q.pop_front()
 78 |     S.push(v)
 79 |     new_weight = dist[v] + 1
 80 |     i = adj.indptr[v]
 81 |     j = adj.indptr[v+1]
 82 |     for widx in range(i, j):
 83 |       w = adj.indices[widx]
 84 |       if dist[w] > new_weight:
 85 |         pred[w] = [v]
 86 |         sigma[w] = sigma[v]
 87 |         dist[w] = new_weight
 88 |         Q.push_back(w)
 89 |       elif dist[w] == new_weight:
 90 |         pred[w].append(v)
 91 |         sigma[w] += sigma[v]
 92 |   return pred
 93 | 
 94 | 
 95 | cdef dict _sssp_weighted(adj, intc s, intc[::1] sigma, double[::1] dist, stack[intc]& S):
 96 |   cdef intc v, w, i, j, widx
 97 |   cdef double dist_v, new_weight, d
 98 |   cdef set SS = set()
 99 |   cdef dict pred = {}
100 |   sigma[:] = 0
101 |   sigma[s] = 1
102 |   dist[:] = INF
103 |   dist[s] = 0
104 |   cdef priority_queue[pair[double,intc]] Q
105 |   Q.push(pair[double,intc](0.,s))
106 |   while not Q.empty():
107 |     tmp = Q.top()
108 |     Q.pop()
109 |     dist_v = tmp.first
110 |     v = tmp.second
111 |     SS.add(v)
112 |     i = adj.indptr[v]
113 |     j = adj.indptr[v+1]
114 |     for widx in range(i, j):
115 |       w = adj.indices[widx]
116 |       d = adj.data[widx]
117 |       new_weight = dist_v + d
118 |       if dist[w] > new_weight:
119 |         pred[w] = [v]
120 |         sigma[w] = sigma[v]
121 |         dist[w] = new_weight
122 |         Q.push(pair[double,intc](new_weight, w))
123 |       elif dist[w] == new_weight:
124 |         pred[w].append(v)
125 |         sigma[w] += sigma[v]
126 |   # XXX: ugly workaround: using lambdas/comprehensions in cdef -> segfault
127 |   cdef list foo = [(dist[v], v) for v in SS]
128 |   for _,w in sorted(foo):
129 |     S.push(w)
130 |   return pred
131 | 


--------------------------------------------------------------------------------
/graphs/mixins/tests/test_analysis.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | import warnings
  4 | from numpy.testing import assert_array_equal, assert_array_almost_equal
  5 | from scipy.sparse import coo_matrix
  6 | from graphs import Graph
  7 | 
  8 | PAIRS = np.array([[0,1],[0,2],[1,2],[2,0],[3,4],[4,3]])
  9 | ADJ = [[0,1,1,0,0],
 10 |        [0,0,1,0,0],
 11 |        [1,0,0,0,0],
 12 |        [0,0,0,0,1],
 13 |        [0,0,0,1,0]]
 14 | 
 15 | 
 16 | class TestAnalysis(unittest.TestCase):
 17 |   def setUp(self):
 18 |     self.graphs = [
 19 |         Graph.from_edge_pairs(PAIRS),
 20 |         Graph.from_adj_matrix(ADJ),
 21 |         Graph.from_adj_matrix(coo_matrix(ADJ)),
 22 |     ]
 23 | 
 24 |   def test_connected_components(self):
 25 |     for G in self.graphs:
 26 |       n, labels = G.connected_components()
 27 |       self.assertEqual(2, n)
 28 |       assert_array_equal(labels, [0,0,0,1,1])
 29 | 
 30 |   def test_ave_laplacian(self):
 31 |     g = Graph.from_adj_matrix([[0,1,2],[1,0,0],[2,0,0]])
 32 |     expected = np.array([[1,-0.5,0],[-0.5,1,0],[0,0,1]])
 33 |     assert_array_almost_equal(g.ave_laplacian(), expected)
 34 | 
 35 |   def test_directed_laplacian(self):
 36 |     expected = np.array([
 37 |         [0.239519, -0.05988, -0.179839, 0,   0],
 38 |         [-0.05988,  0.120562,-0.060281, 0,   0],
 39 |         [-0.179839,-0.060281, 0.239919, 0,   0],
 40 |         [0,         0,        0,        0.2,-0.2],
 41 |         [0,         0,        0,       -0.2, 0.2]])
 42 |     for G in self.graphs:
 43 |       L = G.directed_laplacian()
 44 |       assert_array_almost_equal(L, expected)
 45 | 
 46 |     # test non-convergence case
 47 |     with warnings.catch_warnings(record=True) as w:
 48 |       self.graphs[0].directed_laplacian(max_iter=2)
 49 |       self.assertEqual(len(w), 1)
 50 |       self.assertEqual(str(w[0].message),
 51 |                        'phi failed to converge after 2 iterations')
 52 | 
 53 |   def test_bandwidth(self):
 54 |     for G in self.graphs:
 55 |       self.assertEqual(G.bandwidth(), 2)
 56 | 
 57 |   def test_profile(self):
 58 |     for G in self.graphs:
 59 |       self.assertEqual(G.profile(), 1)
 60 | 
 61 |   def test_betweenness(self):
 62 |     for G in self.graphs:
 63 |       G.symmetrize(copy=False)
 64 |       _test_btw(G, 'vertex', False, False, np.zeros(5))
 65 |       _test_btw(G, 'vertex', False, True, np.zeros(5))
 66 |       _test_btw(G, 'edge', False, False, np.ones(8)/2.)
 67 |       _test_btw(G, 'edge', False, True, np.ones(8))
 68 |       if G.is_weighted():
 69 |         _test_btw(G, 'vertex', True, False, [0,0.5,0,0,0])
 70 |         _test_btw(G, 'vertex', True, True, [0,1,0,0,0])
 71 |         _test_btw(G, 'edge', True, False, np.array([3,1,3,3,1,3,2,2])/4.)
 72 |         _test_btw(G, 'edge', True, True, np.array([3,1,3,3,1,3,2,2])/2.)
 73 | 
 74 |   def test_betweenness_weighted(self):
 75 |     # test a weighted graph with different kinds of weights
 76 |     G = Graph.from_adj_matrix([[0,1,2,0],[1,0,0,3],[2,0,0,1],[0,3,1,0]])
 77 |     _test_btw(G, 'vertex', False, False, [0.5]*4)
 78 |     _test_btw(G, 'vertex', False, True, [1]*4)
 79 |     _test_btw(G, 'vertex', True, False, [1,0,1,0])
 80 |     _test_btw(G, 'vertex', True, True, [2,0,2,0])
 81 |     _test_btw(G, 'edge', False, False, [1,1,1,1,1,1,1,1])
 82 |     _test_btw(G, 'edge', False, True, [2,2,2,2,2,2,2,2])
 83 |     _test_btw(G, 'edge', True, False, np.array([2,3,2,1,3,2,1,2])/2.)
 84 |     _test_btw(G, 'edge', True, True, [2,3,2,1,3,2,1,2])
 85 | 
 86 |   def test_eccentricity(self):
 87 |     for G in self.graphs:
 88 |       # unconnected graphs have infinite eccentricity
 89 |       assert_array_equal(G.eccentricity(), np.inf+np.ones(5))
 90 |     g = Graph.from_adj_matrix([[0,1,2],[1,0,0],[2,0,0]])
 91 |     assert_array_equal(g.eccentricity(), [2,3,3])
 92 | 
 93 |   def test_diameter(self):
 94 |     for G in self.graphs:
 95 |       # unconnected graphs have infinite diameter
 96 |       self.assertEqual(G.diameter(), np.inf)
 97 |     g = Graph.from_adj_matrix([[0,1,2],[1,0,0],[2,0,0]])
 98 |     self.assertEqual(g.diameter(), 3)
 99 | 
100 |   def test_radius(self):
101 |     for G in self.graphs:
102 |       # unconnected graphs have infinite radius
103 |       self.assertEqual(G.radius(), np.inf)
104 |     g = Graph.from_adj_matrix([[0,1,2],[1,0,0],[2,0,0]])
105 |     self.assertEqual(g.radius(), 2)
106 | 
107 | 
108 | def _test_btw(G, k, w, d, exp):
109 |   assert_array_equal(G.betweenness(kind=k, weighted=w, directed=d), exp)
110 | 
111 | if __name__ == '__main__':
112 |   unittest.main()
113 | 


--------------------------------------------------------------------------------
/graphs/mixins/tests/test_label.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | from numpy.testing import assert_array_equal
  4 | from scipy.sparse import coo_matrix
  5 | from sklearn.metrics.cluster import adjusted_rand_score
  6 | from graphs import Graph
  7 | from graphs.construction import neighbor_graph
  8 | 
  9 | 
 10 | class TestLabel(unittest.TestCase):
 11 | 
 12 |   def _make_blob_graphs(self, k=11):
 13 |     pts = np.random.random(size=(20, 2))
 14 |     pts[10:] += 2
 15 |     labels = np.zeros(20)
 16 |     labels[10:] = 1
 17 |     G_sparse = neighbor_graph(pts, k=k).symmetrize()
 18 |     G_dense = Graph.from_adj_matrix(G_sparse.matrix('dense'))
 19 |     return (G_sparse, G_dense), labels
 20 | 
 21 |   def test_greedy_coloring(self):
 22 |     pairs = np.array([[0,1],[0,2],[1,0],[1,2],[2,0],[2,1],[3,4],[4,3]])
 23 |     adj = [[0,1,1,0,0],
 24 |            [1,0,1,0,0],
 25 |            [1,1,0,0,0],
 26 |            [0,0,0,0,1],
 27 |            [0,0,0,1,0]]
 28 |     test_cases = [
 29 |         Graph.from_edge_pairs(pairs),
 30 |         Graph.from_adj_matrix(adj),
 31 |         Graph.from_adj_matrix(coo_matrix(adj)),
 32 |     ]
 33 |     for G in test_cases:
 34 |       assert_array_equal([1,2,3,1,2], G.color_greedy())
 35 | 
 36 |   def test_bicolor_spectral(self):
 37 |     pairs = np.array([[0,1],[0,2],[1,0],[1,2],[2,0],[2,1],[2,3],[3,2]])
 38 |     adj = [[0,1,1,0],
 39 |            [1,0,1,0],
 40 |            [1,1,0,1],
 41 |            [0,0,1,0]]
 42 |     test_cases = [
 43 |         Graph.from_edge_pairs(pairs),
 44 |         Graph.from_adj_matrix(adj),
 45 |         Graph.from_adj_matrix(coo_matrix(adj)),
 46 |     ]
 47 |     expected = np.array([1,1,0,1], dtype=bool)
 48 |     for G in test_cases:
 49 |       assert_array_equal(expected, G.bicolor_spectral())
 50 | 
 51 |   def test_spectral_clustering(self):
 52 |     blob_graphs, expected = self._make_blob_graphs(k=11)
 53 | 
 54 |     for g in blob_graphs:
 55 |       labels = g.cluster_spectral(2, kernel='rbf')
 56 |       self.assertGreater(adjusted_rand_score(expected, labels), 0.95)
 57 | 
 58 |   def test_nn_classifier(self):
 59 |     blob_graphs, expected = self._make_blob_graphs(k=4)
 60 |     partial = expected.copy()
 61 |     partial[1:-1] = -1
 62 | 
 63 |     for g in blob_graphs:
 64 |       labels = g.classify_nearest(partial)
 65 |       self.assertGreater(adjusted_rand_score(expected, labels), 0.95)
 66 | 
 67 |   def test_lgc_classifier(self):
 68 |     blob_graphs, expected = self._make_blob_graphs(k=11)
 69 |     partial = expected.copy()
 70 |     partial[1:-1] = -1
 71 | 
 72 |     for g in blob_graphs:
 73 |       labels = g.classify_lgc(partial, kernel='rbf', alpha=0.2, tol=1e-3,
 74 |                               max_iter=30)
 75 |       self.assertGreater(adjusted_rand_score(expected, labels), 0.95)
 76 | 
 77 |   def test_harmonic_classifier(self):
 78 |     blob_graphs, expected = self._make_blob_graphs(k=4)
 79 |     partial = expected.copy()
 80 |     partial[1:-1] = -1
 81 | 
 82 |     for g in blob_graphs:
 83 |       labels = g.classify_harmonic(partial, use_CMN=True)
 84 |       self.assertGreater(adjusted_rand_score(expected, labels), 0.95)
 85 | 
 86 |   def test_regression(self):
 87 |     t = np.linspace(0, 1, 31)
 88 |     pts = np.column_stack((np.sin(t), np.cos(t)))
 89 |     G = neighbor_graph(pts, k=3).symmetrize()
 90 |     y_mask = slice(None, None, 2)
 91 | 
 92 |     # test the interpolated case
 93 |     x = G.regression(t[y_mask], y_mask)
 94 |     assert_array_equal(t, np.linspace(0, 1, 31))  # ensure t hasn't changed
 95 |     self.assertLess(np.linalg.norm(t - x), 0.15)
 96 | 
 97 |     # test the boolean mask case
 98 |     y_mask = np.zeros_like(t, dtype=bool)
 99 |     y_mask[::2] = True
100 |     x = G.regression(t[y_mask], y_mask)
101 |     self.assertLess(np.linalg.norm(t - x), 0.15)
102 | 
103 |     # test the penalized case
104 |     x = G.regression(t[y_mask], y_mask, smoothness_penalty=1e-4)
105 |     self.assertLess(np.linalg.norm(t - x), 0.15)
106 | 
107 |     # test no kernel + dense laplacian case
108 |     dG = Graph.from_adj_matrix(G.matrix('dense'))
109 |     x = dG.regression(t[y_mask], y_mask, kernel='none')
110 |     self.assertLess(np.linalg.norm(t - x), 0.25)
111 |     x = dG.regression(t[y_mask], y_mask, smoothness_penalty=1e-4, kernel='none')
112 |     self.assertLess(np.linalg.norm(t - x), 0.25)
113 | 
114 |     # test the multidimensional regression case
115 |     tt = np.column_stack((t, t[::-1]))
116 |     x = G.regression(tt[y_mask], y_mask)
117 |     self.assertLess(np.linalg.norm(tt - x), 0.2)
118 | 
119 |     # check for bad inputs
120 |     with self.assertRaisesRegexp(ValueError, r'^Invalid shape of y array'):
121 |       G.regression([], y_mask)
122 | 
123 | if __name__ == '__main__':
124 |   unittest.main()
125 | 


--------------------------------------------------------------------------------
/graphs/construction/saffron.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function
  2 | 
  3 | import numpy as np
  4 | import scipy.sparse as ss
  5 | import warnings
  6 | from sklearn.metrics.pairwise import pairwise_distances
  7 | from sklearn.preprocessing import normalize
  8 | 
  9 | from graphs import Graph
 10 | from ..mini_six import range
 11 | from .neighbors import neighbor_graph
 12 | 
 13 | __all__ = ['saffron']
 14 | 
 15 | 
 16 | def saffron(X, q=32, k=4, tangent_dim=1, curv_thresh=0.95, decay_rate=0.9,
 17 |             max_iter=15, verbose=False):
 18 |   '''
 19 |   SAFFRON graph construction method.
 20 | 
 21 |     X : (n,d)-array of coordinates
 22 |     q : int, median number of candidate friends per vertex
 23 |     k : int, number of friends to select per vertex, k < q
 24 |     tangent_dim : int, dimensionality of manifold tangent space
 25 |     curv_thresh : float, tolerance to curvature, lambda in the paper
 26 |     decay_rate : float, controls step size per iteration, between 0 and 1
 27 |     max_iter : int, cap on number of iterations
 28 |     verbose : bool, print goodness measure per iteration when True
 29 | 
 30 |   From "Tangent Space Guided Intelligent Neighbor Finding",
 31 |     by Gashler & Martinez, 2011.
 32 |   See http://axon.cs.byu.edu/papers/gashler2011ijcnn1.pdf
 33 |   '''
 34 |   n = len(X)
 35 |   dist = pairwise_distances(X)
 36 |   idx = np.argpartition(dist, q)[:, q]
 37 |   # radius for finding candidate friends: median distance to qth neighbor
 38 |   r = np.median(dist[np.arange(n), idx])
 39 | 
 40 |   # make candidate graph + weights
 41 |   W = neighbor_graph(dist, precomputed=True, epsilon=r).matrix('csr')
 42 |   # NOTE: this differs from the paper, where W.data[:] = 1 initially
 43 |   W.data[:] = 1 / W.data
 44 |   # row normalize
 45 |   normalize(W, norm='l1', axis=1, copy=False)
 46 |   # XXX: hacky densify
 47 |   W = W.toarray()
 48 | 
 49 |   # iterate to learn optimal weights
 50 |   prev_goodness = 1e-12
 51 |   for it in range(max_iter):
 52 |     goodness = 0
 53 |     S = _estimate_tangent_spaces(X, W, tangent_dim)
 54 |     # find aligned candidates
 55 |     for i, row in enumerate(W):
 56 |       nbrs = row.nonzero()[-1]
 57 | 
 58 |       # compute alignment scores
 59 |       edges = X[nbrs] - X[i]
 60 |       edge_norms = (edges**2).sum(axis=1)
 61 |       a1 = (edges.dot(S[i])**2).sum(axis=1) / edge_norms
 62 |       a2 = (np.einsum('ij,ijk->ik', edges, S[nbrs])**2).sum(axis=1) / edge_norms
 63 |       a3 = _principal_angle(S[i], S[nbrs]) ** 2
 64 |       x = (np.minimum(curv_thresh, a1) *
 65 |            np.minimum(curv_thresh, a2) *
 66 |            np.minimum(curv_thresh, a3))
 67 | 
 68 |       # decay weight of least-aligned candidates
 69 |       excess = x.shape[0] - k
 70 |       if excess > 0:
 71 |         bad_idx = np.argpartition(x, excess-1)[:excess]
 72 |         W[i, nbrs[bad_idx]] *= decay_rate
 73 |         W[i] /= W[i].sum()
 74 | 
 75 |       # update goodness measure (weighted alignment)
 76 |       goodness += x.dot(W[i,nbrs])
 77 | 
 78 |     if verbose:  # pragma: no cover
 79 |       goodness /= n
 80 |       print(it, goodness, goodness / prev_goodness)
 81 |     if goodness / prev_goodness <= 1.0001:
 82 |       break
 83 |     prev_goodness = goodness
 84 |   else:
 85 |     warnings.warn('Failed to converge after %d iterations.' % max_iter)
 86 | 
 87 |   # use the largest k weights for each row of W, weighted by original distance
 88 |   indptr, indices, data = [0], [], []
 89 |   for i, row in enumerate(W):
 90 |     nbrs = row.nonzero()[-1]
 91 |     if len(nbrs) > k:
 92 |       nbrs = nbrs[np.argpartition(row[nbrs], len(nbrs)-k)[-k:]]
 93 |     indices.extend(nbrs)
 94 |     indptr.append(len(nbrs))
 95 |     data.extend(dist[i, nbrs])
 96 |   indptr = np.cumsum(indptr)
 97 |   data = np.array(data)
 98 |   indices = np.array(indices)
 99 |   W = ss.csr_matrix((data, indices, indptr), shape=W.shape)
100 |   return Graph.from_adj_matrix(W)
101 | 
102 | 
103 | def _estimate_tangent_spaces(X, W, dim):
104 |   # compute many PCAs in batch
105 |   covs = np.empty(X.shape + (X.shape[1],))
106 |   for i, row in enumerate(W):
107 |     nbrs = row.nonzero()[-1]
108 |     xx = X[nbrs] * row[nbrs,None]  # weight samples by W
109 |     xx -= xx.mean(axis=0)
110 |     covs[i] = xx.T.dot(xx)
111 |   # compute all the PCs at once
112 |   _, vecs = np.linalg.eigh(covs)
113 |   return vecs[:,:,-dim:]
114 | 
115 | 
116 | def _principal_angle(a, B):
117 |   '''a is (d,t), B is (k,d,t)'''
118 |   # TODO: check case for t = d-1
119 |   if a.shape[1] == 1:
120 |     return a.T.dot(B)[0,:,0]
121 | 
122 |   # find normals that maximize distance when projected
123 |   x1 = np.einsum('abc,adc->abd', B, B).dot(a) - a   # b.dot(b.T).dot(a) - a
124 |   x2 = np.einsum('ab,cad->cbd', a.dot(a.T), B) - B  # a.dot(a.T).dot(b) - b
125 |   xx = np.vstack((x1, x2))
126 | 
127 |   # batch PCA (1st comp. only)
128 |   xx -= xx.mean(axis=1)[:,None]
129 |   c = np.einsum('abc,abd->acd', xx, xx)
130 |   _, vecs = np.linalg.eigh(c)
131 |   fpc = vecs[:,:,-1]
132 |   fpc1 = fpc[:len(x1)]
133 |   fpc2 = fpc[len(x1):]
134 | 
135 |   # a.dot(fpc1).dot(b.dot(fpc2))
136 |   lhs = a.dot(fpc1.T).T
137 |   rhs = np.einsum('abc,ac->ab', B, fpc2)
138 |   return np.einsum('ij,ij->i', lhs, rhs)
139 | 


--------------------------------------------------------------------------------
/graphs/mixins/analysis.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, absolute_import, print_function
  2 | import numpy as np
  3 | import scipy.sparse as ss
  4 | import scipy.sparse.csgraph as ssc
  5 | import warnings
  6 | from ..mini_six import range
  7 | from ._betweenness import betweenness
  8 | 
  9 | 
 10 | class AnalysisMixin(object):
 11 | 
 12 |   # scipy.sparse.csgraph wrappers
 13 |   def connected_components(self, **kwargs):
 14 |     '''Mirrors the scipy.sparse.csgraph function of the same name:
 15 |     connected_components(G, directed=True, connection='weak',
 16 |                          return_labels=True)
 17 |     '''
 18 |     return ssc.connected_components(self.matrix(), **kwargs)
 19 | 
 20 |   def laplacian(self, **kwargs):
 21 |     '''Mirrors the scipy.sparse.csgraph function of the same name:
 22 |     laplacian(G, normed=False, return_diag=False, use_out_degree=False)
 23 |     '''
 24 |     return ssc.laplacian(self.matrix(), **kwargs)
 25 | 
 26 |   def shortest_path(self, directed=None, weighted=None, method='auto',
 27 |                     return_predecessors=False, limit=np.inf, indices=None):
 28 |     '''Mirrors the scipy.sparse.csgraph function of the same name.'''
 29 |     d = directed if directed is not None else self.is_directed()
 30 |     w = weighted if weighted is not None else self.is_weighted()
 31 | 
 32 |     adj = self.matrix('dense', 'csr', 'csc')
 33 |     if not ss.issparse(adj):
 34 |       adj = np.ascontiguousarray(adj)
 35 | 
 36 |     # dispatch based on presence of limit and/or indices
 37 |     if np.isinf(limit) and indices is None:
 38 |       overwrite = not (hasattr(self, '_adj') and self._adj is adj)
 39 |       return ssc.shortest_path(adj, method=method, directed=d,
 40 |                                return_predecessors=return_predecessors,
 41 |                                unweighted=(not w), overwrite=overwrite)
 42 |     return ssc.dijkstra(adj, directed=d, indices=indices,
 43 |                         return_predecessors=return_predecessors,
 44 |                         unweighted=(not w), limit=limit)
 45 | 
 46 |   def ave_laplacian(self):
 47 |     '''Another kind of laplacian normalization, used in the matlab PVF code.
 48 |     Uses the formula: L = I - D^{-1} * W'''
 49 |     W = self.matrix('dense')
 50 |     # calculate -inv(D)
 51 |     Dinv = W.sum(axis=0)
 52 |     mask = Dinv!=0
 53 |     Dinv[mask] = -1./Dinv[mask]
 54 |     # calculate -inv(D) * W
 55 |     lap = (Dinv * W.T).T
 56 |     # add I
 57 |     lap.flat[::W.shape[0]+1] += 1
 58 |     # symmetrize
 59 |     return (lap + lap.T) / 2.0
 60 | 
 61 |   def directed_laplacian(self, D=None, eta=0.99, tol=1e-12, max_iter=500):
 62 |     '''Computes the directed combinatorial graph laplacian.
 63 |     http://www-all.cs.umass.edu/pubs/2007/johns_m_ICML07.pdf
 64 | 
 65 |     D: (optional) N-array of degrees
 66 |     eta: probability of not teleporting (see the paper)
 67 |     tol, max_iter: convergence params for Perron vector calculation
 68 |     '''
 69 |     W = self.matrix('dense')
 70 |     n = W.shape[0]
 71 |     if D is None:
 72 |       D = W.sum(axis=1)
 73 |     # compute probability transition matrix
 74 |     with np.errstate(invalid='ignore', divide='ignore'):
 75 |       P = W.astype(float) / D[:,None]
 76 |     P[D==0] = 0
 77 |     # start at the uniform distribution Perron vector (phi)
 78 |     old_phi = np.ones(n) / n
 79 |     # iterate to the fixed point (teleporting random walk)
 80 |     for _ in range(max_iter):
 81 |       phi = eta * old_phi.dot(P) + (1-eta)/n
 82 |       if np.abs(phi - old_phi).max() < tol:
 83 |         break
 84 |       old_phi = phi
 85 |     else:
 86 |       warnings.warn("phi failed to converge after %d iterations" % max_iter)
 87 |     # L = Phi - (Phi P + P' Phi)/2
 88 |     return np.diag(phi) - ((phi * P.T).T + P.T * phi)/2
 89 | 
 90 |   def bandwidth(self):
 91 |     """Computes the 'bandwidth' of a graph."""
 92 |     return np.abs(np.diff(self.pairs(), axis=1)).max()
 93 | 
 94 |   def profile(self):
 95 |     """Measure of bandedness, also known as 'envelope size'."""
 96 |     leftmost_idx = np.argmax(self.matrix('dense').astype(bool), axis=0)
 97 |     return (np.arange(self.num_vertices()) - leftmost_idx).sum()
 98 | 
 99 |   def betweenness(self, kind='vertex', directed=None, weighted=None):
100 |     '''Computes the betweenness centrality of a graph.
101 |     kind : string, either 'vertex' (default) or 'edge'
102 |     directed : bool, defaults to self.is_directed()
103 |     weighted : bool, defaults to self.is_weighted()
104 |     '''
105 |     assert kind in ('vertex', 'edge'), 'Invalid kind argument: ' + kind
106 |     weighted = weighted is not False and self.is_weighted()
107 |     directed = directed if directed is not None else self.is_directed()
108 |     adj = self.matrix('csr')
109 |     btw = betweenness(adj, weighted, kind=='vertex')
110 |     # normalize if undirected
111 |     if not directed:
112 |       btw /= 2.
113 |     return btw
114 | 
115 |   def eccentricity(self, directed=None, weighted=None):
116 |     '''Maximum distance from each vertex to any other vertex.'''
117 |     sp = self.shortest_path(directed=directed, weighted=weighted)
118 |     return sp.max(axis=0)
119 | 
120 |   def diameter(self, directed=None, weighted=None):
121 |     '''Finds the length of the longest shortest path,
122 |     a.k.a. the maximum graph eccentricity.'''
123 |     return self.eccentricity(directed, weighted).max()
124 | 
125 |   def radius(self, directed=None, weighted=None):
126 |     '''minimum graph eccentricity'''
127 |     return self.eccentricity(directed, weighted).min()
128 | 


--------------------------------------------------------------------------------
/graphs/construction/tests/test_geometric.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import unittest
  3 | from numpy.testing import assert_array_equal, assert_array_almost_equal
  4 | from sklearn.metrics import pairwise_distances
  5 | 
  6 | from graphs.construction import (
  7 |     delaunay_graph, urquhart_graph, gabriel_graph, relative_neighborhood_graph)
  8 | from graphs.construction.geometric import _find_relative_neighbors
  9 | 
 10 | 
 11 | class TestGeometric(unittest.TestCase):
 12 |   def setUp(self):
 13 |     self.pts = np.array([
 14 |         [0.192,0.622],[0.438,0.785],[0.780,0.273],[0.276,0.802],[0.958,0.876],
 15 |         [0.358,0.501],[0.683,0.713],[0.370,0.561],[0.503,0.014],[0.773,0.883]])
 16 | 
 17 |   def test_delaunay(self):
 18 |     expected = np.array([
 19 |         [0, 0, 0, 1, 0, 1, 0, 1, 1, 0],
 20 |         [0, 0, 0, 1, 0, 0, 1, 1, 0, 1],
 21 |         [0, 0, 0, 0, 1, 1, 1, 0, 1, 0],
 22 |         [1, 1, 0, 0, 0, 0, 0, 1, 0, 1],
 23 |         [0, 0, 1, 0, 0, 0, 1, 0, 0, 1],
 24 |         [1, 0, 1, 0, 0, 0, 1, 1, 1, 0],
 25 |         [0, 1, 1, 0, 1, 1, 0, 1, 0, 1],
 26 |         [1, 1, 0, 1, 0, 1, 1, 0, 0, 0],
 27 |         [1, 0, 1, 0, 0, 1, 0, 0, 0, 0],
 28 |         [0, 1, 0, 1, 1, 0, 1, 0, 0, 0]], dtype=float)
 29 |     G = delaunay_graph(self.pts)
 30 |     assert_array_equal(G.matrix('dense'), expected)
 31 | 
 32 |     # with edge weights
 33 |     G = delaunay_graph(self.pts, weighted=True)
 34 |     expected[expected!=0] = [
 35 |         0.198635, 0.205419, 0.188162, 0.682924, 0.16289, 0.255361,
 36 |         0.234094, 0.34904, 0.628723, 0.479654, 0.450565, 0.379223,
 37 |         0.198635, 0.16289, 0.258683, 0.503557, 0.628723, 0.319678,
 38 |         0.185132, 0.205419, 0.479654, 0.388032, 0.061188, 0.508128,
 39 |         0.255361, 0.450565, 0.319678, 0.388032, 0.347955, 0.192354,
 40 |         0.188162, 0.234094, 0.258683, 0.061188, 0.347955, 0.682924,
 41 |         0.379223, 0.508128, 0.34904, 0.503557, 0.185132, 0.192354]
 42 |     assert_array_almost_equal(G.matrix('dense'), expected)
 43 | 
 44 |   def test_urquhart(self):
 45 |     expected = np.array([
 46 |         [0, 0, 0, 1, 0, 1, 0, 1, 0, 0],
 47 |         [0, 0, 0, 1, 0, 0, 1, 1, 0, 1],
 48 |         [0, 0, 0, 0, 0, 1, 1, 0, 1, 0],
 49 |         [1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
 50 |         [0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
 51 |         [1, 0, 1, 0, 0, 0, 1, 1, 1, 0],
 52 |         [0, 1, 1, 0, 1, 1, 0, 1, 0, 1],
 53 |         [1, 1, 0, 0, 0, 1, 1, 0, 0, 0],
 54 |         [0, 0, 1, 0, 0, 1, 0, 0, 0, 0],
 55 |         [0, 1, 0, 0, 1, 0, 1, 0, 0, 0]], dtype=float)
 56 |     G = urquhart_graph(self.pts)
 57 |     assert_array_equal(G.matrix('dense'), expected)
 58 | 
 59 |     # with edge weights
 60 |     G = urquhart_graph(self.pts, weighted=True)
 61 |     expected[expected!=0] = [
 62 |         0.198635, 0.205419, 0.188162, 0.16289, 0.255361, 0.234094,
 63 |         0.34904, 0.479654, 0.450565, 0.379223, 0.198635, 0.16289,
 64 |         0.319678, 0.185132, 0.205419, 0.479654, 0.388032, 0.061188,
 65 |         0.508128, 0.255361, 0.450565, 0.319678, 0.388032, 0.347955,
 66 |         0.192354, 0.188162, 0.234094, 0.061188, 0.347955, 0.379223,
 67 |         0.508128, 0.34904, 0.185132, 0.192354]
 68 |     assert_array_almost_equal(G.matrix('dense'), expected)
 69 | 
 70 |   def test_gabriel(self):
 71 |     expected = np.array([
 72 |         [0,3], [0,7], [1,3], [1,6], [1,7], [2,5], [2,6], [2,8], [3,7], [4,9],
 73 |         [5,7], [5,8], [6,9]])
 74 |     expected = np.vstack((expected, expected[:,::-1]))
 75 |     G = gabriel_graph(self.pts)
 76 |     assert_array_equal(G.pairs(), expected)
 77 | 
 78 |     # with edge weights
 79 |     G = gabriel_graph(self.pts, weighted=True)
 80 |     adj = np.zeros((10,10))
 81 |     idx = [3,7,13,16,17,25,26,28,30,31,37,49,52,57,58,
 82 |            61,62,69,70,71,73,75,82,85,94,96]
 83 |     adj.flat[idx] = [
 84 |         0.198635, 0.188162, 0.16289, 0.255361, 0.234094, 0.479654,
 85 |         0.450565, 0.379223, 0.198635, 0.16289, 0.258683, 0.185132,
 86 |         0.479654, 0.061188, 0.508128, 0.255361, 0.450565, 0.192354,
 87 |         0.188162, 0.234094, 0.258683, 0.061188, 0.379223, 0.508128,
 88 |         0.185132, 0.192354]
 89 |     assert_array_almost_equal(G.matrix('dense'), adj)
 90 | 
 91 |   def test_relative_neighborhood(self):
 92 |     dist = pairwise_distances(self.pts)
 93 |     expected = np.array([
 94 |         [0,3], [0,7], [1,3], [1,6], [1,7], [2,6], [2,8], [4,9], [5,7], [6,9]])
 95 | 
 96 |     pairs = np.asarray(_find_relative_neighbors(dist))
 97 |     assert_array_equal(pairs, expected)
 98 | 
 99 |     expected = np.vstack((expected, expected[:,::-1]))
100 |     G = relative_neighborhood_graph(self.pts)
101 |     assert_array_equal(G.pairs(), expected)
102 | 
103 |     # with metric='precomputed'
104 |     G = relative_neighborhood_graph(dist, metric='precomputed')
105 |     assert_array_equal(G.pairs(), expected)
106 | 
107 |     # with edge weights
108 |     G = relative_neighborhood_graph(self.pts, weighted=True)
109 |     adj = np.zeros((10,10))
110 |     idx = [3,7,13,16,17,26,28,30,31,49,57,61,62,69,70,71,75,82,94,96]
111 |     adj.flat[idx] = [
112 |         0.198635, 0.188162, 0.16289, 0.255361, 0.234094, 0.450565,
113 |         0.379223, 0.198635, 0.16289, 0.185132, 0.061188, 0.255361,
114 |         0.450565, 0.192354, 0.188162, 0.234094, 0.061188, 0.379223,
115 |         0.185132, 0.192354]
116 |     assert_array_almost_equal(G.matrix('dense'), adj)
117 | 
118 | if __name__ == '__main__':
119 |   unittest.main()
120 | 


--------------------------------------------------------------------------------
/graphs/construction/b_matching.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, print_function
  2 | import numpy as np
  3 | import warnings
  4 | from graphs import Graph
  5 | from ..mini_six import range
  6 | 
  7 | __all__ = ['b_matching']
  8 | 
  9 | 
 10 | def b_matching(D, k, max_iter=1000, damping=1, conv_thresh=1e-4,
 11 |                weighted=False, verbose=False):
 12 |   '''
 13 |   "Belief-Propagation for Weighted b-Matchings on Arbitrary Graphs
 14 |   and its Relation to Linear Programs with Integer Solutions"
 15 |   Bayati et al.
 16 | 
 17 |   Finds the minimal weight perfect b-matching using min-sum loopy-BP.
 18 | 
 19 |   @param D pairwise distance matrix
 20 |   @param k number of neighbors per vertex (scalar or array-like)
 21 | 
 22 |   Based on the code at http://www.cs.columbia.edu/~bert/code/bmatching/bdmatch
 23 |   '''
 24 |   INTERVAL = 2
 25 |   oscillation = 10
 26 |   cbuff = np.zeros(100, dtype=float)
 27 |   cbuffpos = 0
 28 |   N = D.shape[0]
 29 |   assert D.shape[1] == N, 'Input distance matrix must be square'
 30 |   mask = ~np.eye(N, dtype=bool)  # Assume all nonzero except for diagonal
 31 |   W = -D[mask].reshape((N, -1)).astype(float)
 32 |   degrees = np.clip(np.atleast_1d(k), 0, N-1)
 33 |   if degrees.size == 1:  # broadcast scalar up to length-N array
 34 |     degrees = np.repeat(degrees, N)
 35 |   else:
 36 |     assert degrees.shape == (N,), 'Input degrees must have length N'
 37 |   # TODO: remove these later
 38 |   inds = np.tile(np.arange(N), (N, 1))
 39 |   backinds = inds.copy()
 40 |   inds = inds[mask].reshape((N, -1))
 41 |   backinds = backinds.T.ravel()[:(N*(N-1))].reshape((N, -1))
 42 | 
 43 |   # Run Belief Revision
 44 |   change = 1.0
 45 |   B = W.copy()
 46 |   for n_iter in range(1, max_iter+1):
 47 |     oldB = B.copy()
 48 |     update_belief(oldB, B, W, degrees, damping, inds, backinds)
 49 | 
 50 |     # check for convergence
 51 |     if n_iter % INTERVAL == 0:
 52 |       # track changes
 53 |       c = np.abs(B[:,0]).sum()
 54 |       # c may be infinite here, and that's ok
 55 |       with np.errstate(invalid='ignore'):
 56 |         if np.any(np.abs(c - cbuff) < conv_thresh):
 57 |           oscillation -= 1
 58 |       cbuff[cbuffpos] = c
 59 |       cbuffpos = (cbuffpos + 1) % len(cbuff)
 60 | 
 61 |       change = diff_belief(B, oldB)
 62 |       if np.isnan(change):
 63 |         warnings.warn("change is NaN! "
 64 |                       "BP will quit but solution could be invalid. "
 65 |                       "Problem may be infeasible.")
 66 |         break
 67 |       if change < conv_thresh or oscillation < 1:
 68 |         break
 69 |   else:
 70 |     warnings.warn("Hit iteration limit (%d) before converging" % max_iter)
 71 | 
 72 |   if verbose:  # pragma: no cover
 73 |     if change < conv_thresh:
 74 |       print("Converged to stable beliefs in %d iterations" % n_iter)
 75 |     elif oscillation < 1:
 76 |       print("Stopped after reaching oscillation in %d iterations" % n_iter)
 77 |       print("No feasible solution found or there are multiple maxima.")
 78 |       print("Outputting best approximate solution. Try damping.")
 79 | 
 80 |   # recover result from B
 81 |   thresholds = np.zeros(N)
 82 |   for i,d in enumerate(degrees):
 83 |     Brow = B[i]
 84 |     if d >= N - 1:
 85 |       thresholds[i] = -np.inf
 86 |     elif d < 1:
 87 |       thresholds[i] = np.inf
 88 |     else:
 89 |       thresholds[i] = Brow[quickselect(-Brow, d-1)]
 90 | 
 91 |   ii,jj = np.where(B >= thresholds[:,None])
 92 |   pairs = np.column_stack((ii, inds[ii,jj]))
 93 |   w = D[ii, pairs[:,1]] if weighted else None
 94 |   return Graph.from_edge_pairs(pairs, num_vertices=N, weights=w)
 95 | 
 96 | 
 97 | def _update_change(B, oldB):  # pragma: no cover
 98 |   expB = np.exp(B)
 99 |   expB[np.isinf(expB)] = 0
100 |   rowsums = expB.sum(axis=1)
101 |   expOldB = np.exp(oldB)
102 |   expOldB[np.isinf(expOldB)] = 0
103 |   oldrowsums = expOldB.sum(axis=1)
104 | 
105 |   change = 0
106 |   rowsums[rowsums==0] = 1
107 |   oldrowsums[oldrowsums==0] = 1
108 |   for i in range(B.shape[0]):
109 |     row = expB[i]
110 |     oldrow = expOldB[i]
111 |     rmask = row == 0
112 |     ormask = oldrow == 0
113 |     change += np.count_nonzero(np.logical_xor(rmask, ormask))
114 |     mask = ~np.logical_and(rmask, ormask)
115 |     change += np.abs(oldrow[mask]/oldrowsums[i] -
116 |                      row[mask]/rowsums[i]).sum()
117 |   return change
118 | 
119 | 
120 | def _quickselect(B_row, *ks):  # pragma: no cover
121 |   order = np.argpartition(B_row, ks)
122 |   if len(ks) == 1:
123 |     return order[ks[0]]
124 |   return [order[k] for k in ks]
125 | 
126 | 
127 | def _updateB(oldB, B, W, degrees, damping, inds, backinds):  # pragma: no cover
128 |   '''belief update function.'''
129 |   for j,d in enumerate(degrees):
130 |     kk = inds[j]
131 |     bk = backinds[j]
132 | 
133 |     if d == 0:
134 |       B[kk,bk] = -np.inf
135 |       continue
136 | 
137 |     belief = W[kk,bk] + W[j]
138 |     oldBj = oldB[j]
139 |     if d == oldBj.shape[0]:
140 |       bth = quickselect(-oldBj, d-1)
141 |       bplus = -1
142 |     else:
143 |       bth,bplus = quickselect(-oldBj, d-1, d)
144 | 
145 |     belief -= np.where(oldBj >= oldBj[bth], oldBj[bplus], oldBj[bth])
146 |     B[kk,bk] = damping*belief + (1-damping)*oldB[kk,bk]
147 | 
148 | 
149 | try:
150 |   from ._fast_paths import quickselect, update_belief, diff_belief
151 | except ImportError:
152 |   try:
153 |     import pyximport
154 |     pyximport.install(setup_args={'include_dirs': np.get_include()})
155 |     from ._fast_paths import quickselect, update_belief, diff_belief
156 |   except ImportError:
157 |     quickselect = _quickselect
158 |     update_belief = _updateB
159 |     diff_belief = _update_change
160 | 


--------------------------------------------------------------------------------
/graphs/construction/neighbors.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | import numpy as np
  4 | from sklearn.neighbors import NearestNeighbors
  5 | from sklearn.metrics.pairwise import pairwise_distances
  6 | 
  7 | try:
  8 |   from bottleneck import argpartsort
  9 | except ImportError:
 10 |   argpartsort = lambda arr, k: np.argpartition(arr, k-1)
 11 | 
 12 | from graphs import Graph
 13 | 
 14 | __all__ = ['neighbor_graph', 'nearest_neighbors']
 15 | 
 16 | 
 17 | def neighbor_graph(X, metric='euclidean', k=None, epsilon=None,
 18 |                    weighting='none', precomputed=False):
 19 |   '''Build a neighbor graph from pairwise distance information.
 20 | 
 21 |   X : two-dimensional array-like
 22 |       Shape must either be (num_pts, num_dims) or (num_pts, num_pts).
 23 |   k : int, maximum number of nearest neighbors
 24 |   epsilon : float, maximum distance to a neighbor
 25 |   metric : str, type of distance metric (see sklearn.metrics)
 26 |       When metric='precomputed', X is a symmetric distance matrix.
 27 |   weighting : str, one of {'binary', 'none'}
 28 |       When weighting='binary', all edge weights == 1.
 29 |   '''
 30 |   if k is None and epsilon is None:
 31 |     raise ValueError('Must provide `k` or `epsilon`.')
 32 |   if weighting not in ('binary', 'none'):
 33 |     raise ValueError('Invalid weighting param: %r' % weighting)
 34 | 
 35 |   # TODO: deprecate the precomputed kwarg
 36 |   precomputed = precomputed or (metric == 'precomputed')
 37 |   binary = weighting == 'binary'
 38 | 
 39 |   # Try the fast path, if possible.
 40 |   if not precomputed and epsilon is None:
 41 |     return _sparse_neighbor_graph(X, k, binary, metric)
 42 | 
 43 |   if precomputed:
 44 |     D = X
 45 |   else:
 46 |     D = pairwise_distances(X, metric=metric)
 47 |   return _slow_neighbor_graph(D, k, epsilon, binary)
 48 | 
 49 | 
 50 | def nearest_neighbors(query_pts, target_pts=None, metric='euclidean',
 51 |                       k=None, epsilon=None, return_dists=False,
 52 |                       precomputed=False):
 53 |   '''Find nearest neighbors of query points from a matrix of target points.
 54 | 
 55 |   Returns a list of indices of neighboring points, one list per query.
 56 |   If no target_pts are specified, distances are calculated within query_pts.
 57 |   When return_dists is True, returns two lists: (distances, indices)
 58 |   '''
 59 |   if k is None and epsilon is None:
 60 |     raise ValueError('Must provide `k` or `epsilon`.')
 61 | 
 62 |   # TODO: deprecate the precomputed kwarg
 63 |   precomputed = precomputed or (metric == 'precomputed')
 64 | 
 65 |   if precomputed and target_pts is not None:
 66 |     raise ValueError('`target_pts` cannot be used with precomputed distances')
 67 | 
 68 |   query_pts = np.array(query_pts)
 69 |   if len(query_pts.shape) == 1:
 70 |     query_pts = query_pts.reshape((1,-1))  # ensure that the query is a 1xD row
 71 | 
 72 |   if precomputed:
 73 |     dists = query_pts.copy()
 74 |   else:
 75 |     dists = pairwise_distances(query_pts, Y=target_pts, metric=metric)
 76 | 
 77 |   if epsilon is not None:
 78 |     if k is not None:
 79 |       # kNN filtering
 80 |       _, not_nn = _min_k_indices(dists, k, inv_ind=True)
 81 |       dists[np.arange(dists.shape[0]), not_nn.T] = np.inf
 82 |     # epsilon-ball
 83 |     is_close = dists <= epsilon
 84 |     if return_dists:
 85 |       nnis,nnds = [],[]
 86 |       for i,row in enumerate(is_close):
 87 |         nns = np.nonzero(row)[0]
 88 |         nnis.append(nns)
 89 |         nnds.append(dists[i,nns])
 90 |       return nnds, nnis
 91 |     return np.array([np.nonzero(row)[0] for row in is_close])
 92 | 
 93 |   # knn
 94 |   nns = _min_k_indices(dists,k)
 95 |   if return_dists:
 96 |     # index each row of dists by each row of nns
 97 |     row_inds = np.arange(len(nns))[:,np.newaxis]
 98 |     nn_dists = dists[row_inds, nns]
 99 |     return nn_dists, nns
100 |   return nns
101 | 
102 | 
103 | def _slow_neighbor_graph(dist, k, epsilon, binary):
104 |   num_pts = dist.shape[0]
105 | 
106 |   if k is not None:
107 |     k = min(k+1, num_pts)
108 |     nn, not_nn = _min_k_indices(dist, k, inv_ind=True)
109 |     I = np.arange(num_pts)
110 | 
111 |   if epsilon is not None:
112 |     mask = dist <= epsilon
113 |     if k is not None:
114 |       mask[I, not_nn.T] = False
115 |     if binary:
116 |       np.fill_diagonal(mask, False)
117 |       W = mask.astype(float)
118 |     else:
119 |       W = np.where(mask, dist, 0)
120 |   else:
121 |     inv_mask = np.eye(num_pts, dtype=bool)
122 |     inv_mask[I, not_nn.T] = True
123 |     if binary:
124 |       W = 1.0 - inv_mask
125 |     else:
126 |       W = np.where(inv_mask, 0, dist)
127 | 
128 |   # W = scipy.sparse.csr_matrix(W)
129 |   return Graph.from_adj_matrix(W)
130 | 
131 | 
132 | def _min_k_indices(arr, k, inv_ind=False):
133 |   psorted = argpartsort(arr, k)
134 |   if inv_ind:
135 |     return psorted[...,:k], psorted[...,k:]
136 |   return psorted[...,:k]
137 | 
138 | 
139 | def _sparse_neighbor_graph(X, k, binary=False, metric='l2'):
140 |   '''Construct a sparse adj matrix from a matrix of points (one per row).
141 |   Non-zeros are unweighted/binary distance values, depending on the binary arg.
142 |   Doesn't include self-edges.'''
143 |   knn = NearestNeighbors(n_neighbors=k, metric=metric).fit(X)
144 |   mode = 'connectivity' if binary else 'distance'
145 |   try:
146 |     adj = knn.kneighbors_graph(None, mode=mode)
147 |   except IndexError:
148 |     # XXX: we must be running an old (<0.16) version of sklearn
149 |     #  We have to hack around an old bug:
150 |     if binary:
151 |       adj = knn.kneighbors_graph(X, k+1, mode=mode)
152 |       adj.setdiag(0)
153 |     else:
154 |       adj = knn.kneighbors_graph(X, k, mode=mode)
155 |   return Graph.from_adj_matrix(adj)
156 | 


--------------------------------------------------------------------------------
/graphs/construction/_fast_paths.pyx:
--------------------------------------------------------------------------------
  1 | # cython: boundscheck=False
  2 | # cython: wraparound=False
  3 | # cython: nonecheck=False
  4 | # cython: cdivision=True
  5 | import numpy as np
  6 | cimport numpy as np
  7 | cimport cython
  8 | from libcpp cimport bool
  9 | from sklearn.metrics import pairwise_distances
 10 | 
 11 | cdef extern from "math.h":
 12 |     float INFINITY
 13 | 
 14 | IDX_DTYPE = np.intp
 15 | ctypedef Py_ssize_t IDX_DTYPE_t
 16 | 
 17 | 
 18 | def find_relative_neighbors(D):
 19 |   cdef IDX_DTYPE_t n = D.shape[0]
 20 |   cdef IDX_DTYPE_t max_num_pairs = n * (n-1) // 2
 21 |   pairs = np.empty((max_num_pairs, 2), dtype=IDX_DTYPE)
 22 |   cdef IDX_DTYPE_t end_idx = _fill_rn_pairs(D, n, pairs)
 23 |   return pairs[:end_idx]
 24 | 
 25 | 
 26 | cdef IDX_DTYPE_t _fill_rn_pairs(double[:,::1] D,
 27 |                                 IDX_DTYPE_t n,
 28 |                                 IDX_DTYPE_t[:,::1] pairs):
 29 |   cdef IDX_DTYPE_t idx = 0
 30 |   cdef IDX_DTYPE_t r, c, i
 31 |   cdef double d
 32 |   for r in range(n-1):
 33 |     for c in range(r+1, n):
 34 |       d = D[r,c]
 35 |       for i in range(n):
 36 |         if i == r or i == c:
 37 |           continue
 38 |         if D[r,i] < d and D[c,i] < d:
 39 |           break  # Point in lune, this is not an edge
 40 |       else:
 41 |         pairs[idx,0] = r
 42 |         pairs[idx,1] = c
 43 |         idx += 1
 44 |   return idx
 45 | 
 46 | 
 47 | def inter_cluster_distance(X, num_clusters, cluster_labels):
 48 |   # compute shortest distances between clusters
 49 |   Dx = pairwise_distances(X, metric='sqeuclidean')
 50 |   Dc = np.zeros((num_clusters,num_clusters), dtype=np.float64)
 51 |   edges = np.zeros((num_clusters,num_clusters,2), dtype=IDX_DTYPE)
 52 |   _fill_Dc_edges(num_clusters, cluster_labels, Dx, Dc, edges)
 53 |   return Dc, edges
 54 | 
 55 | 
 56 | cdef void _fill_Dc_edges(IDX_DTYPE_t num_clusters,
 57 |                          int[::1] cluster_labels,
 58 |                          double[:,::1] Dx,
 59 |                          double[:,::1] Dc,
 60 |                          IDX_DTYPE_t[:,:,::1] edges):
 61 |   cdef IDX_DTYPE_t i, j, k, l, r, c, ik, il, ii_n, jj_n
 62 |   cdef double min_val
 63 |   cdef double INF = np.inf
 64 |   cdef IDX_DTYPE_t n = Dx.shape[0]
 65 |   cdef bool[:,::1] masks
 66 |   cdef IDX_DTYPE_t[::1] ii, jj
 67 |   cdef list indices = []
 68 |   for i in range(num_clusters):
 69 |     indices.append(where_eq(cluster_labels, i))
 70 |   for i in range(num_clusters-1):
 71 |     ii = indices[i]
 72 |     ii_n = ii.shape[0]
 73 |     for j in range(i+1, num_clusters):
 74 |       jj = indices[j]
 75 |       jj_n = jj.shape[0]
 76 |       r = 0
 77 |       c = 0
 78 |       min_val = INF
 79 |       for ik in range(ii_n):
 80 |         k = ii[ik]
 81 |         for il in range(jj_n):
 82 |           l = jj[il]
 83 |           if Dx[k,l] < min_val:
 84 |             min_val = Dx[k,l]
 85 |             # Transposed index
 86 |             r = k
 87 |             c = l
 88 |       edges[i,j,0] = r
 89 |       edges[i,j,1] = c
 90 |       edges[j,i,0] = r
 91 |       edges[j,i,1] = c
 92 |       Dc[i,j] = min_val
 93 |       Dc[j,i] = min_val
 94 | 
 95 | cdef IDX_DTYPE_t[::1] where_eq(int[::1] x, IDX_DTYPE_t val):
 96 |   # return np.where(x == val)[0]
 97 |   cdef IDX_DTYPE_t n = x.shape[0]
 98 |   cdef IDX_DTYPE_t i, n_inds
 99 |   cdef list inds = []
100 |   for i in range(n):
101 |     if x[i] == val:
102 |       inds.append(i)
103 |   n_inds = len(inds)
104 |   cdef IDX_DTYPE_t[::1] result = np.empty(n_inds, dtype=IDX_DTYPE)
105 |   for i in range(n_inds):
106 |     result[i] = inds[i]
107 |   return result
108 | 
109 | 
110 | cpdef IDX_DTYPE_t quickselect(B_row, IDX_DTYPE_t k):
111 |   cdef IDX_DTYPE_t[::1] order = np.argpartition(B_row, k)
112 |   return order[k]
113 | 
114 | 
115 | def diff_belief(B, oldB):
116 |   cdef IDX_DTYPE_t i, j, N = B.shape[0]
117 |   cdef double rs, ors, change = 0
118 |   cdef double[::1] rowsums, oldrowsums, row, oldrow
119 |   expB = np.exp(B)
120 |   expOldB = np.exp(oldB)
121 |   expB[np.isinf(expB)] = 0
122 |   expOldB[np.isinf(expOldB)] = 0
123 |   rowsums = expB.sum(axis=1)
124 |   oldrowsums = expOldB.sum(axis=1)
125 | 
126 |   for i in range(N):
127 |     rs = rowsums[i]
128 |     ors = oldrowsums[i]
129 |     if rs == 0:
130 |       rs = 1
131 |     if ors == 0:
132 |       ors = 1
133 |     row = expB[i]
134 |     oldrow = expOldB[i]
135 |     for j in range(N-1):
136 |       if (row[j] == 0 and oldrow[j] != 0) or (row[j] != 0 and oldrow[j] == 0):
137 |         change += 1
138 |       if row[j] != 0 and oldrow[j] != 0:
139 |         change += abs(oldrow[j]/ors - row[j]/rs)
140 |   return change
141 | 
142 | 
143 | def update_belief(oldB, double[:,::1] B, double[:,::1] W,
144 |             IDX_DTYPE_t[::1] degrees, double damping,
145 |             IDX_DTYPE_t[:,::1] inds, IDX_DTYPE_t[:,::1] backinds):
146 |   cdef IDX_DTYPE_t j, d, kkk, bkk, n = degrees.shape[0]
147 |   cdef IDX_DTYPE_t[::1] kk, bk, order
148 |   cdef IDX_DTYPE_t bth, bplus
149 |   cdef double[::1] oldBj
150 |   cdef double[:,::1] oldBview = oldB
151 |   cdef double belief
152 |   for j in range(n):
153 |     d = degrees[j]
154 |     kk = inds[j]
155 |     bk = backinds[j]
156 | 
157 |     if d == 0:
158 |       for k in range(n-1):
159 |         kkk = kk[k]
160 |         bkk = bk[k]
161 |         B[kkk,bkk] = -INFINITY
162 |       continue
163 | 
164 |     oldBj = oldBview[j]
165 |     if d == n-1:
166 |       bth = quickselect(-oldB[j], d-1)
167 |       bplus = -1
168 |     else:
169 |       order = np.argpartition(-oldB[j], (d-1, d))
170 |       bth = order[d-1]
171 |       bplus = order[d]
172 | 
173 |     for k in range(n-1):
174 |       kkk = kk[k]
175 |       bkk = bk[k]
176 |       belief = W[kkk,bkk] + W[j,k]
177 | 
178 |       if oldBj[k] >= oldBj[bth]:
179 |         belief -= oldBj[bplus]
180 |       else:
181 |         belief -= oldBj[bth]
182 |       B[kkk,bkk] = damping*belief + (1-damping)*oldBview[kkk,bkk]
183 | 


--------------------------------------------------------------------------------
/graphs/mixins/tests/test_embed.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | from numpy.testing import assert_array_almost_equal
  4 | from scipy.sparse import csr_matrix
  5 | from sklearn.decomposition import PCA
  6 | from sklearn.manifold import locally_linear_embedding
  7 | from graphs import Graph
  8 | from graphs.construction import neighbor_graph
  9 | 
 10 | 
 11 | def assert_signless_array_almost_equal(a, b, **kwargs):
 12 |   a = np.asarray(a)
 13 |   b = np.asarray(b)
 14 |   if (a.flat[0] < 0 and b.flat[0] > 0) or (a.flat[0] > 0 and b.flat[0] < 0):
 15 |     assert_array_almost_equal(a, -b, **kwargs)
 16 |   else:
 17 |     assert_array_almost_equal(a, b, **kwargs)
 18 | 
 19 | 
 20 | class TestEmbeddings(unittest.TestCase):
 21 |   def test_isomap(self):
 22 |     expected = [-np.sqrt(8), -np.sqrt(2), 0, np.sqrt(2), np.sqrt(8)]
 23 |     G = Graph.from_adj_matrix([[0, np.sqrt(2), 2.82842712, 0, 0],
 24 |                                [np.sqrt(2), 0, np.sqrt(2), 0, 0],
 25 |                                [0, np.sqrt(2), 0, np.sqrt(2), 0],
 26 |                                [0, 0, np.sqrt(2), 0, np.sqrt(2)],
 27 |                                [0, 0, 2.82842712, np.sqrt(2), 0]])
 28 |     Y = G.isomap(num_dims=1)
 29 |     self.assertEqual(Y.shape, (5, 1))
 30 |     assert_array_almost_equal(Y[:,0], expected)
 31 | 
 32 |   def test_laplacian_eigenmaps(self):
 33 |     # Test a simple chain graph
 34 |     expected = np.array([0.5, 0.5, 0., -0.5, -0.5])
 35 |     W = np.zeros((5,5)) + np.diag(np.ones(4), k=1) + np.diag(np.ones(4), k=-1)
 36 |     G = Graph.from_adj_matrix(W)
 37 |     Y = G.laplacian_eigenmaps(num_dims=1)
 38 |     self.assertEqual(Y.shape, (5, 1))
 39 |     assert_signless_array_almost_equal(Y[:,0], expected)
 40 |     # Test num_dims=None case
 41 |     Y = G.laplacian_eigenmaps()
 42 |     self.assertEqual(Y.shape, (5, 4))
 43 |     assert_signless_array_almost_equal(Y[:,0], expected)
 44 |     # Test sparse case
 45 |     G = Graph.from_adj_matrix(csr_matrix(W))
 46 |     Y = G.laplacian_eigenmaps(num_dims=1)
 47 |     self.assertEqual(Y.shape, (5, 1))
 48 |     assert_signless_array_almost_equal(Y[:,0], expected)
 49 | 
 50 |   def test_locality_preserving_projections(self):
 51 |     X = np.array([[1,2],[2,1],[3,1.5],[4,0.5],[5,1]])
 52 |     G = Graph.from_adj_matrix([[0, 1, 1, 0, 0],
 53 |                                [1, 0, 1, 0, 0],
 54 |                                [1, 1, 0, 1, 1],
 55 |                                [0, 0, 1, 0, 1],
 56 |                                [0, 0, 1, 1, 0]])
 57 |     proj = G.locality_preserving_projections(X, num_dims=1)
 58 |     assert_array_almost_equal(proj, np.array([[-0.95479113],[0.29727749]]))
 59 |     # test case with bigger d than n
 60 |     X = np.hstack((X, X))[:3]
 61 |     G = Graph.from_adj_matrix(G.matrix()[:3,:3])
 62 |     proj = G.locality_preserving_projections(X, num_dims=1)
 63 |     assert_array_almost_equal(proj, np.array([[0.9854859,0.1697574,0,0]]).T)
 64 | 
 65 |   def test_locally_linear_embedding(self):
 66 |     np.random.seed(1234)
 67 |     pts = np.random.random((5, 3))
 68 |     expected = locally_linear_embedding(pts, 3, 1)[0]
 69 |     G = neighbor_graph(pts, k=3).barycenter_edge_weights(pts, copy=False)
 70 |     actual = G.locally_linear_embedding(num_dims=1)
 71 |     assert_signless_array_almost_equal(expected, actual)
 72 | 
 73 |   def test_neighborhood_preserving_embedding(self):
 74 |     X = np.array([[1,2],[2,1],[3,1.5],[4,0.5],[5,1]])
 75 |     G = Graph.from_adj_matrix([[0, 1, 1, 0, 0],
 76 |                                [1, 0, 1, 0, 0],
 77 |                                [1, 1, 0, 1, 1],
 78 |                                [0, 0, 1, 0, 1],
 79 |                                [0, 0, 1, 1, 0]])
 80 |     proj = G.neighborhood_preserving_embedding(X, num_dims=1)
 81 |     assert_signless_array_almost_equal(proj, [[0.99763], [0.068804]])
 82 | 
 83 |   def test_laplacian_pca(self):
 84 |     X = np.array([[1,2],[2,1],[3,1.5],[4,0.5],[5,1]])
 85 |     G = Graph.from_adj_matrix([[0, 1, 1, 0, 0],
 86 |                                [1, 0, 1, 0, 0],
 87 |                                [1, 1, 0, 1, 1],
 88 |                                [0, 0, 1, 0, 1],
 89 |                                [0, 0, 1, 1, 0]])
 90 |     # check that beta=0 gets the (roughly) the same answer as PCA
 91 |     mX = X - X.mean(axis=0)
 92 |     expected = PCA(n_components=1).fit_transform(mX)
 93 |     actual = G.laplacian_pca(mX, num_dims=1, beta=0)[:,:1]
 94 |     self.assertEqual(expected.shape, actual.shape)
 95 |     assert_signless_array_almost_equal(expected[:,0], actual[:,0], decimal=1)
 96 | 
 97 |   def test_circular_layout(self):
 98 |     G = Graph.from_edge_pairs([], num_vertices=4)
 99 |     expected = np.array([[1,0],[0,1],[-1,0],[0,-1]])
100 |     assert_array_almost_equal(G.layout_circle(), expected)
101 |     # edge cases
102 |     for nv in (0, 1):
103 |       G = Graph.from_edge_pairs([], num_vertices=nv)
104 |       X = G.layout_circle()
105 |       self.assertEqual(X.shape, (nv, 2))
106 | 
107 |   def test_spring_layout(self):
108 |     np.random.seed(1234)
109 |     w = np.array([1,2,0.1,1,1,2,0.1,1])
110 |     p = [[0,1],[1,2],[2,3],[3,4],[1,0],[2,1],[3,2],[4,3]]
111 |     G = Graph.from_edge_pairs(p, weights=w, num_vertices=5)
112 |     expected = np.array([
113 |         [-1.12951518, 0.44975598],
114 |         [-0.42574481, 0.51702804],
115 |         [0.58946761,  0.61403187],
116 |         [0.96513010,  0.64989485],
117 |         [1.67011322,  0.71714073]])
118 |     assert_array_almost_equal(G.layout_spring(), expected)
119 |     # Test initial_layout kwarg
120 |     X = np.arange(10).reshape((5,2))
121 |     expected = np.array([
122 |         [1.837091, 2.837091],
123 |         [2.996882, 3.996882],
124 |         [4.472791, 5.472791],
125 |         [5.014210, 6.014210],
126 |         [6.162909, 7.162909]])
127 |     assert_array_almost_equal(G.layout_spring(initial_layout=X), expected)
128 | 
129 | if __name__ == '__main__':
130 |   unittest.main()
131 | 


--------------------------------------------------------------------------------
/graphs/reorder.py:
--------------------------------------------------------------------------------
  1 | '''Sparse symmetric matrix reordering to reduce bandwidth/diagonalness.
  2 | Methods:
  3 |  - cuthill_mckee
  4 |  - node_centroid_hill_climbing
  5 |  - laplacian_reordering
  6 | References:
  7 |  - ftp://ftp.numerical.rl.ac.uk/pub/talks/jas.ala06.24VII06.pdf
  8 |  - http://www.jstor.org/stable/2156090 (profile defn, NYI RCM improvements)
  9 |  - https://www.cs.purdue.edu/homes/apothen/env3.pdf (laplacian, NYI sloan alg)
 10 | '''
 11 | from __future__ import absolute_import, print_function
 12 | from collections import deque
 13 | import numpy as np
 14 | import scipy.sparse.csgraph as ssc
 15 | from graphs import Graph
 16 | from .mini_six import range
 17 | 
 18 | __all__ = [
 19 |     'permute_graph', 'cuthill_mckee', 'node_centroid_hill_climbing',
 20 |     'laplacian_reordering'
 21 | ]
 22 | 
 23 | 
 24 | def permute_graph(G, order):
 25 |   '''Reorder the graph's vertices, returning a copy of the input graph.
 26 |   order : integer array-like, some permutation of range(G.num_vertices()).
 27 |   '''
 28 |   adj = G.matrix('dense')
 29 |   adj = adj[np.ix_(order, order)]
 30 |   return Graph.from_adj_matrix(adj)
 31 | 
 32 | 
 33 | def _cuthill_mckee(G):
 34 |   n = G.num_vertices()
 35 |   queue = deque([])
 36 |   result = []
 37 |   degree = G.degree()
 38 |   remaining = dict(enumerate(degree))
 39 |   adj = G.matrix('dense', 'csr')
 40 |   while len(result) != n:
 41 |     queue.append(min(remaining, key=remaining.get))
 42 |     while queue:
 43 |       p = queue.popleft()
 44 |       if p not in remaining:
 45 |         continue
 46 |       result.append(p)
 47 |       del remaining[p]
 48 |       nbrs = [c for c in np.where(adj[p])[0] if c in remaining]
 49 |       queue.extend(sorted(nbrs, key=remaining.get))
 50 |   return permute_graph(G, np.array(result))
 51 | 
 52 | 
 53 | if hasattr(ssc, 'reverse_cuthill_mckee'):  # pragma: no cover
 54 |   def cuthill_mckee(G):
 55 |     sG = G.matrix('csr')
 56 |     order = ssc.reverse_cuthill_mckee(sG, symmetric_mode=True)
 57 |     return permute_graph(G, order)
 58 | else:  # pragma: no cover
 59 |   cuthill_mckee = _cuthill_mckee
 60 | 
 61 | cuthill_mckee.__doc__ = 'Reorder vertices using the Cuthill-McKee algorithm.'
 62 | 
 63 | 
 64 | def laplacian_reordering(G):
 65 |   '''Reorder vertices using the eigenvector of the graph Laplacian corresponding
 66 |   to the first positive eigenvalue.'''
 67 |   L = G.laplacian()
 68 |   vals, vecs = np.linalg.eigh(L)
 69 |   min_positive_idx = np.argmax(vals == vals[vals>0].min())
 70 |   vec = vecs[:, min_positive_idx]
 71 |   return permute_graph(G, np.argsort(vec))
 72 | 
 73 | 
 74 | def node_centroid_hill_climbing(G, relax=1, num_centerings=20, verbose=False):
 75 |   '''Iterative reordering method based on alternating rounds of node-centering
 76 |   and hill-climbing search.'''
 77 |   # Initialize order with BFS from a random start node.
 78 |   order = _breadth_first_order(G)
 79 |   for it in range(num_centerings):
 80 |     B = permute_graph(G, order).bandwidth()
 81 |     nc_order = _node_center(G, order, relax=relax)
 82 |     nc_B = permute_graph(G, nc_order).bandwidth()
 83 |     if nc_B < B:
 84 |       if verbose:  # pragma: no cover
 85 |         print('post-center', B, nc_B)
 86 |       order = nc_order
 87 |     order = _hill_climbing(G, order, verbose=verbose)
 88 |   return permute_graph(G, order)
 89 | 
 90 | 
 91 | def _breadth_first_order(G):
 92 |   inds = np.arange(G.num_vertices())
 93 |   adj = G.matrix('dense', 'csr')
 94 |   total_order = []
 95 |   while len(inds) > 0:
 96 |     order = ssc.breadth_first_order(adj, np.random.choice(inds),
 97 |                                     return_predecessors=False)
 98 |     inds = np.setdiff1d(inds, order, assume_unique=True)
 99 |     total_order = np.append(total_order, order)
100 |   return total_order.astype(int)
101 | 
102 | 
103 | def _critical_vertices(G, order, relax=1, bw=None):
104 |   go = permute_graph(G, order)
105 |   if bw is None:
106 |     bw = go.bandwidth()
107 |   adj = go.matrix('dense')
108 |   if relax == 1:
109 |     for i in np.where(np.diag(adj, -bw))[0]:
110 |       yield bw + i, i
111 |   else:
112 |     crit = relax * bw
113 |     for u, v in np.transpose(np.where(np.tril(adj, -np.floor(crit)))):
114 |       if np.abs(u-v) >= crit:
115 |         yield u, v
116 | 
117 | 
118 | def _node_center(G, order, relax=0.99):
119 |   weights = order.copy().astype(float)
120 |   counts = np.ones_like(order)
121 |   inv_order = np.argsort(order)
122 |   for i, j in _critical_vertices(G, order, relax):
123 |     u = inv_order[i]
124 |     v = inv_order[j]
125 |     weights[u] += j  # order[v]
126 |     counts[u] += 1
127 |     weights[v] += i  # order[u]
128 |     counts[v] += 1
129 |   weights /= counts
130 |   return np.argsort(weights)
131 | 
132 | 
133 | def _hill_climbing(G, order, verbose=False):
134 |   B = permute_graph(G, order).bandwidth()
135 |   while True:
136 |     inv_order = np.argsort(order)
137 |     for i, j in _critical_vertices(G, order, bw=B):
138 |       u = inv_order[i]
139 |       v = inv_order[j]
140 |       for w,k in enumerate(order):
141 |         if not (k < i or k > j):
142 |           continue
143 |         new_order = order.copy()
144 |         if k < i:
145 |           new_order[[u,w]] = new_order[[w,u]]
146 |         elif k > j:
147 |           new_order[[v,w]] = new_order[[w,v]]
148 | 
149 |         new_B = permute_graph(G, new_order).bandwidth()
150 |         if new_B < B:
151 |           order = new_order
152 |           if verbose:  # pragma: no cover
153 |             print('improved B', B, new_B)
154 |           B = new_B
155 |           break
156 |         elif new_B == B:
157 |           nc = sum(1 for _ in _critical_vertices(G, order, bw=B))
158 |           new_nc = sum(1 for _ in _critical_vertices(G, new_order, bw=B))
159 |           if new_nc < nc:
160 |             order = new_order
161 |             if verbose:  # pragma: no cover
162 |               print('improved nc', nc, new_nc)
163 |             break
164 |       else:
165 |         continue
166 |       break
167 |     else:
168 |       break
169 |   return order
170 | 


--------------------------------------------------------------------------------
/graphs/mixins/tests/test_transformation.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | from numpy.testing import assert_array_equal, assert_array_almost_equal
  4 | from scipy.sparse import coo_matrix, csr_matrix
  5 | from graphs import Graph
  6 | from graphs.construction import neighbor_graph
  7 | 
  8 | PAIRS = np.array([[0,1],[0,2],[1,0],[1,2],[2,0],[2,1],[3,4],[4,3]])
  9 | ADJ = [[0,1,1,0,0],
 10 |        [1,0,1,0,0],
 11 |        [1,1,0,0,0],
 12 |        [0,0,0,0,1],
 13 |        [0,0,0,1,0]]
 14 | 
 15 | # fixed "random" data in 2 dimensions
 16 | X = np.column_stack((
 17 |     [0.192, 0.438, 0.78, 0.276, 0.958, 0.358, 0.683, 0.37, 0.503, 0.773,
 18 |      0.365, 0.075, 0.933, 0.397, 0.317, 0.869, 0.802, 0.704, 0.219, 0.442],
 19 |     [0.622, 0.785, 0.273, 0.802, 0.876, 0.501, 0.713, 0.561, 0.014, 0.883,
 20 |      0.615, 0.369, 0.651, 0.789, 0.568, 0.436, 0.144, 0.705, 0.925, 0.909]
 21 | ))
 22 | 
 23 | 
 24 | class TestTransformation(unittest.TestCase):
 25 | 
 26 |   def test_kernelize(self):
 27 |     graphs = [
 28 |         Graph.from_edge_pairs(PAIRS),
 29 |         Graph.from_adj_matrix(ADJ),
 30 |         Graph.from_adj_matrix(coo_matrix(ADJ)),
 31 |         Graph.from_adj_matrix(csr_matrix(ADJ)),
 32 |     ]
 33 |     for G in graphs:
 34 |       for kernel in ('none', 'binary'):
 35 |         K = G.kernelize(kernel)
 36 |         assert_array_equal(K.matrix('dense'), ADJ)
 37 |       self.assertRaises(ValueError, G.kernelize, 'foobar')
 38 | 
 39 |   def test_connected_subgraphs(self):
 40 |     G = Graph.from_edge_pairs(PAIRS)
 41 |     subgraphs = list(G.connected_subgraphs(directed=False, ordered=False))
 42 |     self.assertEqual(len(subgraphs), 2)
 43 |     assert_array_equal(subgraphs[0].pairs(), PAIRS[:6])
 44 |     assert_array_equal(subgraphs[1].pairs(), [[0,1],[1,0]])
 45 | 
 46 |     G = neighbor_graph(X, k=2)
 47 |     subgraphs = list(G.connected_subgraphs(directed=True, ordered=True))
 48 |     self.assertEqual(len(subgraphs), 3)
 49 |     self.assertEqual([g.num_vertices() for g in subgraphs], [9,6,5])
 50 | 
 51 |   def test_shortest_path_subtree(self):
 52 |     n = X.shape[0]
 53 |     G = neighbor_graph(X, k=4)
 54 |     e_data = [0.163, 0.199, 0.079, 0.188, 0.173, 0.122, 0.136, 0.136, 0.197]
 55 |     e_row = [3, 0, 14, 0, 0, 3, 0, 3, 3]
 56 |     e_col = [1, 3, 5, 7, 10, 13, 14, 18, 19]
 57 |     expected = np.zeros((n,n))
 58 |     expected[e_row, e_col] = e_data
 59 | 
 60 |     spt = G.shortest_path_subtree(0, directed=True)
 61 |     assert_array_almost_equal(spt.matrix('dense'), expected, decimal=3)
 62 | 
 63 |     # test undirected case
 64 |     G.symmetrize(method='max', copy=False)
 65 |     e_data = [0.185,0.379,0.199,0.32,0.205,0.255,0.188,0.508,0.192,0.173,0.279,
 66 |               0.258,0.122,0.136,0.316,0.326,0.278,0.136,0.197,0.185,0.379,0.199,
 67 |               0.32,0.205,0.255,0.188,0.508,0.192,0.173,0.279,0.258,0.122,0.136,
 68 |               0.316,0.326,0.278,0.136,0.197]
 69 |     e_row = [10,8,0,6,0,1,0,5,6,0,0,6,3,0,17,8,1,3,3,1,2,3,4,5,6,7,8,9,10,11,12,
 70 |              13,14,15,16,17,18,19]
 71 |     e_col = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,10,8,0,6,0,1,0,5,6,
 72 |              0,0,6,3,0,17,8,1,3,3]
 73 |     expected[:] = 0
 74 |     expected[e_row, e_col] = e_data
 75 | 
 76 |     spt = G.shortest_path_subtree(0, directed=False)
 77 |     assert_array_almost_equal(spt.matrix('dense'), expected, decimal=3)
 78 | 
 79 |   def test_minimum_spanning_subtree(self):
 80 |     n = X.shape[0]
 81 |     G = neighbor_graph(X, k=4)
 82 |     e_data = [0.279,0.136,0.255,0.041,0.124,0.186,0.131,0.122,0.136,0.185,0.226,
 83 |               0.061,0.255,0.022,0.061,0.054,0.053,0.326,0.185,0.191,0.054,0.177,
 84 |               0.279,0.226,0.224,0.041,0.122,0.177,0.136,0.053,0.186,0.224,0.131,
 85 |               0.326,0.022,0.191,0.136,0.124]
 86 |     e_row = [0,0,1,1,1,2,2,3,3,4,4,5,6,6,7,7,7,8,9,9,10,10,11,12,12,13,13,13,14,
 87 |              14,15,15,16,16,17,17,18,19]
 88 |     e_col = [11,14,6,13,19,15,16,13,18,9,12,7,1,17,5,10,14,16,4,17,7,13,0,4,15,
 89 |              1,3,10,0,7,2,12,2,8,6,9,3,1]
 90 |     expected = np.zeros((n,n))
 91 |     expected[e_row, e_col] = e_data
 92 | 
 93 |     mst = G.minimum_spanning_subtree()
 94 |     assert_array_almost_equal(mst.matrix('dense'), expected, decimal=3)
 95 | 
 96 |   def test_neighborhood_subgraph(self):
 97 |     G = neighbor_graph(X, k=4)
 98 | 
 99 |     # simple 1-neighbor subgraph
100 |     g, mask = G.neighborhood_subgraph(0, radius=1, weighted=False,
101 |                                       return_mask=True)
102 |     assert_array_equal(mask.nonzero()[0], [0,3,7,10,14])
103 |     self.assertEqual(g.num_vertices(), 5)
104 |     self.assertEqual(g.num_edges(), 13)
105 | 
106 |     # distance-based subgraph
107 |     g, mask = G.neighborhood_subgraph(12, radius=0.5, return_mask=True)
108 |     assert_array_equal(mask.nonzero()[0], [2,4,6,9,12,15,17])
109 |     self.assertEqual(g.num_vertices(), 7)
110 |     self.assertEqual(g.num_edges(), 23)
111 | 
112 |   def test_isograph(self):
113 |     # make roughly U-shaped data
114 |     theta = np.linspace(0, 2*np.pi, 10)[1:]
115 |     data = np.column_stack((np.sin(theta)*2, np.cos(theta)))
116 |     G = neighbor_graph(data, k=2)
117 | 
118 |     g = G.isograph()
119 |     self.assertIsNot(g, G)
120 |     diff = G.matrix('dense') - g.matrix('dense')
121 |     ii, jj = np.nonzero(diff)
122 |     assert_array_equal(ii, [3, 4])
123 |     assert_array_equal(jj, [4, 3])
124 | 
125 |     # test case with large epsilon
126 |     g = G.isograph(min_weight=999)
127 |     self.assertIsNot(g, G)
128 |     assert_array_equal(g.matrix('dense'), G.matrix('dense'))
129 | 
130 |   def test_circle_tear(self):
131 |     G = neighbor_graph(X, k=4).symmetrize(method='max', copy=False)
132 | 
133 |     # test MST start
134 |     res = G.circle_tear(spanning_tree='mst', cycle_len_thresh=5)
135 |     diff = G.matrix('dense') - res.matrix('dense')
136 |     ii, jj = np.nonzero(diff)
137 |     assert_array_equal(ii, [5,8,8,11])
138 |     assert_array_equal(jj, [8,5,11,8])
139 | 
140 |     # test SPT start with a fixed starting vertex
141 |     res = G.circle_tear(spanning_tree='spt', cycle_len_thresh=5, spt_idx=8)
142 |     diff = G.matrix('dense') - res.matrix('dense')
143 |     ii, jj = np.nonzero(diff)
144 |     assert_array_equal(ii, [1,1,6,17])
145 |     assert_array_equal(jj, [6,17,1,1])
146 | 
147 |   def test_cycle_cut(self):
148 |     G = neighbor_graph(X, k=4).symmetrize(method='max', copy=False)
149 | 
150 |     # hack: the atomic cycle finder chooses a random vertex to start from
151 |     np.random.seed(1234)
152 |     res = G.cycle_cut(cycle_len_thresh=5, directed=False)
153 |     diff = G.matrix('dense') - res.matrix('dense')
154 |     ii, jj = np.nonzero(diff)
155 |     assert_array_equal(ii, [1,1,6,17])
156 |     assert_array_equal(jj, [6,17,1,1])
157 | 
158 | if __name__ == '__main__':
159 |   unittest.main()
160 | 


--------------------------------------------------------------------------------
/graphs/construction/regularized.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import, division
  2 | 
  3 | import numpy as np
  4 | import scipy.sparse as ss
  5 | import warnings
  6 | from scipy.linalg import cho_factor, get_lapack_funcs
  7 | from sklearn import linear_model
  8 | from sklearn.metrics import pairwise_distances
  9 | 
 10 | from graphs import Graph
 11 | from ..mini_six import range
 12 | from .neighbors import nearest_neighbors
 13 | 
 14 | __all__ = ['sparse_regularized_graph', 'smce_graph']
 15 | 
 16 | # For quickly running cho_solve without lots of checking
 17 | potrs = get_lapack_funcs('potrs')
 18 | 
 19 | # TODO: implement NNLRS next
 20 | # http://www.cis.pku.edu.cn/faculty/vision/zlin/Publications/2012-CVPR-NNLRS.pdf
 21 | 
 22 | 
 23 | def smce_graph(X, metric='l2', sparsity_param=10, kmax=None, keep_ratio=0.95):
 24 |   '''Sparse graph construction from the SMCE paper.
 25 | 
 26 |   X : 2-dimensional array-like
 27 |   metric : str, optional
 28 |   sparsity_param : float, optional
 29 |   kmax : int, optional
 30 |   keep_ratio : float, optional
 31 |     When <1, keep edges up to (keep_ratio * total weight)
 32 | 
 33 |   Returns a graph with asymmetric similarity weights.
 34 |   Call .symmetrize() and .kernelize('rbf') to convert to symmetric distances.
 35 | 
 36 |   SMCE: "Sparse Manifold Clustering and Embedding"
 37 |     Elhamifar & Vidal, NIPS 2011
 38 |   '''
 39 |   n = X.shape[0]
 40 |   if kmax is None:
 41 |     kmax = min(n-1, max(5, n // 10))
 42 | 
 43 |   nn_dists, nn_inds = nearest_neighbors(X, metric=metric, k=kmax+1,
 44 |                                         return_dists=True)
 45 |   W = np.zeros((n, n))
 46 | 
 47 |   # optimize each point separately
 48 |   for i, pt in enumerate(X):
 49 |     nbr_inds = nn_inds[i]
 50 |     mask = nbr_inds != i  # remove self-edge
 51 |     nbr_inds = nbr_inds[mask]
 52 |     nbr_dist = nn_dists[i,mask]
 53 |     Y = (X[nbr_inds] - pt) / nbr_dist[:,None]
 54 |     # solve sparse optimization with ADMM
 55 |     c = _solve_admm(Y, nbr_dist/nbr_dist.sum(), sparsity_param)
 56 |     c = np.abs(c / nbr_dist)
 57 |     W[i,nbr_inds] = c / c.sum()
 58 | 
 59 |   W = ss.csr_matrix(W)
 60 |   if keep_ratio < 1:
 61 |     for i in range(n):
 62 |       row_data = W.data[W.indptr[i]:W.indptr[i+1]]
 63 |       order = np.argsort(row_data)[::-1]
 64 |       stop_idx = np.searchsorted(np.cumsum(row_data[order]), keep_ratio) + 1
 65 |       bad_inds = order[stop_idx:]
 66 |       row_data[bad_inds] = 0
 67 |     W.eliminate_zeros()
 68 | 
 69 |   return Graph.from_adj_matrix(W)
 70 | 
 71 | 
 72 | def _solve_admm(Y, q, alpha=10, mu=10, max_iter=10000):
 73 |   n = Y.shape[0]
 74 |   alpha_q = alpha * q
 75 |   # solve (YYt + mu*I + mu) Z = (mu*C - lambda + gamma + mu)
 76 |   A, lower = cho_factor(Y.dot(Y.T) + mu*(np.eye(n) + 1), overwrite_a=True)
 77 |   C = np.zeros(n)
 78 |   Z_old = 0  # shape (n,)
 79 |   lmbda = np.zeros(n)
 80 |   gamma = 0
 81 |   # ADMM iteration
 82 |   for i in range(max_iter):
 83 |     # call the guts of cho_solve directly for speed
 84 |     Z, _ = potrs(A, gamma + mu + mu*C - lmbda, lower=lower, overwrite_b=True)
 85 | 
 86 |     tmp = mu*Z + lmbda
 87 |     C[:] = np.abs(tmp)
 88 |     C -= alpha_q
 89 |     np.maximum(C, 0, out=C)
 90 |     C *= np.sign(tmp)
 91 |     C /= mu
 92 | 
 93 |     d_ZC = Z - C
 94 |     d_1Z = 1 - Z.sum()
 95 |     lmbda += mu * d_ZC
 96 |     gamma += mu * d_1Z
 97 | 
 98 |     if ((abs(d_1Z) / n < 1e-6)
 99 |             and (np.abs(d_ZC).mean() < 1e-6)
100 |             and (np.abs(Z - Z_old).mean() < 1e-5)):
101 |       break
102 |     Z_old = Z
103 |   else:
104 |     warnings.warn('ADMM failed to converge after %d iterations.' % max_iter)
105 |   return C
106 | 
107 | 
108 | def sparse_regularized_graph(X, positive=False, sparsity_param=None, kmax=None):
109 |   '''Sparse Regularized Graph Construction, commonly known as an l1-graph.
110 | 
111 |   positive : bool, optional
112 |     When True, computes the Sparse Probability Graph (SPG).
113 |   sparsity_param : float, optional
114 |     Controls sparsity cost in the LASSO optimization.
115 |     When None, uses cross-validation to find sparsity parameters.
116 |     This is very slow, but it gets good results.
117 |   kmax : int, optional
118 |     When None, allow all points to be edges. Otherwise, restrict to kNN set.
119 | 
120 |   l1-graph: "Semi-supervised Learning by Sparse Representation"
121 |     Yan & Wang, SDM 2009
122 |     http://epubs.siam.org/doi/pdf/10.1137/1.9781611972795.68
123 | 
124 |   SPG: "Nonnegative Sparse Coding for Discriminative Semi-supervised Learning"
125 |     He et al., CVPR 2001
126 |   '''
127 |   clf, X = _l1_graph_setup(X, positive, sparsity_param)
128 |   if kmax is None:
129 |     W = _l1_graph_solve_full(clf, X)
130 |   else:
131 |     W = _l1_graph_solve_k(clf, X, kmax)
132 |   return Graph.from_adj_matrix(W)
133 | 
134 | 
135 | def _l1_graph_solve_full(clf, X):
136 |   n, d = X.shape
137 |   # Solve for each row of W
138 |   W = []
139 |   B = np.vstack((X[1:], np.eye(d)))
140 |   for i, x in enumerate(X):
141 |     # Solve min ||B'a - x|| + |a|
142 |     clf.fit(B.T, x)
143 |     # Set up B for next time
144 |     B[i] = x
145 |     # Extract edge weights (first n-1 coefficients)
146 |     a = ss.csr_matrix(clf.coef_[:n-1])
147 |     a = abs(a)
148 |     a /= a.sum()
149 |     # Add a zero on the diagonal
150 |     a.indices[np.searchsorted(a.indices, i):] += 1
151 |     a._shape = (1, n)  # XXX: hack around lack of csr.resize()
152 |     W.append(a)
153 |   return ss.vstack(W)
154 | 
155 | 
156 | def _l1_graph_solve_k(clf, X, k):
157 |   n, d = X.shape
158 |   nn_inds = nearest_neighbors(X, k=k+1)  # self-edges included
159 |   # Solve for each row of W
160 |   W = []
161 |   B = np.empty((k+d, d))
162 |   B[k:] = np.eye(d)
163 |   for i, x in enumerate(X):
164 |     # Set up B with neighbors of x
165 |     idx = nn_inds[i]
166 |     idx = idx[idx!=i]  # remove self-edge
167 |     B[:k] = X[idx]
168 |     # Solve min ||B'a - x|| + |a|
169 |     clf.fit(B.T, x)
170 |     # Extract edge weights (first k coefficients)
171 |     a = ss.csr_matrix((clf.coef_[:k], idx, [0, k]), shape=(1, n))
172 |     a.eliminate_zeros()  # some of the first k might be zeros
173 |     a = abs(a)
174 |     a /= a.sum()
175 |     W.append(a)
176 |   return ss.vstack(W)
177 | 
178 | 
179 | def _l1_graph_setup(X, positive, alpha):
180 |   n, d = X.shape
181 |   # Choose an efficient Lasso solver
182 |   if alpha is not None:
183 |     if positive or d < n:
184 |       clf = linear_model.Lasso(positive=positive, alpha=alpha)
185 |     else:
186 |       clf = linear_model.LassoLars(alpha=alpha)
187 |   else:
188 |     cv = min(d, 3)
189 |     if positive or d < n:
190 |       clf = linear_model.LassoCV(positive=positive, cv=cv)
191 |     else:
192 |       clf = linear_model.LassoLarsCV(cv=cv)
193 |   # Normalize all samples
194 |   X = X / np.linalg.norm(X, ord=2, axis=1)[:,None]
195 |   return clf, X
196 | 


--------------------------------------------------------------------------------
/graphs/base/base.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import numpy as np
  3 | import scipy.sparse as ss
  4 | import warnings
  5 | from sklearn.metrics.pairwise import paired_distances
  6 | 
  7 | from ..mini_six import zip
  8 | from ..mixins import (
  9 |     AnalysisMixin, EmbedMixin, LabelMixin, TransformMixin, VizMixin)
 10 | 
 11 | 
 12 | class Graph(AnalysisMixin, EmbedMixin, LabelMixin, TransformMixin, VizMixin):
 13 | 
 14 |   def __init__(self, *args, **kwargs):
 15 |     raise NotImplementedError('Graph should not be instantiated directly')
 16 | 
 17 |   def pairs(self, copy=False, directed=True):
 18 |     '''Returns a (num_edges,2)-array of vertex indices (s,t).
 19 |     When directed=False, only pairs with s <= t are returned.'''
 20 |     raise NotImplementedError()
 21 | 
 22 |   def matrix(self, *formats, **kwargs):
 23 |     '''Returns a (num_vertices,num_vertices) array or sparse matrix, M,
 24 |     where M[s,t] is the weight of edge (s,t).
 25 | 
 26 |     formats: sequence of {'dense','csr','csc','coo'}
 27 |     copy (kwarg): may share memory if copy=False
 28 |     '''
 29 |     raise NotImplementedError()
 30 | 
 31 |   def edge_weights(self, copy=False, directed=True):
 32 |     '''Returns a (num_edges,)-array of edge weights.
 33 |     Weights correspond to the (s,t) pairs returned by pairs().
 34 |     When directed=False, only weights with s <= t are returned.'''
 35 |     raise NotImplementedError()
 36 | 
 37 |   def num_edges(self):
 38 |     raise NotImplementedError()
 39 | 
 40 |   def num_vertices(self):
 41 |     raise NotImplementedError()
 42 | 
 43 |   def symmetrize(self, method='sum', copy=False):
 44 |     '''Symmetrizes with the given method. {sum,max,avg}
 45 |     Returns a copy if copy=True.'''
 46 |     raise NotImplementedError()
 47 | 
 48 |   def add_edges(self, from_idx, to_idx, weight=1, symmetric=False, copy=False):
 49 |     '''Adds all from->to edges. weight may be a scalar or 1d array.
 50 |     If symmetric=True, also adds to->from edges with the same weights.'''
 51 |     raise NotImplementedError()
 52 | 
 53 |   def remove_edges(self, from_idx, to_idx, symmetric=False, copy=False):
 54 |     '''Removes all from->to edges, without making sure they already exist.
 55 |     If symmetric=True, also removes to->from edges.'''
 56 |     raise NotImplementedError()
 57 | 
 58 |   def _update_edges(self, weights, copy=False):
 59 |     raise NotImplementedError()
 60 | 
 61 |   def subgraph(self, mask):
 62 |     '''Returns the subgraph with vertices V[mask].
 63 |     mask : boolean mask, index, or slice'''
 64 |     raise NotImplementedError()
 65 | 
 66 |   def copy(self):
 67 |     raise NotImplementedError()
 68 | 
 69 |   def is_weighted(self):
 70 |     '''Returns True if edges have associated weights.'''
 71 |     return False
 72 | 
 73 |   def is_directed(self):
 74 |     '''Returns True if edges *may be* asymmetric.'''
 75 |     return True
 76 | 
 77 |   def add_self_edges(self, weight=None, copy=False):
 78 |     '''Adds all i->i edges. weight may be a scalar or 1d array.'''
 79 |     ii = np.arange(self.num_vertices())
 80 |     return self.add_edges(ii, ii, weight=weight, symmetric=False, copy=copy)
 81 | 
 82 |   def reweight(self, weight, edges=None, copy=False):
 83 |     '''Replaces existing edge weights. weight may be a scalar or 1d array.
 84 |     edges is a mask or index array that specifies a subset of edges to modify'''
 85 |     if not self.is_weighted():
 86 |       warnings.warn('Cannot supply weights for unweighted graph; '
 87 |                     'ignoring call to reweight')
 88 |       return self
 89 |     if edges is None:
 90 |       return self._update_edges(weight, copy=copy)
 91 |     ii, jj = self.pairs()[edges].T
 92 |     return self.add_edges(ii, jj, weight=weight, symmetric=False, copy=copy)
 93 | 
 94 |   def reweight_by_distance(self, coords, metric='l2', copy=False):
 95 |     '''Replaces existing edge weights by distances between connected vertices.
 96 |     The new weight of edge (i,j) is given by: metric(coords[i], coords[j]).
 97 |     coords : (num_vertices x d) array of coordinates, in vertex order
 98 |     metric : str or callable, see sklearn.metrics.pairwise.paired_distances'''
 99 |     if not self.is_weighted():
100 |       warnings.warn('Cannot supply weights for unweighted graph; '
101 |                     'ignoring call to reweight_by_distance')
102 |       return self
103 |     # TODO: take advantage of symmetry of metric function
104 |     ii, jj = self.pairs().T
105 |     if metric == 'precomputed':
106 |       assert coords.ndim == 2 and coords.shape[0] == coords.shape[1]
107 |       d = coords[ii,jj]
108 |     else:
109 |       d = paired_distances(coords[ii], coords[jj], metric=metric)
110 |     return self._update_edges(d, copy=copy)
111 | 
112 |   def adj_list(self):
113 |     '''Generates a sequence of lists of neighbor indices:
114 |         an adjacency list representation.'''
115 |     adj = self.matrix('dense', 'csr')
116 |     for row in adj:
117 |       yield row.nonzero()[-1]
118 | 
119 |   def degree(self, kind='out', weighted=True):
120 |     '''Returns an array of vertex degrees.
121 |     kind : either 'in' or 'out', useful for directed graphs
122 |     weighted : controls whether to count edges or sum their weights
123 |     '''
124 |     if kind == 'out':
125 |       axis = 1
126 |       adj = self.matrix('dense', 'csc')
127 |     else:
128 |       axis = 0
129 |       adj = self.matrix('dense', 'csr')
130 | 
131 |     if not weighted and self.is_weighted():
132 |       # With recent numpy and a dense matrix, could do:
133 |       # d = np.count_nonzero(adj, axis=axis)
134 |       d = (adj!=0).sum(axis=axis)
135 |     else:
136 |       d = adj.sum(axis=axis)
137 |     return np.asarray(d).ravel()
138 | 
139 |   def to_igraph(self, weighted=None):
140 |     '''Converts this Graph object to an igraph-compatible object.
141 |     Requires the python-igraph library.'''
142 |     # Import here to avoid ImportErrors when igraph isn't available.
143 |     import igraph
144 |     ig = igraph.Graph(n=self.num_vertices(), edges=self.pairs().tolist(),
145 |                       directed=self.is_directed())
146 |     if weighted is not False and self.is_weighted():
147 |       ig.es['weight'] = self.edge_weights()
148 |     return ig
149 | 
150 |   def to_graph_tool(self):
151 |     '''Converts this Graph object to a graph_tool-compatible object.
152 |     Requires the graph_tool library.
153 |     Note that the internal ordering of graph_tool seems to be column-major.'''
154 |     # Import here to avoid ImportErrors when graph_tool isn't available.
155 |     import graph_tool
156 |     gt = graph_tool.Graph(directed=self.is_directed())
157 |     gt.add_edge_list(self.pairs())
158 |     if self.is_weighted():
159 |       weights = gt.new_edge_property('double')
160 |       for e,w in zip(gt.edges(), self.edge_weights()):
161 |         weights[e] = w
162 |       gt.edge_properties['weight'] = weights
163 |     return gt
164 | 
165 |   def to_networkx(self, directed=None):
166 |     '''Converts this Graph object to a networkx-compatible object.
167 |     Requires the networkx library.'''
168 |     import networkx as nx
169 |     directed = directed if directed is not None else self.is_directed()
170 |     cls = nx.DiGraph if directed else nx.Graph
171 |     adj = self.matrix()
172 |     if ss.issparse(adj):
173 |       return nx.from_scipy_sparse_matrix(adj, create_using=cls())
174 |     return nx.from_numpy_matrix(adj, create_using=cls())
175 | 


--------------------------------------------------------------------------------
/graphs/base/pairs.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | import numpy as np
  4 | import scipy.sparse as ss
  5 | import warnings
  6 | 
  7 | from .base import Graph
  8 | 
  9 | 
 10 | class EdgePairGraph(Graph):
 11 |   def __init__(self, pairs, num_vertices=None):
 12 |     self._pairs = np.atleast_2d(pairs)
 13 |     # Handle empty-input case
 14 |     if self._pairs.size == 0:
 15 |       self._pairs.shape = (0, 2)
 16 |       self._pairs = self._pairs.astype(np.intp, copy=False)
 17 |       self._num_vertices = num_vertices if num_vertices is not None else 0
 18 |       return
 19 |     # Validate shape and dtype
 20 |     assert self._pairs.shape[1] == 2
 21 |     if not np.can_cast(self._pairs, np.intp, casting='same_kind'):
 22 |       self._pairs = self._pairs.astype(np.intp)
 23 |     # Set self._num_vertices
 24 |     if num_vertices is not None:
 25 |       self._num_vertices = num_vertices
 26 |     else:
 27 |       self._num_vertices = self._pairs.max() + 1
 28 | 
 29 |   def pairs(self, copy=False, directed=True):
 30 |     if not directed:
 31 |       canonical = np.sort(self._pairs, axis=1)
 32 |       n = self._num_vertices
 33 |       _, uniq_idx = np.unique(np.ravel_multi_index(canonical.T, (n,n)),
 34 |                               return_index=True)
 35 |       return canonical[uniq_idx]
 36 |     if copy:
 37 |       return self._pairs.copy()
 38 |     return self._pairs
 39 | 
 40 |   def matrix(self, *formats, **kwargs):
 41 |     kwargs.pop('copy', False)
 42 |     if kwargs:
 43 |       raise ValueError('Unexpected kwargs for matrix(): %s' % kwargs)
 44 |     n = self._num_vertices
 45 |     row, col = self.pairs().T
 46 |     data = np.ones(len(row), dtype=np.intp)
 47 |     adj = ss.coo_matrix((data, (row,col)), shape=(n,n))
 48 |     if not formats or 'coo' in formats:
 49 |       return adj
 50 |     for fmt in formats:
 51 |       if fmt != 'dense':
 52 |         return adj.asformat(fmt)
 53 |     if 'dense' in formats:
 54 |       return adj.toarray()
 55 |     raise NotImplementedError('Unknown matrix format(s): %s' % (formats,))
 56 | 
 57 |   def copy(self):
 58 |     return EdgePairGraph(self._pairs.copy(), num_vertices=self._num_vertices)
 59 | 
 60 |   def num_edges(self):
 61 |     return len(self._pairs)
 62 | 
 63 |   def num_vertices(self):
 64 |     return self._num_vertices
 65 | 
 66 |   def add_edges(self, from_idx, to_idx,
 67 |                 weight=None, symmetric=False, copy=False):
 68 |     if weight is not None:
 69 |       warnings.warn('Cannot supply weights for unweighted graph; '
 70 |                     'ignoring weight argument')
 71 |     to_add = np.column_stack((from_idx, to_idx))
 72 |     if symmetric:
 73 |       # add reversed edges, excluding diagonals
 74 |       diag_mask = np.not_equal(*to_add.T)
 75 |       rev = to_add[diag_mask,::-1]
 76 |       to_add = np.vstack((to_add, rev))
 77 |     # select only those edges that are not already present
 78 |     flattener = (self._num_vertices, 1)
 79 |     flat_inds = self._pairs.dot(flattener)
 80 |     flat_add = to_add.dot(flattener)
 81 |     to_add = to_add[np.in1d(flat_add, flat_inds, invert=True)]
 82 |     # add the new edges
 83 |     res = self.copy() if copy else self
 84 |     if len(to_add) > 0:
 85 |       res._pairs = np.vstack((self._pairs, to_add))
 86 |     return res
 87 | 
 88 |   def remove_edges(self, from_idx, to_idx, symmetric=False, copy=False):
 89 |     from_idx, to_idx = np.atleast_1d(from_idx, to_idx)
 90 |     flat_inds = self._pairs.dot((self._num_vertices, 1))
 91 |     to_remove = from_idx * self._num_vertices + to_idx
 92 |     if symmetric:
 93 |       to_remove = np.concatenate((to_remove,
 94 |                                   to_idx * self._num_vertices + from_idx))
 95 |     mask = np.in1d(flat_inds, to_remove, invert=True)
 96 |     res = self.copy() if copy else self
 97 |     res._pairs = res._pairs[mask]
 98 |     return res
 99 | 
100 |   def symmetrize(self, method=None, copy=False):
101 |     '''Symmetrizes (ignores method). Returns a copy if copy=True.'''
102 |     if copy:
103 |       return SymmEdgePairGraph(self._pairs.copy(),
104 |                                num_vertices=self._num_vertices)
105 |     shape = (self._num_vertices, self._num_vertices)
106 |     flat_inds = np.union1d(np.ravel_multi_index(self._pairs.T, shape),
107 |                            np.ravel_multi_index(self._pairs.T[::-1], shape))
108 |     self._pairs = np.transpose(np.unravel_index(flat_inds, shape))
109 |     return self
110 | 
111 |   def subgraph(self, mask):
112 |     nv = self.num_vertices()
113 |     idx = np.arange(nv)[mask]
114 |     idx_map = np.full(nv, -1)
115 |     idx_map[idx] = np.arange(len(idx))
116 |     pairs = idx_map[self._pairs]
117 |     pairs = pairs[(pairs >= 0).all(axis=1)]
118 |     return EdgePairGraph(pairs, num_vertices=len(idx))
119 | 
120 |   pairs.__doc__ = Graph.pairs.__doc__
121 |   matrix.__doc__ = Graph.matrix.__doc__
122 |   add_edges.__doc__ = Graph.add_edges.__doc__
123 |   remove_edges.__doc__ = Graph.remove_edges.__doc__
124 |   subgraph.__doc__ = Graph.subgraph.__doc__
125 | 
126 | 
127 | class SymmEdgePairGraph(EdgePairGraph):
128 |   def __init__(self, pairs, num_vertices=None, ensure_format=True):
129 |     EdgePairGraph.__init__(self, pairs, num_vertices=num_vertices)
130 |     if ensure_format:
131 |       # push all edges to upper triangle
132 |       self._pairs.sort()
133 |       # remove any duplicates
134 |       shape = (self._num_vertices, self._num_vertices)
135 |       _, idx = np.unique(np.ravel_multi_index(self._pairs.T, shape),
136 |                          return_index=True)
137 |       self._pairs = self._pairs[idx]
138 |     self._offdiag_mask = np.not_equal(*self._pairs.T)
139 | 
140 |   def pairs(self, copy=False, directed=True):
141 |     if directed:
142 |       return np.vstack((self._pairs[self._offdiag_mask], self._pairs[:,::-1]))
143 |     if copy:
144 |       return self._pairs.copy()
145 |     return self._pairs
146 | 
147 |   def num_edges(self):
148 |     num_offdiag_edges = np.count_nonzero(self._offdiag_mask)
149 |     return len(self._pairs) + num_offdiag_edges
150 | 
151 |   def copy(self):
152 |     return SymmEdgePairGraph(self._pairs.copy(),
153 |                              num_vertices=self._num_vertices,
154 |                              ensure_format=False)
155 | 
156 |   def remove_edges(self, from_idx, to_idx, symmetric=False, copy=False):
157 |     '''Removes all from->to and to->from edges.
158 |     Note: the symmetric kwarg is unused.'''
159 |     flat_inds = self._pairs.dot((self._num_vertices, 1))
160 |     # convert to sorted order and flatten
161 |     to_remove = (np.minimum(from_idx, to_idx) * self._num_vertices
162 |                  + np.maximum(from_idx, to_idx))
163 |     mask = np.in1d(flat_inds, to_remove, invert=True)
164 |     res = self.copy() if copy else self
165 |     res._pairs = res._pairs[mask]
166 |     res._offdiag_mask = res._offdiag_mask[mask]
167 |     return res
168 | 
169 |   def symmetrize(self, method=None, copy=False):
170 |     '''Alias for copy()'''
171 |     if not copy:
172 |       return self
173 |     return SymmEdgePairGraph(self._pairs, num_vertices=self._num_vertices,
174 |                              ensure_format=False)
175 | 
176 |   def subgraph(self, mask):
177 |     g = EdgePairGraph.subgraph(mask)
178 |     return SymmEdgePairGraph(g._pairs, num_vertices=g._num_vertices,
179 |                              ensure_format=False)
180 | 
181 |   pairs.__doc__ = Graph.pairs.__doc__
182 |   subgraph.__doc__ = Graph.subgraph.__doc__
183 | 


--------------------------------------------------------------------------------
/graphs/mixins/embed.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import warnings
  3 | from scipy.sparse import issparse
  4 | from scipy.sparse.linalg import eigsh
  5 | from scipy.linalg import eig, eigh
  6 | from sklearn.decomposition import KernelPCA
  7 | 
  8 | 
  9 | class EmbedMixin(object):
 10 | 
 11 |   def isomap(self, num_dims=None, directed=None):
 12 |     '''Isomap embedding.
 13 | 
 14 |     num_dims : dimension of embedded coordinates, defaults to input dimension
 15 |     directed : used for .shortest_path() calculation
 16 |     '''
 17 |     W = -0.5 * self.shortest_path(directed=directed) ** 2
 18 |     kpca = KernelPCA(n_components=num_dims, kernel='precomputed')
 19 |     return kpca.fit_transform(W)
 20 | 
 21 |   def laplacian_eigenmaps(self, num_dims=None, normed=True, val_thresh=1e-8):
 22 |     '''Laplacian Eigenmaps embedding.
 23 | 
 24 |     num_dims : dimension of embedded coordinates, defaults to input dimension
 25 |     normed : used for .laplacian() calculation
 26 |     val_thresh : threshold for omitting vectors with near-zero eigenvalues
 27 |     '''
 28 |     L = self.laplacian(normed=normed)
 29 |     return _null_space(L, num_dims, val_thresh, overwrite=True)
 30 | 
 31 |   def locality_preserving_projections(self, coordinates, num_dims=None):
 32 |     '''Locality Preserving Projections (LPP, linearized Laplacian Eigenmaps).'''
 33 |     X = np.atleast_2d(coordinates)  # n x d
 34 |     L = self.laplacian(normed=True)  # n x n
 35 |     u,s,_ = np.linalg.svd(X.T.dot(X))
 36 |     Fplus = np.linalg.pinv(u * np.sqrt(s))  # d x d
 37 |     n, d = X.shape
 38 |     if n >= d:  # optimized order: F(X'LX)F'
 39 |       T = Fplus.dot(X.T.dot(L.dot(X))).dot(Fplus.T)
 40 |     else:  # optimized order: (FX')L(XF')
 41 |       T = Fplus.dot(X.T).dot(L.dot(X.dot(Fplus.T)))
 42 |     L = 0.5*(T+T.T)
 43 |     return _null_space(L, num_vecs=num_dims, overwrite=True)
 44 | 
 45 |   def locally_linear_embedding(self, num_dims=None):
 46 |     '''Locally Linear Embedding (LLE).
 47 |     Note: may need to call barycenter_edge_weights() before this!
 48 |     '''
 49 |     W = self.matrix()
 50 |     # compute M = (I-W)'(I-W)
 51 |     M = W.T.dot(W) - W.T - W
 52 |     if issparse(M):
 53 |       M = M.toarray()
 54 |     M.flat[::M.shape[0] + 1] += 1
 55 |     return _null_space(M, num_vecs=num_dims, overwrite=True)
 56 | 
 57 |   def neighborhood_preserving_embedding(self, X, num_dims=None, reweight=True):
 58 |     '''Neighborhood Preserving Embedding (NPE, linearized LLE).'''
 59 |     if reweight:
 60 |       W = self.barycenter_edge_weights(X).matrix()
 61 |     else:
 62 |       W = self.matrix()
 63 |     # compute M = (I-W)'(I-W) as in LLE
 64 |     M = W.T.dot(W) - W.T - W
 65 |     if issparse(M):
 66 |       M = M.toarray()
 67 |     M.flat[::M.shape[0] + 1] += 1
 68 |     # solve generalized eig problem: X'MXa = \lambda X'Xa
 69 |     vals, vecs = eig(X.T.dot(M).dot(X), X.T.dot(X), overwrite_a=True,
 70 |                      overwrite_b=True)
 71 |     if num_dims is None:
 72 |       return vecs
 73 |     return vecs[:,:num_dims]
 74 | 
 75 |   def laplacian_pca(self, coordinates, num_dims=None, beta=0.5):
 76 |     '''Graph-Laplacian PCA (CVPR 2013).
 77 |     coordinates : (n,d) array-like, assumed to be mean-centered.
 78 |     beta : float in [0,1], scales how much PCA/LapEig contributes.
 79 |     Returns an approximation of input coordinates, ala PCA.'''
 80 |     X = np.atleast_2d(coordinates)
 81 |     L = self.laplacian(normed=True)
 82 |     kernel = X.dot(X.T)
 83 |     kernel /= eigsh(kernel, k=1, which='LM', return_eigenvectors=False)
 84 |     L /= eigsh(L, k=1, which='LM', return_eigenvectors=False)
 85 |     W = (1-beta)*(np.identity(kernel.shape[0]) - kernel) + beta*L
 86 |     if num_dims is None:
 87 |       vals, vecs = np.linalg.eigh(W)
 88 |     else:
 89 |       vals, vecs = eigh(W, eigvals=(0, num_dims-1), overwrite_a=True)
 90 |     return X.T.dot(vecs).dot(vecs.T).T
 91 | 
 92 |   def layout_circle(self):
 93 |     '''Position vertices evenly around a circle.'''
 94 |     n = self.num_vertices()
 95 |     t = np.linspace(0, 2*np.pi, n+1)[:n]
 96 |     return np.column_stack((np.cos(t), np.sin(t)))
 97 | 
 98 |   def layout_spring(self, num_dims=2, spring_constant=None, iterations=50,
 99 |                     initial_temp=0.1, initial_layout=None):
100 |     '''Position vertices using the Fruchterman-Reingold (spring) algorithm.
101 | 
102 |     num_dims : int (default=2)
103 |        Number of dimensions to embed vertices in.
104 | 
105 |     spring_constant : float (default=None)
106 |        Optimal distance between nodes.  If None the distance is set to
107 |        1/sqrt(n) where n is the number of nodes.  Increase this value
108 |        to move nodes farther apart.
109 | 
110 |     iterations : int (default=50)
111 |        Number of iterations of spring-force relaxation
112 | 
113 |     initial_temp : float (default=0.1)
114 |        Largest step-size allowed in the dynamics, decays linearly.
115 |        Must be positive, should probably be less than 1.
116 | 
117 |     initial_layout : array-like of shape (n, num_dims)
118 |        If provided, serves as the initial placement of vertex coordinates.
119 |     '''
120 |     if initial_layout is None:
121 |       X = np.random.random((self.num_vertices(), num_dims))
122 |     else:
123 |       X = np.array(initial_layout, dtype=float, copy=True)
124 |       assert X.shape == (self.num_vertices(), num_dims)
125 |     if spring_constant is None:
126 |       # default to sqrt(area_of_viewport / num_vertices)
127 |       spring_constant = X.shape[0] ** -0.5
128 |     S = self.matrix('csr', 'csc', 'coo', copy=True)
129 |     S.data[:] = 1. / S.data  # Convert to similarity
130 |     ii,jj = S.nonzero()  # cache nonzero indices
131 |     # simple cooling scheme, linearly steps down
132 |     cooling_scheme = np.linspace(initial_temp, 0, iterations+2)[:-2]
133 |     # this is still O(V^2)
134 |     # could use multilevel methods to speed this up significantly
135 |     for t in cooling_scheme:
136 |       delta = X[:,None] - X[None]
137 |       distance = _bounded_norm(delta, 1e-8)
138 |       # repulsion from all vertices
139 |       force = spring_constant**2 / distance
140 |       # attraction from connected vertices
141 |       force[ii,jj] -= S.data * distance[ii,jj]**2 / spring_constant
142 |       displacement = np.einsum('ijk,ij->ik', delta, force)
143 |       # update positions
144 |       length = _bounded_norm(displacement, 1e-2)
145 |       X += displacement * t / length[:,None]
146 |     return X
147 | 
148 | 
149 | def _null_space(X, num_vecs=None, val_thresh=1e-8, overwrite=False):
150 |   if issparse(X):
151 |     # This is a bit of a hack. Make sure we end up with enough eigenvectors.
152 |     k = X.shape[0] - 1 if num_vecs is None else num_vecs + 1
153 |     try:
154 |       # TODO: try using shift-invert mode (sigma=0?) for speed here.
155 |       vals,vecs = eigsh(X, k, which='SM')
156 |     except:
157 |       warnings.warn('Sparse eigsh failed, falling back to dense version')
158 |       X = X.toarray()
159 |       overwrite = True
160 |   if not issparse(X):
161 |     vals,vecs = eigh(X, overwrite_a=overwrite)
162 |   # vals are not guaranteed to be in sorted order
163 |   idx = np.argsort(vals)
164 |   vecs = vecs.real[:,idx]
165 |   vals = vals.real[idx]
166 |   # discard any with really small eigenvalues
167 |   i = np.searchsorted(vals, val_thresh)
168 |   if num_vecs is None:
169 |     # take all of them
170 |     num_vecs = vals.shape[0] - i
171 |   return vecs[:,i:i+num_vecs]
172 | 
173 | 
174 | def _bounded_norm(X, min_length):
175 |   length = np.linalg.norm(X, ord=2, axis=-1)
176 |   np.maximum(length, min_length, out=length)
177 |   return length
178 | 


--------------------------------------------------------------------------------
/graphs/construction/tests/test_regularized.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | np.set_printoptions(suppress=True, precision=3)
  3 | import unittest
  4 | import warnings
  5 | from numpy.testing import assert_array_almost_equal
  6 | try:
  7 |   from sklearn.exceptions import ConvergenceWarning
  8 | except ImportError:
  9 |   from sklearn.utils import ConvergenceWarning
 10 | 
 11 | from graphs.construction import sparse_regularized_graph, smce_graph
 12 | 
 13 | 
 14 | class TestRegularized(unittest.TestCase):
 15 |   def setUp(self):
 16 |     np.random.seed(1234)
 17 |     self.pts = _gaussian_clusters(2, 5, 20)
 18 | 
 19 |   def test_L1_graph(self):
 20 |     expected = [
 21 |         [0,    0.286,0.352,0.362,0,    0,    0,    0,    0,    0],
 22 |         [0.637,0,    0.209,0,    0.153,0,    0,    0,    0,    0],
 23 |         [0.446,0.133,0,    0,    0.421,0,    0,    0,    0,    0],
 24 |         [0.493,0,    0,    0,    0.507,0,    0,    0,    0,    0],
 25 |         [0,    0.062,0.493,0.444,0,    0,    0,    0,    0,    0],
 26 |         [0,    0,    0,    0,    0,    0,    0.906,0.055,0.039,0],
 27 |         [0,    0,    0,    0,    0,    0.603,0,    0,    0.136,0.261],
 28 |         [0,    0,    0,    0,    0,    0.172,0,    0,    0.332,0.496],
 29 |         [0,    0,    0,    0,    0,    0.007,0.576,0.278,0,    0.139],
 30 |         [0,    0,    0,    0,    0,    0,    0.441,0.395,0.164,0]
 31 |     ]
 32 |     G = sparse_regularized_graph(self.pts, sparsity_param=0.005)
 33 |     assert_array_almost_equal(G.matrix('dense'), expected, decimal=3)
 34 | 
 35 |   def test_L1_knn_graph(self):
 36 |     expected = [
 37 |         [0,    0.286,0.352,0.362,0,    0,    0,    0,    0,    0],
 38 |         [0.637,0,    0.209,0,    0.153,0,    0,    0,    0,    0],
 39 |         [0.446,0.133,0,    0,    0.421,0,    0,    0,    0,    0],
 40 |         [0.493,0,    0,    0,    0.507,0,    0,    0,    0,    0],
 41 |         [0,    0,    0.535,0.465,0,    0,    0,    0,    0,    0],
 42 |         [0,    0,    0,    0,    0,    0,    0.924,0,    0.076,0],
 43 |         [0,    0,    0,    0,    0,    0.603,0,    0,    0.136,0.261],
 44 |         [0,    0,    0,    0,    0,    0,    0,    0,    0.454,0.546],
 45 |         [0,    0,    0,    0,    0,    0.138,0.520,0,    0,    0.343],
 46 |         [0,    0,    0,    0,    0,    0,    0.441,0.395,0.164,0]
 47 |     ]
 48 |     G = sparse_regularized_graph(self.pts, sparsity_param=0.005, kmax=3)
 49 |     assert_array_almost_equal(G.matrix('dense'), expected, decimal=3)
 50 | 
 51 |   def test_L1_graph_cv(self):
 52 |     expected = [
 53 |         [0,    0.231,0.372,0.397,0,    0,    0,    0,    0,    0],
 54 |         [0.670,0,    0.205,0,    0.124,0,    0,    0,    0,    0],
 55 |         [0.437,0.138,0,    0.012,0.413,0,    0,    0,    0,    0],
 56 |         [0.503,0,    0,    0,    0.497,0,    0,    0,    0,    0],
 57 |         [0,    0.053,0.509,0.438,0,    0,    0,    0,    0,    0],
 58 |         [0,    0,    0,    0,    0,    0,    0.914,0.061,0.025,0],
 59 |         [0,    0,    0,    0,    0,    0.597,0,    0,    0.139,0.264],
 60 |         [0,    0,    0,    0,    0,    0.311,0,    0,    0.391,0.297],
 61 |         [0,    0,    0,    0,    0,    0.043,0.544,0.310,0,    0.103],
 62 |         [0,    0,    0,    0,    0,    0,    0.428,0.399,0.173,0]
 63 |     ]
 64 |     with warnings.catch_warnings():
 65 |       warnings.filterwarnings('ignore', category=ConvergenceWarning)
 66 |       G = sparse_regularized_graph(self.pts)
 67 |     assert_array_almost_equal(G.matrix('dense'), expected, decimal=3)
 68 | 
 69 |   def test_SPD_graph(self):
 70 |     expected = [
 71 |         [0,    0.216,0.380,0.404,0,    0,    0,    0,    0,    0],
 72 |         [0.676,0,    0.123,0,    0.202,0,    0,    0,    0,    0],
 73 |         [0.377,0.140,0,    0,    0.483,0,    0,    0,    0,    0],
 74 |         [0.506,0,    0,    0,    0.441,0,    0,    0,    0.053,0],
 75 |         [0.017,0.065,0.454,0.464,0,    0,    0,    0,    0,    0],
 76 |         [0,    0,    0,    0,    0,    0,    0.907,0.093,0,    0],
 77 |         [0,    0,    0,    0,    0,    0.575,0,    0,    0.117,0.308],
 78 |         [0,    0,    0,    0,    0,    0.295,0,    0,    0.319,0.386],
 79 |         [0,    0,    0,    0,    0,    0.010,0.599,0.274,0,    0.117],
 80 |         [0,    0,    0,    0,    0,    0,    0.440,0.386,0.174,0]
 81 |     ]
 82 |     G = sparse_regularized_graph(self.pts, positive=True, sparsity_param=0.002)
 83 |     assert_array_almost_equal(G.matrix('dense'), expected, decimal=3)
 84 | 
 85 |   def test_SPD_graph_cv(self):
 86 |     expected = [
 87 |         [0,    0.230,0.380,0.390,0,    0,    0,    0,    0,    0],
 88 |         [0.603,0,    0.209,0,    0.188,0,    0,    0,    0,    0],
 89 |         [0.366,0.133,0,    0,    0.501,0,    0,    0,    0,    0],
 90 |         [0.414,0,    0.119,0,    0.383,0,    0,    0,    0.084,0],
 91 |         [0.002,0.062,0.482,0.454,0,    0,    0,    0,    0,    0],
 92 |         [0,    0,    0,    0,    0,    0,    0.921,0.079,0,    0],
 93 |         [0,    0,    0,    0,    0.006,0.584,0,    0,    0.088,0.322],
 94 |         [0,    0,    0,    0,    0,    0.286,0,    0,    0.288,0.426],
 95 |         [0,    0,    0,    0,    0,    0.052,0.541,0.254,0,    0.153],
 96 |         [0,    0,    0,    0,    0,    0,    0.458,0.408,0.134,0]
 97 |     ]
 98 |     G = sparse_regularized_graph(self.pts, positive=True)
 99 |     assert_array_almost_equal(G.matrix('dense'), expected, decimal=3)
100 | 
101 |   def test_smce_graph(self):
102 |     expected = [
103 |         [0,    0.318,0.323,0.359,0,    0,    0,    0,    0,    0],
104 |         [0.68, 0,    0.13, 0,    0.191,0,    0,    0,    0,    0],
105 |         [0.537,0.047,0,    0,    0.417,0,    0,    0,    0,    0],
106 |         [0.492,0,    0,    0,    0.508,0,    0,    0,    0,    0],
107 |         [0.063,0.055,0.382,0.5,  0,    0,    0,    0,    0,    0],
108 |         [0,    0,    0,    0,    0,    0,    0.768,0,    0.232,0],
109 |         [0,    0,    0,    0,    0,    0.667,0,    0,    0.01, 0.323],
110 |         [0,    0,    0,    0,    0,    0.031,0.125,0,    0.215,0.629],
111 |         [0,    0,    0,    0,    0,    0.386,0.155,0.049,0,    0.41],
112 |         [0,    0,    0,    0,    0,    0,    0.391,0.343,0.266,0]
113 |     ]
114 |     G = smce_graph(self.pts, keep_ratio=1)
115 |     assert_array_almost_equal(G.matrix('dense'), expected, decimal=3)
116 | 
117 |     # use keep_ratio = 0.9
118 |     expected = [
119 |         [0,    0.326,0.302,0.348,0,    0,    0,    0,    0,    0],
120 |         [0.478,0,    0.222,0,    0.243,0,    0,    0,    0,    0],
121 |         [0.376,0.197,0,    0,    0.376,0,    0,    0,    0,    0],
122 |         [0.447,0,    0,    0,    0.474,0,    0,    0,    0,    0],
123 |         [0,    0.188,0.348,0.443,0,    0,    0,    0,    0,    0],
124 |         [0,    0,    0,    0,    0,    0,    0.604,0,    0.305,0],
125 |         [0,    0,    0,    0,    0,    0.539,0,    0,    0.097,0.32],
126 |         [0,    0,    0,    0,    0,    0.176,0.1,  0,    0.227,0.496],
127 |         [0,    0,    0,    0,    0,    0.374,0.111,0.179,0,    0.335],
128 |         [0,    0,    0,    0,    0,    0,    0.353,0.364,0.283,0]
129 |     ]
130 |     G = smce_graph(self.pts, kmax=8, keep_ratio=0.9)
131 |     assert_array_almost_equal(G.matrix('dense'), expected, decimal=3)
132 | 
133 | 
134 | def _gaussian_clusters(num_clusters, pts_per_cluster, dim):
135 |   n = num_clusters * pts_per_cluster
136 |   offsets = np.random.uniform(-9, 9, (num_clusters, dim))
137 |   return np.random.randn(n, dim) + np.repeat(offsets, pts_per_cluster, axis=0)
138 | 
139 | 
140 | if __name__ == '__main__':
141 |   unittest.main()
142 | 


--------------------------------------------------------------------------------
/graphs/mixins/label.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import numpy as np
  3 | import scipy.linalg as sl
  4 | import scipy.sparse as ss
  5 | import warnings
  6 | from itertools import count
  7 | from scipy.sparse.linalg import eigs
  8 | from sklearn.cluster import spectral_clustering
  9 | 
 10 | from ..mini_six import range
 11 | 
 12 | 
 13 | class LabelMixin(object):
 14 | 
 15 |   def color_greedy(self):
 16 |     '''Returns a greedy vertex coloring as an array of ints.'''
 17 |     n = self.num_vertices()
 18 |     coloring = np.zeros(n, dtype=int)
 19 |     for i, nbrs in enumerate(self.adj_list()):
 20 |       nbr_colors = set(coloring[nbrs])
 21 |       for c in count(1):
 22 |         if c not in nbr_colors:
 23 |           coloring[i] = c
 24 |           break
 25 |     return coloring
 26 | 
 27 |   def bicolor_spectral(self):
 28 |     '''Returns an approximate 2-coloring as an array of booleans.
 29 | 
 30 |     From "A Multiscale Pyramid Transform for Graph Signals" by Shuman et al.
 31 |     Note: Assumes a single connected component, and may fail otherwise.
 32 |     '''
 33 |     lap = self.laplacian().astype(float)
 34 |     vals, vecs = eigs(lap, k=1, which='LM')
 35 |     vec = vecs[:,0].real
 36 |     return vec > 0 if vec[0] > 0 else vec < 0
 37 | 
 38 |   def cluster_spectral(self, num_clusters, kernel='rbf'):
 39 |     aff = self.kernelize(kernel).matrix()
 40 |     return spectral_clustering(aff, n_clusters=num_clusters)
 41 | 
 42 |   def classify_nearest(self, partial_labels):
 43 |     '''Simple semi-supervised classification, by assigning unlabeled vertices
 44 |     the label of nearest labeled vertex.
 45 | 
 46 |     partial_labels: (n,) array of integer labels, -1 for unlabeled.
 47 |     '''
 48 |     labels = np.array(partial_labels, copy=True)
 49 |     unlabeled = labels == -1
 50 |     # compute geodesic distances from unlabeled vertices
 51 |     D_unlabeled = self.shortest_path(weighted=True)[unlabeled]
 52 |     # set distances to other unlabeled vertices to infinity
 53 |     D_unlabeled[:,unlabeled] = np.inf
 54 |     # find shortest distances to labeled vertices
 55 |     idx = D_unlabeled.argmin(axis=1)
 56 |     # apply the label of the closest vertex
 57 |     labels[unlabeled] = labels[idx]
 58 |     return labels
 59 | 
 60 |   def classify_lgc(self, partial_labels, kernel='rbf', alpha=0.2, tol=1e-3,
 61 |                    max_iter=30):
 62 |     '''Iterative label spreading for semi-supervised classification.
 63 | 
 64 |     partial_labels: (n,) array of integer labels, -1 for unlabeled.
 65 |     kernel: one of {'none', 'rbf', 'binary'}, for reweighting edges.
 66 |     alpha: scalar, clamping factor.
 67 |     tol: scalar, convergence tolerance.
 68 |     max_iter: integer, cap on the number of iterations performed.
 69 | 
 70 |     From "Learning with local and global consistency"
 71 |       by Zhou et al. in 2004.
 72 | 
 73 |     Based on the LabelSpreading implementation in scikit-learn.
 74 |     '''
 75 |     # compute the gram matrix
 76 |     gram = -self.kernelize(kernel).laplacian(normed=True)
 77 |     if ss.issparse(gram):
 78 |       gram.data[gram.row == gram.col] = 0
 79 |     else:
 80 |       np.fill_diagonal(gram, 0)
 81 | 
 82 |     # initialize label distributions
 83 |     partial_labels = np.asarray(partial_labels)
 84 |     unlabeled = partial_labels == -1
 85 |     label_dists, classes = _onehot(partial_labels, mask=~unlabeled)
 86 | 
 87 |     # initialize clamping terms
 88 |     clamp_weights = np.where(unlabeled, alpha, 1)[:,None]
 89 |     y_static = label_dists * min(1 - alpha, 1)
 90 | 
 91 |     # iterate
 92 |     for it in range(max_iter):
 93 |       old_label_dists = label_dists
 94 |       label_dists = gram.dot(label_dists)
 95 |       label_dists *= clamp_weights
 96 |       label_dists += y_static
 97 |       # check convergence
 98 |       if np.abs(label_dists - old_label_dists).sum() <= tol:
 99 |         break
100 |     else:
101 |       warnings.warn("classify_lgc didn't converge in %d iterations" % max_iter)
102 | 
103 |     return classes[label_dists.argmax(axis=1)]
104 | 
105 |   def classify_local(self, partial_labels, C_l=10.0, C_u=1e-6):
106 |     '''Local Learning Regularization for semi-supervised classification.
107 | 
108 |     partial_labels: (n,) array of integer labels, -1 for unlabeled.
109 | 
110 |     From "Transductive Classification via Local Learning Regularization"
111 |       by Wu & Scholkopf in 2007.
112 |     '''
113 |     raise NotImplementedError('NYI')
114 | 
115 |   def classify_harmonic(self, partial_labels, use_CMN=True):
116 |     '''Harmonic function method for semi-supervised classification,
117 |     also known as the Gaussian Mean Fields algorithm.
118 | 
119 |     partial_labels: (n,) array of integer labels, -1 for unlabeled.
120 |     use_CMN : when True, apply Class Mass Normalization
121 | 
122 |     From "Semi-Supervised Learning Using Gaussian Fields and Harmonic Functions"
123 |       by Zhu, Ghahramani, and Lafferty in 2003.
124 | 
125 |     Based on the matlab code at:
126 |       http://pages.cs.wisc.edu/~jerryzhu/pub/harmonic_function.m
127 |     '''
128 |     # prepare labels
129 |     labels = np.array(partial_labels, copy=True)
130 |     unlabeled = labels == -1
131 | 
132 |     # convert known labels to one-hot encoding
133 |     fl, classes = _onehot(labels[~unlabeled])
134 | 
135 |     L = self.laplacian(normed=False)
136 |     if ss.issparse(L):
137 |       L = L.tocsr()[unlabeled].toarray()
138 |     else:
139 |       L = L[unlabeled]
140 | 
141 |     Lul = L[:,~unlabeled]
142 |     Luu = L[:,unlabeled]
143 |     fu = -np.linalg.solve(Luu, Lul.dot(fl))
144 | 
145 |     if use_CMN:
146 |       scale = (1 + fl.sum(axis=0)) / fu.sum(axis=0)
147 |       fu *= scale
148 | 
149 |     # assign new labels
150 |     labels[unlabeled] = classes[fu.argmax(axis=1)]
151 |     return labels
152 | 
153 |   def regression(self, y, y_mask, smoothness_penalty=0, kernel='rbf'):
154 |     '''Perform vertex-valued regression, given partial labels.
155 |     y : (n,d) array of known labels
156 |     y_mask : index object such that all_labels[y_mask] == y
157 | 
158 |     From "Regularization and Semi-supervised Learning on Large Graphs"
159 |       by Belkin, Matveeva, and Niyogi in 2004.
160 |     Doesn't support multiple labels per vertex, unlike the paper's algorithm.
161 |     To allow provided y values to change, use a (small) smoothness_penalty.
162 |     '''
163 |     n = self.num_vertices()
164 | 
165 |     # input validation for y
166 |     y = np.array(y, copy=True)
167 |     ravel_f = False
168 |     if y.ndim == 1:
169 |       y = y[:,None]
170 |       ravel_f = True
171 |     if y.ndim != 2 or y.size == 0:
172 |       raise ValueError('Invalid shape of y array: %s' % (y.shape,))
173 |     k, d = y.shape
174 | 
175 |     # input validation for y_mask
176 |     if not hasattr(y_mask, 'dtype') or y_mask.dtype != 'bool':
177 |       tmp = np.zeros(n, dtype=bool)
178 |       tmp[y_mask] = True
179 |       y_mask = tmp
180 | 
181 |     # mean-center known y for stability
182 |     y_mean = y.mean(axis=0)
183 |     y -= y_mean
184 | 
185 |     # use the normalized Laplacian for the smoothness matrix
186 |     S = self.kernelize(kernel).laplacian(normed=True)
187 |     if ss.issparse(S):
188 |       S = S.tocsr()
189 | 
190 |     if smoothness_penalty == 0:
191 |       # see Algorithm 2: Interpolated Regularization
192 |       unlabeled_mask = ~y_mask
193 |       S_23 = S[unlabeled_mask, :]
194 |       S_3 = S_23[:, unlabeled_mask]
195 |       rhs = S_23[:, y_mask].dot(y)
196 |       if ss.issparse(S):
197 |         f_unlabeled = ss.linalg.spsolve(S_3, rhs)
198 |         if f_unlabeled.ndim == 1:
199 |           f_unlabeled = f_unlabeled[:,None]
200 |       else:
201 |         f_unlabeled = sl.solve(S_3, rhs, sym_pos=True, overwrite_a=True,
202 |                                overwrite_b=True)
203 |       f = np.zeros((n, d))
204 |       f[y_mask] = y
205 |       f[unlabeled_mask] = -f_unlabeled
206 |     else:
207 |       # see Algorithm 1: Tikhonov Regularization in the paper
208 |       y_hat = np.zeros((n, d))
209 |       y_hat[y_mask] = y
210 |       I = y_mask.astype(float)  # only one label per vertex
211 |       lhs = k * smoothness_penalty * S
212 |       if ss.issparse(lhs):
213 |         lhs.setdiag(lhs.diagonal() + I)
214 |         f = ss.linalg.lsqr(lhs, y_hat)[0]
215 |       else:
216 |         lhs.flat[::n+1] += I
217 |         f = sl.solve(lhs, y_hat, sym_pos=True, overwrite_a=True,
218 |                      overwrite_b=True)
219 | 
220 |     # re-add the mean
221 |     f += y_mean
222 |     if ravel_f:
223 |       return f.ravel()
224 |     return f
225 | 
226 | 
227 | def _onehot(labels, mask=Ellipsis):
228 |   classes = np.unique(labels[mask])
229 |   onehot = np.zeros((len(labels), len(classes)), dtype=int)
230 |   for idx, label in enumerate(classes):
231 |     onehot[labels==label, idx] = 1
232 |   return onehot, classes
233 | 


--------------------------------------------------------------------------------
/graphs/base/adj.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | import numpy as np
  4 | import scipy.sparse as ss
  5 | 
  6 | from .base import Graph
  7 | 
  8 | 
  9 | class AdjacencyMatrixGraph(Graph):
 10 | 
 11 |   def copy(self):
 12 |     return self.__class__(self._adj.copy())
 13 | 
 14 |   def num_vertices(self):
 15 |     return self._adj.shape[0]
 16 | 
 17 |   def is_weighted(self):
 18 |     return True
 19 | 
 20 |   def subgraph(self, mask):
 21 |     adj = self.matrix('dense', 'csr', 'csc')
 22 |     sub_adj = adj[mask][:,mask]
 23 |     return Graph.from_adj_matrix(sub_adj)
 24 | 
 25 |   subgraph.__doc__ = Graph.subgraph.__doc__
 26 | 
 27 | 
 28 | class DenseAdjacencyMatrixGraph(AdjacencyMatrixGraph):
 29 |   def __init__(self, adj):
 30 |     self._adj = np.atleast_2d(adj)
 31 |     assert self._adj.shape[0] == self._adj.shape[1]
 32 | 
 33 |   def pairs(self, copy=False, directed=True):
 34 |     adj = self._adj if directed else np.triu(self._adj)
 35 |     return np.transpose(np.nonzero(adj))
 36 | 
 37 |   def matrix(self, *formats, **kwargs):
 38 |     copy = kwargs.pop('copy', False)
 39 |     if kwargs:
 40 |       raise ValueError('Unexpected kwargs for matrix(): %s' % kwargs)
 41 |     if not formats or 'dense' in formats:
 42 |       if copy:
 43 |         return self._adj.copy()
 44 |       return self._adj
 45 |     if 'csr' in formats:
 46 |       return ss.csr_matrix(self._adj)
 47 |     if 'csc' in formats:
 48 |       return ss.csc_matrix(self._adj)
 49 |     if 'coo' in formats:
 50 |       return ss.coo_matrix(self._adj)
 51 |     raise NotImplementedError('Unknown matrix format(s): %s' % (formats,))
 52 | 
 53 |   def edge_weights(self, copy=False, directed=True):
 54 |     ii,jj = self.pairs(directed=directed).T
 55 |     return self._adj[ii,jj]
 56 | 
 57 |   def num_edges(self):
 58 |     return np.count_nonzero(self._adj)
 59 | 
 60 |   def add_edges(self, from_idx, to_idx, weight=1, symmetric=False, copy=False):
 61 |     weight = np.atleast_1d(1 if weight is None else weight)
 62 |     res_dtype = np.promote_types(weight.dtype, self._adj.dtype)
 63 |     adj = self._adj.astype(res_dtype, copy=copy)
 64 |     adj[from_idx, to_idx] = weight
 65 |     if symmetric:
 66 |       adj[to_idx, from_idx] = weight
 67 |     if copy:
 68 |       return DenseAdjacencyMatrixGraph(adj)
 69 |     self._adj = adj
 70 |     return self
 71 | 
 72 |   def remove_edges(self, from_idx, to_idx, symmetric=False, copy=False):
 73 |     adj = self._adj.copy() if copy else self._adj
 74 |     adj[from_idx, to_idx] = 0
 75 |     if symmetric:
 76 |       adj[to_idx, from_idx] = 0
 77 |     if copy:
 78 |       return DenseAdjacencyMatrixGraph(adj)
 79 |     self._adj = adj
 80 |     return self
 81 | 
 82 |   def _update_edges(self, weights, copy=False):
 83 |     weights = np.asarray(weights)
 84 |     res_dtype = np.promote_types(weights.dtype, self._adj.dtype)
 85 |     adj = self._adj.astype(res_dtype, copy=copy)
 86 |     adj[adj != 0] = weights
 87 |     if copy:
 88 |       return DenseAdjacencyMatrixGraph(adj)
 89 |     self._adj = adj
 90 |     return self
 91 | 
 92 |   def symmetrize(self, method='sum', copy=False):
 93 |     adj = _symmetrize(self._adj, method)
 94 |     if copy:
 95 |       return DenseAdjacencyMatrixGraph(adj)
 96 |     self._adj = adj
 97 |     return self
 98 | 
 99 |   pairs.__doc__ = Graph.pairs.__doc__
100 |   matrix.__doc__ = Graph.matrix.__doc__
101 |   edge_weights.__doc__ = Graph.edge_weights.__doc__
102 |   add_edges.__doc__ = Graph.add_edges.__doc__
103 |   remove_edges.__doc__ = Graph.remove_edges.__doc__
104 |   symmetrize.__doc__ = Graph.symmetrize.__doc__
105 | 
106 | 
107 | class SparseAdjacencyMatrixGraph(AdjacencyMatrixGraph):
108 |   def __init__(self, adj, may_have_zeros=True):
109 |     assert ss.issparse(adj), 'SparseAdjacencyMatrixGraph input must be sparse'
110 |     if adj.format not in ('coo', 'csr', 'csc'):
111 |       adj = adj.tocsr()
112 |     self._adj = adj
113 |     assert self._adj.shape[0] == self._adj.shape[1]
114 |     if may_have_zeros:
115 |       # Things go wrong if we have explicit zeros in the graph.
116 |       _eliminate_zeros(self._adj)
117 | 
118 |   def pairs(self, copy=False, directed=True):
119 |     adj = self._adj if directed else ss.triu(self._adj)
120 |     return np.transpose(adj.nonzero())
121 | 
122 |   def matrix(self, *formats, **kwargs):
123 |     copy = kwargs.pop('copy', False)
124 |     if kwargs:
125 |       raise ValueError('Unexpected kwargs for matrix(): %s' % kwargs)
126 |     if not formats or self._adj.format in formats:
127 |       if copy:
128 |         return self._adj.copy()
129 |       return self._adj
130 |     for fmt in formats:
131 |       if fmt != 'dense':
132 |         return self._adj.asformat(fmt)
133 |     if 'dense' in formats:
134 |       return self._adj.toarray()
135 |     raise NotImplementedError('Unknown matrix format(s): %s' % (formats,))
136 | 
137 |   def edge_weights(self, copy=False, directed=True):
138 |     if not directed:
139 |       ii, jj = ss.triu(self._adj).nonzero()
140 |       return np.asarray(self._adj[ii, jj]).ravel()
141 |     # XXX: assumes correct internal ordering and no explicit zeros
142 |     w = self._adj.data.ravel()
143 |     if copy:
144 |       return w.copy()
145 |     return w
146 | 
147 |   def num_edges(self):
148 |     return self._adj.nnz
149 | 
150 |   def add_edges(self, from_idx, to_idx, weight=1, symmetric=False, copy=False):
151 |     adj = self._weightable_adj(weight, copy)
152 |     if adj.format == 'coo':
153 |       adj = adj.tocsr()
154 |     adj[from_idx, to_idx] = weight
155 |     if symmetric:
156 |       adj[to_idx, from_idx] = weight
157 |     return self._post_weighting(adj, weight, copy)
158 | 
159 |   def remove_edges(self, from_idx, to_idx, symmetric=False, copy=False):
160 |     adj = self._adj.copy() if copy else self._adj
161 |     if adj.format == 'coo':
162 |       adj = adj.tocsr()
163 |     adj[from_idx, to_idx] = 0
164 |     if symmetric:
165 |       adj[to_idx, from_idx] = 0
166 |     return self._post_weighting(adj, 0, copy)
167 | 
168 |   def _update_edges(self, weights, copy=False):
169 |     adj = self._weightable_adj(weights, copy)
170 |     adj.data[:] = weights
171 |     return self._post_weighting(adj, weights, copy)
172 | 
173 |   def add_self_edges(self, weight=1, copy=False):
174 |     adj = self._weightable_adj(weight, copy)
175 |     try:
176 |       adj.setdiag(weight)
177 |     except TypeError:  # pragma: no cover
178 |       # Older scipy doesn't support setdiag on everything.
179 |       adj = adj.tocsr()
180 |       adj.setdiag(weight)
181 |     return self._post_weighting(adj, weight, copy)
182 | 
183 |   def reweight(self, weight, edges=None, copy=False):
184 |     adj = self._weightable_adj(weight, copy)
185 |     if edges is None:
186 |       adj.data[:] = weight
187 |     else:
188 |       adj.data[edges] = weight
189 |     return self._post_weighting(adj, weight, copy)
190 | 
191 |   def _weightable_adj(self, weight, copy):
192 |     weight = np.atleast_1d(weight)
193 |     adj = self._adj
194 |     res_dtype = np.promote_types(weight.dtype, adj.dtype)
195 |     if copy:
196 |       adj = adj.copy()
197 |     if res_dtype is not adj.dtype:
198 |       adj.data = adj.data.astype(res_dtype)
199 |     return adj
200 | 
201 |   def _post_weighting(self, adj, weight, copy):
202 |     # Check if we might have changed the sparsity structure by adding zeros
203 |     has_zeros = np.any(weight == 0)
204 |     if copy:
205 |       return SparseAdjacencyMatrixGraph(adj, may_have_zeros=has_zeros)
206 |     self._adj = _eliminate_zeros(adj) if has_zeros else adj
207 |     return self
208 | 
209 |   def symmetrize(self, method='sum', copy=False):
210 |     adj = _symmetrize(self._adj.tocsr(), method)
211 |     if copy:
212 |       return SparseAdjacencyMatrixGraph(adj, may_have_zeros=False)
213 |     self._adj = adj
214 |     return self
215 | 
216 |   pairs.__doc__ = Graph.pairs.__doc__
217 |   matrix.__doc__ = Graph.matrix.__doc__
218 |   edge_weights.__doc__ = Graph.edge_weights.__doc__
219 |   add_edges.__doc__ = Graph.add_edges.__doc__
220 |   remove_edges.__doc__ = Graph.remove_edges.__doc__
221 |   symmetrize.__doc__ = Graph.symmetrize.__doc__
222 |   add_self_edges.__doc__ = Graph.add_self_edges.__doc__
223 |   reweight.__doc__ = Graph.reweight.__doc__
224 | 
225 | 
226 | def _symmetrize(A, method):
227 |   if method == 'sum':
228 |     S = A + A.T
229 |   elif method == 'max':
230 |     if ss.issparse(A):
231 |       S = A.maximum(A.T)
232 |     else:
233 |       S = np.maximum(A, A.T)
234 |   else:
235 |     S = (A + A.T) / 2.0
236 |   return S
237 | 
238 | 
239 | def _eliminate_zeros(A):
240 |   if hasattr(A, 'eliminate_zeros'):
241 |     A.eliminate_zeros()
242 |   elif A.format == 'coo':  # pragma: no cover
243 |     # old scipy doesn't provide coo_matrix.eliminate_zeros
244 |     nz_mask = A.data != 0
245 |     A.data = A.data[nz_mask]
246 |     A.row = A.row[nz_mask]
247 |     A.col = A.col[nz_mask]
248 |   else:
249 |     raise ValueError("Can't eliminate_zeros from type: %s" % type(A))
250 |   return A
251 | 


--------------------------------------------------------------------------------
/graphs/mixins/transformation.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division, absolute_import, print_function
  2 | import numpy as np
  3 | import scipy.sparse as ss
  4 | import scipy.sparse.csgraph as ssc
  5 | from scipy.linalg import solve
  6 | from collections import deque
  7 | 
  8 | from ..mini_six import range
  9 | 
 10 | 
 11 | class TransformMixin(object):
 12 | 
 13 |   def kernelize(self, kernel):
 14 |     '''Re-weight according to a specified kernel function.
 15 |     kernel : str, {none, binary, rbf}
 16 |       none   -> no reweighting
 17 |       binary -> all edges are given weight 1
 18 |       rbf    -> applies a gaussian function to edge weights
 19 |     '''
 20 |     if kernel == 'none':
 21 |       return self
 22 |     if kernel == 'binary':
 23 |       if self.is_weighted():
 24 |         return self._update_edges(1, copy=True)
 25 |       return self
 26 |     if kernel == 'rbf':
 27 |       w = self.edge_weights()
 28 |       r = np.exp(-w / w.std())
 29 |       return self._update_edges(r, copy=True)
 30 |     raise ValueError('Invalid kernel type: %r' % kernel)
 31 | 
 32 |   def barycenter_edge_weights(self, X, copy=True, reg=1e-3):
 33 |     '''Re-weight such that the sum of each vertex's edge weights is 1.
 34 |     The resulting weighted graph is suitable for locally linear embedding.
 35 |     reg : amount of regularization to keep the problem well-posed
 36 |     '''
 37 |     new_weights = []
 38 |     for i, adj in enumerate(self.adj_list()):
 39 |       C = X[adj] - X[i]
 40 |       G = C.dot(C.T)
 41 |       trace = np.trace(G)
 42 |       r = reg * trace if trace > 0 else reg
 43 |       G.flat[::G.shape[1] + 1] += r
 44 |       w = solve(G, np.ones(G.shape[0]), sym_pos=True,
 45 |                 overwrite_a=True, overwrite_b=True)
 46 |       w /= w.sum()
 47 |       new_weights.extend(w.tolist())
 48 |     return self.reweight(new_weights, copy=copy)
 49 | 
 50 |   def connected_subgraphs(self, directed=True, ordered=False):
 51 |     '''Generates connected components as subgraphs.
 52 |     When ordered=True, subgraphs are ordered by number of vertices.
 53 |     '''
 54 |     num_ccs, labels = self.connected_components(directed=directed)
 55 |     # check the trivial case first
 56 |     if num_ccs == 1:
 57 |       yield self
 58 |       raise StopIteration
 59 |     if ordered:
 60 |       # sort by descending size (num vertices)
 61 |       order = np.argsort(np.bincount(labels))[::-1]
 62 |     else:
 63 |       order = range(num_ccs)
 64 | 
 65 |     # don't use self.subgraph() here, because we can reuse adj
 66 |     adj = self.matrix('dense', 'csr', 'csc')
 67 |     for c in order:
 68 |       mask = labels == c
 69 |       sub_adj = adj[mask][:,mask]
 70 |       yield self.__class__.from_adj_matrix(sub_adj)
 71 | 
 72 |   def shortest_path_subtree(self, start_idx, directed=True):
 73 |     '''Returns a subgraph containing only the shortest paths from start_idx to
 74 |        every other vertex.
 75 |     '''
 76 |     adj = self.matrix()
 77 |     _, pred = ssc.dijkstra(adj, directed=directed, indices=start_idx,
 78 |                            return_predecessors=True)
 79 |     adj = ssc.reconstruct_path(adj, pred, directed=directed)
 80 |     if not directed:
 81 |       adj = adj + adj.T
 82 |     return self.__class__.from_adj_matrix(adj)
 83 | 
 84 |   def minimum_spanning_subtree(self):
 85 |     '''Returns the (undirected) minimum spanning tree subgraph.'''
 86 |     dist = self.matrix('dense', copy=True)
 87 |     dist[dist==0] = np.inf
 88 |     np.fill_diagonal(dist, 0)
 89 |     mst = ssc.minimum_spanning_tree(dist)
 90 |     return self.__class__.from_adj_matrix(mst + mst.T)
 91 | 
 92 |   def neighborhood_subgraph(self, start_idx, radius=1, weighted=True,
 93 |                             directed=True, return_mask=False):
 94 |     '''Returns a subgraph containing only vertices within a given
 95 |        geodesic radius of start_idx.'''
 96 |     adj = self.matrix('dense', 'csr', 'csc')
 97 |     dist = ssc.dijkstra(adj, directed=directed, indices=start_idx,
 98 |                         unweighted=(not weighted), limit=radius)
 99 |     mask = np.isfinite(dist)
100 |     sub_adj = adj[mask][:,mask]
101 |     g = self.__class__.from_adj_matrix(sub_adj)
102 |     if return_mask:
103 |       return g, mask
104 |     return g
105 | 
106 |   def isograph(self, min_weight=None):
107 |     '''Remove short-circuit edges using the Isograph algorithm.
108 | 
109 |     min_weight : float, optional
110 |         Minimum weight of edges to consider removing. Defaults to max(MST).
111 | 
112 |     From "Isograph: Neighbourhood Graph Construction Based On Geodesic Distance
113 |           For Semi-Supervised Learning" by Ghazvininejad et al., 2011.
114 |     Note: This uses the non-iterative algorithm which removes edges
115 |         rather than reweighting them.
116 |     '''
117 |     W = self.matrix('dense')
118 |     # get candidate edges: all edges - MST edges
119 |     tree = self.minimum_spanning_subtree()
120 |     candidates = np.argwhere((W - tree.matrix('dense')) > 0)
121 |     cand_weights = W[candidates[:,0], candidates[:,1]]
122 |     # order by increasing edge weight
123 |     order = np.argsort(cand_weights)
124 |     cand_weights = cand_weights[order]
125 |     # disregard edges shorter than a threshold
126 |     if min_weight is None:
127 |       min_weight = tree.edge_weights().max()
128 |     idx = np.searchsorted(cand_weights, min_weight)
129 |     cand_weights = cand_weights[idx:]
130 |     candidates = candidates[order[idx:]]
131 |     # check each candidate edge
132 |     to_remove = np.zeros_like(cand_weights, dtype=bool)
133 |     for i, (u,v) in enumerate(candidates):
134 |       W_uv = np.where(W < cand_weights[i], W, 0)
135 |       len_uv = ssc.dijkstra(W_uv, indices=u, unweighted=True, limit=2)[v]
136 |       if len_uv > 2:
137 |         to_remove[i] = True
138 |     ii, jj = candidates[to_remove].T
139 |     return self.remove_edges(ii, jj, copy=True)
140 | 
141 |   def circle_tear(self, spanning_tree='mst', cycle_len_thresh=5, spt_idx=None,
142 |                   copy=True):
143 |     '''Circular graph tearing.
144 | 
145 |     spanning_tree: one of {'mst', 'spt'}
146 |     cycle_len_thresh: int, length of longest allowable cycle
147 |     spt_idx: int, start vertex for shortest_path_subtree, random if None
148 | 
149 |     From "How to project 'circular' manifolds using geodesic distances?"
150 |       by Lee & Verleysen, ESANN 2004.
151 | 
152 |     See also: shortest_path_subtree, minimum_spanning_subtree
153 |     '''
154 |     # make the initial spanning tree graph
155 |     if spanning_tree == 'mst':
156 |       tree = self.minimum_spanning_subtree().matrix()
157 |     elif spanning_tree == 'spt':
158 |       if spt_idx is None:
159 |         spt_idx = np.random.choice(self.num_vertices())
160 |       tree = self.shortest_path_subtree(spt_idx, directed=False).matrix()
161 | 
162 |     # find edges in self but not in the tree
163 |     potential_edges = np.argwhere(ss.triu(self.matrix() - tree))
164 | 
165 |     # remove edges that induce large cycles
166 |     ii, jj = _find_cycle_inducers(tree, potential_edges, cycle_len_thresh)
167 |     return self.remove_edges(ii, jj, symmetric=True, copy=copy)
168 | 
169 |   def cycle_cut(self, cycle_len_thresh=12, directed=False, copy=True):
170 |     '''CycleCut algorithm: removes bottleneck edges.
171 |     Paper DOI: 10.1.1.225.5335
172 |     '''
173 |     symmetric = not directed
174 |     adj = self.kernelize('binary').matrix('csr', 'dense', copy=True)
175 |     if symmetric:
176 |       adj = adj + adj.T
177 | 
178 |     removed_edges = []
179 |     while True:
180 |       c = _atomic_cycle(adj, cycle_len_thresh, directed=directed)
181 |       if c is None:
182 |         break
183 |       # remove edges in the cycle
184 |       ii, jj = c.T
185 |       adj[ii,jj] = 0
186 |       if symmetric:
187 |         adj[jj,ii] = 0
188 |       removed_edges.extend(c)
189 | 
190 |     #XXX: if _atomic_cycle changes, may need to do this on each loop
191 |     if ss.issparse(adj):
192 |       adj.eliminate_zeros()
193 | 
194 |     # select only the necessary cuts
195 |     ii, jj = _find_cycle_inducers(adj, removed_edges, cycle_len_thresh,
196 |                                   directed=directed)
197 |     # remove the bad edges
198 |     return self.remove_edges(ii, jj, symmetric=symmetric, copy=copy)
199 | 
200 | 
201 | def _atomic_cycle(adj, length_thresh, directed=False):
202 |   # TODO: make this more efficient
203 |   start_vertex = np.random.choice(adj.shape[0])
204 |   # run BFS
205 |   q = deque([start_vertex])
206 |   visited_vertices = set([start_vertex])
207 |   visited_edges = set()
208 |   while q:
209 |     a = q.popleft()
210 |     nbrs = adj[a].nonzero()[-1]
211 |     for b in nbrs:
212 |       if b not in visited_vertices:
213 |         q.append(b)
214 |         visited_vertices.add(b)
215 |         visited_edges.add((a,b))
216 |         if not directed:
217 |           visited_edges.add((b,a))
218 |         continue
219 |       # run an inner BFS
220 |       inner_q = deque([b])
221 |       inner_visited = set([b])
222 |       parent_vertices = {b: -1}
223 |       while inner_q:
224 |         c = inner_q.popleft()
225 |         inner_nbrs = adj[c].nonzero()[-1]
226 |         for d in inner_nbrs:
227 |           if d in inner_visited or (d,c) not in visited_edges:
228 |             continue
229 |           parent_vertices[d] = c
230 |           inner_q.append(d)
231 |           inner_visited.add(d)
232 |           if d != a:
233 |             continue
234 |           # atomic cycle found
235 |           cycle = []
236 |           while parent_vertices[d] != -1:
237 |             x, d = d, parent_vertices[d]
238 |             cycle.append((x, d))
239 |           cycle.append((d, a))
240 |           if len(cycle) >= length_thresh:
241 |             return np.array(cycle)
242 |           else:
243 |             # abort the inner BFS
244 |             inner_q.clear()
245 |             break
246 |       # finished considering edge a->b
247 |       visited_edges.add((a,b))
248 |       if not directed:
249 |         visited_edges.add((b,a))
250 |   # no cycles found
251 |   return None
252 | 
253 | 
254 | def _find_cycle_inducers(adj, potential_edges, length_thresh, directed=False):
255 |     # remove edges that induce large cycles
256 |     path_dist = ssc.dijkstra(adj, directed=directed, return_predecessors=False,
257 |                              unweighted=True)
258 |     remove_ii, remove_jj = [], []
259 |     for i,j in potential_edges:
260 |       if length_thresh < path_dist[i,j] < np.inf:
261 |         remove_ii.append(i)
262 |         remove_jj.append(j)
263 |       else:
264 |         # keeping this edge: update path lengths
265 |         tmp = (path_dist[:,i] + 1)[:,None] + path_dist[j,:]
266 |         ii, jj = np.nonzero(tmp < path_dist)
267 |         new_lengths = tmp[ii, jj]
268 |         path_dist[ii,jj] = new_lengths
269 |         if not directed:
270 |           path_dist[jj,ii] = new_lengths
271 |     return remove_ii, remove_jj
272 | 


--------------------------------------------------------------------------------