├── graphs ├── tests │ ├── __init__.py │ └── test_reorder.py ├── base │ ├── tests │ │ ├── __init__.py │ │ ├── test_adj.py │ │ ├── test_pairs.py │ │ └── test_static.py │ ├── __init__.py │ ├── base.py │ ├── pairs.py │ └── adj.py ├── datasets │ ├── tests │ │ ├── __init__.py │ │ ├── test_mountain_car.py │ │ ├── test_shapes.py │ │ └── test_swiss_roll.py │ ├── __init__.py │ ├── swiss_roll.py │ ├── shapes.py │ └── mountain_car.py ├── construction │ ├── tests │ │ ├── __init__.py │ │ ├── test_directed.py │ │ ├── test_downsample.py │ │ ├── test_msg.py │ │ ├── test_spanning_tree.py │ │ ├── test_b_matching.py │ │ ├── test_incremental.py │ │ ├── test_saffron.py │ │ ├── test_neighbors.py │ │ ├── test_geometric.py │ │ └── test_regularized.py │ ├── __init__.py │ ├── directed.py │ ├── incremental.py │ ├── spanning_tree.py │ ├── downsample.py │ ├── geometric.py │ ├── saffron.py │ ├── b_matching.py │ ├── neighbors.py │ ├── _fast_paths.pyx │ └── regularized.py ├── generators │ ├── tests │ │ ├── __init__.py │ │ ├── test_rand.py │ │ ├── test_structured.py │ │ └── test_trajectories.py │ ├── __init__.py │ ├── rand.py │ ├── trajectories.py │ └── structured.py ├── _version.py ├── mixins │ ├── __init__.py │ ├── _betweenness_helper.pyxbld │ ├── tests │ │ ├── test_betweenness.py │ │ ├── test_viz.py │ │ ├── test_analysis.py │ │ ├── test_label.py │ │ ├── test_embed.py │ │ └── test_transformation.py │ ├── _betweenness.py │ ├── _betweenness_helper.pyx │ ├── analysis.py │ ├── embed.py │ ├── label.py │ └── transformation.py ├── mini_six.py ├── __init__.py └── reorder.py ├── benchmarks ├── benchmarks │ ├── __init__.py │ ├── basic.py │ ├── construction.py │ └── mixins.py ├── .gitignore └── asv.conf.json ├── .gitignore ├── run_tests.sh ├── .landscape.yml ├── .travis.yml ├── .coveragerc ├── examples ├── interactive.py ├── short_circuit.py └── swiss_roll.py ├── setup.py ├── LICENSE └── README.md /graphs/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /graphs/base/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /graphs/datasets/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /graphs/construction/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /graphs/generators/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /graphs/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.3' 2 | -------------------------------------------------------------------------------- /benchmarks/.gitignore: -------------------------------------------------------------------------------- 1 | env 2 | graphs 3 | results 4 | html 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.pyxbldc 3 | htmlcov/ 4 | .coverage 5 | build/ 6 | dist/ 7 | *.egg-info/ 8 | *.swp 9 | *.c 10 | *.cpp 11 | *.so 12 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | test_dirs=$(find graphs/ -type d -name tests | xargs) 4 | nosetests --with-cov --cov-report html --cov=graphs/ $test_dirs \ 5 | && coverage report 6 | 7 | -------------------------------------------------------------------------------- /graphs/mixins/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .analysis import AnalysisMixin 4 | from .embed import EmbedMixin 5 | from .label import LabelMixin 6 | from .transformation import TransformMixin 7 | from .viz import VizMixin 8 | -------------------------------------------------------------------------------- /.landscape.yml: -------------------------------------------------------------------------------- 1 | strictness: medium 2 | pep8: 3 | disable: 4 | - E111 5 | - E114 6 | - E231 7 | - E225 8 | - E402 9 | - W503 10 | pylint: 11 | disable: 12 | - bad-indentation 13 | - invalid-name 14 | - too-many-arguments 15 | ignore-paths: 16 | - benchmarks/ 17 | -------------------------------------------------------------------------------- /graphs/mini_six.py: -------------------------------------------------------------------------------- 1 | '''Py3k compatibility hacks.''' 2 | 3 | __all__ = ['range', 'zip', 'zip_longest'] 4 | 5 | # If we're on Python 2, use xrange instead of range, etc 6 | if type(range(1)) is list: 7 | range = xrange 8 | from itertools import izip_longest as zip_longest, izip as zip 9 | else: 10 | range = range 11 | zip = zip 12 | from itertools import zip_longest 13 | -------------------------------------------------------------------------------- /graphs/mixins/_betweenness_helper.pyxbld: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def make_ext(modname, pyxfilename): 4 | from distutils.extension import Extension 5 | return Extension( 6 | name=modname, 7 | sources=[pyxfilename], 8 | extra_compile_args=['-O2'], 9 | libraries=['stdc++'], 10 | language='c++') 11 | 12 | def make_setup_args(): 13 | return {'include_dirs': np.get_include()} 14 | 15 | -------------------------------------------------------------------------------- /graphs/generators/__init__.py: -------------------------------------------------------------------------------- 1 | '''Graph generation helper functions. 2 | 3 | trajectories : helpers for working with trajectory data 4 | structured : functions for generating chain/lattice graphs 5 | rand : functions for generating graphs with random edges 6 | ''' 7 | from __future__ import absolute_import 8 | 9 | from . import trajectories 10 | from .structured import chain_graph, lattice_graph 11 | from .rand import random_graph 12 | -------------------------------------------------------------------------------- /graphs/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | '''Dataset generation functions. 2 | 3 | mountain_car : the "Mountain Car" toy domain from reinforcement learning 4 | shapes : various parameterized shapes 5 | swiss_roll : the "Swiss Roll" toy domain from manifold learning 6 | ''' 7 | from __future__ import absolute_import 8 | 9 | from .mountain_car import mountain_car_trajectories 10 | from .shapes import MobiusStrip, FigureEight, SCurve 11 | from .swiss_roll import swiss_roll 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: false 3 | python: 4 | - "2.7" 5 | - "3.4" 6 | - "3.5" 7 | cache: pip 8 | before_install: 9 | - pip install --upgrade pip 10 | - pip install wheel 11 | - pip install numpy scipy Cython scikit-learn matplotlib coveralls nose-cov 12 | script: 13 | - nosetests --with-cov --cov=graphs/ graphs/tests/ graphs/base/tests/ graphs/construction/tests/ graphs/generators/tests/ graphs/mixins/tests/ 14 | after_success: coveralls 15 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = true 3 | omit = 4 | *.pyxbld* 5 | 6 | [report] 7 | # Regexes for lines to exclude from consideration 8 | exclude_lines = 9 | # Have to re-enable the standard pragma 10 | pragma: no cover 11 | 12 | # Don't complain if tests don't hit defensive assertion code: 13 | raise AssertionError 14 | raise NotImplementedError 15 | 16 | # Don't complain if non-runnable code isn't run: 17 | if False: 18 | if __name__ == .__main__.: 19 | 20 | # Don't complain about import-guarded code 21 | except ImportError: 22 | -------------------------------------------------------------------------------- /examples/interactive.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | from graphs.construction import neighbor_graph 5 | 6 | if hasattr(__builtins__, 'raw_input'): 7 | input = raw_input 8 | 9 | 10 | def main(): 11 | print("Select coordinates for graph vertices:") 12 | plt.plot([]) 13 | coords = np.array(plt.ginput(n=-1, timeout=-1)) 14 | 15 | k = int(input("Number of nearest neighbors: ")) 16 | g = neighbor_graph(coords, k=k) 17 | 18 | print("Resulting graph:") 19 | g.plot(coords, vertex_style='ro')() 20 | 21 | if __name__ == '__main__': 22 | main() 23 | -------------------------------------------------------------------------------- /graphs/generators/tests/test_rand.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import unittest 4 | from numpy.testing import assert_array_equal 5 | 6 | from ..rand import random_graph 7 | 8 | 9 | class TestRandomGraph(unittest.TestCase): 10 | def test_random_graph(self): 11 | for degree in (np.ones(5), [1,2,2], [1,0,0,1]): 12 | G = random_graph(degree) 13 | assert_array_equal(degree, G.degree(kind='out')) 14 | self.assertEqual(1, G.edge_weights().max()) 15 | 16 | # Check that degrees >= n will throw an error 17 | self.assertRaises(ValueError, random_graph, [1,2,3]) 18 | 19 | 20 | if __name__ == '__main__': 21 | unittest.main() 22 | -------------------------------------------------------------------------------- /graphs/__init__.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Graphs: a library for efficiently manipulating graphs. 3 | 4 | Graph -- the base class for all graph objects. 5 | construction -- a module for constructing graphs from data. 6 | generators -- a module for generating graphs with desired properties. 7 | datasets -- a module providing sample datasets. 8 | reorder -- a module for reordering graph vertices. 9 | 10 | To create a Graph object, use the static constructors: 11 | `Graph.from_adj_matrix` or `Graph.from_edge_pairs`. 12 | ''' 13 | from __future__ import absolute_import 14 | 15 | from ._version import __version__ 16 | from .base import Graph 17 | from . import construction, generators, datasets, reorder 18 | -------------------------------------------------------------------------------- /benchmarks/benchmarks/basic.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('template') 3 | import scipy.sparse as ss 4 | from graphs import Graph 5 | 6 | 7 | class BasicOperations(object): 8 | params = ['dense', 'coo', 'csr'] 9 | param_names = ['adj_format'] 10 | 11 | def setup(self, adj_format): 12 | n = 1500 13 | density = 0.2 14 | adj = ss.rand(n, n, density=density) 15 | if adj_format == 'dense': 16 | self.adj = adj.A 17 | else: 18 | self.adj = adj.asformat(adj_format) 19 | self.G = Graph.from_adj_matrix(self.adj) 20 | 21 | def time_construction(self, adj_format): 22 | Graph.from_adj_matrix(self.adj) 23 | 24 | def time_num_edges(self, adj_format): 25 | self.G.num_edges() 26 | 27 | def time_num_vertices(self, adj_format): 28 | self.G.num_vertices() 29 | -------------------------------------------------------------------------------- /graphs/construction/tests/test_directed.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import unittest 4 | 5 | from ...datasets.shapes import SCurve 6 | from .. import directed_graph 7 | 8 | 9 | class TestDirected(unittest.TestCase): 10 | 11 | def test_directed_graph(self): 12 | # XXX: This kind of testing isn't exactly reproducible across versions 13 | np.random.seed(1234) 14 | traj = SCurve().trajectories(5, 20) 15 | G, X = directed_graph(traj, k=5, pruning_thresh=0, return_coords=True) 16 | P = directed_graph(traj, k=5, pruning_thresh=0.1) 17 | self.assertEqual(X.shape, (100, 3)) 18 | self.assertEqual(G.num_edges(), 500) 19 | # Results may vary slightly, so just check that we're <500 20 | self.assertLess(P.num_edges(), 500) 21 | 22 | 23 | if __name__ == '__main__': 24 | unittest.main() 25 | -------------------------------------------------------------------------------- /graphs/datasets/tests/test_mountain_car.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import unittest 4 | from matplotlib import pyplot 5 | pyplot.switch_backend('template') 6 | 7 | from ... import Graph 8 | from .. import mountain_car as mcar 9 | 10 | 11 | class TestMountainCar(unittest.TestCase): 12 | 13 | def test_traj_sampling(self): 14 | traj, traces = mcar.mountain_car_trajectories(3) 15 | self.assertEqual(len(traces), 3) 16 | self.assertEqual(len(traj), 3) 17 | self.assertEqual(traj[0].shape[1], 2) 18 | self.assertEqual(traj[1].shape[1], 2) 19 | self.assertEqual(traj[2].shape[1], 2) 20 | 21 | def test_basis_plotting(self): 22 | pts = np.random.random((5, 2)) 23 | G = Graph.from_adj_matrix(np.random.random((5,5))) 24 | mcar.plot_mcar_basis(G, pts) 25 | 26 | 27 | if __name__ == '__main__': 28 | unittest.main() 29 | -------------------------------------------------------------------------------- /benchmarks/benchmarks/construction.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('template') 3 | import numpy as np 4 | from sklearn.metrics import pairwise_distances 5 | import graphs.construction as gc 6 | 7 | 8 | class Neighbors(object): 9 | n = 500 10 | params = [[None, 0.25, 0.5], [None, 1, 100], ['none', 'binary']] 11 | param_names = ['epsilon', 'k', 'weighting'] 12 | 13 | def setup(self, epsilon, k, weighting): 14 | if epsilon is None and k is None: 15 | raise NotImplementedError() 16 | self.X = np.random.random((self.n, 3)) 17 | self.D = pairwise_distances(self.X) 18 | 19 | def time_neighbor_graph(self, epsilon, k, weighting): 20 | gc.neighbor_graph(self.X, k=k, epsilon=epsilon, weighting=weighting) 21 | 22 | def time_neighbor_graph_precomputed(self, epsilon, k, weighting): 23 | gc.neighbor_graph(self.D, k=k, epsilon=epsilon, weighting=weighting, 24 | precomputed=True) 25 | -------------------------------------------------------------------------------- /graphs/generators/rand.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | from scipy.sparse import coo_matrix 4 | 5 | from .. import Graph 6 | 7 | __all__ = ['random_graph'] 8 | 9 | 10 | def random_graph(out_degree): 11 | '''Random graph generator. Does not generate self-edges. 12 | out_degree : array-like of ints, controlling the out degree of each vertex. 13 | ''' 14 | n = len(out_degree) 15 | out_degree = np.asarray(out_degree, dtype=int) 16 | if (out_degree >= n).any(): 17 | raise ValueError('Cannot have degree >= num_vertices') 18 | row = np.repeat(np.arange(n), out_degree) 19 | weights = np.ones_like(row, dtype=float) 20 | # Generate random edges from 0 to n-2, then shift by one to avoid self-edges. 21 | col = np.concatenate([np.random.choice(n-1, d, replace=False) 22 | for d in out_degree]) 23 | col[col >= row] += 1 24 | adj = coo_matrix((weights, (row, col)), shape=(n, n)) 25 | return Graph.from_adj_matrix(adj) 26 | -------------------------------------------------------------------------------- /graphs/mixins/tests/test_betweenness.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_array_equal 4 | 5 | # Test the non-Cython version specifically 6 | from graphs.mixins._betweenness import _betweenness 7 | 8 | ADJ = np.array([[0,1,2,0], 9 | [1,0,0,3], 10 | [2,0,0,1], 11 | [0,3,1,0]]) 12 | 13 | 14 | class TestBetweenness(unittest.TestCase): 15 | 16 | def test_betweenness_edge_unweighted(self): 17 | res = _betweenness(ADJ, False, False) 18 | assert_array_equal(res, [2,2,2,2,2,2,2,2]) 19 | 20 | def test_betweenness_edge_weighted(self): 21 | res = _betweenness(ADJ, True, False) 22 | assert_array_equal(res, [2,3,2,1,3,2,1,2]) 23 | 24 | def test_betweenness_vertex_unweighted(self): 25 | res = _betweenness(ADJ, False, True) 26 | assert_array_equal(res, [1,1,1,1]) 27 | 28 | def test_betweenness_vertex_weighted(self): 29 | res = _betweenness(ADJ, True, True) 30 | assert_array_equal(res, [2,0,2,0]) 31 | 32 | 33 | if __name__ == '__main__': 34 | unittest.main() 35 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from setuptools import setup, find_packages, Extension 3 | 4 | try: 5 | from Cython.Build import cythonize 6 | import numpy as np 7 | except ImportError: 8 | use_cython = False 9 | else: 10 | use_cython = True 11 | 12 | version = open('graphs/_version.py').read().strip().split('=', 1)[1].strip(" '") 13 | 14 | setup_kwargs = dict( 15 | name='graphs', 16 | version=version, 17 | author='CJ Carey', 18 | author_email='perimosocordiae@gmail.com', 19 | description='A library for graph-based machine learning.', 20 | url='http://github.com/all-umass/graphs', 21 | license='MIT', 22 | packages=find_packages(exclude=['tests']), 23 | package_data={'': ['*.pyx']}, 24 | install_requires=[ 25 | 'numpy >= 1.8', 26 | 'scipy >= 0.14', 27 | 'scikit-learn >= 0.15', 28 | 'matplotlib >= 1.3.1', 29 | 'Cython >= 0.21', 30 | ], 31 | ) 32 | if use_cython: 33 | exts = [Extension('*', ['graphs/*/*.pyx'], include_dirs=[np.get_include()])] 34 | setup_kwargs['ext_modules'] = cythonize(exts) 35 | 36 | setup(**setup_kwargs) 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 ALL @ UMass 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /graphs/datasets/swiss_roll.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | __all__ = ['swiss_roll', 'error_ratio'] 4 | 5 | 6 | def swiss_roll(radians, num_points, radius=1.0, 7 | theta_noise=0.1, radius_noise=0.01, 8 | return_theta=False): 9 | theta = np.linspace(1, radians, num_points) 10 | if theta_noise > 0: 11 | theta += np.random.normal(scale=theta_noise, size=theta.shape) 12 | r = np.sqrt(np.linspace(0, radius*radius, num_points)) 13 | if radius_noise > 0: 14 | r += np.random.normal(scale=radius_noise, size=r.shape) 15 | roll = np.empty((num_points, 3)) 16 | roll[:,0] = r * np.sin(theta) 17 | roll[:,2] = r * np.cos(theta) 18 | roll[:,1] = np.random.uniform(-1,1,num_points) 19 | if return_theta: 20 | return roll, theta 21 | return roll 22 | 23 | 24 | def error_ratio(G, GT_points, max_delta_theta=0.1, return_tuple=False): 25 | theta_edges = GT_points[G.pairs(),0] 26 | delta_theta = np.abs(np.diff(theta_edges)) 27 | err_edges = np.count_nonzero(delta_theta > max_delta_theta) 28 | tot_edges = delta_theta.shape[0] 29 | if return_tuple: 30 | return err_edges, tot_edges 31 | return err_edges / float(tot_edges) 32 | -------------------------------------------------------------------------------- /graphs/datasets/tests/test_shapes.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import unittest 3 | 4 | from .. import shapes 5 | 6 | 7 | class TestShapes(unittest.TestCase): 8 | 9 | def test_mobius(self): 10 | S = shapes.MobiusStrip(radius=1.0, max_width=1.0) 11 | X = S.point_cloud(25) 12 | T = S.trajectories(2, 10) 13 | self.assertEqual(X.shape, (25, 3)) 14 | self.assertEqual(len(T), 2) 15 | self.assertEqual(T[0].shape, (10, 3)) 16 | self.assertEqual(T[1].shape, (10, 3)) 17 | 18 | def test_s_curve(self): 19 | S = shapes.SCurve(radius=1.0) 20 | X = S.point_cloud(25) 21 | T = S.trajectories(2, 10) 22 | self.assertEqual(X.shape, (25, 3)) 23 | self.assertEqual(len(T), 2) 24 | self.assertEqual(T[0].shape, (10, 3)) 25 | self.assertEqual(T[1].shape, (10, 3)) 26 | 27 | def test_figure_eight(self): 28 | for d in (2,3): 29 | S = shapes.FigureEight(radius=1.0, dimension=d) 30 | X = S.point_cloud(25) 31 | T = S.trajectories(2, 10) 32 | self.assertEqual(X.shape, (25, d)) 33 | self.assertEqual(len(T), 2) 34 | self.assertEqual(T[0].shape, (10, d)) 35 | self.assertEqual(T[1].shape, (10, d)) 36 | 37 | if __name__ == '__main__': 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /graphs/datasets/tests/test_swiss_roll.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import unittest 4 | 5 | from ... import Graph 6 | from ..swiss_roll import swiss_roll, error_ratio 7 | 8 | 9 | class TestSwissRoll(unittest.TestCase): 10 | 11 | def test_swiss_roll(self): 12 | X = swiss_roll(6, 10) 13 | self.assertEqual(X.shape, (10, 3)) 14 | X, theta = swiss_roll(3.0, 25, theta_noise=0, radius_noise=0, 15 | return_theta=True) 16 | self.assertEqual(X.shape, (25, 3)) 17 | self.assertEqual(theta.shape, (25,)) 18 | self.assertAlmostEqual(theta.max(), 3.0) 19 | 20 | def test_error_ratio(self): 21 | adj = np.diag(np.ones(3), k=1) 22 | G = Graph.from_adj_matrix(adj + adj.T) 23 | GT = np.tile(np.linspace(0, 1, adj.shape[0])**2, (2,1)).T 24 | err_edges, tot_edges = error_ratio(G, GT, return_tuple=True) 25 | self.assertEqual(err_edges, 6) 26 | self.assertEqual(tot_edges, 6) 27 | self.assertEqual(error_ratio(G, GT, max_delta_theta=0.2), 4/6.) 28 | self.assertEqual(error_ratio(G, GT, max_delta_theta=0.5), 2/6.) 29 | self.assertEqual(error_ratio(G, GT, max_delta_theta=1), 0.0) 30 | 31 | 32 | if __name__ == '__main__': 33 | unittest.main() 34 | -------------------------------------------------------------------------------- /examples/short_circuit.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | 5 | from graphs.datasets import swiss_roll 6 | from graphs.construction import neighbor_graph 7 | 8 | 9 | def main(): 10 | np.random.seed(1234) 11 | X, theta = swiss_roll(8, 300, return_theta=True, radius=0.5) 12 | GT = np.column_stack((theta, X[:,1])) 13 | g = neighbor_graph(X, k=6) 14 | g = g.from_adj_matrix(g.matrix('dense')) 15 | ct = 12 16 | 17 | _, axes = plt.subplots(nrows=2, ncols=2, figsize=(8, 8), 18 | sharex=True, sharey=True) 19 | _plot_diff(axes[0,0], GT, g, g.minimum_spanning_subtree(), title='MST') 20 | _plot_diff(axes[0,1], GT, g, g.circle_tear(cycle_len_thresh=ct), 21 | title='Circle Tear (%d)' % ct) 22 | _plot_diff(axes[1,0], GT, g, g.cycle_cut(cycle_len_thresh=ct), 23 | title='Cycle Cut (%d)' % ct) 24 | _plot_diff(axes[1,1], GT, g, g.isograph(), title='Isograph') 25 | plt.show() 26 | 27 | 28 | def _plot_diff(ax, x, g1, g2, title=''): 29 | g1.plot(x, ax=ax, edge_style='y-', vertex_style='k.') 30 | g2.plot(x, ax=ax, edge_style='b-', vertex_style='k.') 31 | ax.set_title(title) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /graphs/construction/tests/test_downsample.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import unittest 4 | from numpy.testing import assert_array_equal 5 | 6 | from ...datasets.shapes import SCurve 7 | from .. import downsample as d 8 | 9 | 10 | class TestDownsample(unittest.TestCase): 11 | 12 | def test_epsilon_net(self): 13 | pts = np.array([[0,0],[1,2],[3,2],[-1,0]]) 14 | sample = d.epsilon_net(pts, 1.7) 15 | self.assertTupleEqual(tuple(sample), (0,1,2)) 16 | traj = [pts[:2], pts[2:]] 17 | sample = d.downsample_trajectories(traj, d.epsilon_net, 1.7) 18 | assert_array_equal(sample[0], pts[:2]) 19 | assert_array_equal(sample[1], pts[2:3]) 20 | 21 | def test_fuzzy_c_means(self): 22 | pts = np.array([[0,0],[1,2],[3,2],[-1,0]]) 23 | sample = np.sort(d.fuzzy_c_means(pts, 2)) 24 | assert_array_equal(sample, [0, 2]) 25 | 26 | def test_downsample_trajectories(self): 27 | traj = SCurve().trajectories(5, 20) 28 | pts = np.vstack(traj) 29 | ds_traj = d.downsample_trajectories(traj, d.epsilon_net, 0.05) 30 | ds_pts = pts[d.epsilon_net(pts, 0.05)] 31 | assert_array_equal(np.vstack(ds_traj), ds_pts) 32 | 33 | 34 | if __name__ == '__main__': 35 | unittest.main() 36 | -------------------------------------------------------------------------------- /graphs/generators/trajectories.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from graphs import Graph 3 | 4 | __all__ = ['chunk_up', 'concat_trajectories'] 5 | 6 | 7 | def chunk_up(trajectories, chunk_size=None, directed=False): 8 | if chunk_size is None: 9 | chunk_lengths = list(map(len, trajectories)) 10 | else: 11 | chunk_lengths = [] 12 | for t in trajectories: 13 | chunk_lengths.extend(_chunk_traj_idxs(len(t), chunk_size)) 14 | return concat_trajectories(chunk_lengths, directed=directed) 15 | 16 | 17 | def concat_trajectories(traj_lengths, directed=False): 18 | P = [] 19 | last_idx = 0 20 | for tl in traj_lengths: 21 | P.append(last_idx + _traj_pair_idxs(tl)) 22 | last_idx += tl 23 | return Graph.from_edge_pairs(np.vstack(P), num_vertices=last_idx, 24 | symmetric=(not directed)) 25 | 26 | 27 | def _traj_pair_idxs(traj_len): 28 | ii = np.arange(traj_len) 29 | pairs = np.transpose((ii[:-1], ii[1:])) 30 | return pairs 31 | 32 | 33 | def _chunk_traj_idxs(traj_len, chunk_size): 34 | num_chunks, extra = divmod(traj_len, chunk_size) 35 | if num_chunks == 0: 36 | return [extra] 37 | c = [chunk_size] * num_chunks 38 | c[-1] += extra # Add any leftovers to the last chunk. 39 | return c 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Graphs 2 | 3 | [![PyPI version](https://badge.fury.io/py/graphs.svg)](http://badge.fury.io/py/graphs) 4 | [![Build Status](https://travis-ci.org/all-umass/graphs.svg?branch=master)](https://travis-ci.org/all-umass/graphs) 5 | [![Coverage Status](https://coveralls.io/repos/all-umass/graphs/badge.svg?branch=master&service=github)](https://coveralls.io/github/all-umass/graphs?branch=master) 6 | 7 | A library for graph-based learning in Python. 8 | 9 | Provides several types of graph container objects, 10 | with a unified API for visualization, analysis, transformation, 11 | and embedding. 12 | 13 | ## Usage example 14 | 15 | ```python 16 | from graphs.generators import random_graph 17 | 18 | G = random_graph([2,3,1,3,2,1,2]) 19 | 20 | print G.num_vertices() # 7 21 | print G.num_edges() # 14 22 | 23 | G.symmetrize(method='max') 24 | X = G.isomap(num_dims=2) 25 | 26 | G.plot(X, title='isomap embedding')() 27 | ``` 28 | 29 | ## Requirements 30 | 31 | Requires recent versions of: 32 | 33 | * numpy 34 | * scipy 35 | * scikit-learn 36 | * matplotlib 37 | * Cython 38 | 39 | Optional dependencies: 40 | 41 | * python-igraph 42 | * graphtool 43 | * networkx 44 | 45 | Testing requires: 46 | 47 | * nose 48 | * nose-cov 49 | 50 | Run the test suite: 51 | 52 | ``` 53 | ./run_tests.sh 54 | ``` 55 | -------------------------------------------------------------------------------- /graphs/construction/tests/test_msg.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import unittest 4 | from numpy.testing import assert_array_equal 5 | 6 | from ...datasets.swiss_roll import swiss_roll, error_ratio 7 | from ..msg import manifold_spanning_graph 8 | 9 | 10 | class TestMSG(unittest.TestCase): 11 | 12 | def test_swiss_roll(self): 13 | np.random.seed(1234) 14 | X, theta = swiss_roll(6, 120, radius=4.8, return_theta=True) 15 | GT = np.hstack((theta[:,None], X[:,1:2])) 16 | GT -= GT.min(axis=0) 17 | GT /= GT.max(axis=0) 18 | 19 | G = manifold_spanning_graph(X, 2) 20 | self.assertEqual(error_ratio(G, GT), 0.0) 21 | 22 | def test_two_moons(self): 23 | np.random.seed(1234) 24 | n1,n2 = 55,75 25 | theta = np.hstack((np.random.uniform(0, 1, size=n1), 26 | np.random.uniform(1, 2, size=n2))) * np.pi 27 | r = 1.3 + 0.12 * np.random.randn(n1+n2)[:,None] 28 | X = r * np.hstack((np.cos(theta), np.sin(theta))).reshape((-1,2), order='F') 29 | X[:n1] += np.array([[0, -0.2]]) 30 | X[n1:] += np.array([[0.9, 0.25]]) 31 | 32 | G = manifold_spanning_graph(X, 2, num_ccs=2) 33 | num_ccs, labels = G.connected_components() 34 | self.assertEqual(num_ccs, 2) 35 | assert_array_equal(labels[:n1], np.zeros(n1)) 36 | assert_array_equal(labels[n1:], np.ones(n2)) 37 | 38 | if __name__ == '__main__': 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /graphs/construction/__init__.py: -------------------------------------------------------------------------------- 1 | '''Graph construction algorithms, including: 2 | 3 | - k-nearest and epsilon-close neighbors, with incremental variants 4 | - b-matching 5 | - directed graph construction 6 | - Delaunay and Gabriel graphs 7 | - Relative Neighborhood graphs 8 | - Manifold Spanning graphs 9 | - Sparse Regularized graphs 10 | - traditional, perturbed, and disjoint Minimum Spanning Trees 11 | 12 | Each construction function returns a Graph object. 13 | ''' 14 | from __future__ import absolute_import 15 | 16 | from .b_matching import * 17 | from .directed import * 18 | from .downsample import * 19 | from .geometric import * 20 | from .incremental import * 21 | from .msg import * 22 | from .neighbors import * 23 | from .regularized import * 24 | from .saffron import * 25 | from .spanning_tree import * 26 | 27 | __all__ = [ 28 | # b_matching 29 | 'b_matching', 30 | # directed 31 | 'directed_graph', 32 | # downsample 33 | 'downsample_trajectories', 'epsilon_net', 'fuzzy_c_means', 34 | # geometric 35 | 'delaunay_graph', 'gabriel_graph', 'relative_neighborhood_graph', 36 | # incremental 37 | 'incremental_neighbor_graph', 38 | # msg 39 | 'manifold_spanning_graph', 40 | # neighbors 41 | 'neighbor_graph', 'nearest_neighbors', 42 | # regularized 43 | 'sparse_regularized_graph', 44 | # saffron 45 | 'saffron', 46 | # spanning_tree 47 | 'mst', 'perturbed_mst', 'disjoint_mst', 48 | ] 49 | -------------------------------------------------------------------------------- /graphs/base/tests/test_adj.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_array_equal 4 | from scipy.sparse import csr_matrix 5 | 6 | from graphs.base.adj import ( 7 | DenseAdjacencyMatrixGraph, SparseAdjacencyMatrixGraph) 8 | 9 | PAIRS = np.array([[0,1],[0,2],[1,1],[2,1],[3,3]]) 10 | ADJ = [[0,1,1,0], 11 | [0,1,0,0], 12 | [0,1,0,0], 13 | [0,0,0,1]] 14 | 15 | 16 | class TestAdjacencyMatrixGraphs(unittest.TestCase): 17 | def setUp(self): 18 | self.G = DenseAdjacencyMatrixGraph(ADJ) 19 | self.S = SparseAdjacencyMatrixGraph(csr_matrix(ADJ)) 20 | 21 | def test_pairs(self): 22 | assert_array_equal(self.G.pairs(), PAIRS) 23 | assert_array_equal(self.S.pairs(), PAIRS) 24 | 25 | def test_matrix(self): 26 | M = self.G.matrix() 27 | assert_array_equal(M, ADJ) 28 | M = self.G.matrix('csr') 29 | self.assertEqual(M.format, 'csr') 30 | assert_array_equal(M.toarray(), ADJ) 31 | M = self.S.matrix() 32 | self.assertEqual(M.format, 'csr') 33 | assert_array_equal(M.toarray(), ADJ) 34 | 35 | def test_matrix_copy(self): 36 | M = self.G.matrix('dense', copy=False) 37 | assert_array_equal(M, ADJ) 38 | M2 = self.G.matrix('dense', copy=True) 39 | assert_array_equal(M, M2) 40 | self.assertIsNot(M, M2) 41 | # Sparse case 42 | M = self.S.matrix('csr', copy=False) 43 | assert_array_equal(M.toarray(), ADJ) 44 | M2 = self.S.matrix('csr', copy=True) 45 | assert_array_equal(M.toarray(), M2.toarray()) 46 | self.assertIsNot(M, M2) 47 | 48 | if __name__ == '__main__': 49 | unittest.main() 50 | -------------------------------------------------------------------------------- /graphs/generators/tests/test_structured.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import unittest 4 | from numpy.testing import assert_array_equal 5 | 6 | from .. import chain_graph, lattice_graph 7 | 8 | 9 | class TestStructured(unittest.TestCase): 10 | 11 | def test_chain_graph(self): 12 | expected = np.diag([1,1], k=1) 13 | g = chain_graph(3, directed=True) 14 | assert_array_equal(g.matrix('dense'), expected) 15 | 16 | expected += np.diag([1,1], k=-1) 17 | g = chain_graph(3, wraparound=False) 18 | assert_array_equal(g.matrix('dense'), expected) 19 | 20 | expected[0,2] = 1 21 | expected[2,0] = 1 22 | g = chain_graph(3, wraparound=True) 23 | assert_array_equal(g.matrix('dense'), expected) 24 | 25 | def test_lattice_graph(self): 26 | self.assertRaises(ValueError, lattice_graph, []) 27 | 28 | expected = np.diag([1,1], k=1) + np.diag([1,1], k=-1) 29 | g = lattice_graph((3,), wraparound=False) 30 | assert_array_equal(g.matrix('dense'), expected) 31 | 32 | expected = np.diag([1,1,0,1,1], k=1) + np.diag([1,1,0,1,1], k=-1) 33 | expected += np.diag([1,1,1], k=3) + np.diag([1,1,1], k=-3) 34 | g = lattice_graph((3,2), wraparound=False) 35 | assert_array_equal(g.matrix('dense'), expected) 36 | 37 | expected[[0,3],[2,5]] = 1 38 | expected[[2,5],[0,3]] = 1 39 | g = lattice_graph((3,2), wraparound=True) 40 | assert_array_equal(g.matrix('dense'), expected) 41 | 42 | expected = np.diag([1,1,1], k=1) + np.diag([1,1,1], k=-1) 43 | g = lattice_graph((1,4), wraparound=False) 44 | assert_array_equal(g.matrix('dense'), expected) 45 | 46 | 47 | if __name__ == '__main__': 48 | unittest.main() 49 | -------------------------------------------------------------------------------- /graphs/construction/directed.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function 2 | 3 | import numpy as np 4 | from sklearn.metrics.pairwise import paired_distances 5 | from graphs import Graph 6 | from .neighbors import neighbor_graph 7 | 8 | __all__ = ['directed_graph'] 9 | 10 | 11 | def directed_graph(trajectories, k=5, verbose=False, pruning_thresh=0, 12 | return_coords=False): 13 | '''Directed graph construction alg. from Johns & Mahadevan, ICML '07. 14 | trajectories: list of NxD arrays of ordered states 15 | ''' 16 | X = np.vstack(trajectories) 17 | G = neighbor_graph(X, k=k) 18 | if pruning_thresh > 0: 19 | traj_len = map(len, trajectories) 20 | G = _prune_edges(G, X, traj_len, pruning_thresh, verbose=verbose) 21 | if return_coords: 22 | return G, X 23 | return G 24 | 25 | 26 | def _prune_edges(G, X, traj_lengths, pruning_thresh=0.1, verbose=False): 27 | '''Prune edges in graph G via cosine distance with trajectory edges.''' 28 | W = G.matrix('dense', copy=True) 29 | degree = G.degree(kind='out', weighted=False) 30 | i = 0 31 | num_bad = 0 32 | for n in traj_lengths: 33 | s, t = np.nonzero(W[i:i+n-1]) 34 | graph_edges = X[t] - X[s+i] 35 | traj_edges = np.diff(X[i:i+n], axis=0) 36 | traj_edges = np.repeat(traj_edges, degree[i:i+n-1], axis=0) 37 | theta = paired_distances(graph_edges, traj_edges, 'cosine') 38 | bad_edges = theta > pruning_thresh 39 | s, t = s[bad_edges], t[bad_edges] 40 | if verbose: # pragma: no cover 41 | num_bad += np.count_nonzero(W[s,t]) 42 | W[s,t] = 0 43 | i += n 44 | if verbose: # pragma: no cover 45 | print('removed %d bad edges' % num_bad) 46 | return Graph.from_adj_matrix(W) 47 | -------------------------------------------------------------------------------- /graphs/generators/structured.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import scipy.sparse as ss 4 | 5 | from .. import Graph 6 | 7 | 8 | def chain_graph(num_vertices, wraparound=False, directed=False, weights=None): 9 | if wraparound: 10 | ii = np.arange(num_vertices) 11 | jj = ii + 1 12 | jj[-1] = 0 13 | else: 14 | ii = np.arange(num_vertices-1) 15 | jj = ii + 1 16 | pairs = np.column_stack((ii, jj)) 17 | return Graph.from_edge_pairs(pairs, num_vertices=num_vertices, 18 | symmetric=(not directed), weights=weights) 19 | 20 | 21 | def lattice_graph(dims, wraparound=False): 22 | dims = [d for d in dims if d > 1] 23 | if len(dims) == 0: 24 | raise ValueError('Must supply at least one dimension >= 2') 25 | if len(dims) == 1: 26 | return chain_graph(dims[0], wraparound=wraparound) 27 | if len(dims) > 2: # pragma: no cover 28 | raise NotImplementedError('NYI: len(dims) > 2') 29 | 30 | # 2d case 31 | m, n = dims 32 | num_vertices = m * n 33 | if wraparound: 34 | offsets = [-m*(n-1), -m, -m+1, -1, 1, m-1, m, m*(n-1)] 35 | data = np.ones((8, num_vertices), dtype=int) 36 | data[[2,5], :] = 0 37 | data[2, ::m] = 1 38 | data[3, m-1::m] = 0 39 | data[4, ::m] = 0 40 | data[5, m-1::m] = 1 41 | # handle edge cases where offsets are duplicated 42 | offsets, idx = np.unique(offsets, return_index=True) 43 | data = data[idx] 44 | else: 45 | offsets = [-m, -1, 1, m] 46 | data = np.ones((4, num_vertices), dtype=int) 47 | data[1, m-1::m] = 0 48 | data[2, 0::m] = 0 49 | adj = ss.dia_matrix((data, offsets), shape=(num_vertices, num_vertices)) 50 | return Graph.from_adj_matrix(adj) 51 | -------------------------------------------------------------------------------- /graphs/generators/tests/test_trajectories.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import unittest 4 | from numpy.testing import assert_array_equal 5 | 6 | from .. import trajectories as traj 7 | 8 | 9 | class TestTrajectories(unittest.TestCase): 10 | 11 | def test_concat_trajectories(self): 12 | expected = [[0,1,0,0,0],[1,0,0,0,0],[0,0,0,1,0],[0,0,1,0,1],[0,0,0,1,0]] 13 | G = traj.concat_trajectories([2, 3], directed=False) 14 | assert_array_equal(G.matrix('dense'), expected) 15 | 16 | def test_chunk_up(self): 17 | T = [np.zeros(4), np.zeros(4)] 18 | expected = [[0,1,0,0,0,0,0,0], 19 | [1,0,1,0,0,0,0,0], 20 | [0,1,0,1,0,0,0,0], 21 | [0,0,1,0,0,0,0,0], 22 | [0,0,0,0,0,1,0,0], 23 | [0,0,0,0,1,0,1,0], 24 | [0,0,0,0,0,1,0,1], 25 | [0,0,0,0,0,0,1,0]] 26 | G = traj.chunk_up(T, directed=False) 27 | assert_array_equal(G.matrix('dense'), expected) 28 | expected = [[0,1,0,0,0,0,0,0], 29 | [1,0,0,0,0,0,0,0], 30 | [0,0,0,1,0,0,0,0], 31 | [0,0,1,0,0,0,0,0], 32 | [0,0,0,0,0,1,0,0], 33 | [0,0,0,0,1,0,0,0], 34 | [0,0,0,0,0,0,0,1], 35 | [0,0,0,0,0,0,1,0]] 36 | G = traj.chunk_up(T, chunk_size=2, directed=False) 37 | assert_array_equal(G.matrix('dense'), expected) 38 | # test case where chunk overflows 39 | T = [np.zeros(3), np.zeros(2)] 40 | expected = [[0,1,0,0,0], 41 | [1,0,1,0,0], 42 | [0,1,0,0,0], 43 | [0,0,0,0,1], 44 | [0,0,0,1,0]] 45 | G = traj.chunk_up(T, chunk_size=3, directed=False) 46 | assert_array_equal(G.matrix('dense'), expected) 47 | 48 | 49 | if __name__ == '__main__': 50 | unittest.main() 51 | -------------------------------------------------------------------------------- /graphs/tests/test_reorder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | from numpy.testing import assert_array_equal 4 | from graphs import Graph, reorder 5 | 6 | 7 | class TestReorder(unittest.TestCase): 8 | def setUp(self): 9 | ii = np.array([0, 0, 1, 2, 2, 3, 3, 3, 4, 5]) 10 | jj = np.array([1, 2, 3, 4, 5, 6, 7, 8, 7, 7]) 11 | adj = np.zeros((9,9), dtype=int) 12 | adj[ii,jj] = 1 13 | adj[jj,ii] = 1 14 | self.G = Graph.from_adj_matrix(adj) 15 | 16 | def test_cuthill_mckee(self): 17 | # Many orderings are "correct". Just ensure minimal bandwidth. 18 | expected_b = 3 19 | # test default version (probably scipy) 20 | cm = reorder.cuthill_mckee(self.G) 21 | self.assertEqual(cm.bandwidth(), expected_b) 22 | # test the non-scipy version 23 | cm = reorder._cuthill_mckee(self.G) 24 | self.assertEqual(cm.bandwidth(), expected_b) 25 | 26 | def test_node_centroid_hill_climbing(self): 27 | np.random.seed(1234) 28 | nchc = reorder.node_centroid_hill_climbing(self.G, relax=1) 29 | expected = np.array([[0,1],[0,2],[0,3],[0,4],[1,0],[2,0],[2,5],[3,0],[3,6], 30 | [3,7],[4,0],[5,2],[5,8],[6,3],[6,8],[7,3],[7,8],[8,5], 31 | [8,6],[8,7]]) 32 | assert_array_equal(nchc.pairs(), expected) 33 | # test with relax < 1 34 | nchc2 = reorder.node_centroid_hill_climbing(self.G, relax=0.99) 35 | expected = np.array([[0,1],[1,0],[1,2],[1,3],[1,4],[2,1],[2,5],[3,1],[3,6], 36 | [3,7],[4,1],[5,2],[5,8],[6,3],[6,8],[7,3],[7,8],[8,5], 37 | [8,6],[8,7]]) 38 | assert_array_equal(nchc2.pairs(), expected) 39 | 40 | def test_laplacian_reordering(self): 41 | lap = reorder.laplacian_reordering(self.G) 42 | self.assertEqual(lap.bandwidth(), 3) 43 | 44 | 45 | if __name__ == '__main__': 46 | unittest.main() 47 | -------------------------------------------------------------------------------- /benchmarks/benchmarks/mixins.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('template') 3 | import scipy.sparse as ss 4 | import numpy as np 5 | from graphs import Graph 6 | 7 | 8 | class _RandomFormatsBase(object): 9 | n = 500 10 | density = 0.05 11 | params = ['dense', 'coo', 'csr'] 12 | param_names = ['adj_format'] 13 | 14 | def setup(self, adj_format, *args): 15 | adj = ss.rand(self.n, self.n, density=self.density, random_state=1234) 16 | if adj_format == 'dense': 17 | adj = adj.A 18 | else: 19 | adj = adj.asformat(adj_format) 20 | self.G = Graph.from_adj_matrix(adj) 21 | self.G.symmetrize() 22 | 23 | 24 | class Labeling(_RandomFormatsBase): 25 | def time_greedy_coloring(self, *args): 26 | self.G.greedy_coloring() 27 | 28 | def time_spectral_clustering(self, *args): 29 | self.G.spectral_clustering(2) 30 | 31 | 32 | class LabelSpreading(_RandomFormatsBase): 33 | params = [['dense', 'coo', 'csr'], ['rbf', 'none', 'binary']] 34 | param_names = ['adj_format', 'kernel'] 35 | 36 | def setup(self, *args): 37 | _RandomFormatsBase.setup(self, *args) 38 | np.random.seed(1234) 39 | self.y = np.random.randint(5, size=self.n) 40 | self.y[np.random.random(self.n) > 0.5] = -1 41 | 42 | def time_spread_labels(self, _, k): 43 | self.G.spread_labels(self.y, kernel=k) 44 | 45 | 46 | class Regression(_RandomFormatsBase): 47 | params = [['dense', 'coo', 'csr'], ['rbf', 'none', 'binary'], [0, 1e-3]] 48 | param_names = ['adj_format', 'kernel', 'smoothness_penalty'] 49 | 50 | def setup(self, *args): 51 | _RandomFormatsBase.setup(self, *args) 52 | self.y = np.random.random((self.n//2, 1)) 53 | self.mask = slice(None, None, 2) 54 | 55 | def time_regression(self, _, k, s): 56 | self.G.regression(self.y, self.mask, smoothness_penalty=s, kernel=k) 57 | -------------------------------------------------------------------------------- /graphs/construction/incremental.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | from sklearn.metrics import pairwise_distances 5 | 6 | from graphs import Graph 7 | 8 | __all__ = ['incremental_neighbor_graph'] 9 | 10 | 11 | def incremental_neighbor_graph(X, precomputed=False, k=None, epsilon=None, 12 | weighting='none'): 13 | '''See neighbor_graph.''' 14 | assert ((k is not None) or (epsilon is not None) 15 | ), "Must provide `k` or `epsilon`" 16 | assert (_issequence(k) ^ _issequence(epsilon) 17 | ), "Exactly one of `k` or `epsilon` must be a sequence." 18 | assert weighting in ('binary','none'), "Invalid weighting param: " + weighting 19 | is_weighted = weighting == 'none' 20 | 21 | if precomputed: 22 | D = X 23 | else: 24 | D = pairwise_distances(X, metric='euclidean') 25 | # pre-sort for efficiency 26 | order = np.argsort(D)[:,1:] 27 | 28 | if k is None: 29 | k = D.shape[0] 30 | 31 | # generate the sequence of graphs 32 | # TODO: convert the core of these loops to Cython for speed 33 | W = np.zeros_like(D) 34 | I = np.arange(D.shape[0]) 35 | if _issequence(k): 36 | # varied k, fixed epsilon 37 | if epsilon is not None: 38 | D[D > epsilon] = 0 39 | old_k = 0 40 | for new_k in k: 41 | idx = order[:, old_k:new_k] 42 | dist = D[I, idx.T] 43 | W[I, idx.T] = dist if is_weighted else 1 44 | yield Graph.from_adj_matrix(W) 45 | old_k = new_k 46 | else: 47 | # varied epsilon, fixed k 48 | idx = order[:,:k] 49 | dist = D[I, idx.T].T 50 | old_i = np.zeros(D.shape[0], dtype=int) 51 | for eps in epsilon: 52 | for i, row in enumerate(dist): 53 | oi = old_i[i] 54 | ni = oi + np.searchsorted(row[oi:], eps) 55 | rr = row[oi:ni] 56 | W[i, idx[i,oi:ni]] = rr if is_weighted else 1 57 | old_i[i] = ni 58 | yield Graph.from_adj_matrix(W) 59 | 60 | 61 | def _issequence(x): 62 | # Note: isinstance(x, collections.Sequence) fails for numpy arrays 63 | return hasattr(x, '__len__') 64 | -------------------------------------------------------------------------------- /graphs/construction/tests/test_spanning_tree.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | from numpy.testing import assert_array_almost_equal 4 | from sklearn.metrics import pairwise_distances 5 | 6 | from graphs.construction import mst, perturbed_mst, disjoint_mst 7 | 8 | 9 | class TestSpanningTree(unittest.TestCase): 10 | def setUp(self): 11 | self.pts = np.array([[0,0],[1,2],[3,2],[-1,0]]) 12 | 13 | def test_mst(self): 14 | expected = [[0, 2.236,0, 1], 15 | [2.236,0, 2, 0], 16 | [0, 2, 0, 0], 17 | [1, 0, 0, 0]] 18 | G = mst(self.pts) 19 | assert_array_almost_equal(G.matrix('dense'), expected, decimal=3) 20 | # Check precomputed metric. 21 | D = pairwise_distances(self.pts) 22 | D_copy = D.copy() 23 | G = mst(D, metric='precomputed') 24 | assert_array_almost_equal(G.matrix('dense'), expected, decimal=3) 25 | assert_array_almost_equal(D, D_copy) 26 | 27 | def test_perturbed_mst(self): 28 | np.random.seed(1234) 29 | expected = [[0,0.71428571,0.23809524,1.00000000], 30 | [0.71428571,0,0.85714286,0.14285714], 31 | [0.23809524,0.85714286,0,0.04761905], 32 | [1.00000000,0.14285714,0.04761905,0]] 33 | G = perturbed_mst(self.pts) 34 | assert_array_almost_equal(G.matrix('dense'), expected) 35 | 36 | def test_disjoint_mst(self): 37 | expected = [[0,2.23606798,3.60555128,1], 38 | [2.23606798,0,2,2.82842712], 39 | [3.60555128,2,0,4.47213595], 40 | [1,2.82842712,4.47213595,0]] 41 | G = disjoint_mst(self.pts) 42 | assert_array_almost_equal(G.matrix('dense'), expected) 43 | 44 | # check precomputed case, especially that we don't overwrite D 45 | D = pairwise_distances(self.pts) 46 | D_copy = D.copy() 47 | G = disjoint_mst(D, metric='precomputed') 48 | assert_array_almost_equal(G.matrix('dense'), expected) 49 | assert_array_almost_equal(D, D_copy) 50 | 51 | if __name__ == '__main__': 52 | unittest.main() 53 | -------------------------------------------------------------------------------- /graphs/base/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import scipy.sparse as ss 5 | 6 | from .adj import SparseAdjacencyMatrixGraph, DenseAdjacencyMatrixGraph 7 | from .base import Graph 8 | from .pairs import EdgePairGraph, SymmEdgePairGraph 9 | 10 | __all__ = ['Graph'] 11 | 12 | 13 | def from_edge_pairs(pairs, num_vertices=None, symmetric=False, weights=None): 14 | '''Constructor for Graph objects based on edges given as pairs of vertices. 15 | pairs : integer array-like with shape (num_edges, 2) 16 | ''' 17 | if not symmetric: 18 | if weights is None: 19 | return EdgePairGraph(pairs, num_vertices=num_vertices) 20 | row, col = np.asarray(pairs).T 21 | row, weights = np.broadcast_arrays(row, weights) 22 | shape = None if num_vertices is None else (num_vertices, num_vertices) 23 | adj = ss.coo_matrix((weights, (row, col)), shape=shape) 24 | return SparseAdjacencyMatrixGraph(adj) 25 | # symmetric case 26 | G = SymmEdgePairGraph(pairs, num_vertices=num_vertices) 27 | if weights is None: 28 | return G 29 | # Convert to sparse adj graph with provided edge weights 30 | s = G.matrix('coo').astype(float) 31 | # shenanigans to assign edge weights in the right order 32 | flat_idx = np.ravel_multi_index(s.nonzero(), s.shape) 33 | r, c = np.transpose(pairs) 34 | rc_idx = np.ravel_multi_index((r,c), s.shape) 35 | cr_idx = np.ravel_multi_index((c,r), s.shape) 36 | order = np.argsort(flat_idx) 37 | flat_idx = flat_idx[order] 38 | s.data[order[np.searchsorted(flat_idx, rc_idx)]] = weights 39 | s.data[order[np.searchsorted(flat_idx, cr_idx)]] = weights 40 | return SparseAdjacencyMatrixGraph(s) 41 | 42 | 43 | def from_adj_matrix(adj): 44 | '''Constructor for Graph objects based on a given adjacency matrix. 45 | adj : scipy.sparse matrix or array-like, shape (num_vertices, num_vertices) 46 | ''' 47 | if ss.issparse(adj): 48 | return SparseAdjacencyMatrixGraph(adj) 49 | return DenseAdjacencyMatrixGraph(adj) 50 | 51 | # Add static methods to the Graph class. 52 | Graph.from_edge_pairs = staticmethod(from_edge_pairs) 53 | Graph.from_adj_matrix = staticmethod(from_adj_matrix) 54 | -------------------------------------------------------------------------------- /graphs/construction/spanning_tree.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | from scipy.sparse.csgraph import minimum_spanning_tree 5 | from sklearn.metrics.pairwise import pairwise_distances 6 | from graphs import Graph 7 | from ..mini_six import range 8 | 9 | __all__ = ['mst', 'perturbed_mst', 'disjoint_mst'] 10 | 11 | 12 | def mst(X, metric='euclidean'): 13 | D = pairwise_distances(X, metric=metric) 14 | mst = minimum_spanning_tree(D, overwrite=(metric!='precomputed')) 15 | return Graph.from_adj_matrix(mst + mst.T) 16 | 17 | 18 | def perturbed_mst(X, num_perturbations=20, metric='euclidean', jitter=None): 19 | '''Builds a graph as the union of several MSTs on perturbed data. 20 | Reference: http://ecovision.mit.edu/~sloop/shao.pdf, page 8 21 | jitter refers to the scale of the gaussian noise added for each perturbation. 22 | When jitter is None, it defaults to the 5th percentile interpoint distance. 23 | Note that metric cannot be 'precomputed', as multiple MSTs are computed.''' 24 | assert metric != 'precomputed' 25 | D = pairwise_distances(X, metric=metric) 26 | if jitter is None: 27 | jitter = np.percentile(D[D>0], 5) 28 | W = minimum_spanning_tree(D) 29 | W = W + W.T 30 | W.data[:] = 1.0 # binarize 31 | for i in range(num_perturbations): 32 | pX = X + np.random.normal(scale=jitter, size=X.shape) 33 | pW = minimum_spanning_tree(pairwise_distances(pX, metric=metric)) 34 | pW = pW + pW.T 35 | pW.data[:] = 1.0 36 | W = W + pW 37 | # final graph is the average over all pertubed MSTs + the original 38 | W.data /= (num_perturbations + 1.0) 39 | return Graph.from_adj_matrix(W) 40 | 41 | 42 | def disjoint_mst(X, num_spanning_trees=3, metric='euclidean'): 43 | '''Builds a graph as the union of several spanning trees, 44 | each time removing any edges present in previously-built trees. 45 | Reference: http://ecovision.mit.edu/~sloop/shao.pdf, page 9.''' 46 | D = pairwise_distances(X, metric=metric) 47 | if metric == 'precomputed': 48 | D = D.copy() 49 | mst = minimum_spanning_tree(D) 50 | W = mst.copy() 51 | for i in range(1, num_spanning_trees): 52 | ii,jj = mst.nonzero() 53 | D[ii,jj] = np.inf 54 | D[jj,ii] = np.inf 55 | mst = minimum_spanning_tree(D) 56 | W = W + mst 57 | # MSTs are all one-sided, so we symmetrize here 58 | return Graph.from_adj_matrix(W + W.T) 59 | -------------------------------------------------------------------------------- /graphs/base/tests/test_pairs.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_array_equal 4 | 5 | from graphs.base.pairs import EdgePairGraph, SymmEdgePairGraph 6 | 7 | PAIRS = np.array([[0,1],[0,2],[1,1],[2,1],[3,3]]) 8 | ADJ = [[0,1,1,0], 9 | [0,1,0,0], 10 | [0,1,0,0], 11 | [0,0,0,1]] 12 | 13 | 14 | class TestEdgePairGraph(unittest.TestCase): 15 | def setUp(self): 16 | self.epg = EdgePairGraph(PAIRS) 17 | 18 | def test_pairs(self): 19 | self.assert_(self.epg.pairs(copy=False) is PAIRS) 20 | P = self.epg.pairs(copy=True) 21 | self.assert_(P is not PAIRS) 22 | assert_array_equal(P, PAIRS) 23 | # test the directed case 24 | P = self.epg.pairs(directed=False) 25 | assert_array_equal(P, [[0,1],[0,2],[1,1],[1,2],[3,3]]) 26 | 27 | def test_matrix(self): 28 | M = self.epg.matrix() 29 | assert_array_equal(M.toarray(), ADJ) 30 | M = self.epg.matrix('dense') 31 | assert_array_equal(M, ADJ) 32 | M = self.epg.matrix('csr') 33 | self.assertEqual(M.format, 'csr') 34 | assert_array_equal(M.toarray(), ADJ) 35 | 36 | def test_self_edges(self): 37 | self.epg.add_self_edges() 38 | expected = self.epg.pairs() 39 | # Ensure that calling it again does the right thing. 40 | self.epg.add_self_edges() 41 | assert_array_equal(self.epg.pairs(), expected) 42 | 43 | def test_symmetrize(self): 44 | # Check that copy=True doesn't change anything 45 | self.epg.symmetrize(copy=True) 46 | assert_array_equal(self.epg.matrix('dense'), ADJ) 47 | 48 | 49 | class TestSymmEdgePairGraph(unittest.TestCase): 50 | def setUp(self): 51 | self.G = SymmEdgePairGraph(PAIRS) 52 | 53 | def test_copy(self): 54 | gg = self.G.copy() 55 | self.assertIsNot(gg, self.G) 56 | assert_array_equal(gg.matrix('dense'), self.G.matrix('dense')) 57 | assert_array_equal(gg.pairs(), self.G.pairs()) 58 | 59 | def test_pairs(self): 60 | expected = [[0,1], [0,2], [1,0], [1,1], [1,2], [2,0], [2,1], [3,3]] 61 | P = self.G.pairs() 62 | assert_array_equal(sorted(P.tolist()), expected) 63 | # test the directed case 64 | P = self.G.pairs(directed=False) 65 | assert_array_equal(P, [[0,1],[0,2],[1,1],[1,2],[3,3]]) 66 | 67 | def test_symmetrize(self): 68 | self.assertIs(self.G.symmetrize(copy=False), self.G) 69 | S = self.G.symmetrize(copy=True) 70 | self.assertIsNot(S, self.G) 71 | assert_array_equal(S.matrix('dense'), self.G.matrix('dense')) 72 | 73 | if __name__ == '__main__': 74 | unittest.main() 75 | -------------------------------------------------------------------------------- /graphs/construction/tests/test_b_matching.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | import warnings 4 | from numpy.testing import assert_array_equal 5 | from sklearn.metrics.pairwise import pairwise_distances 6 | 7 | from graphs.construction import b_matching 8 | 9 | 10 | class TestBMatching(unittest.TestCase): 11 | def setUp(self): 12 | pts = np.array([ 13 | [0.192,0.622],[0.438,0.785],[0.780,0.273],[0.276,0.802],[0.958,0.876], 14 | [0.358,0.501],[0.683,0.713],[0.370,0.561],[0.503,0.014],[0.773,0.883]]) 15 | self.dists = pairwise_distances(pts) 16 | 17 | def test_standard(self): 18 | # Generated with the bdmatch binary (b=2,damp=0.5) 19 | expected = np.array([ 20 | [0, 1, 0, 1, 0, 0, 0, 1, 0, 0], 21 | [1, 0, 0, 1, 0, 0, 0, 0, 0, 0], 22 | [0, 0, 0, 0, 1, 0, 1, 0, 1, 0], 23 | [1, 1, 0, 0, 0, 0, 0, 0, 0, 0], 24 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], 25 | [0, 0, 0, 0, 0, 0, 0, 1, 1, 0], 26 | [0, 0, 1, 0, 0, 0, 0, 0, 0, 1], 27 | [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], 28 | [0, 0, 1, 0, 0, 1, 0, 0, 0, 0], 29 | [0, 0, 0, 0, 1, 0, 1, 0, 0, 0]]).T 30 | G = b_matching(self.dists, 2, damping=0.5) 31 | assert_array_equal(G.matrix('dense').astype(int), expected) 32 | 33 | def test_warn_nonconvergence(self): 34 | with warnings.catch_warnings(record=True) as w: 35 | b_matching(self.dists, 2, max_iter=2) 36 | self.assertEqual(len(w), 1) 37 | self.assertEqual(str(w[0].message), 38 | 'Hit iteration limit (2) before converging') 39 | 40 | def test_oscillation(self): 41 | # Generated with the bdmatch binary (b=2,damp=1) 42 | expected = np.array([ 43 | [0, 0, 0, 1, 0, 0, 0, 1, 0, 0], 44 | [0, 0, 0, 1, 0, 0, 0, 1, 0, 0], 45 | [0, 0, 0, 0, 1, 0, 0, 0, 1, 0], 46 | [1, 1, 0, 0, 0, 0, 0, 0, 0, 0], 47 | [0, 0, 0, 0, 0, 0, 1, 0, 0, 1], 48 | [0, 0, 0, 0, 0, 0, 0, 1, 1, 0], 49 | [0, 0, 0, 0, 1, 0, 0, 0, 0, 1], 50 | [1, 0, 0, 0, 0, 1, 0, 0, 0, 0], 51 | [0, 0, 1, 0, 0, 1, 0, 0, 0, 0], 52 | [0, 0, 0, 0, 1, 0, 1, 0, 0, 0]]) 53 | G = b_matching(self.dists, 2, damping=1) 54 | assert_array_equal(G.matrix('dense').astype(int), expected) 55 | 56 | def test_array_b(self): 57 | b = np.zeros(10, dtype=int) 58 | b[5:] = 20 59 | expected = 1 - np.eye(10, dtype=int) 60 | expected[:5] = 0 61 | G = b_matching(self.dists, b) 62 | assert_array_equal(G.matrix('dense').astype(int), expected) 63 | 64 | if __name__ == '__main__': 65 | unittest.main() 66 | -------------------------------------------------------------------------------- /examples/swiss_roll.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | from mpl_toolkits.mplot3d import Axes3D 5 | from sklearn.metrics import pairwise_distances 6 | from time import time 7 | 8 | from graphs.datasets.swiss_roll import swiss_roll 9 | from graphs.construction import ( 10 | neighbor_graph, b_matching, gabriel_graph, 11 | relative_neighborhood_graph, manifold_spanning_graph, 12 | sparse_regularized_graph, smce_graph, saffron, mst, disjoint_mst 13 | ) 14 | 15 | 16 | def main(): 17 | X, theta = swiss_roll(8, 500, return_theta=True) 18 | D = pairwise_distances(X) 19 | graph_info = [ 20 | _c('5-NN', neighbor_graph, D, k=6, precomputed=True), 21 | _c('b-matching', b_matching, D, 6), 22 | _c('gabriel', gabriel_graph, X), 23 | _c('rel. neighborhood', relative_neighborhood_graph,D,metric='precomputed'), 24 | _c('manifold spanning', manifold_spanning_graph, X, 2), 25 | _c('L1', sparse_regularized_graph, X, kmax=10, sparsity_param=0.0005), 26 | _c('SMCE', _smce_symm_dist, X, kmax=25, sparsity_param=5), 27 | _c('SAFFRON', saffron, X, q=15, k=5, tangent_dim=2), 28 | _c('MST', mst, D, metric='precomputed'), 29 | _c('dMST', disjoint_mst, D, metric='precomputed'), 30 | ] 31 | 32 | print('Plotting graphs & embeddings') 33 | fig1, axes1 = plt.subplots(nrows=3, ncols=3, subplot_kw=dict(projection='3d')) 34 | fig2, axes2 = plt.subplots(nrows=3, ncols=3) 35 | fig1.suptitle('Original Coordinates') 36 | fig2.suptitle('Isomap Embeddings') 37 | 38 | for ax1, ax2, info in zip(axes1.flat, axes2.flat, graph_info): 39 | label, G, gg, emb, mask = info 40 | G.plot(X, ax=ax1, title=label, vertex_style=dict(c=theta)) 41 | gg.plot(emb, ax=ax2, title=label, vertex_style=dict(c=theta[mask])) 42 | ax1.view_init(elev=5, azim=70) 43 | ax1.set_axis_off() 44 | ax2.set_axis_off() 45 | plt.show() 46 | 47 | 48 | def _smce_symm_dist(X, **kwargs): 49 | g = smce_graph(X, **kwargs) 50 | # SMCE produces asymmetric similarity weights, so we have to convert it. 51 | return g.symmetrize('max').reweight_by_distance(X) 52 | 53 | 54 | def _c(label, fn, *args, **kwargs): 55 | print('Constructing', label, 'graph:') 56 | tic = time() 57 | G = fn(*args, **kwargs) 58 | print(' -> took %.3f secs' % (time() - tic)) 59 | num_ccs, labels = G.connected_components(directed=False) 60 | if num_ccs == 1: 61 | mask = Ellipsis 62 | gg = G 63 | else: 64 | mask = labels == np.bincount(labels).argmax() 65 | gg = G.subgraph(mask) 66 | emb = gg.isomap(num_dims=2, directed=False) 67 | return label, G, gg, emb, mask 68 | 69 | 70 | if __name__ == '__main__': 71 | main() 72 | -------------------------------------------------------------------------------- /benchmarks/asv.conf.json: -------------------------------------------------------------------------------- 1 | { 2 | // The version of the config file format. Do not change, unless 3 | // you know what you are doing. 4 | "version": 1, 5 | 6 | // The name of the project being benchmarked 7 | "project": "graphs", 8 | 9 | // The project's homepage 10 | "project_url": "https://github.com/all-umass/graphs", 11 | 12 | // The URL or local path of the source code repository for the 13 | // project being benchmarked 14 | "repo": "..", 15 | "dvcs": "git", 16 | "branches": ["master"], 17 | 18 | // The tool to use to create environments. May be "conda", 19 | // "virtualenv" or other value depending on the plugins in use. 20 | // If missing or the empty string, the tool will be automatically 21 | // determined by looking for tools on the PATH environment 22 | // variable. 23 | "environment_type": "virtualenv", 24 | 25 | // the base URL to show a commit for the project. 26 | "show_commit_url": "http://github.com/all-umass/graphs/commit/", 27 | 28 | // The Pythons you'd like to test against. If not provided, defaults 29 | // to the current version of Python used to run `asv`. 30 | // "pythons": ["2.7", "3.3"], 31 | 32 | // The matrix of dependencies to test. Each key is the name of a 33 | // package (in PyPI) and the values are version numbers. An empty 34 | // list indicates to just test against the default (latest) 35 | // version. 36 | "matrix": { 37 | "numpy": ["1.10.4"], 38 | "scipy": ["0.17"], 39 | "matplotlib": ["1.5.1"], 40 | "scikit-learn": ["0.17"] 41 | }, 42 | 43 | // The directory (relative to the current directory) that benchmarks are 44 | // stored in. If not provided, defaults to "benchmarks" 45 | // "benchmark_dir": "benchmarks", 46 | 47 | // The directory (relative to the current directory) to cache the Python 48 | // environments in. If not provided, defaults to "env" 49 | // "env_dir": "env", 50 | 51 | 52 | // The directory (relative to the current directory) that raw benchmark 53 | // results are stored in. If not provided, defaults to "results". 54 | // "results_dir": "results", 55 | 56 | // The directory (relative to the current directory) that the html tree 57 | // should be written to. If not provided, defaults to "html". 58 | // "html_dir": "html", 59 | 60 | // The number of characters to retain in the commit hashes. 61 | // "hash_length": 8, 62 | 63 | // `asv` will cache wheels of the recent builds in each 64 | // environment, making them faster to install next time. This is 65 | // number of builds to keep, per environment. 66 | "wheel_cache_size": 2 67 | } 68 | -------------------------------------------------------------------------------- /graphs/construction/tests/test_incremental.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_array_almost_equal 4 | from sklearn.metrics import pairwise_distances 5 | 6 | from graphs.construction import neighbor_graph 7 | from graphs.construction.incremental import incremental_neighbor_graph 8 | from graphs.mini_six import zip_longest, range 9 | 10 | np.set_printoptions(precision=3, suppress=True) 11 | 12 | 13 | def ngraph(*a, **k): 14 | return neighbor_graph(*a,**k).matrix('dense') 15 | 16 | 17 | class TestNeighbors(unittest.TestCase): 18 | def setUp(self): 19 | self.pts = np.array([[0,0],[1,2],[3,2.5],[-1,0],[.5,.2],[3,.6],[-2,-0.5]]) 20 | 21 | def test_k_range(self): 22 | k_range = range(1, 5) 23 | incr_gen = incremental_neighbor_graph(self.pts, k=k_range) 24 | for k, G in zip_longest(k_range, incr_gen): 25 | expected = ngraph(self.pts, k=k) 26 | assert_array_almost_equal(G.matrix('dense'), expected) 27 | 28 | # non-uniform steps 29 | k_range = [1, 3, 6] 30 | incr_gen = incremental_neighbor_graph(self.pts, k=k_range) 31 | for k, G in zip_longest(k_range, incr_gen): 32 | expected = ngraph(self.pts, k=k) 33 | assert_array_almost_equal(G.matrix('dense'), expected) 34 | 35 | def test_eps_range(self): 36 | eps_range = np.linspace(0.1, 5.5, 5) 37 | incr_gen = incremental_neighbor_graph(self.pts, epsilon=eps_range) 38 | for eps, G in zip_longest(eps_range, incr_gen): 39 | expected = ngraph(self.pts, epsilon=eps) 40 | assert_array_almost_equal(G.matrix('dense'), expected) 41 | 42 | def test_k_eps_range(self): 43 | # varied k with fixed epsilon 44 | k_range = range(1, 5) 45 | incr_gen = incremental_neighbor_graph(self.pts, k=k_range, epsilon=3.) 46 | for k, G in zip_longest(k_range, incr_gen): 47 | expected = ngraph(self.pts, k=k, epsilon=3.) 48 | assert_array_almost_equal(G.matrix('dense'), expected) 49 | 50 | # varied eps with fixed k 51 | eps_range = np.linspace(0.1, 5.5, 5) 52 | incr_gen = incremental_neighbor_graph(self.pts, k=3, epsilon=eps_range) 53 | for eps, G in zip_longest(eps_range, incr_gen): 54 | expected = ngraph(self.pts, k=3, epsilon=eps) 55 | assert_array_almost_equal(G.matrix('dense'), expected) 56 | 57 | def test_l1_precomputed(self): 58 | dist = pairwise_distances(self.pts, metric='l1') 59 | k_range = range(1, 5) 60 | incr_gen = incremental_neighbor_graph(dist, precomputed=True, k=k_range) 61 | for k, G in zip_longest(k_range, incr_gen): 62 | expected = ngraph(dist, precomputed=True, k=k) 63 | assert_array_almost_equal(G.matrix('dense'), expected) 64 | 65 | 66 | if __name__ == '__main__': 67 | unittest.main() 68 | -------------------------------------------------------------------------------- /graphs/construction/tests/test_saffron.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | from numpy.testing import assert_array_almost_equal 4 | 5 | from graphs.construction import saffron 6 | 7 | 8 | class TestSaffron(unittest.TestCase): 9 | 10 | def test_x(self): 11 | theta = np.concatenate((np.linspace(-0.25, 0.3, 8), 12 | np.linspace(2.86, 3.4, 8))) 13 | n = theta.shape[0] 14 | X = np.column_stack((np.sin(theta), np.sin(theta) * np.cos(theta))) 15 | 16 | G = saffron(X, q=5, k=2, tangent_dim=1, curv_thresh=0.9, decay_rate=0.5) 17 | 18 | expected_ii = [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 19 | 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15] 20 | expected_jj = [2, 1, 3, 2, 4, 1, 5, 2, 6, 3, 4, 6, 5, 7, 5, 6, 10, 9, 11, 21 | 10, 12, 9, 9, 13, 11, 14, 15, 12, 13, 15, 13, 14] 22 | expected_w = [0.214, 0.105, 0.219, 0.109, 0.222, 0.109, 0.221, 0.111, 0.216, 23 | 0.111, 0.11, 0.107, 0.107, 0.102, 0.208, 0.102, 0.207, 0.101, 24 | 0.213, 0.105, 0.217, 0.105, 0.213, 0.217, 0.109, 0.215, 0.209, 25 | 0.108, 0.106, 0.103, 0.209, 0.103] 26 | exp = np.zeros((n, n), dtype=float) 27 | exp[expected_ii, expected_jj] = expected_w 28 | 29 | assert_array_almost_equal(G.matrix('dense'), exp, decimal=3) 30 | 31 | def test_intersecting_planes(self): 32 | n1 = np.array([-0.25, -1, 1]) 33 | n2 = np.array([0.5, 0.75, 1.25]) 34 | x1, y1 = map(np.ravel, np.meshgrid(np.linspace(-0.75, 1.5, 10), 35 | np.linspace(-1, 1, 9))) 36 | z1 = (-n1[0]*x1 - n1[1]*y1) / n1[2] 37 | x2, y2 = map(np.ravel, np.meshgrid(np.linspace(-1, 1, 8), 38 | np.linspace(-1.2, 0.9, 9))) 39 | z2 = (-n2[0]*x2 - n2[1]*y2) / n2[2] 40 | X = np.vstack((np.c_[x1, y1, z1], np.c_[x2, y2, z2])) 41 | 42 | # just a smoke test for now, to test the tangent_dim > 1 case 43 | saffron(X, q=16, k=3, tangent_dim=2, decay_rate=0.75, max_iter=30) 44 | 45 | # XXX: This test doesn't pass, though it's unclear if that's due to a bug. 46 | ''' 47 | def test_helix(self): 48 | # attempt to replicate the squashed helix example from the paper 49 | t = np.linspace(0, 7*np.pi, 439) 50 | X = np.column_stack((np.sin(t), np.cos(t), 0.001*t)) 51 | G = saffron(X, q=32, k=4, tangent_dim=1, curv_thresh=0.95, decay_rate=0.9, 52 | max_iter=100) 53 | # check that G doesn't short circuit across loops of the helix 54 | ii, jj = G.pairs().T 55 | diag_offsets = np.unique(np.abs(ii - jj)) 56 | assert_array_equal(diag_offsets, [1, 2])''' 57 | 58 | 59 | if __name__ == '__main__': 60 | unittest.main() 61 | -------------------------------------------------------------------------------- /graphs/base/tests/test_static.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_array_equal, assert_array_almost_equal 4 | from scipy.sparse import csr_matrix 5 | 6 | from graphs.base import Graph 7 | 8 | PAIRS = np.array([[0,1],[0,2],[1,1],[2,1],[3,3]]) 9 | ADJ = [[0,1,1,0], 10 | [0,1,0,0], 11 | [0,1,0,0], 12 | [0,0,0,1]] 13 | 14 | 15 | class TestStaticConstructors(unittest.TestCase): 16 | def test_from_pairs(self): 17 | g = Graph.from_edge_pairs(PAIRS) 18 | self.assertEqual(g.num_edges(), 5) 19 | self.assertEqual(g.num_vertices(), 4) 20 | g = Graph.from_edge_pairs(PAIRS, num_vertices=10) 21 | self.assertEqual(g.num_edges(), 5) 22 | self.assertEqual(g.num_vertices(), 10) 23 | g = Graph.from_edge_pairs(PAIRS, symmetric=True) 24 | self.assertEqual(g.num_edges(), 8) 25 | self.assertEqual(g.num_vertices(), 4) 26 | 27 | def test_from_pairs_empty(self): 28 | g = Graph.from_edge_pairs([]) 29 | self.assertEqual(g.num_edges(), 0) 30 | self.assertEqual(g.num_vertices(), 0) 31 | ii, jj = g.pairs().T 32 | assert_array_equal(ii, []) 33 | assert_array_equal(jj, []) 34 | # Make sure ii and jj have indexable dtypes 35 | PAIRS[ii,jj] 36 | # Make sure num_vertices is set correctly 37 | g = Graph.from_edge_pairs([], num_vertices=5) 38 | self.assertEqual(g.num_edges(), 0) 39 | self.assertEqual(g.num_vertices(), 5) 40 | 41 | def test_from_pairs_floating(self): 42 | g = Graph.from_edge_pairs(PAIRS.astype(float)) 43 | p = g.pairs() 44 | self.assertTrue(np.can_cast(p, PAIRS.dtype, casting='same_kind'), 45 | "Expected integral dtype, got %s" % p.dtype) 46 | assert_array_equal(p, PAIRS) 47 | 48 | def test_from_pairs_weighted(self): 49 | w = np.array([1,1,0.1,2,1,2,3.1,4]) 50 | p = [[0,1],[1,2],[2,3],[3,4],[1,0],[2,1],[3,2],[4,3]] 51 | expected = [[0,1,0,0,0],[1,0,1,0,0],[0,2,0,0.1,0],[0,0,3.1,0,2],[0,0,0,4,0]] 52 | G = Graph.from_edge_pairs(p, weights=w, num_vertices=5) 53 | assert_array_almost_equal(G.matrix('dense'), expected) 54 | 55 | # weighted + symmetric 56 | w = np.arange(1, 6) 57 | expected = [[0,1,2,0],[1,3,4,0],[2,4,0,0],[0,0,0,5]] 58 | G = Graph.from_edge_pairs(PAIRS, symmetric=True, weights=w) 59 | assert_array_equal(G.matrix('dense'), expected) 60 | G = Graph.from_edge_pairs(PAIRS[::-1], symmetric=True, weights=w[::-1]) 61 | assert_array_equal(G.matrix('dense'), expected) 62 | 63 | def test_from_adj(self): 64 | m = Graph.from_adj_matrix(ADJ) 65 | self.assertEqual(m.num_edges(), 5) 66 | self.assertEqual(m.num_vertices(), 4) 67 | m = Graph.from_adj_matrix(csr_matrix(ADJ)) 68 | self.assertEqual(m.num_edges(), 5) 69 | self.assertEqual(m.num_vertices(), 4) 70 | 71 | if __name__ == '__main__': 72 | unittest.main() 73 | -------------------------------------------------------------------------------- /graphs/construction/downsample.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import warnings 5 | from sklearn.metrics.pairwise import pairwise_distances 6 | 7 | from ..mini_six import range 8 | from .neighbors import nearest_neighbors 9 | 10 | __all__ = [ 11 | 'downsample_trajectories', 'epsilon_net', 'fuzzy_c_means' 12 | ] 13 | 14 | 15 | def downsample_trajectories(trajectories, downsampler, *args, **kwargs): 16 | '''Downsamples all points together, then re-splits into original trajectories. 17 | 18 | trajectories : list of 2-d arrays, each representing a trajectory 19 | downsampler(X, *args, **kwargs) : callable that returns indices into X 20 | ''' 21 | X = np.vstack(trajectories) 22 | traj_lengths = list(map(len, trajectories)) 23 | inds = np.sort(downsampler(X, *args, **kwargs)) 24 | new_traj = [] 25 | for stop in np.cumsum(traj_lengths): 26 | n = np.searchsorted(inds, stop) 27 | new_traj.append(X[inds[:n]]) 28 | inds = inds[n:] 29 | return new_traj 30 | 31 | 32 | def epsilon_net(points, close_distance): 33 | '''Selects a subset of `points` to preserve graph structure while minimizing 34 | the number of points used, by removing points within `close_distance`. 35 | Returns the downsampled indices.''' 36 | num_points = points.shape[0] 37 | indices = set(range(num_points)) 38 | selected = [] 39 | while indices: 40 | idx = indices.pop() 41 | nn_inds, = nearest_neighbors(points[idx], points, epsilon=close_distance) 42 | indices.difference_update(nn_inds) 43 | selected.append(idx) 44 | return selected 45 | 46 | 47 | def fuzzy_c_means(points, num_centers, m=2., tol=1e-4, max_iter=100, 48 | verbose=False): 49 | '''Uses Fuzzy C-Means to downsample `points`. 50 | m : aggregation parameter >1, larger implies smoother clusters 51 | Returns indices of downsampled points. 52 | ''' 53 | num_points = points.shape[0] 54 | if num_centers >= num_points: 55 | return np.arange(num_points) 56 | # randomly initialize cluster assignments matrix 57 | assn = np.random.random((points.shape[0], num_centers)) 58 | # iterate assignments until they converge 59 | for i in range(max_iter): 60 | # compute centers 61 | w = assn ** m 62 | w /= w.sum(axis=0) 63 | centers = w.T.dot(points) 64 | # calculate new assignments 65 | d = pairwise_distances(points, centers) 66 | d **= 2. / (m - 1) 67 | np.maximum(d, 1e-10, out=d) 68 | new_assn = 1. / np.einsum('ik,ij->ik', d, 1./d) 69 | # check for convergence 70 | change = np.linalg.norm(new_assn - assn) 71 | if verbose: 72 | print('At iteration %d: change = %g' % (i+1, change)) 73 | if change < tol: 74 | break 75 | assn = new_assn 76 | else: 77 | warnings.warn("fuzzy_c_means didn't converge in %d iterations" % max_iter) 78 | # find points closest to the selected cluster centers 79 | return d.argmin(axis=0) 80 | -------------------------------------------------------------------------------- /graphs/construction/tests/test_neighbors.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_array_equal, assert_array_almost_equal 4 | from sklearn.metrics.pairwise import pairwise_distances 5 | 6 | from graphs.construction import neighbors 7 | 8 | 9 | def ngraph(*a, **k): 10 | return neighbors.neighbor_graph(*a,**k).matrix('dense') 11 | 12 | 13 | class TestNeighbors(unittest.TestCase): 14 | def setUp(self): 15 | self.pts = np.array([[0,0],[1,2],[3,2],[-1,0]]) 16 | self.bin_adj = np.array([[0,1,0,1],[1,0,1,0],[1,1,0,0],[1,1,0,0]]) 17 | self.l2_adj = np.sqrt([[0,5,0,1],[5,0,4,0],[13,4,0,0],[1,8,0,0]]) 18 | 19 | def test_neighbor_graph(self): 20 | self.assertRaises(ValueError, ngraph, self.pts) 21 | 22 | def test_binary_weighting(self): 23 | assert_array_equal(ngraph(self.pts, weighting='binary', k=2), self.bin_adj) 24 | assert_array_equal(ngraph(self.pts, weighting='binary', k=2, epsilon=100), 25 | self.bin_adj) 26 | # Add extra values for e-ball 27 | self.bin_adj[0,2] = 1 28 | self.bin_adj[1,3] = 1 29 | assert_array_equal(ngraph(self.pts, weighting='binary', epsilon=3.61), 30 | self.bin_adj) 31 | 32 | def test_no_weighting(self): 33 | assert_array_almost_equal(ngraph(self.pts, k=2), self.l2_adj) 34 | # Add extra values for e-ball 35 | self.l2_adj[0,2] = np.sqrt(13) 36 | self.l2_adj[1,3] = np.sqrt(8) 37 | assert_array_almost_equal(ngraph(self.pts, epsilon=3.61), self.l2_adj) 38 | 39 | def test_precomputed(self): 40 | D = pairwise_distances(self.pts, metric='l2') 41 | actual = ngraph(D, metric='precomputed', k=2) 42 | assert_array_almost_equal(actual, self.l2_adj, decimal=4) 43 | actual = ngraph(D, metric='precomputed', k=2, weighting='binary') 44 | assert_array_almost_equal(actual, self.bin_adj, decimal=4) 45 | 46 | def test_nearest_neighbors(self): 47 | nns = neighbors.nearest_neighbors 48 | pt = np.zeros(2) 49 | self.assertRaises(ValueError, nns, pt, self.pts) 50 | assert_array_equal(nns(pt, self.pts, k=2), [[0,3]]) 51 | assert_array_equal(nns(pt, self.pts, epsilon=2), [[0,3]]) 52 | assert_array_equal(nns(pt, self.pts, k=2, epsilon=10), [[0,3]]) 53 | # Check return_dists 54 | dists, inds = nns(pt, self.pts, k=2, return_dists=True) 55 | assert_array_equal(inds, [[0,3]]) 56 | assert_array_almost_equal(dists, [[0, 1]]) 57 | dists, inds = nns(pt, self.pts, epsilon=2, return_dists=True) 58 | assert_array_equal(inds, [[0,3]]) 59 | assert_array_almost_equal(dists, [[0, 1]]) 60 | # Check precomputed 61 | D = pairwise_distances(pt[None], self.pts, metric='l1') 62 | self.assertRaises(ValueError, nns, pt, self.pts, metric='precomputed', k=2) 63 | assert_array_equal(nns(D, metric='precomputed', k=2), [[0,3]]) 64 | # Check 2d query shape 65 | pt = [[0,0]] 66 | assert_array_equal(nns(pt, self.pts, k=2), [[0,3]]) 67 | # Check all-pairs mode 68 | assert_array_equal(nns(self.pts, k=2), [[0,3],[1,2],[2,1],[3,0]]) 69 | 70 | 71 | if __name__ == '__main__': 72 | unittest.main() 73 | -------------------------------------------------------------------------------- /graphs/construction/geometric.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function 2 | 3 | import numpy as np 4 | from scipy.spatial import Delaunay 5 | from sklearn.metrics.pairwise import ( 6 | pairwise_distances, paired_distances, pairwise_distances_argmin_min) 7 | from graphs import Graph 8 | from ..mini_six import range 9 | 10 | __all__ = [ 11 | 'delaunay_graph', 'urquhart_graph', 'gabriel_graph', 12 | 'relative_neighborhood_graph' 13 | ] 14 | 15 | 16 | def delaunay_graph(X, weighted=False): 17 | '''Delaunay triangulation graph. 18 | ''' 19 | e1, e2 = _delaunay_edges(X) 20 | pairs = np.column_stack((e1, e2)) 21 | w = paired_distances(X[e1], X[e2]) if weighted else None 22 | return Graph.from_edge_pairs(pairs, num_vertices=X.shape[0], symmetric=True, 23 | weights=w) 24 | 25 | 26 | def urquhart_graph(X, weighted=False): 27 | '''Urquhart graph: made from the 2 shortest edges of each Delaunay triangle. 28 | ''' 29 | e1, e2 = _delaunay_edges(X) 30 | w = paired_distances(X[e1], X[e2]) 31 | mask = np.ones_like(w, dtype=bool) 32 | bad_inds = w.reshape((-1, 3)).argmax(axis=1) + np.arange(0, len(e1), 3) 33 | mask[bad_inds] = False 34 | 35 | weights = w[mask] if weighted else None 36 | pairs = np.column_stack((e1[mask], e2[mask])) 37 | return Graph.from_edge_pairs(pairs, num_vertices=X.shape[0], symmetric=True, 38 | weights=weights) 39 | 40 | 41 | def gabriel_graph(X, metric='euclidean', weighted=False): 42 | n = X.shape[0] 43 | a, b = np.triu_indices(n, k=1) 44 | midpoints = (X[a] + X[b]) / 2 45 | _, Dmid = pairwise_distances_argmin_min(midpoints, X, metric=metric) 46 | Dedge = paired_distances(X[a], X[b], metric=metric) 47 | mask = (Dedge - Dmid * 2) < 1e-10 48 | pairs = np.column_stack((a[mask], b[mask])) 49 | w = Dedge[mask] if weighted else None 50 | return Graph.from_edge_pairs(pairs, num_vertices=n, symmetric=True, weights=w) 51 | 52 | 53 | def relative_neighborhood_graph(X, metric='euclidean', weighted=False): 54 | D = pairwise_distances(X, metric=metric) 55 | n = D.shape[0] 56 | pairs = np.asarray(find_relative_neighbors(D)) 57 | w = D[pairs[:,0],pairs[:,1]] if weighted else None 58 | return Graph.from_edge_pairs(pairs, num_vertices=n, symmetric=True, weights=w) 59 | 60 | 61 | def _delaunay_edges(X): 62 | tri = Delaunay(X) 63 | e1 = tri.simplices.ravel() 64 | e2 = np.roll(tri.simplices, 1, axis=1).ravel() 65 | return e1, e2 66 | 67 | 68 | def _find_relative_neighbors(D): 69 | # Naive algorithm, but it's generic to any D (doesn't depend on delaunay). 70 | n = D.shape[0] 71 | pairs = [] 72 | for r in range(n-1): 73 | for c in range(r+1, n): 74 | d = D[r,c] 75 | for i in range(n): 76 | if i == r or i == c: 77 | continue 78 | if D[r,i] < d and D[c,i] < d: 79 | break # Point in lune, this is not an edge 80 | else: 81 | pairs.append((r,c)) 82 | return pairs 83 | 84 | 85 | try: 86 | from ._fast_paths import find_relative_neighbors 87 | except ImportError: 88 | try: 89 | import pyximport 90 | pyximport.install(setup_args={'include_dirs': np.get_include()}) 91 | from ._fast_paths import find_relative_neighbors 92 | except ImportError: 93 | find_relative_neighbors = _find_relative_neighbors 94 | -------------------------------------------------------------------------------- /graphs/mixins/_betweenness.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import, print_function 2 | from collections import deque 3 | from heapq import heappush, heappop 4 | import numpy as np 5 | import scipy.sparse as ss 6 | from ..mini_six import range 7 | 8 | 9 | def _betweenness(adj, weighted, vertex): 10 | fn = _brandes if vertex else _brandes_edges 11 | return fn(adj, weighted) 12 | 13 | 14 | def _brandes(adj, weighted): 15 | # Brandes algorithm for vertex betweenness 16 | # sigma[v]: number of shortest paths from s->v 17 | # delta[v]: dependency of s on v 18 | sssp = _sssp_weighted if weighted else _sssp_unweighted 19 | n = adj.shape[0] 20 | btw = np.zeros(n) 21 | for s in range(n): 22 | S, pred, sigma = sssp(adj, s) 23 | delta = np.zeros(n) 24 | while S: 25 | w = S.pop() 26 | coeff = (1 + delta[w]) / sigma[w] 27 | for v in pred.get(w, []): 28 | delta[v] += sigma[v] * coeff 29 | if w != s: 30 | btw[w] += delta[w] 31 | return btw 32 | 33 | 34 | def _brandes_edges(adj, weighted): 35 | sssp = _sssp_weighted if weighted else _sssp_unweighted 36 | n = adj.shape[0] 37 | # set up betweenness container with correct sparsity pattern 38 | btw = ss.csr_matrix(adj, dtype=float, copy=True) 39 | btw.eliminate_zeros() 40 | btw.data[:] = 0 41 | for s in range(n): 42 | S, pred, sigma = sssp(adj, s) 43 | delta = np.zeros(n) 44 | while S: 45 | w = S.pop() 46 | coeff = (1 + delta[w]) / sigma[w] 47 | for v in pred.get(w, []): 48 | c = sigma[v] * coeff 49 | btw[v,w] += c 50 | delta[v] += c 51 | return btw.data 52 | 53 | 54 | def _sssp_unweighted(adj, s): 55 | n = adj.shape[0] 56 | S = [] 57 | pred = {} 58 | sigma = np.zeros(n) 59 | sigma[s] = 1 60 | dist = sigma + np.inf 61 | dist[s] = 0 62 | Q = deque([s]) 63 | while Q: 64 | v = Q.popleft() 65 | S.append(v) 66 | new_weight = dist[v] + 1 67 | neighbors = adj[v].nonzero()[-1] 68 | for w in neighbors: 69 | if np.isinf(dist[w]): 70 | pred[w] = [v] 71 | sigma[w] = sigma[v] 72 | dist[w] = new_weight 73 | Q.append(w) 74 | elif dist[w] == new_weight: 75 | pred[w].append(v) 76 | sigma[w] += sigma[v] 77 | return S, pred, sigma 78 | 79 | 80 | def _sssp_weighted(adj, s): 81 | n = adj.shape[0] 82 | S = set() 83 | pred = {} 84 | sigma = np.zeros(n) 85 | sigma[s] = 1 86 | dist = sigma + np.inf 87 | dist[s] = 0 88 | Q = [(0,s)] 89 | while Q: 90 | dist_v, v = heappop(Q) 91 | S.add(v) 92 | neighbors = adj[v].nonzero()[-1] 93 | for w in neighbors: 94 | new_weight = dist_v + adj[v,w] 95 | if dist[w] > new_weight: 96 | pred[w] = [v] 97 | sigma[w] = sigma[v] 98 | dist[w] = new_weight 99 | heappush(Q, (new_weight, w)) 100 | elif dist[w] == new_weight: 101 | pred[w].append(v) 102 | sigma[w] += sigma[v] 103 | S = sorted(S, key=lambda v: dist[v]) 104 | return S, pred, sigma 105 | 106 | 107 | try: 108 | from ._betweenness_helper import betweenness 109 | except ImportError: 110 | try: 111 | import pyximport 112 | pyximport.install(setup_args={'include_dirs': np.get_include()}) 113 | from ._betweenness_helper import betweenness 114 | except ImportError: 115 | betweenness = _betweenness 116 | -------------------------------------------------------------------------------- /graphs/datasets/shapes.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | __all__ = ['MobiusStrip', 'FigureEight', 'SCurve'] 4 | 5 | 6 | class ParameterizedShape(object): 7 | def __init__(self, **param_info): 8 | for name,(lb,ub,is_monotone) in param_info.items(): 9 | assert lb <= ub, 'Lower bound must be <= upper bound for %s' % name 10 | assert (bool(is_monotone) == is_monotone 11 | ), 'monoticity must be boolean for %s' % name 12 | self.param_info = param_info 13 | 14 | def evaluate(self, **param_values): 15 | raise NotImplementedError('subclasses must implement this') 16 | 17 | def point_cloud(self, num_points): 18 | param_values = {} 19 | for name,(lb,ub,is_monotone) in self.param_info.items(): 20 | if is_monotone: 21 | vals = np.linspace(lb, ub, num_points) 22 | else: 23 | vals = np.random.uniform(lb, ub, size=num_points) 24 | param_values[name] = vals 25 | return self.evaluate(**param_values) 26 | 27 | def trajectories(self, num_traj, points_per_traj): 28 | param_values = {} 29 | for name,(lb,ub,is_monotone) in self.param_info.items(): 30 | step = float(ub-lb)/points_per_traj 31 | shape = (num_traj, points_per_traj) 32 | if is_monotone: 33 | vals = np.random.normal(loc=step, scale=step/3, size=shape) 34 | else: 35 | vals = np.random.normal(loc=0, scale=step, size=shape) 36 | param_values[name] = np.cumsum(vals, axis=1) 37 | #TODO: random offsets for starting vals? 38 | return self.evaluate(**param_values) 39 | 40 | 41 | class MobiusStrip(ParameterizedShape): 42 | def __init__(self, radius=1.0, max_width=1.0): 43 | ParameterizedShape.__init__(self, 44 | theta=(0, 2*np.pi, True), 45 | width=(-max_width/2, max_width/2, False)) 46 | self.radius = radius 47 | 48 | def evaluate(self, theta=None, width=None): 49 | tmp = self.radius + width * np.cos(theta/2) 50 | X = np.empty(theta.shape + (3,)) 51 | X[...,0] = tmp * np.cos(theta) 52 | X[...,1] = tmp * np.sin(theta) 53 | X[...,2] = width * np.sin(theta/2) 54 | return X 55 | 56 | 57 | class FigureEight(ParameterizedShape): 58 | def __init__(self, radius=1.0, dimension=2): 59 | ParameterizedShape.__init__(self, 60 | theta=(0, 2*np.pi, True), 61 | width=(0, 1, False)) # width is only if it's 3d 62 | self.radius = radius 63 | assert dimension in (2,3) 64 | self.dim = dimension 65 | 66 | def evaluate(self, theta=None, width=None): 67 | X = np.empty(theta.shape + (self.dim,)) 68 | X[...,0] = self.radius * np.sin(theta) 69 | # The only difference from a circle is this extra sin(theta) term. 70 | X[...,1] = X[...,0] * np.cos(theta) 71 | if self.dim == 3: 72 | X[...,2] = width 73 | return X 74 | 75 | 76 | class SCurve(ParameterizedShape): 77 | def __init__(self, radius=1.0): 78 | ParameterizedShape.__init__(self, 79 | theta=(-np.pi-1, np.pi+1, True), 80 | width=(-1, 1, False)) 81 | self.radius = radius 82 | 83 | def evaluate(self, theta=None, width=None): 84 | X = np.empty(theta.shape + (3,)) 85 | X[...,0] = np.sin(theta) 86 | X[...,2] = np.cos(theta) 87 | X[...,1] = width 88 | first_half = slice(0, theta.shape[-1]//2) 89 | X[...,first_half,2] = 2 + -X[...,first_half,2] 90 | return X 91 | -------------------------------------------------------------------------------- /graphs/mixins/tests/test_viz.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from io import StringIO 4 | from scipy.sparse import csr_matrix 5 | from matplotlib import pyplot 6 | pyplot.switch_backend('template') 7 | 8 | from graphs import Graph 9 | 10 | 11 | class TestPlot(unittest.TestCase): 12 | def setUp(self): 13 | pairs = np.array([[0,1],[0,2],[1,2],[3,4]]) 14 | adj = [[0,1,2,0,0], 15 | [0,0,3,0,0], 16 | [0,0,0,0,0], 17 | [0,0,0,0,4], 18 | [0,0,0,0,0]] 19 | self.graphs = [ 20 | Graph.from_edge_pairs(pairs), 21 | Graph.from_edge_pairs(pairs, symmetric=True), 22 | Graph.from_adj_matrix(adj), 23 | Graph.from_adj_matrix(csr_matrix(adj)), 24 | ] 25 | self.coords = np.random.random((5, 3)) 26 | 27 | def test_plot_default(self): 28 | for G in self.graphs: 29 | G.plot(self.coords[:,:1]) # 1d plotting 30 | G.plot(self.coords[:,:2]) # 2d plotting 31 | G.plot(self.coords) # 3d plotting 32 | 33 | def test_plot_direction(self): 34 | for G in self.graphs: 35 | G.plot(self.coords[:,:2], directed=True) 36 | G.plot(self.coords[:,:2], directed=False) 37 | G.plot(self.coords, directed=True) 38 | G.plot(self.coords, directed=False) 39 | 40 | def test_plot_weighting(self): 41 | for G in self.graphs: 42 | G.plot(self.coords[:,:2], weighted=True) 43 | G.plot(self.coords[:,:2], weighted=False) 44 | G.plot(self.coords, weighted=True) 45 | G.plot(self.coords, weighted=False) 46 | 47 | def test_plot_styles(self): 48 | x = self.coords[:,:2] # use 2d coords, 3d _get_axis is slow 49 | for G in self.graphs: 50 | G.plot(x, edge_style='r--') 51 | G.plot(x, edge_style=dict(colors=[(0.5,1,0)]*4, linestyles=':')) 52 | G.plot(x, vertex_style='rx') 53 | G.plot(x, vertex_style=dict(c=[(0,0,0),(1,1,1)], marker='o')) 54 | G.plot(x, edge_style='k') 55 | G.plot(x, directed=True, edge_style='1') 56 | G.plot(x, edge_style='01') 57 | G.plot(x, directed=True, edge_style=' x') 58 | G.plot(x, edge_style='-.') 59 | G.plot(x, edge_style='k-') 60 | # Make sure we break with bogus styles 61 | with self.assertRaises(ValueError): 62 | G.plot(x, edge_style='z') 63 | with self.assertRaises(ValueError): 64 | G.plot(x, edge_style='::') 65 | with self.assertRaises(ValueError): 66 | G.plot(x, edge_style='oo') 67 | with self.assertRaises(ValueError): 68 | G.plot(x, edge_style='kk') 69 | 70 | def test_plot_fig(self): 71 | for G in self.graphs: 72 | G.plot(self.coords[:,:2], fig='new') 73 | G.plot(self.coords[:,:2], fig='current') 74 | 75 | def test_to_html(self): 76 | for G in self.graphs: 77 | buf = StringIO() 78 | # just make sure no exceptions are thrown 79 | G.to_html(buf, directed=False) 80 | buf.truncate(0) 81 | 82 | c = np.arange(5) 83 | G.to_html(buf, vertex_ids=c, directed=False, title='Test Page') 84 | buf.truncate(0) 85 | G.to_html(buf, vertex_colors=c, directed=False) 86 | buf.truncate(0) 87 | G.to_html(buf, vertex_labels=c, directed=False) 88 | buf.truncate(0) 89 | with self.assertRaises(ValueError): 90 | G.to_html(buf, vertex_colors=c, vertex_labels=c, directed=False) 91 | with self.assertRaises(ValueError): 92 | G.to_html(buf, vertex_ids=c[:2], directed=False) 93 | with self.assertRaises(ValueError): 94 | G.to_html(buf, vertex_colors=c[:2], directed=False) 95 | with self.assertRaises(ValueError): 96 | G.to_html(buf, vertex_labels=c[:2], directed=False) 97 | 98 | 99 | if __name__ == '__main__': 100 | unittest.main() 101 | -------------------------------------------------------------------------------- /graphs/datasets/mountain_car.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from matplotlib import pyplot as plt 3 | from scipy.interpolate import griddata 4 | 5 | __all__ = ['plot_mcar_basis', 'mountain_car_trajectories'] 6 | 7 | 8 | def plot_mcar_basis(G, X, title='Mountain Car graph'): 9 | _, axes = plt.subplots(nrows=2, ncols=2) 10 | G.plot(X, title=title, ax=axes[0,0]) 11 | 12 | emb = G.laplacian_eigenmaps(num_dims=3) 13 | 14 | x, y = X.T 15 | # Set up grids for a contour plot 16 | x_range = (x.min(), x.max()) 17 | y_range = (y.min(), y.max()) 18 | pad_x = 0.05 * -np.subtract.reduce(x_range) 19 | pad_y = 0.05 * -np.subtract.reduce(y_range) 20 | grid_x = np.linspace(x_range[0] - pad_x, x_range[1] + pad_x, 100) 21 | grid_y = np.linspace(y_range[0] - pad_y, y_range[1] + pad_y, 100) 22 | for i,(ax,z) in enumerate(zip(axes.flat[1:], emb.T)): 23 | grid_z = griddata((x, y), z, (grid_x[None], grid_y[:,None]), 24 | method='nearest') 25 | ax.contourf(grid_x, grid_y, grid_z) 26 | ax.plot(x, y, 'k,') 27 | ax.set_title('Basis %d' % (i+1)) 28 | return plt.show 29 | 30 | 31 | def mountain_car_trajectories(num_traj): 32 | '''Collect data using random hard-coded policies on MountainCar. 33 | 34 | num_traj : int, number of trajectories to collect 35 | 36 | Returns (trajectories, traces) 37 | ''' 38 | domain = MountainCar() 39 | slopes = np.random.normal(0, 0.01, size=num_traj) 40 | v0s = np.random.normal(0, 0.005, size=num_traj) 41 | trajectories = [] 42 | traces = [] 43 | norm = np.array((domain.MAX_POS-domain.MIN_POS, 44 | domain.MAX_VEL-domain.MIN_VEL)) 45 | for m,b in zip(slopes, v0s): 46 | mcar_policy = lambda s: 0 if s[0]*m + s[1] + b > 0 else 2 47 | start = (np.random.uniform(domain.MIN_POS,domain.MAX_POS), 48 | np.random.uniform(domain.MIN_VEL,domain.MAX_VEL)) 49 | samples = _run_episode(mcar_policy, domain, start, max_iters=40) 50 | # normalize 51 | samples.state /= norm 52 | samples.next_state /= norm 53 | traces.append(samples) 54 | if samples.reward[-1] == 0: 55 | # Don't include the warp to the final state. 56 | trajectories.append(samples.state[:-1]) 57 | else: 58 | trajectories.append(samples.state) 59 | 60 | return trajectories, traces 61 | 62 | 63 | def _run_episode(policy_action, domain, state, max_iters=1e100): 64 | action = policy_action(state) 65 | samples = [] 66 | while not domain.finished(state): 67 | # get new state and action 68 | new_state = domain.take_action(state, action) 69 | new_action = policy_action(new_state) 70 | # update histories 71 | reward = domain.reward_for(state) 72 | samples.append((state, action, reward, new_state, new_action)) 73 | if len(samples) >= max_iters: 74 | break 75 | state = new_state 76 | action = new_action 77 | ds = len(state) 78 | names = ('state','action','reward','next_state','next_action') 79 | formats = (('f',(ds,)),int,float,('f',(ds,)),int) 80 | dtype = dict(names=names, formats=formats) 81 | return np.array(samples, dtype).view(np.recarray) 82 | 83 | 84 | class MountainCar(object): 85 | # directions: fwd neu rev 86 | action_dirs = [1, 0, -1] 87 | NUM_ACTIONS = 3 88 | 89 | GOAL_POS = 0.5 90 | DT = 0.001 91 | 92 | MIN_POS = -1.2 93 | MAX_POS = 0.5 94 | MIN_VEL = -0.07 95 | MAX_VEL = 0.07 96 | 97 | def __init__(self, gravity=-0.0025): 98 | self.gravity = gravity 99 | 100 | def reward_for(self, state): 101 | return 0 if state[0] >= MountainCar.GOAL_POS else -1 102 | 103 | def finished(self, state): 104 | return self.reward_for(state) == 0 105 | 106 | def take_action(self, state, action): 107 | p,v = state 108 | a = MountainCar.action_dirs[action] 109 | new_v = v + (MountainCar.DT*a) + (self.gravity*np.cos(3*p)) 110 | new_v = min(MountainCar.MAX_VEL, max(MountainCar.MIN_VEL, new_v)) 111 | new_p = p + new_v 112 | if new_p < MountainCar.MIN_POS: 113 | new_p = MountainCar.MIN_POS 114 | new_v = 0 115 | elif new_p > MountainCar.MAX_POS: 116 | new_p = MountainCar.MAX_POS 117 | new_v = 0 118 | return new_p, new_v 119 | -------------------------------------------------------------------------------- /graphs/mixins/_betweenness_helper.pyx: -------------------------------------------------------------------------------- 1 | #distutils: language = c++ 2 | #cython: boundscheck=False, wraparound=True, cdivision=True 3 | cimport numpy as np 4 | import numpy as np 5 | import scipy.sparse as ss 6 | from libcpp.deque cimport deque 7 | from libcpp.stack cimport stack 8 | from libcpp.pair cimport pair 9 | from libcpp.queue cimport priority_queue 10 | 11 | cdef double INF = float('inf') 12 | ctypedef np.int_t intc 13 | ctypedef dict (*sssp_fn)(object, intc, intc[::1], double[::1], stack[intc]&) 14 | 15 | cpdef betweenness(adj, bint weighted, bint vertex): 16 | cdef sssp_fn sssp 17 | if weighted: 18 | sssp = &_sssp_weighted 19 | else: 20 | sssp = &_sssp_unweighted 21 | # sigma[v]: number of shortest paths from s->v 22 | # delta[v]: dependency of s on v 23 | cdef intc s, w, v, n = adj.shape[0] 24 | cdef double[::1] delta = np.zeros(n) 25 | cdef double[::1] dist = np.zeros(n) 26 | cdef intc[::1] sigma = np.zeros(n, dtype=np.int) 27 | cdef double coeff 28 | cdef stack[intc] S 29 | cdef dict pred 30 | cdef double[::1] vbtw 31 | if vertex: 32 | # Brandes algorithm for vertex betweenness 33 | vbtw = np.zeros(n) 34 | for s in range(n): 35 | pred = sssp(adj, s, sigma, dist, S) 36 | delta[:] = 0 37 | while not S.empty(): 38 | w = S.top() 39 | coeff = (1.0 + delta[w]) / sigma[w] 40 | for v in pred.get(w, []): 41 | delta[v] += sigma[v] * coeff 42 | if w != s: 43 | vbtw[w] += delta[w] 44 | S.pop() 45 | return np.array(vbtw, dtype=float) 46 | # Brandes variant for edge betweennes 47 | # set up betweenness container with correct sparsity pattern 48 | ebtw = ss.csr_matrix(adj, dtype=float, copy=True) 49 | ebtw.eliminate_zeros() 50 | ebtw.data[:] = 0 51 | for s in range(n): 52 | pred = sssp(adj, s, sigma, dist, S) 53 | delta[:] = 0 54 | while not S.empty(): 55 | w = S.top() 56 | coeff = (1.0 + delta[w]) / sigma[w] 57 | for v in pred.get(w, []): 58 | c = sigma[v] * coeff 59 | ebtw[v,w] += c 60 | delta[v] += c 61 | S.pop() 62 | return ebtw.data 63 | 64 | 65 | cdef dict _sssp_unweighted(adj, intc s, intc[::1] sigma, double[::1] dist, stack[intc]& S): 66 | cdef intc v, w, i, j, widx 67 | cdef double new_weight 68 | cdef dict pred = {} 69 | sigma[:] = 0 70 | sigma[s] = 1 71 | dist[:] = INF 72 | dist[s] = 0 73 | cdef deque[intc] Q 74 | Q.push_back(s) 75 | while not Q.empty(): 76 | v = Q.front() 77 | Q.pop_front() 78 | S.push(v) 79 | new_weight = dist[v] + 1 80 | i = adj.indptr[v] 81 | j = adj.indptr[v+1] 82 | for widx in range(i, j): 83 | w = adj.indices[widx] 84 | if dist[w] > new_weight: 85 | pred[w] = [v] 86 | sigma[w] = sigma[v] 87 | dist[w] = new_weight 88 | Q.push_back(w) 89 | elif dist[w] == new_weight: 90 | pred[w].append(v) 91 | sigma[w] += sigma[v] 92 | return pred 93 | 94 | 95 | cdef dict _sssp_weighted(adj, intc s, intc[::1] sigma, double[::1] dist, stack[intc]& S): 96 | cdef intc v, w, i, j, widx 97 | cdef double dist_v, new_weight, d 98 | cdef set SS = set() 99 | cdef dict pred = {} 100 | sigma[:] = 0 101 | sigma[s] = 1 102 | dist[:] = INF 103 | dist[s] = 0 104 | cdef priority_queue[pair[double,intc]] Q 105 | Q.push(pair[double,intc](0.,s)) 106 | while not Q.empty(): 107 | tmp = Q.top() 108 | Q.pop() 109 | dist_v = tmp.first 110 | v = tmp.second 111 | SS.add(v) 112 | i = adj.indptr[v] 113 | j = adj.indptr[v+1] 114 | for widx in range(i, j): 115 | w = adj.indices[widx] 116 | d = adj.data[widx] 117 | new_weight = dist_v + d 118 | if dist[w] > new_weight: 119 | pred[w] = [v] 120 | sigma[w] = sigma[v] 121 | dist[w] = new_weight 122 | Q.push(pair[double,intc](new_weight, w)) 123 | elif dist[w] == new_weight: 124 | pred[w].append(v) 125 | sigma[w] += sigma[v] 126 | # XXX: ugly workaround: using lambdas/comprehensions in cdef -> segfault 127 | cdef list foo = [(dist[v], v) for v in SS] 128 | for _,w in sorted(foo): 129 | S.push(w) 130 | return pred 131 | -------------------------------------------------------------------------------- /graphs/mixins/tests/test_analysis.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | import warnings 4 | from numpy.testing import assert_array_equal, assert_array_almost_equal 5 | from scipy.sparse import coo_matrix 6 | from graphs import Graph 7 | 8 | PAIRS = np.array([[0,1],[0,2],[1,2],[2,0],[3,4],[4,3]]) 9 | ADJ = [[0,1,1,0,0], 10 | [0,0,1,0,0], 11 | [1,0,0,0,0], 12 | [0,0,0,0,1], 13 | [0,0,0,1,0]] 14 | 15 | 16 | class TestAnalysis(unittest.TestCase): 17 | def setUp(self): 18 | self.graphs = [ 19 | Graph.from_edge_pairs(PAIRS), 20 | Graph.from_adj_matrix(ADJ), 21 | Graph.from_adj_matrix(coo_matrix(ADJ)), 22 | ] 23 | 24 | def test_connected_components(self): 25 | for G in self.graphs: 26 | n, labels = G.connected_components() 27 | self.assertEqual(2, n) 28 | assert_array_equal(labels, [0,0,0,1,1]) 29 | 30 | def test_ave_laplacian(self): 31 | g = Graph.from_adj_matrix([[0,1,2],[1,0,0],[2,0,0]]) 32 | expected = np.array([[1,-0.5,0],[-0.5,1,0],[0,0,1]]) 33 | assert_array_almost_equal(g.ave_laplacian(), expected) 34 | 35 | def test_directed_laplacian(self): 36 | expected = np.array([ 37 | [0.239519, -0.05988, -0.179839, 0, 0], 38 | [-0.05988, 0.120562,-0.060281, 0, 0], 39 | [-0.179839,-0.060281, 0.239919, 0, 0], 40 | [0, 0, 0, 0.2,-0.2], 41 | [0, 0, 0, -0.2, 0.2]]) 42 | for G in self.graphs: 43 | L = G.directed_laplacian() 44 | assert_array_almost_equal(L, expected) 45 | 46 | # test non-convergence case 47 | with warnings.catch_warnings(record=True) as w: 48 | self.graphs[0].directed_laplacian(max_iter=2) 49 | self.assertEqual(len(w), 1) 50 | self.assertEqual(str(w[0].message), 51 | 'phi failed to converge after 2 iterations') 52 | 53 | def test_bandwidth(self): 54 | for G in self.graphs: 55 | self.assertEqual(G.bandwidth(), 2) 56 | 57 | def test_profile(self): 58 | for G in self.graphs: 59 | self.assertEqual(G.profile(), 1) 60 | 61 | def test_betweenness(self): 62 | for G in self.graphs: 63 | G.symmetrize(copy=False) 64 | _test_btw(G, 'vertex', False, False, np.zeros(5)) 65 | _test_btw(G, 'vertex', False, True, np.zeros(5)) 66 | _test_btw(G, 'edge', False, False, np.ones(8)/2.) 67 | _test_btw(G, 'edge', False, True, np.ones(8)) 68 | if G.is_weighted(): 69 | _test_btw(G, 'vertex', True, False, [0,0.5,0,0,0]) 70 | _test_btw(G, 'vertex', True, True, [0,1,0,0,0]) 71 | _test_btw(G, 'edge', True, False, np.array([3,1,3,3,1,3,2,2])/4.) 72 | _test_btw(G, 'edge', True, True, np.array([3,1,3,3,1,3,2,2])/2.) 73 | 74 | def test_betweenness_weighted(self): 75 | # test a weighted graph with different kinds of weights 76 | G = Graph.from_adj_matrix([[0,1,2,0],[1,0,0,3],[2,0,0,1],[0,3,1,0]]) 77 | _test_btw(G, 'vertex', False, False, [0.5]*4) 78 | _test_btw(G, 'vertex', False, True, [1]*4) 79 | _test_btw(G, 'vertex', True, False, [1,0,1,0]) 80 | _test_btw(G, 'vertex', True, True, [2,0,2,0]) 81 | _test_btw(G, 'edge', False, False, [1,1,1,1,1,1,1,1]) 82 | _test_btw(G, 'edge', False, True, [2,2,2,2,2,2,2,2]) 83 | _test_btw(G, 'edge', True, False, np.array([2,3,2,1,3,2,1,2])/2.) 84 | _test_btw(G, 'edge', True, True, [2,3,2,1,3,2,1,2]) 85 | 86 | def test_eccentricity(self): 87 | for G in self.graphs: 88 | # unconnected graphs have infinite eccentricity 89 | assert_array_equal(G.eccentricity(), np.inf+np.ones(5)) 90 | g = Graph.from_adj_matrix([[0,1,2],[1,0,0],[2,0,0]]) 91 | assert_array_equal(g.eccentricity(), [2,3,3]) 92 | 93 | def test_diameter(self): 94 | for G in self.graphs: 95 | # unconnected graphs have infinite diameter 96 | self.assertEqual(G.diameter(), np.inf) 97 | g = Graph.from_adj_matrix([[0,1,2],[1,0,0],[2,0,0]]) 98 | self.assertEqual(g.diameter(), 3) 99 | 100 | def test_radius(self): 101 | for G in self.graphs: 102 | # unconnected graphs have infinite radius 103 | self.assertEqual(G.radius(), np.inf) 104 | g = Graph.from_adj_matrix([[0,1,2],[1,0,0],[2,0,0]]) 105 | self.assertEqual(g.radius(), 2) 106 | 107 | 108 | def _test_btw(G, k, w, d, exp): 109 | assert_array_equal(G.betweenness(kind=k, weighted=w, directed=d), exp) 110 | 111 | if __name__ == '__main__': 112 | unittest.main() 113 | -------------------------------------------------------------------------------- /graphs/mixins/tests/test_label.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_array_equal 4 | from scipy.sparse import coo_matrix 5 | from sklearn.metrics.cluster import adjusted_rand_score 6 | from graphs import Graph 7 | from graphs.construction import neighbor_graph 8 | 9 | 10 | class TestLabel(unittest.TestCase): 11 | 12 | def _make_blob_graphs(self, k=11): 13 | pts = np.random.random(size=(20, 2)) 14 | pts[10:] += 2 15 | labels = np.zeros(20) 16 | labels[10:] = 1 17 | G_sparse = neighbor_graph(pts, k=k).symmetrize() 18 | G_dense = Graph.from_adj_matrix(G_sparse.matrix('dense')) 19 | return (G_sparse, G_dense), labels 20 | 21 | def test_greedy_coloring(self): 22 | pairs = np.array([[0,1],[0,2],[1,0],[1,2],[2,0],[2,1],[3,4],[4,3]]) 23 | adj = [[0,1,1,0,0], 24 | [1,0,1,0,0], 25 | [1,1,0,0,0], 26 | [0,0,0,0,1], 27 | [0,0,0,1,0]] 28 | test_cases = [ 29 | Graph.from_edge_pairs(pairs), 30 | Graph.from_adj_matrix(adj), 31 | Graph.from_adj_matrix(coo_matrix(adj)), 32 | ] 33 | for G in test_cases: 34 | assert_array_equal([1,2,3,1,2], G.color_greedy()) 35 | 36 | def test_bicolor_spectral(self): 37 | pairs = np.array([[0,1],[0,2],[1,0],[1,2],[2,0],[2,1],[2,3],[3,2]]) 38 | adj = [[0,1,1,0], 39 | [1,0,1,0], 40 | [1,1,0,1], 41 | [0,0,1,0]] 42 | test_cases = [ 43 | Graph.from_edge_pairs(pairs), 44 | Graph.from_adj_matrix(adj), 45 | Graph.from_adj_matrix(coo_matrix(adj)), 46 | ] 47 | expected = np.array([1,1,0,1], dtype=bool) 48 | for G in test_cases: 49 | assert_array_equal(expected, G.bicolor_spectral()) 50 | 51 | def test_spectral_clustering(self): 52 | blob_graphs, expected = self._make_blob_graphs(k=11) 53 | 54 | for g in blob_graphs: 55 | labels = g.cluster_spectral(2, kernel='rbf') 56 | self.assertGreater(adjusted_rand_score(expected, labels), 0.95) 57 | 58 | def test_nn_classifier(self): 59 | blob_graphs, expected = self._make_blob_graphs(k=4) 60 | partial = expected.copy() 61 | partial[1:-1] = -1 62 | 63 | for g in blob_graphs: 64 | labels = g.classify_nearest(partial) 65 | self.assertGreater(adjusted_rand_score(expected, labels), 0.95) 66 | 67 | def test_lgc_classifier(self): 68 | blob_graphs, expected = self._make_blob_graphs(k=11) 69 | partial = expected.copy() 70 | partial[1:-1] = -1 71 | 72 | for g in blob_graphs: 73 | labels = g.classify_lgc(partial, kernel='rbf', alpha=0.2, tol=1e-3, 74 | max_iter=30) 75 | self.assertGreater(adjusted_rand_score(expected, labels), 0.95) 76 | 77 | def test_harmonic_classifier(self): 78 | blob_graphs, expected = self._make_blob_graphs(k=4) 79 | partial = expected.copy() 80 | partial[1:-1] = -1 81 | 82 | for g in blob_graphs: 83 | labels = g.classify_harmonic(partial, use_CMN=True) 84 | self.assertGreater(adjusted_rand_score(expected, labels), 0.95) 85 | 86 | def test_regression(self): 87 | t = np.linspace(0, 1, 31) 88 | pts = np.column_stack((np.sin(t), np.cos(t))) 89 | G = neighbor_graph(pts, k=3).symmetrize() 90 | y_mask = slice(None, None, 2) 91 | 92 | # test the interpolated case 93 | x = G.regression(t[y_mask], y_mask) 94 | assert_array_equal(t, np.linspace(0, 1, 31)) # ensure t hasn't changed 95 | self.assertLess(np.linalg.norm(t - x), 0.15) 96 | 97 | # test the boolean mask case 98 | y_mask = np.zeros_like(t, dtype=bool) 99 | y_mask[::2] = True 100 | x = G.regression(t[y_mask], y_mask) 101 | self.assertLess(np.linalg.norm(t - x), 0.15) 102 | 103 | # test the penalized case 104 | x = G.regression(t[y_mask], y_mask, smoothness_penalty=1e-4) 105 | self.assertLess(np.linalg.norm(t - x), 0.15) 106 | 107 | # test no kernel + dense laplacian case 108 | dG = Graph.from_adj_matrix(G.matrix('dense')) 109 | x = dG.regression(t[y_mask], y_mask, kernel='none') 110 | self.assertLess(np.linalg.norm(t - x), 0.25) 111 | x = dG.regression(t[y_mask], y_mask, smoothness_penalty=1e-4, kernel='none') 112 | self.assertLess(np.linalg.norm(t - x), 0.25) 113 | 114 | # test the multidimensional regression case 115 | tt = np.column_stack((t, t[::-1])) 116 | x = G.regression(tt[y_mask], y_mask) 117 | self.assertLess(np.linalg.norm(tt - x), 0.2) 118 | 119 | # check for bad inputs 120 | with self.assertRaisesRegexp(ValueError, r'^Invalid shape of y array'): 121 | G.regression([], y_mask) 122 | 123 | if __name__ == '__main__': 124 | unittest.main() 125 | -------------------------------------------------------------------------------- /graphs/construction/saffron.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function 2 | 3 | import numpy as np 4 | import scipy.sparse as ss 5 | import warnings 6 | from sklearn.metrics.pairwise import pairwise_distances 7 | from sklearn.preprocessing import normalize 8 | 9 | from graphs import Graph 10 | from ..mini_six import range 11 | from .neighbors import neighbor_graph 12 | 13 | __all__ = ['saffron'] 14 | 15 | 16 | def saffron(X, q=32, k=4, tangent_dim=1, curv_thresh=0.95, decay_rate=0.9, 17 | max_iter=15, verbose=False): 18 | ''' 19 | SAFFRON graph construction method. 20 | 21 | X : (n,d)-array of coordinates 22 | q : int, median number of candidate friends per vertex 23 | k : int, number of friends to select per vertex, k < q 24 | tangent_dim : int, dimensionality of manifold tangent space 25 | curv_thresh : float, tolerance to curvature, lambda in the paper 26 | decay_rate : float, controls step size per iteration, between 0 and 1 27 | max_iter : int, cap on number of iterations 28 | verbose : bool, print goodness measure per iteration when True 29 | 30 | From "Tangent Space Guided Intelligent Neighbor Finding", 31 | by Gashler & Martinez, 2011. 32 | See http://axon.cs.byu.edu/papers/gashler2011ijcnn1.pdf 33 | ''' 34 | n = len(X) 35 | dist = pairwise_distances(X) 36 | idx = np.argpartition(dist, q)[:, q] 37 | # radius for finding candidate friends: median distance to qth neighbor 38 | r = np.median(dist[np.arange(n), idx]) 39 | 40 | # make candidate graph + weights 41 | W = neighbor_graph(dist, precomputed=True, epsilon=r).matrix('csr') 42 | # NOTE: this differs from the paper, where W.data[:] = 1 initially 43 | W.data[:] = 1 / W.data 44 | # row normalize 45 | normalize(W, norm='l1', axis=1, copy=False) 46 | # XXX: hacky densify 47 | W = W.toarray() 48 | 49 | # iterate to learn optimal weights 50 | prev_goodness = 1e-12 51 | for it in range(max_iter): 52 | goodness = 0 53 | S = _estimate_tangent_spaces(X, W, tangent_dim) 54 | # find aligned candidates 55 | for i, row in enumerate(W): 56 | nbrs = row.nonzero()[-1] 57 | 58 | # compute alignment scores 59 | edges = X[nbrs] - X[i] 60 | edge_norms = (edges**2).sum(axis=1) 61 | a1 = (edges.dot(S[i])**2).sum(axis=1) / edge_norms 62 | a2 = (np.einsum('ij,ijk->ik', edges, S[nbrs])**2).sum(axis=1) / edge_norms 63 | a3 = _principal_angle(S[i], S[nbrs]) ** 2 64 | x = (np.minimum(curv_thresh, a1) * 65 | np.minimum(curv_thresh, a2) * 66 | np.minimum(curv_thresh, a3)) 67 | 68 | # decay weight of least-aligned candidates 69 | excess = x.shape[0] - k 70 | if excess > 0: 71 | bad_idx = np.argpartition(x, excess-1)[:excess] 72 | W[i, nbrs[bad_idx]] *= decay_rate 73 | W[i] /= W[i].sum() 74 | 75 | # update goodness measure (weighted alignment) 76 | goodness += x.dot(W[i,nbrs]) 77 | 78 | if verbose: # pragma: no cover 79 | goodness /= n 80 | print(it, goodness, goodness / prev_goodness) 81 | if goodness / prev_goodness <= 1.0001: 82 | break 83 | prev_goodness = goodness 84 | else: 85 | warnings.warn('Failed to converge after %d iterations.' % max_iter) 86 | 87 | # use the largest k weights for each row of W, weighted by original distance 88 | indptr, indices, data = [0], [], [] 89 | for i, row in enumerate(W): 90 | nbrs = row.nonzero()[-1] 91 | if len(nbrs) > k: 92 | nbrs = nbrs[np.argpartition(row[nbrs], len(nbrs)-k)[-k:]] 93 | indices.extend(nbrs) 94 | indptr.append(len(nbrs)) 95 | data.extend(dist[i, nbrs]) 96 | indptr = np.cumsum(indptr) 97 | data = np.array(data) 98 | indices = np.array(indices) 99 | W = ss.csr_matrix((data, indices, indptr), shape=W.shape) 100 | return Graph.from_adj_matrix(W) 101 | 102 | 103 | def _estimate_tangent_spaces(X, W, dim): 104 | # compute many PCAs in batch 105 | covs = np.empty(X.shape + (X.shape[1],)) 106 | for i, row in enumerate(W): 107 | nbrs = row.nonzero()[-1] 108 | xx = X[nbrs] * row[nbrs,None] # weight samples by W 109 | xx -= xx.mean(axis=0) 110 | covs[i] = xx.T.dot(xx) 111 | # compute all the PCs at once 112 | _, vecs = np.linalg.eigh(covs) 113 | return vecs[:,:,-dim:] 114 | 115 | 116 | def _principal_angle(a, B): 117 | '''a is (d,t), B is (k,d,t)''' 118 | # TODO: check case for t = d-1 119 | if a.shape[1] == 1: 120 | return a.T.dot(B)[0,:,0] 121 | 122 | # find normals that maximize distance when projected 123 | x1 = np.einsum('abc,adc->abd', B, B).dot(a) - a # b.dot(b.T).dot(a) - a 124 | x2 = np.einsum('ab,cad->cbd', a.dot(a.T), B) - B # a.dot(a.T).dot(b) - b 125 | xx = np.vstack((x1, x2)) 126 | 127 | # batch PCA (1st comp. only) 128 | xx -= xx.mean(axis=1)[:,None] 129 | c = np.einsum('abc,abd->acd', xx, xx) 130 | _, vecs = np.linalg.eigh(c) 131 | fpc = vecs[:,:,-1] 132 | fpc1 = fpc[:len(x1)] 133 | fpc2 = fpc[len(x1):] 134 | 135 | # a.dot(fpc1).dot(b.dot(fpc2)) 136 | lhs = a.dot(fpc1.T).T 137 | rhs = np.einsum('abc,ac->ab', B, fpc2) 138 | return np.einsum('ij,ij->i', lhs, rhs) 139 | -------------------------------------------------------------------------------- /graphs/mixins/analysis.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import, print_function 2 | import numpy as np 3 | import scipy.sparse as ss 4 | import scipy.sparse.csgraph as ssc 5 | import warnings 6 | from ..mini_six import range 7 | from ._betweenness import betweenness 8 | 9 | 10 | class AnalysisMixin(object): 11 | 12 | # scipy.sparse.csgraph wrappers 13 | def connected_components(self, **kwargs): 14 | '''Mirrors the scipy.sparse.csgraph function of the same name: 15 | connected_components(G, directed=True, connection='weak', 16 | return_labels=True) 17 | ''' 18 | return ssc.connected_components(self.matrix(), **kwargs) 19 | 20 | def laplacian(self, **kwargs): 21 | '''Mirrors the scipy.sparse.csgraph function of the same name: 22 | laplacian(G, normed=False, return_diag=False, use_out_degree=False) 23 | ''' 24 | return ssc.laplacian(self.matrix(), **kwargs) 25 | 26 | def shortest_path(self, directed=None, weighted=None, method='auto', 27 | return_predecessors=False, limit=np.inf, indices=None): 28 | '''Mirrors the scipy.sparse.csgraph function of the same name.''' 29 | d = directed if directed is not None else self.is_directed() 30 | w = weighted if weighted is not None else self.is_weighted() 31 | 32 | adj = self.matrix('dense', 'csr', 'csc') 33 | if not ss.issparse(adj): 34 | adj = np.ascontiguousarray(adj) 35 | 36 | # dispatch based on presence of limit and/or indices 37 | if np.isinf(limit) and indices is None: 38 | overwrite = not (hasattr(self, '_adj') and self._adj is adj) 39 | return ssc.shortest_path(adj, method=method, directed=d, 40 | return_predecessors=return_predecessors, 41 | unweighted=(not w), overwrite=overwrite) 42 | return ssc.dijkstra(adj, directed=d, indices=indices, 43 | return_predecessors=return_predecessors, 44 | unweighted=(not w), limit=limit) 45 | 46 | def ave_laplacian(self): 47 | '''Another kind of laplacian normalization, used in the matlab PVF code. 48 | Uses the formula: L = I - D^{-1} * W''' 49 | W = self.matrix('dense') 50 | # calculate -inv(D) 51 | Dinv = W.sum(axis=0) 52 | mask = Dinv!=0 53 | Dinv[mask] = -1./Dinv[mask] 54 | # calculate -inv(D) * W 55 | lap = (Dinv * W.T).T 56 | # add I 57 | lap.flat[::W.shape[0]+1] += 1 58 | # symmetrize 59 | return (lap + lap.T) / 2.0 60 | 61 | def directed_laplacian(self, D=None, eta=0.99, tol=1e-12, max_iter=500): 62 | '''Computes the directed combinatorial graph laplacian. 63 | http://www-all.cs.umass.edu/pubs/2007/johns_m_ICML07.pdf 64 | 65 | D: (optional) N-array of degrees 66 | eta: probability of not teleporting (see the paper) 67 | tol, max_iter: convergence params for Perron vector calculation 68 | ''' 69 | W = self.matrix('dense') 70 | n = W.shape[0] 71 | if D is None: 72 | D = W.sum(axis=1) 73 | # compute probability transition matrix 74 | with np.errstate(invalid='ignore', divide='ignore'): 75 | P = W.astype(float) / D[:,None] 76 | P[D==0] = 0 77 | # start at the uniform distribution Perron vector (phi) 78 | old_phi = np.ones(n) / n 79 | # iterate to the fixed point (teleporting random walk) 80 | for _ in range(max_iter): 81 | phi = eta * old_phi.dot(P) + (1-eta)/n 82 | if np.abs(phi - old_phi).max() < tol: 83 | break 84 | old_phi = phi 85 | else: 86 | warnings.warn("phi failed to converge after %d iterations" % max_iter) 87 | # L = Phi - (Phi P + P' Phi)/2 88 | return np.diag(phi) - ((phi * P.T).T + P.T * phi)/2 89 | 90 | def bandwidth(self): 91 | """Computes the 'bandwidth' of a graph.""" 92 | return np.abs(np.diff(self.pairs(), axis=1)).max() 93 | 94 | def profile(self): 95 | """Measure of bandedness, also known as 'envelope size'.""" 96 | leftmost_idx = np.argmax(self.matrix('dense').astype(bool), axis=0) 97 | return (np.arange(self.num_vertices()) - leftmost_idx).sum() 98 | 99 | def betweenness(self, kind='vertex', directed=None, weighted=None): 100 | '''Computes the betweenness centrality of a graph. 101 | kind : string, either 'vertex' (default) or 'edge' 102 | directed : bool, defaults to self.is_directed() 103 | weighted : bool, defaults to self.is_weighted() 104 | ''' 105 | assert kind in ('vertex', 'edge'), 'Invalid kind argument: ' + kind 106 | weighted = weighted is not False and self.is_weighted() 107 | directed = directed if directed is not None else self.is_directed() 108 | adj = self.matrix('csr') 109 | btw = betweenness(adj, weighted, kind=='vertex') 110 | # normalize if undirected 111 | if not directed: 112 | btw /= 2. 113 | return btw 114 | 115 | def eccentricity(self, directed=None, weighted=None): 116 | '''Maximum distance from each vertex to any other vertex.''' 117 | sp = self.shortest_path(directed=directed, weighted=weighted) 118 | return sp.max(axis=0) 119 | 120 | def diameter(self, directed=None, weighted=None): 121 | '''Finds the length of the longest shortest path, 122 | a.k.a. the maximum graph eccentricity.''' 123 | return self.eccentricity(directed, weighted).max() 124 | 125 | def radius(self, directed=None, weighted=None): 126 | '''minimum graph eccentricity''' 127 | return self.eccentricity(directed, weighted).min() 128 | -------------------------------------------------------------------------------- /graphs/construction/tests/test_geometric.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import unittest 3 | from numpy.testing import assert_array_equal, assert_array_almost_equal 4 | from sklearn.metrics import pairwise_distances 5 | 6 | from graphs.construction import ( 7 | delaunay_graph, urquhart_graph, gabriel_graph, relative_neighborhood_graph) 8 | from graphs.construction.geometric import _find_relative_neighbors 9 | 10 | 11 | class TestGeometric(unittest.TestCase): 12 | def setUp(self): 13 | self.pts = np.array([ 14 | [0.192,0.622],[0.438,0.785],[0.780,0.273],[0.276,0.802],[0.958,0.876], 15 | [0.358,0.501],[0.683,0.713],[0.370,0.561],[0.503,0.014],[0.773,0.883]]) 16 | 17 | def test_delaunay(self): 18 | expected = np.array([ 19 | [0, 0, 0, 1, 0, 1, 0, 1, 1, 0], 20 | [0, 0, 0, 1, 0, 0, 1, 1, 0, 1], 21 | [0, 0, 0, 0, 1, 1, 1, 0, 1, 0], 22 | [1, 1, 0, 0, 0, 0, 0, 1, 0, 1], 23 | [0, 0, 1, 0, 0, 0, 1, 0, 0, 1], 24 | [1, 0, 1, 0, 0, 0, 1, 1, 1, 0], 25 | [0, 1, 1, 0, 1, 1, 0, 1, 0, 1], 26 | [1, 1, 0, 1, 0, 1, 1, 0, 0, 0], 27 | [1, 0, 1, 0, 0, 1, 0, 0, 0, 0], 28 | [0, 1, 0, 1, 1, 0, 1, 0, 0, 0]], dtype=float) 29 | G = delaunay_graph(self.pts) 30 | assert_array_equal(G.matrix('dense'), expected) 31 | 32 | # with edge weights 33 | G = delaunay_graph(self.pts, weighted=True) 34 | expected[expected!=0] = [ 35 | 0.198635, 0.205419, 0.188162, 0.682924, 0.16289, 0.255361, 36 | 0.234094, 0.34904, 0.628723, 0.479654, 0.450565, 0.379223, 37 | 0.198635, 0.16289, 0.258683, 0.503557, 0.628723, 0.319678, 38 | 0.185132, 0.205419, 0.479654, 0.388032, 0.061188, 0.508128, 39 | 0.255361, 0.450565, 0.319678, 0.388032, 0.347955, 0.192354, 40 | 0.188162, 0.234094, 0.258683, 0.061188, 0.347955, 0.682924, 41 | 0.379223, 0.508128, 0.34904, 0.503557, 0.185132, 0.192354] 42 | assert_array_almost_equal(G.matrix('dense'), expected) 43 | 44 | def test_urquhart(self): 45 | expected = np.array([ 46 | [0, 0, 0, 1, 0, 1, 0, 1, 0, 0], 47 | [0, 0, 0, 1, 0, 0, 1, 1, 0, 1], 48 | [0, 0, 0, 0, 0, 1, 1, 0, 1, 0], 49 | [1, 1, 0, 0, 0, 0, 0, 0, 0, 0], 50 | [0, 0, 0, 0, 0, 0, 1, 0, 0, 1], 51 | [1, 0, 1, 0, 0, 0, 1, 1, 1, 0], 52 | [0, 1, 1, 0, 1, 1, 0, 1, 0, 1], 53 | [1, 1, 0, 0, 0, 1, 1, 0, 0, 0], 54 | [0, 0, 1, 0, 0, 1, 0, 0, 0, 0], 55 | [0, 1, 0, 0, 1, 0, 1, 0, 0, 0]], dtype=float) 56 | G = urquhart_graph(self.pts) 57 | assert_array_equal(G.matrix('dense'), expected) 58 | 59 | # with edge weights 60 | G = urquhart_graph(self.pts, weighted=True) 61 | expected[expected!=0] = [ 62 | 0.198635, 0.205419, 0.188162, 0.16289, 0.255361, 0.234094, 63 | 0.34904, 0.479654, 0.450565, 0.379223, 0.198635, 0.16289, 64 | 0.319678, 0.185132, 0.205419, 0.479654, 0.388032, 0.061188, 65 | 0.508128, 0.255361, 0.450565, 0.319678, 0.388032, 0.347955, 66 | 0.192354, 0.188162, 0.234094, 0.061188, 0.347955, 0.379223, 67 | 0.508128, 0.34904, 0.185132, 0.192354] 68 | assert_array_almost_equal(G.matrix('dense'), expected) 69 | 70 | def test_gabriel(self): 71 | expected = np.array([ 72 | [0,3], [0,7], [1,3], [1,6], [1,7], [2,5], [2,6], [2,8], [3,7], [4,9], 73 | [5,7], [5,8], [6,9]]) 74 | expected = np.vstack((expected, expected[:,::-1])) 75 | G = gabriel_graph(self.pts) 76 | assert_array_equal(G.pairs(), expected) 77 | 78 | # with edge weights 79 | G = gabriel_graph(self.pts, weighted=True) 80 | adj = np.zeros((10,10)) 81 | idx = [3,7,13,16,17,25,26,28,30,31,37,49,52,57,58, 82 | 61,62,69,70,71,73,75,82,85,94,96] 83 | adj.flat[idx] = [ 84 | 0.198635, 0.188162, 0.16289, 0.255361, 0.234094, 0.479654, 85 | 0.450565, 0.379223, 0.198635, 0.16289, 0.258683, 0.185132, 86 | 0.479654, 0.061188, 0.508128, 0.255361, 0.450565, 0.192354, 87 | 0.188162, 0.234094, 0.258683, 0.061188, 0.379223, 0.508128, 88 | 0.185132, 0.192354] 89 | assert_array_almost_equal(G.matrix('dense'), adj) 90 | 91 | def test_relative_neighborhood(self): 92 | dist = pairwise_distances(self.pts) 93 | expected = np.array([ 94 | [0,3], [0,7], [1,3], [1,6], [1,7], [2,6], [2,8], [4,9], [5,7], [6,9]]) 95 | 96 | pairs = np.asarray(_find_relative_neighbors(dist)) 97 | assert_array_equal(pairs, expected) 98 | 99 | expected = np.vstack((expected, expected[:,::-1])) 100 | G = relative_neighborhood_graph(self.pts) 101 | assert_array_equal(G.pairs(), expected) 102 | 103 | # with metric='precomputed' 104 | G = relative_neighborhood_graph(dist, metric='precomputed') 105 | assert_array_equal(G.pairs(), expected) 106 | 107 | # with edge weights 108 | G = relative_neighborhood_graph(self.pts, weighted=True) 109 | adj = np.zeros((10,10)) 110 | idx = [3,7,13,16,17,26,28,30,31,49,57,61,62,69,70,71,75,82,94,96] 111 | adj.flat[idx] = [ 112 | 0.198635, 0.188162, 0.16289, 0.255361, 0.234094, 0.450565, 113 | 0.379223, 0.198635, 0.16289, 0.185132, 0.061188, 0.255361, 114 | 0.450565, 0.192354, 0.188162, 0.234094, 0.061188, 0.379223, 115 | 0.185132, 0.192354] 116 | assert_array_almost_equal(G.matrix('dense'), adj) 117 | 118 | if __name__ == '__main__': 119 | unittest.main() 120 | -------------------------------------------------------------------------------- /graphs/construction/b_matching.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, print_function 2 | import numpy as np 3 | import warnings 4 | from graphs import Graph 5 | from ..mini_six import range 6 | 7 | __all__ = ['b_matching'] 8 | 9 | 10 | def b_matching(D, k, max_iter=1000, damping=1, conv_thresh=1e-4, 11 | weighted=False, verbose=False): 12 | ''' 13 | "Belief-Propagation for Weighted b-Matchings on Arbitrary Graphs 14 | and its Relation to Linear Programs with Integer Solutions" 15 | Bayati et al. 16 | 17 | Finds the minimal weight perfect b-matching using min-sum loopy-BP. 18 | 19 | @param D pairwise distance matrix 20 | @param k number of neighbors per vertex (scalar or array-like) 21 | 22 | Based on the code at http://www.cs.columbia.edu/~bert/code/bmatching/bdmatch 23 | ''' 24 | INTERVAL = 2 25 | oscillation = 10 26 | cbuff = np.zeros(100, dtype=float) 27 | cbuffpos = 0 28 | N = D.shape[0] 29 | assert D.shape[1] == N, 'Input distance matrix must be square' 30 | mask = ~np.eye(N, dtype=bool) # Assume all nonzero except for diagonal 31 | W = -D[mask].reshape((N, -1)).astype(float) 32 | degrees = np.clip(np.atleast_1d(k), 0, N-1) 33 | if degrees.size == 1: # broadcast scalar up to length-N array 34 | degrees = np.repeat(degrees, N) 35 | else: 36 | assert degrees.shape == (N,), 'Input degrees must have length N' 37 | # TODO: remove these later 38 | inds = np.tile(np.arange(N), (N, 1)) 39 | backinds = inds.copy() 40 | inds = inds[mask].reshape((N, -1)) 41 | backinds = backinds.T.ravel()[:(N*(N-1))].reshape((N, -1)) 42 | 43 | # Run Belief Revision 44 | change = 1.0 45 | B = W.copy() 46 | for n_iter in range(1, max_iter+1): 47 | oldB = B.copy() 48 | update_belief(oldB, B, W, degrees, damping, inds, backinds) 49 | 50 | # check for convergence 51 | if n_iter % INTERVAL == 0: 52 | # track changes 53 | c = np.abs(B[:,0]).sum() 54 | # c may be infinite here, and that's ok 55 | with np.errstate(invalid='ignore'): 56 | if np.any(np.abs(c - cbuff) < conv_thresh): 57 | oscillation -= 1 58 | cbuff[cbuffpos] = c 59 | cbuffpos = (cbuffpos + 1) % len(cbuff) 60 | 61 | change = diff_belief(B, oldB) 62 | if np.isnan(change): 63 | warnings.warn("change is NaN! " 64 | "BP will quit but solution could be invalid. " 65 | "Problem may be infeasible.") 66 | break 67 | if change < conv_thresh or oscillation < 1: 68 | break 69 | else: 70 | warnings.warn("Hit iteration limit (%d) before converging" % max_iter) 71 | 72 | if verbose: # pragma: no cover 73 | if change < conv_thresh: 74 | print("Converged to stable beliefs in %d iterations" % n_iter) 75 | elif oscillation < 1: 76 | print("Stopped after reaching oscillation in %d iterations" % n_iter) 77 | print("No feasible solution found or there are multiple maxima.") 78 | print("Outputting best approximate solution. Try damping.") 79 | 80 | # recover result from B 81 | thresholds = np.zeros(N) 82 | for i,d in enumerate(degrees): 83 | Brow = B[i] 84 | if d >= N - 1: 85 | thresholds[i] = -np.inf 86 | elif d < 1: 87 | thresholds[i] = np.inf 88 | else: 89 | thresholds[i] = Brow[quickselect(-Brow, d-1)] 90 | 91 | ii,jj = np.where(B >= thresholds[:,None]) 92 | pairs = np.column_stack((ii, inds[ii,jj])) 93 | w = D[ii, pairs[:,1]] if weighted else None 94 | return Graph.from_edge_pairs(pairs, num_vertices=N, weights=w) 95 | 96 | 97 | def _update_change(B, oldB): # pragma: no cover 98 | expB = np.exp(B) 99 | expB[np.isinf(expB)] = 0 100 | rowsums = expB.sum(axis=1) 101 | expOldB = np.exp(oldB) 102 | expOldB[np.isinf(expOldB)] = 0 103 | oldrowsums = expOldB.sum(axis=1) 104 | 105 | change = 0 106 | rowsums[rowsums==0] = 1 107 | oldrowsums[oldrowsums==0] = 1 108 | for i in range(B.shape[0]): 109 | row = expB[i] 110 | oldrow = expOldB[i] 111 | rmask = row == 0 112 | ormask = oldrow == 0 113 | change += np.count_nonzero(np.logical_xor(rmask, ormask)) 114 | mask = ~np.logical_and(rmask, ormask) 115 | change += np.abs(oldrow[mask]/oldrowsums[i] - 116 | row[mask]/rowsums[i]).sum() 117 | return change 118 | 119 | 120 | def _quickselect(B_row, *ks): # pragma: no cover 121 | order = np.argpartition(B_row, ks) 122 | if len(ks) == 1: 123 | return order[ks[0]] 124 | return [order[k] for k in ks] 125 | 126 | 127 | def _updateB(oldB, B, W, degrees, damping, inds, backinds): # pragma: no cover 128 | '''belief update function.''' 129 | for j,d in enumerate(degrees): 130 | kk = inds[j] 131 | bk = backinds[j] 132 | 133 | if d == 0: 134 | B[kk,bk] = -np.inf 135 | continue 136 | 137 | belief = W[kk,bk] + W[j] 138 | oldBj = oldB[j] 139 | if d == oldBj.shape[0]: 140 | bth = quickselect(-oldBj, d-1) 141 | bplus = -1 142 | else: 143 | bth,bplus = quickselect(-oldBj, d-1, d) 144 | 145 | belief -= np.where(oldBj >= oldBj[bth], oldBj[bplus], oldBj[bth]) 146 | B[kk,bk] = damping*belief + (1-damping)*oldB[kk,bk] 147 | 148 | 149 | try: 150 | from ._fast_paths import quickselect, update_belief, diff_belief 151 | except ImportError: 152 | try: 153 | import pyximport 154 | pyximport.install(setup_args={'include_dirs': np.get_include()}) 155 | from ._fast_paths import quickselect, update_belief, diff_belief 156 | except ImportError: 157 | quickselect = _quickselect 158 | update_belief = _updateB 159 | diff_belief = _update_change 160 | -------------------------------------------------------------------------------- /graphs/construction/neighbors.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | from sklearn.neighbors import NearestNeighbors 5 | from sklearn.metrics.pairwise import pairwise_distances 6 | 7 | try: 8 | from bottleneck import argpartsort 9 | except ImportError: 10 | argpartsort = lambda arr, k: np.argpartition(arr, k-1) 11 | 12 | from graphs import Graph 13 | 14 | __all__ = ['neighbor_graph', 'nearest_neighbors'] 15 | 16 | 17 | def neighbor_graph(X, metric='euclidean', k=None, epsilon=None, 18 | weighting='none', precomputed=False): 19 | '''Build a neighbor graph from pairwise distance information. 20 | 21 | X : two-dimensional array-like 22 | Shape must either be (num_pts, num_dims) or (num_pts, num_pts). 23 | k : int, maximum number of nearest neighbors 24 | epsilon : float, maximum distance to a neighbor 25 | metric : str, type of distance metric (see sklearn.metrics) 26 | When metric='precomputed', X is a symmetric distance matrix. 27 | weighting : str, one of {'binary', 'none'} 28 | When weighting='binary', all edge weights == 1. 29 | ''' 30 | if k is None and epsilon is None: 31 | raise ValueError('Must provide `k` or `epsilon`.') 32 | if weighting not in ('binary', 'none'): 33 | raise ValueError('Invalid weighting param: %r' % weighting) 34 | 35 | # TODO: deprecate the precomputed kwarg 36 | precomputed = precomputed or (metric == 'precomputed') 37 | binary = weighting == 'binary' 38 | 39 | # Try the fast path, if possible. 40 | if not precomputed and epsilon is None: 41 | return _sparse_neighbor_graph(X, k, binary, metric) 42 | 43 | if precomputed: 44 | D = X 45 | else: 46 | D = pairwise_distances(X, metric=metric) 47 | return _slow_neighbor_graph(D, k, epsilon, binary) 48 | 49 | 50 | def nearest_neighbors(query_pts, target_pts=None, metric='euclidean', 51 | k=None, epsilon=None, return_dists=False, 52 | precomputed=False): 53 | '''Find nearest neighbors of query points from a matrix of target points. 54 | 55 | Returns a list of indices of neighboring points, one list per query. 56 | If no target_pts are specified, distances are calculated within query_pts. 57 | When return_dists is True, returns two lists: (distances, indices) 58 | ''' 59 | if k is None and epsilon is None: 60 | raise ValueError('Must provide `k` or `epsilon`.') 61 | 62 | # TODO: deprecate the precomputed kwarg 63 | precomputed = precomputed or (metric == 'precomputed') 64 | 65 | if precomputed and target_pts is not None: 66 | raise ValueError('`target_pts` cannot be used with precomputed distances') 67 | 68 | query_pts = np.array(query_pts) 69 | if len(query_pts.shape) == 1: 70 | query_pts = query_pts.reshape((1,-1)) # ensure that the query is a 1xD row 71 | 72 | if precomputed: 73 | dists = query_pts.copy() 74 | else: 75 | dists = pairwise_distances(query_pts, Y=target_pts, metric=metric) 76 | 77 | if epsilon is not None: 78 | if k is not None: 79 | # kNN filtering 80 | _, not_nn = _min_k_indices(dists, k, inv_ind=True) 81 | dists[np.arange(dists.shape[0]), not_nn.T] = np.inf 82 | # epsilon-ball 83 | is_close = dists <= epsilon 84 | if return_dists: 85 | nnis,nnds = [],[] 86 | for i,row in enumerate(is_close): 87 | nns = np.nonzero(row)[0] 88 | nnis.append(nns) 89 | nnds.append(dists[i,nns]) 90 | return nnds, nnis 91 | return np.array([np.nonzero(row)[0] for row in is_close]) 92 | 93 | # knn 94 | nns = _min_k_indices(dists,k) 95 | if return_dists: 96 | # index each row of dists by each row of nns 97 | row_inds = np.arange(len(nns))[:,np.newaxis] 98 | nn_dists = dists[row_inds, nns] 99 | return nn_dists, nns 100 | return nns 101 | 102 | 103 | def _slow_neighbor_graph(dist, k, epsilon, binary): 104 | num_pts = dist.shape[0] 105 | 106 | if k is not None: 107 | k = min(k+1, num_pts) 108 | nn, not_nn = _min_k_indices(dist, k, inv_ind=True) 109 | I = np.arange(num_pts) 110 | 111 | if epsilon is not None: 112 | mask = dist <= epsilon 113 | if k is not None: 114 | mask[I, not_nn.T] = False 115 | if binary: 116 | np.fill_diagonal(mask, False) 117 | W = mask.astype(float) 118 | else: 119 | W = np.where(mask, dist, 0) 120 | else: 121 | inv_mask = np.eye(num_pts, dtype=bool) 122 | inv_mask[I, not_nn.T] = True 123 | if binary: 124 | W = 1.0 - inv_mask 125 | else: 126 | W = np.where(inv_mask, 0, dist) 127 | 128 | # W = scipy.sparse.csr_matrix(W) 129 | return Graph.from_adj_matrix(W) 130 | 131 | 132 | def _min_k_indices(arr, k, inv_ind=False): 133 | psorted = argpartsort(arr, k) 134 | if inv_ind: 135 | return psorted[...,:k], psorted[...,k:] 136 | return psorted[...,:k] 137 | 138 | 139 | def _sparse_neighbor_graph(X, k, binary=False, metric='l2'): 140 | '''Construct a sparse adj matrix from a matrix of points (one per row). 141 | Non-zeros are unweighted/binary distance values, depending on the binary arg. 142 | Doesn't include self-edges.''' 143 | knn = NearestNeighbors(n_neighbors=k, metric=metric).fit(X) 144 | mode = 'connectivity' if binary else 'distance' 145 | try: 146 | adj = knn.kneighbors_graph(None, mode=mode) 147 | except IndexError: 148 | # XXX: we must be running an old (<0.16) version of sklearn 149 | # We have to hack around an old bug: 150 | if binary: 151 | adj = knn.kneighbors_graph(X, k+1, mode=mode) 152 | adj.setdiag(0) 153 | else: 154 | adj = knn.kneighbors_graph(X, k, mode=mode) 155 | return Graph.from_adj_matrix(adj) 156 | -------------------------------------------------------------------------------- /graphs/construction/_fast_paths.pyx: -------------------------------------------------------------------------------- 1 | # cython: boundscheck=False 2 | # cython: wraparound=False 3 | # cython: nonecheck=False 4 | # cython: cdivision=True 5 | import numpy as np 6 | cimport numpy as np 7 | cimport cython 8 | from libcpp cimport bool 9 | from sklearn.metrics import pairwise_distances 10 | 11 | cdef extern from "math.h": 12 | float INFINITY 13 | 14 | IDX_DTYPE = np.intp 15 | ctypedef Py_ssize_t IDX_DTYPE_t 16 | 17 | 18 | def find_relative_neighbors(D): 19 | cdef IDX_DTYPE_t n = D.shape[0] 20 | cdef IDX_DTYPE_t max_num_pairs = n * (n-1) // 2 21 | pairs = np.empty((max_num_pairs, 2), dtype=IDX_DTYPE) 22 | cdef IDX_DTYPE_t end_idx = _fill_rn_pairs(D, n, pairs) 23 | return pairs[:end_idx] 24 | 25 | 26 | cdef IDX_DTYPE_t _fill_rn_pairs(double[:,::1] D, 27 | IDX_DTYPE_t n, 28 | IDX_DTYPE_t[:,::1] pairs): 29 | cdef IDX_DTYPE_t idx = 0 30 | cdef IDX_DTYPE_t r, c, i 31 | cdef double d 32 | for r in range(n-1): 33 | for c in range(r+1, n): 34 | d = D[r,c] 35 | for i in range(n): 36 | if i == r or i == c: 37 | continue 38 | if D[r,i] < d and D[c,i] < d: 39 | break # Point in lune, this is not an edge 40 | else: 41 | pairs[idx,0] = r 42 | pairs[idx,1] = c 43 | idx += 1 44 | return idx 45 | 46 | 47 | def inter_cluster_distance(X, num_clusters, cluster_labels): 48 | # compute shortest distances between clusters 49 | Dx = pairwise_distances(X, metric='sqeuclidean') 50 | Dc = np.zeros((num_clusters,num_clusters), dtype=np.float64) 51 | edges = np.zeros((num_clusters,num_clusters,2), dtype=IDX_DTYPE) 52 | _fill_Dc_edges(num_clusters, cluster_labels, Dx, Dc, edges) 53 | return Dc, edges 54 | 55 | 56 | cdef void _fill_Dc_edges(IDX_DTYPE_t num_clusters, 57 | int[::1] cluster_labels, 58 | double[:,::1] Dx, 59 | double[:,::1] Dc, 60 | IDX_DTYPE_t[:,:,::1] edges): 61 | cdef IDX_DTYPE_t i, j, k, l, r, c, ik, il, ii_n, jj_n 62 | cdef double min_val 63 | cdef double INF = np.inf 64 | cdef IDX_DTYPE_t n = Dx.shape[0] 65 | cdef bool[:,::1] masks 66 | cdef IDX_DTYPE_t[::1] ii, jj 67 | cdef list indices = [] 68 | for i in range(num_clusters): 69 | indices.append(where_eq(cluster_labels, i)) 70 | for i in range(num_clusters-1): 71 | ii = indices[i] 72 | ii_n = ii.shape[0] 73 | for j in range(i+1, num_clusters): 74 | jj = indices[j] 75 | jj_n = jj.shape[0] 76 | r = 0 77 | c = 0 78 | min_val = INF 79 | for ik in range(ii_n): 80 | k = ii[ik] 81 | for il in range(jj_n): 82 | l = jj[il] 83 | if Dx[k,l] < min_val: 84 | min_val = Dx[k,l] 85 | # Transposed index 86 | r = k 87 | c = l 88 | edges[i,j,0] = r 89 | edges[i,j,1] = c 90 | edges[j,i,0] = r 91 | edges[j,i,1] = c 92 | Dc[i,j] = min_val 93 | Dc[j,i] = min_val 94 | 95 | cdef IDX_DTYPE_t[::1] where_eq(int[::1] x, IDX_DTYPE_t val): 96 | # return np.where(x == val)[0] 97 | cdef IDX_DTYPE_t n = x.shape[0] 98 | cdef IDX_DTYPE_t i, n_inds 99 | cdef list inds = [] 100 | for i in range(n): 101 | if x[i] == val: 102 | inds.append(i) 103 | n_inds = len(inds) 104 | cdef IDX_DTYPE_t[::1] result = np.empty(n_inds, dtype=IDX_DTYPE) 105 | for i in range(n_inds): 106 | result[i] = inds[i] 107 | return result 108 | 109 | 110 | cpdef IDX_DTYPE_t quickselect(B_row, IDX_DTYPE_t k): 111 | cdef IDX_DTYPE_t[::1] order = np.argpartition(B_row, k) 112 | return order[k] 113 | 114 | 115 | def diff_belief(B, oldB): 116 | cdef IDX_DTYPE_t i, j, N = B.shape[0] 117 | cdef double rs, ors, change = 0 118 | cdef double[::1] rowsums, oldrowsums, row, oldrow 119 | expB = np.exp(B) 120 | expOldB = np.exp(oldB) 121 | expB[np.isinf(expB)] = 0 122 | expOldB[np.isinf(expOldB)] = 0 123 | rowsums = expB.sum(axis=1) 124 | oldrowsums = expOldB.sum(axis=1) 125 | 126 | for i in range(N): 127 | rs = rowsums[i] 128 | ors = oldrowsums[i] 129 | if rs == 0: 130 | rs = 1 131 | if ors == 0: 132 | ors = 1 133 | row = expB[i] 134 | oldrow = expOldB[i] 135 | for j in range(N-1): 136 | if (row[j] == 0 and oldrow[j] != 0) or (row[j] != 0 and oldrow[j] == 0): 137 | change += 1 138 | if row[j] != 0 and oldrow[j] != 0: 139 | change += abs(oldrow[j]/ors - row[j]/rs) 140 | return change 141 | 142 | 143 | def update_belief(oldB, double[:,::1] B, double[:,::1] W, 144 | IDX_DTYPE_t[::1] degrees, double damping, 145 | IDX_DTYPE_t[:,::1] inds, IDX_DTYPE_t[:,::1] backinds): 146 | cdef IDX_DTYPE_t j, d, kkk, bkk, n = degrees.shape[0] 147 | cdef IDX_DTYPE_t[::1] kk, bk, order 148 | cdef IDX_DTYPE_t bth, bplus 149 | cdef double[::1] oldBj 150 | cdef double[:,::1] oldBview = oldB 151 | cdef double belief 152 | for j in range(n): 153 | d = degrees[j] 154 | kk = inds[j] 155 | bk = backinds[j] 156 | 157 | if d == 0: 158 | for k in range(n-1): 159 | kkk = kk[k] 160 | bkk = bk[k] 161 | B[kkk,bkk] = -INFINITY 162 | continue 163 | 164 | oldBj = oldBview[j] 165 | if d == n-1: 166 | bth = quickselect(-oldB[j], d-1) 167 | bplus = -1 168 | else: 169 | order = np.argpartition(-oldB[j], (d-1, d)) 170 | bth = order[d-1] 171 | bplus = order[d] 172 | 173 | for k in range(n-1): 174 | kkk = kk[k] 175 | bkk = bk[k] 176 | belief = W[kkk,bkk] + W[j,k] 177 | 178 | if oldBj[k] >= oldBj[bth]: 179 | belief -= oldBj[bplus] 180 | else: 181 | belief -= oldBj[bth] 182 | B[kkk,bkk] = damping*belief + (1-damping)*oldBview[kkk,bkk] 183 | -------------------------------------------------------------------------------- /graphs/mixins/tests/test_embed.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_array_almost_equal 4 | from scipy.sparse import csr_matrix 5 | from sklearn.decomposition import PCA 6 | from sklearn.manifold import locally_linear_embedding 7 | from graphs import Graph 8 | from graphs.construction import neighbor_graph 9 | 10 | 11 | def assert_signless_array_almost_equal(a, b, **kwargs): 12 | a = np.asarray(a) 13 | b = np.asarray(b) 14 | if (a.flat[0] < 0 and b.flat[0] > 0) or (a.flat[0] > 0 and b.flat[0] < 0): 15 | assert_array_almost_equal(a, -b, **kwargs) 16 | else: 17 | assert_array_almost_equal(a, b, **kwargs) 18 | 19 | 20 | class TestEmbeddings(unittest.TestCase): 21 | def test_isomap(self): 22 | expected = [-np.sqrt(8), -np.sqrt(2), 0, np.sqrt(2), np.sqrt(8)] 23 | G = Graph.from_adj_matrix([[0, np.sqrt(2), 2.82842712, 0, 0], 24 | [np.sqrt(2), 0, np.sqrt(2), 0, 0], 25 | [0, np.sqrt(2), 0, np.sqrt(2), 0], 26 | [0, 0, np.sqrt(2), 0, np.sqrt(2)], 27 | [0, 0, 2.82842712, np.sqrt(2), 0]]) 28 | Y = G.isomap(num_dims=1) 29 | self.assertEqual(Y.shape, (5, 1)) 30 | assert_array_almost_equal(Y[:,0], expected) 31 | 32 | def test_laplacian_eigenmaps(self): 33 | # Test a simple chain graph 34 | expected = np.array([0.5, 0.5, 0., -0.5, -0.5]) 35 | W = np.zeros((5,5)) + np.diag(np.ones(4), k=1) + np.diag(np.ones(4), k=-1) 36 | G = Graph.from_adj_matrix(W) 37 | Y = G.laplacian_eigenmaps(num_dims=1) 38 | self.assertEqual(Y.shape, (5, 1)) 39 | assert_signless_array_almost_equal(Y[:,0], expected) 40 | # Test num_dims=None case 41 | Y = G.laplacian_eigenmaps() 42 | self.assertEqual(Y.shape, (5, 4)) 43 | assert_signless_array_almost_equal(Y[:,0], expected) 44 | # Test sparse case 45 | G = Graph.from_adj_matrix(csr_matrix(W)) 46 | Y = G.laplacian_eigenmaps(num_dims=1) 47 | self.assertEqual(Y.shape, (5, 1)) 48 | assert_signless_array_almost_equal(Y[:,0], expected) 49 | 50 | def test_locality_preserving_projections(self): 51 | X = np.array([[1,2],[2,1],[3,1.5],[4,0.5],[5,1]]) 52 | G = Graph.from_adj_matrix([[0, 1, 1, 0, 0], 53 | [1, 0, 1, 0, 0], 54 | [1, 1, 0, 1, 1], 55 | [0, 0, 1, 0, 1], 56 | [0, 0, 1, 1, 0]]) 57 | proj = G.locality_preserving_projections(X, num_dims=1) 58 | assert_array_almost_equal(proj, np.array([[-0.95479113],[0.29727749]])) 59 | # test case with bigger d than n 60 | X = np.hstack((X, X))[:3] 61 | G = Graph.from_adj_matrix(G.matrix()[:3,:3]) 62 | proj = G.locality_preserving_projections(X, num_dims=1) 63 | assert_array_almost_equal(proj, np.array([[0.9854859,0.1697574,0,0]]).T) 64 | 65 | def test_locally_linear_embedding(self): 66 | np.random.seed(1234) 67 | pts = np.random.random((5, 3)) 68 | expected = locally_linear_embedding(pts, 3, 1)[0] 69 | G = neighbor_graph(pts, k=3).barycenter_edge_weights(pts, copy=False) 70 | actual = G.locally_linear_embedding(num_dims=1) 71 | assert_signless_array_almost_equal(expected, actual) 72 | 73 | def test_neighborhood_preserving_embedding(self): 74 | X = np.array([[1,2],[2,1],[3,1.5],[4,0.5],[5,1]]) 75 | G = Graph.from_adj_matrix([[0, 1, 1, 0, 0], 76 | [1, 0, 1, 0, 0], 77 | [1, 1, 0, 1, 1], 78 | [0, 0, 1, 0, 1], 79 | [0, 0, 1, 1, 0]]) 80 | proj = G.neighborhood_preserving_embedding(X, num_dims=1) 81 | assert_signless_array_almost_equal(proj, [[0.99763], [0.068804]]) 82 | 83 | def test_laplacian_pca(self): 84 | X = np.array([[1,2],[2,1],[3,1.5],[4,0.5],[5,1]]) 85 | G = Graph.from_adj_matrix([[0, 1, 1, 0, 0], 86 | [1, 0, 1, 0, 0], 87 | [1, 1, 0, 1, 1], 88 | [0, 0, 1, 0, 1], 89 | [0, 0, 1, 1, 0]]) 90 | # check that beta=0 gets the (roughly) the same answer as PCA 91 | mX = X - X.mean(axis=0) 92 | expected = PCA(n_components=1).fit_transform(mX) 93 | actual = G.laplacian_pca(mX, num_dims=1, beta=0)[:,:1] 94 | self.assertEqual(expected.shape, actual.shape) 95 | assert_signless_array_almost_equal(expected[:,0], actual[:,0], decimal=1) 96 | 97 | def test_circular_layout(self): 98 | G = Graph.from_edge_pairs([], num_vertices=4) 99 | expected = np.array([[1,0],[0,1],[-1,0],[0,-1]]) 100 | assert_array_almost_equal(G.layout_circle(), expected) 101 | # edge cases 102 | for nv in (0, 1): 103 | G = Graph.from_edge_pairs([], num_vertices=nv) 104 | X = G.layout_circle() 105 | self.assertEqual(X.shape, (nv, 2)) 106 | 107 | def test_spring_layout(self): 108 | np.random.seed(1234) 109 | w = np.array([1,2,0.1,1,1,2,0.1,1]) 110 | p = [[0,1],[1,2],[2,3],[3,4],[1,0],[2,1],[3,2],[4,3]] 111 | G = Graph.from_edge_pairs(p, weights=w, num_vertices=5) 112 | expected = np.array([ 113 | [-1.12951518, 0.44975598], 114 | [-0.42574481, 0.51702804], 115 | [0.58946761, 0.61403187], 116 | [0.96513010, 0.64989485], 117 | [1.67011322, 0.71714073]]) 118 | assert_array_almost_equal(G.layout_spring(), expected) 119 | # Test initial_layout kwarg 120 | X = np.arange(10).reshape((5,2)) 121 | expected = np.array([ 122 | [1.837091, 2.837091], 123 | [2.996882, 3.996882], 124 | [4.472791, 5.472791], 125 | [5.014210, 6.014210], 126 | [6.162909, 7.162909]]) 127 | assert_array_almost_equal(G.layout_spring(initial_layout=X), expected) 128 | 129 | if __name__ == '__main__': 130 | unittest.main() 131 | -------------------------------------------------------------------------------- /graphs/reorder.py: -------------------------------------------------------------------------------- 1 | '''Sparse symmetric matrix reordering to reduce bandwidth/diagonalness. 2 | Methods: 3 | - cuthill_mckee 4 | - node_centroid_hill_climbing 5 | - laplacian_reordering 6 | References: 7 | - ftp://ftp.numerical.rl.ac.uk/pub/talks/jas.ala06.24VII06.pdf 8 | - http://www.jstor.org/stable/2156090 (profile defn, NYI RCM improvements) 9 | - https://www.cs.purdue.edu/homes/apothen/env3.pdf (laplacian, NYI sloan alg) 10 | ''' 11 | from __future__ import absolute_import, print_function 12 | from collections import deque 13 | import numpy as np 14 | import scipy.sparse.csgraph as ssc 15 | from graphs import Graph 16 | from .mini_six import range 17 | 18 | __all__ = [ 19 | 'permute_graph', 'cuthill_mckee', 'node_centroid_hill_climbing', 20 | 'laplacian_reordering' 21 | ] 22 | 23 | 24 | def permute_graph(G, order): 25 | '''Reorder the graph's vertices, returning a copy of the input graph. 26 | order : integer array-like, some permutation of range(G.num_vertices()). 27 | ''' 28 | adj = G.matrix('dense') 29 | adj = adj[np.ix_(order, order)] 30 | return Graph.from_adj_matrix(adj) 31 | 32 | 33 | def _cuthill_mckee(G): 34 | n = G.num_vertices() 35 | queue = deque([]) 36 | result = [] 37 | degree = G.degree() 38 | remaining = dict(enumerate(degree)) 39 | adj = G.matrix('dense', 'csr') 40 | while len(result) != n: 41 | queue.append(min(remaining, key=remaining.get)) 42 | while queue: 43 | p = queue.popleft() 44 | if p not in remaining: 45 | continue 46 | result.append(p) 47 | del remaining[p] 48 | nbrs = [c for c in np.where(adj[p])[0] if c in remaining] 49 | queue.extend(sorted(nbrs, key=remaining.get)) 50 | return permute_graph(G, np.array(result)) 51 | 52 | 53 | if hasattr(ssc, 'reverse_cuthill_mckee'): # pragma: no cover 54 | def cuthill_mckee(G): 55 | sG = G.matrix('csr') 56 | order = ssc.reverse_cuthill_mckee(sG, symmetric_mode=True) 57 | return permute_graph(G, order) 58 | else: # pragma: no cover 59 | cuthill_mckee = _cuthill_mckee 60 | 61 | cuthill_mckee.__doc__ = 'Reorder vertices using the Cuthill-McKee algorithm.' 62 | 63 | 64 | def laplacian_reordering(G): 65 | '''Reorder vertices using the eigenvector of the graph Laplacian corresponding 66 | to the first positive eigenvalue.''' 67 | L = G.laplacian() 68 | vals, vecs = np.linalg.eigh(L) 69 | min_positive_idx = np.argmax(vals == vals[vals>0].min()) 70 | vec = vecs[:, min_positive_idx] 71 | return permute_graph(G, np.argsort(vec)) 72 | 73 | 74 | def node_centroid_hill_climbing(G, relax=1, num_centerings=20, verbose=False): 75 | '''Iterative reordering method based on alternating rounds of node-centering 76 | and hill-climbing search.''' 77 | # Initialize order with BFS from a random start node. 78 | order = _breadth_first_order(G) 79 | for it in range(num_centerings): 80 | B = permute_graph(G, order).bandwidth() 81 | nc_order = _node_center(G, order, relax=relax) 82 | nc_B = permute_graph(G, nc_order).bandwidth() 83 | if nc_B < B: 84 | if verbose: # pragma: no cover 85 | print('post-center', B, nc_B) 86 | order = nc_order 87 | order = _hill_climbing(G, order, verbose=verbose) 88 | return permute_graph(G, order) 89 | 90 | 91 | def _breadth_first_order(G): 92 | inds = np.arange(G.num_vertices()) 93 | adj = G.matrix('dense', 'csr') 94 | total_order = [] 95 | while len(inds) > 0: 96 | order = ssc.breadth_first_order(adj, np.random.choice(inds), 97 | return_predecessors=False) 98 | inds = np.setdiff1d(inds, order, assume_unique=True) 99 | total_order = np.append(total_order, order) 100 | return total_order.astype(int) 101 | 102 | 103 | def _critical_vertices(G, order, relax=1, bw=None): 104 | go = permute_graph(G, order) 105 | if bw is None: 106 | bw = go.bandwidth() 107 | adj = go.matrix('dense') 108 | if relax == 1: 109 | for i in np.where(np.diag(adj, -bw))[0]: 110 | yield bw + i, i 111 | else: 112 | crit = relax * bw 113 | for u, v in np.transpose(np.where(np.tril(adj, -np.floor(crit)))): 114 | if np.abs(u-v) >= crit: 115 | yield u, v 116 | 117 | 118 | def _node_center(G, order, relax=0.99): 119 | weights = order.copy().astype(float) 120 | counts = np.ones_like(order) 121 | inv_order = np.argsort(order) 122 | for i, j in _critical_vertices(G, order, relax): 123 | u = inv_order[i] 124 | v = inv_order[j] 125 | weights[u] += j # order[v] 126 | counts[u] += 1 127 | weights[v] += i # order[u] 128 | counts[v] += 1 129 | weights /= counts 130 | return np.argsort(weights) 131 | 132 | 133 | def _hill_climbing(G, order, verbose=False): 134 | B = permute_graph(G, order).bandwidth() 135 | while True: 136 | inv_order = np.argsort(order) 137 | for i, j in _critical_vertices(G, order, bw=B): 138 | u = inv_order[i] 139 | v = inv_order[j] 140 | for w,k in enumerate(order): 141 | if not (k < i or k > j): 142 | continue 143 | new_order = order.copy() 144 | if k < i: 145 | new_order[[u,w]] = new_order[[w,u]] 146 | elif k > j: 147 | new_order[[v,w]] = new_order[[w,v]] 148 | 149 | new_B = permute_graph(G, new_order).bandwidth() 150 | if new_B < B: 151 | order = new_order 152 | if verbose: # pragma: no cover 153 | print('improved B', B, new_B) 154 | B = new_B 155 | break 156 | elif new_B == B: 157 | nc = sum(1 for _ in _critical_vertices(G, order, bw=B)) 158 | new_nc = sum(1 for _ in _critical_vertices(G, new_order, bw=B)) 159 | if new_nc < nc: 160 | order = new_order 161 | if verbose: # pragma: no cover 162 | print('improved nc', nc, new_nc) 163 | break 164 | else: 165 | continue 166 | break 167 | else: 168 | break 169 | return order 170 | -------------------------------------------------------------------------------- /graphs/mixins/tests/test_transformation.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from numpy.testing import assert_array_equal, assert_array_almost_equal 4 | from scipy.sparse import coo_matrix, csr_matrix 5 | from graphs import Graph 6 | from graphs.construction import neighbor_graph 7 | 8 | PAIRS = np.array([[0,1],[0,2],[1,0],[1,2],[2,0],[2,1],[3,4],[4,3]]) 9 | ADJ = [[0,1,1,0,0], 10 | [1,0,1,0,0], 11 | [1,1,0,0,0], 12 | [0,0,0,0,1], 13 | [0,0,0,1,0]] 14 | 15 | # fixed "random" data in 2 dimensions 16 | X = np.column_stack(( 17 | [0.192, 0.438, 0.78, 0.276, 0.958, 0.358, 0.683, 0.37, 0.503, 0.773, 18 | 0.365, 0.075, 0.933, 0.397, 0.317, 0.869, 0.802, 0.704, 0.219, 0.442], 19 | [0.622, 0.785, 0.273, 0.802, 0.876, 0.501, 0.713, 0.561, 0.014, 0.883, 20 | 0.615, 0.369, 0.651, 0.789, 0.568, 0.436, 0.144, 0.705, 0.925, 0.909] 21 | )) 22 | 23 | 24 | class TestTransformation(unittest.TestCase): 25 | 26 | def test_kernelize(self): 27 | graphs = [ 28 | Graph.from_edge_pairs(PAIRS), 29 | Graph.from_adj_matrix(ADJ), 30 | Graph.from_adj_matrix(coo_matrix(ADJ)), 31 | Graph.from_adj_matrix(csr_matrix(ADJ)), 32 | ] 33 | for G in graphs: 34 | for kernel in ('none', 'binary'): 35 | K = G.kernelize(kernel) 36 | assert_array_equal(K.matrix('dense'), ADJ) 37 | self.assertRaises(ValueError, G.kernelize, 'foobar') 38 | 39 | def test_connected_subgraphs(self): 40 | G = Graph.from_edge_pairs(PAIRS) 41 | subgraphs = list(G.connected_subgraphs(directed=False, ordered=False)) 42 | self.assertEqual(len(subgraphs), 2) 43 | assert_array_equal(subgraphs[0].pairs(), PAIRS[:6]) 44 | assert_array_equal(subgraphs[1].pairs(), [[0,1],[1,0]]) 45 | 46 | G = neighbor_graph(X, k=2) 47 | subgraphs = list(G.connected_subgraphs(directed=True, ordered=True)) 48 | self.assertEqual(len(subgraphs), 3) 49 | self.assertEqual([g.num_vertices() for g in subgraphs], [9,6,5]) 50 | 51 | def test_shortest_path_subtree(self): 52 | n = X.shape[0] 53 | G = neighbor_graph(X, k=4) 54 | e_data = [0.163, 0.199, 0.079, 0.188, 0.173, 0.122, 0.136, 0.136, 0.197] 55 | e_row = [3, 0, 14, 0, 0, 3, 0, 3, 3] 56 | e_col = [1, 3, 5, 7, 10, 13, 14, 18, 19] 57 | expected = np.zeros((n,n)) 58 | expected[e_row, e_col] = e_data 59 | 60 | spt = G.shortest_path_subtree(0, directed=True) 61 | assert_array_almost_equal(spt.matrix('dense'), expected, decimal=3) 62 | 63 | # test undirected case 64 | G.symmetrize(method='max', copy=False) 65 | e_data = [0.185,0.379,0.199,0.32,0.205,0.255,0.188,0.508,0.192,0.173,0.279, 66 | 0.258,0.122,0.136,0.316,0.326,0.278,0.136,0.197,0.185,0.379,0.199, 67 | 0.32,0.205,0.255,0.188,0.508,0.192,0.173,0.279,0.258,0.122,0.136, 68 | 0.316,0.326,0.278,0.136,0.197] 69 | e_row = [10,8,0,6,0,1,0,5,6,0,0,6,3,0,17,8,1,3,3,1,2,3,4,5,6,7,8,9,10,11,12, 70 | 13,14,15,16,17,18,19] 71 | e_col = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,10,8,0,6,0,1,0,5,6, 72 | 0,0,6,3,0,17,8,1,3,3] 73 | expected[:] = 0 74 | expected[e_row, e_col] = e_data 75 | 76 | spt = G.shortest_path_subtree(0, directed=False) 77 | assert_array_almost_equal(spt.matrix('dense'), expected, decimal=3) 78 | 79 | def test_minimum_spanning_subtree(self): 80 | n = X.shape[0] 81 | G = neighbor_graph(X, k=4) 82 | e_data = [0.279,0.136,0.255,0.041,0.124,0.186,0.131,0.122,0.136,0.185,0.226, 83 | 0.061,0.255,0.022,0.061,0.054,0.053,0.326,0.185,0.191,0.054,0.177, 84 | 0.279,0.226,0.224,0.041,0.122,0.177,0.136,0.053,0.186,0.224,0.131, 85 | 0.326,0.022,0.191,0.136,0.124] 86 | e_row = [0,0,1,1,1,2,2,3,3,4,4,5,6,6,7,7,7,8,9,9,10,10,11,12,12,13,13,13,14, 87 | 14,15,15,16,16,17,17,18,19] 88 | e_col = [11,14,6,13,19,15,16,13,18,9,12,7,1,17,5,10,14,16,4,17,7,13,0,4,15, 89 | 1,3,10,0,7,2,12,2,8,6,9,3,1] 90 | expected = np.zeros((n,n)) 91 | expected[e_row, e_col] = e_data 92 | 93 | mst = G.minimum_spanning_subtree() 94 | assert_array_almost_equal(mst.matrix('dense'), expected, decimal=3) 95 | 96 | def test_neighborhood_subgraph(self): 97 | G = neighbor_graph(X, k=4) 98 | 99 | # simple 1-neighbor subgraph 100 | g, mask = G.neighborhood_subgraph(0, radius=1, weighted=False, 101 | return_mask=True) 102 | assert_array_equal(mask.nonzero()[0], [0,3,7,10,14]) 103 | self.assertEqual(g.num_vertices(), 5) 104 | self.assertEqual(g.num_edges(), 13) 105 | 106 | # distance-based subgraph 107 | g, mask = G.neighborhood_subgraph(12, radius=0.5, return_mask=True) 108 | assert_array_equal(mask.nonzero()[0], [2,4,6,9,12,15,17]) 109 | self.assertEqual(g.num_vertices(), 7) 110 | self.assertEqual(g.num_edges(), 23) 111 | 112 | def test_isograph(self): 113 | # make roughly U-shaped data 114 | theta = np.linspace(0, 2*np.pi, 10)[1:] 115 | data = np.column_stack((np.sin(theta)*2, np.cos(theta))) 116 | G = neighbor_graph(data, k=2) 117 | 118 | g = G.isograph() 119 | self.assertIsNot(g, G) 120 | diff = G.matrix('dense') - g.matrix('dense') 121 | ii, jj = np.nonzero(diff) 122 | assert_array_equal(ii, [3, 4]) 123 | assert_array_equal(jj, [4, 3]) 124 | 125 | # test case with large epsilon 126 | g = G.isograph(min_weight=999) 127 | self.assertIsNot(g, G) 128 | assert_array_equal(g.matrix('dense'), G.matrix('dense')) 129 | 130 | def test_circle_tear(self): 131 | G = neighbor_graph(X, k=4).symmetrize(method='max', copy=False) 132 | 133 | # test MST start 134 | res = G.circle_tear(spanning_tree='mst', cycle_len_thresh=5) 135 | diff = G.matrix('dense') - res.matrix('dense') 136 | ii, jj = np.nonzero(diff) 137 | assert_array_equal(ii, [5,8,8,11]) 138 | assert_array_equal(jj, [8,5,11,8]) 139 | 140 | # test SPT start with a fixed starting vertex 141 | res = G.circle_tear(spanning_tree='spt', cycle_len_thresh=5, spt_idx=8) 142 | diff = G.matrix('dense') - res.matrix('dense') 143 | ii, jj = np.nonzero(diff) 144 | assert_array_equal(ii, [1,1,6,17]) 145 | assert_array_equal(jj, [6,17,1,1]) 146 | 147 | def test_cycle_cut(self): 148 | G = neighbor_graph(X, k=4).symmetrize(method='max', copy=False) 149 | 150 | # hack: the atomic cycle finder chooses a random vertex to start from 151 | np.random.seed(1234) 152 | res = G.cycle_cut(cycle_len_thresh=5, directed=False) 153 | diff = G.matrix('dense') - res.matrix('dense') 154 | ii, jj = np.nonzero(diff) 155 | assert_array_equal(ii, [1,1,6,17]) 156 | assert_array_equal(jj, [6,17,1,1]) 157 | 158 | if __name__ == '__main__': 159 | unittest.main() 160 | -------------------------------------------------------------------------------- /graphs/construction/regularized.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division 2 | 3 | import numpy as np 4 | import scipy.sparse as ss 5 | import warnings 6 | from scipy.linalg import cho_factor, get_lapack_funcs 7 | from sklearn import linear_model 8 | from sklearn.metrics import pairwise_distances 9 | 10 | from graphs import Graph 11 | from ..mini_six import range 12 | from .neighbors import nearest_neighbors 13 | 14 | __all__ = ['sparse_regularized_graph', 'smce_graph'] 15 | 16 | # For quickly running cho_solve without lots of checking 17 | potrs = get_lapack_funcs('potrs') 18 | 19 | # TODO: implement NNLRS next 20 | # http://www.cis.pku.edu.cn/faculty/vision/zlin/Publications/2012-CVPR-NNLRS.pdf 21 | 22 | 23 | def smce_graph(X, metric='l2', sparsity_param=10, kmax=None, keep_ratio=0.95): 24 | '''Sparse graph construction from the SMCE paper. 25 | 26 | X : 2-dimensional array-like 27 | metric : str, optional 28 | sparsity_param : float, optional 29 | kmax : int, optional 30 | keep_ratio : float, optional 31 | When <1, keep edges up to (keep_ratio * total weight) 32 | 33 | Returns a graph with asymmetric similarity weights. 34 | Call .symmetrize() and .kernelize('rbf') to convert to symmetric distances. 35 | 36 | SMCE: "Sparse Manifold Clustering and Embedding" 37 | Elhamifar & Vidal, NIPS 2011 38 | ''' 39 | n = X.shape[0] 40 | if kmax is None: 41 | kmax = min(n-1, max(5, n // 10)) 42 | 43 | nn_dists, nn_inds = nearest_neighbors(X, metric=metric, k=kmax+1, 44 | return_dists=True) 45 | W = np.zeros((n, n)) 46 | 47 | # optimize each point separately 48 | for i, pt in enumerate(X): 49 | nbr_inds = nn_inds[i] 50 | mask = nbr_inds != i # remove self-edge 51 | nbr_inds = nbr_inds[mask] 52 | nbr_dist = nn_dists[i,mask] 53 | Y = (X[nbr_inds] - pt) / nbr_dist[:,None] 54 | # solve sparse optimization with ADMM 55 | c = _solve_admm(Y, nbr_dist/nbr_dist.sum(), sparsity_param) 56 | c = np.abs(c / nbr_dist) 57 | W[i,nbr_inds] = c / c.sum() 58 | 59 | W = ss.csr_matrix(W) 60 | if keep_ratio < 1: 61 | for i in range(n): 62 | row_data = W.data[W.indptr[i]:W.indptr[i+1]] 63 | order = np.argsort(row_data)[::-1] 64 | stop_idx = np.searchsorted(np.cumsum(row_data[order]), keep_ratio) + 1 65 | bad_inds = order[stop_idx:] 66 | row_data[bad_inds] = 0 67 | W.eliminate_zeros() 68 | 69 | return Graph.from_adj_matrix(W) 70 | 71 | 72 | def _solve_admm(Y, q, alpha=10, mu=10, max_iter=10000): 73 | n = Y.shape[0] 74 | alpha_q = alpha * q 75 | # solve (YYt + mu*I + mu) Z = (mu*C - lambda + gamma + mu) 76 | A, lower = cho_factor(Y.dot(Y.T) + mu*(np.eye(n) + 1), overwrite_a=True) 77 | C = np.zeros(n) 78 | Z_old = 0 # shape (n,) 79 | lmbda = np.zeros(n) 80 | gamma = 0 81 | # ADMM iteration 82 | for i in range(max_iter): 83 | # call the guts of cho_solve directly for speed 84 | Z, _ = potrs(A, gamma + mu + mu*C - lmbda, lower=lower, overwrite_b=True) 85 | 86 | tmp = mu*Z + lmbda 87 | C[:] = np.abs(tmp) 88 | C -= alpha_q 89 | np.maximum(C, 0, out=C) 90 | C *= np.sign(tmp) 91 | C /= mu 92 | 93 | d_ZC = Z - C 94 | d_1Z = 1 - Z.sum() 95 | lmbda += mu * d_ZC 96 | gamma += mu * d_1Z 97 | 98 | if ((abs(d_1Z) / n < 1e-6) 99 | and (np.abs(d_ZC).mean() < 1e-6) 100 | and (np.abs(Z - Z_old).mean() < 1e-5)): 101 | break 102 | Z_old = Z 103 | else: 104 | warnings.warn('ADMM failed to converge after %d iterations.' % max_iter) 105 | return C 106 | 107 | 108 | def sparse_regularized_graph(X, positive=False, sparsity_param=None, kmax=None): 109 | '''Sparse Regularized Graph Construction, commonly known as an l1-graph. 110 | 111 | positive : bool, optional 112 | When True, computes the Sparse Probability Graph (SPG). 113 | sparsity_param : float, optional 114 | Controls sparsity cost in the LASSO optimization. 115 | When None, uses cross-validation to find sparsity parameters. 116 | This is very slow, but it gets good results. 117 | kmax : int, optional 118 | When None, allow all points to be edges. Otherwise, restrict to kNN set. 119 | 120 | l1-graph: "Semi-supervised Learning by Sparse Representation" 121 | Yan & Wang, SDM 2009 122 | http://epubs.siam.org/doi/pdf/10.1137/1.9781611972795.68 123 | 124 | SPG: "Nonnegative Sparse Coding for Discriminative Semi-supervised Learning" 125 | He et al., CVPR 2001 126 | ''' 127 | clf, X = _l1_graph_setup(X, positive, sparsity_param) 128 | if kmax is None: 129 | W = _l1_graph_solve_full(clf, X) 130 | else: 131 | W = _l1_graph_solve_k(clf, X, kmax) 132 | return Graph.from_adj_matrix(W) 133 | 134 | 135 | def _l1_graph_solve_full(clf, X): 136 | n, d = X.shape 137 | # Solve for each row of W 138 | W = [] 139 | B = np.vstack((X[1:], np.eye(d))) 140 | for i, x in enumerate(X): 141 | # Solve min ||B'a - x|| + |a| 142 | clf.fit(B.T, x) 143 | # Set up B for next time 144 | B[i] = x 145 | # Extract edge weights (first n-1 coefficients) 146 | a = ss.csr_matrix(clf.coef_[:n-1]) 147 | a = abs(a) 148 | a /= a.sum() 149 | # Add a zero on the diagonal 150 | a.indices[np.searchsorted(a.indices, i):] += 1 151 | a._shape = (1, n) # XXX: hack around lack of csr.resize() 152 | W.append(a) 153 | return ss.vstack(W) 154 | 155 | 156 | def _l1_graph_solve_k(clf, X, k): 157 | n, d = X.shape 158 | nn_inds = nearest_neighbors(X, k=k+1) # self-edges included 159 | # Solve for each row of W 160 | W = [] 161 | B = np.empty((k+d, d)) 162 | B[k:] = np.eye(d) 163 | for i, x in enumerate(X): 164 | # Set up B with neighbors of x 165 | idx = nn_inds[i] 166 | idx = idx[idx!=i] # remove self-edge 167 | B[:k] = X[idx] 168 | # Solve min ||B'a - x|| + |a| 169 | clf.fit(B.T, x) 170 | # Extract edge weights (first k coefficients) 171 | a = ss.csr_matrix((clf.coef_[:k], idx, [0, k]), shape=(1, n)) 172 | a.eliminate_zeros() # some of the first k might be zeros 173 | a = abs(a) 174 | a /= a.sum() 175 | W.append(a) 176 | return ss.vstack(W) 177 | 178 | 179 | def _l1_graph_setup(X, positive, alpha): 180 | n, d = X.shape 181 | # Choose an efficient Lasso solver 182 | if alpha is not None: 183 | if positive or d < n: 184 | clf = linear_model.Lasso(positive=positive, alpha=alpha) 185 | else: 186 | clf = linear_model.LassoLars(alpha=alpha) 187 | else: 188 | cv = min(d, 3) 189 | if positive or d < n: 190 | clf = linear_model.LassoCV(positive=positive, cv=cv) 191 | else: 192 | clf = linear_model.LassoLarsCV(cv=cv) 193 | # Normalize all samples 194 | X = X / np.linalg.norm(X, ord=2, axis=1)[:,None] 195 | return clf, X 196 | -------------------------------------------------------------------------------- /graphs/base/base.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import scipy.sparse as ss 4 | import warnings 5 | from sklearn.metrics.pairwise import paired_distances 6 | 7 | from ..mini_six import zip 8 | from ..mixins import ( 9 | AnalysisMixin, EmbedMixin, LabelMixin, TransformMixin, VizMixin) 10 | 11 | 12 | class Graph(AnalysisMixin, EmbedMixin, LabelMixin, TransformMixin, VizMixin): 13 | 14 | def __init__(self, *args, **kwargs): 15 | raise NotImplementedError('Graph should not be instantiated directly') 16 | 17 | def pairs(self, copy=False, directed=True): 18 | '''Returns a (num_edges,2)-array of vertex indices (s,t). 19 | When directed=False, only pairs with s <= t are returned.''' 20 | raise NotImplementedError() 21 | 22 | def matrix(self, *formats, **kwargs): 23 | '''Returns a (num_vertices,num_vertices) array or sparse matrix, M, 24 | where M[s,t] is the weight of edge (s,t). 25 | 26 | formats: sequence of {'dense','csr','csc','coo'} 27 | copy (kwarg): may share memory if copy=False 28 | ''' 29 | raise NotImplementedError() 30 | 31 | def edge_weights(self, copy=False, directed=True): 32 | '''Returns a (num_edges,)-array of edge weights. 33 | Weights correspond to the (s,t) pairs returned by pairs(). 34 | When directed=False, only weights with s <= t are returned.''' 35 | raise NotImplementedError() 36 | 37 | def num_edges(self): 38 | raise NotImplementedError() 39 | 40 | def num_vertices(self): 41 | raise NotImplementedError() 42 | 43 | def symmetrize(self, method='sum', copy=False): 44 | '''Symmetrizes with the given method. {sum,max,avg} 45 | Returns a copy if copy=True.''' 46 | raise NotImplementedError() 47 | 48 | def add_edges(self, from_idx, to_idx, weight=1, symmetric=False, copy=False): 49 | '''Adds all from->to edges. weight may be a scalar or 1d array. 50 | If symmetric=True, also adds to->from edges with the same weights.''' 51 | raise NotImplementedError() 52 | 53 | def remove_edges(self, from_idx, to_idx, symmetric=False, copy=False): 54 | '''Removes all from->to edges, without making sure they already exist. 55 | If symmetric=True, also removes to->from edges.''' 56 | raise NotImplementedError() 57 | 58 | def _update_edges(self, weights, copy=False): 59 | raise NotImplementedError() 60 | 61 | def subgraph(self, mask): 62 | '''Returns the subgraph with vertices V[mask]. 63 | mask : boolean mask, index, or slice''' 64 | raise NotImplementedError() 65 | 66 | def copy(self): 67 | raise NotImplementedError() 68 | 69 | def is_weighted(self): 70 | '''Returns True if edges have associated weights.''' 71 | return False 72 | 73 | def is_directed(self): 74 | '''Returns True if edges *may be* asymmetric.''' 75 | return True 76 | 77 | def add_self_edges(self, weight=None, copy=False): 78 | '''Adds all i->i edges. weight may be a scalar or 1d array.''' 79 | ii = np.arange(self.num_vertices()) 80 | return self.add_edges(ii, ii, weight=weight, symmetric=False, copy=copy) 81 | 82 | def reweight(self, weight, edges=None, copy=False): 83 | '''Replaces existing edge weights. weight may be a scalar or 1d array. 84 | edges is a mask or index array that specifies a subset of edges to modify''' 85 | if not self.is_weighted(): 86 | warnings.warn('Cannot supply weights for unweighted graph; ' 87 | 'ignoring call to reweight') 88 | return self 89 | if edges is None: 90 | return self._update_edges(weight, copy=copy) 91 | ii, jj = self.pairs()[edges].T 92 | return self.add_edges(ii, jj, weight=weight, symmetric=False, copy=copy) 93 | 94 | def reweight_by_distance(self, coords, metric='l2', copy=False): 95 | '''Replaces existing edge weights by distances between connected vertices. 96 | The new weight of edge (i,j) is given by: metric(coords[i], coords[j]). 97 | coords : (num_vertices x d) array of coordinates, in vertex order 98 | metric : str or callable, see sklearn.metrics.pairwise.paired_distances''' 99 | if not self.is_weighted(): 100 | warnings.warn('Cannot supply weights for unweighted graph; ' 101 | 'ignoring call to reweight_by_distance') 102 | return self 103 | # TODO: take advantage of symmetry of metric function 104 | ii, jj = self.pairs().T 105 | if metric == 'precomputed': 106 | assert coords.ndim == 2 and coords.shape[0] == coords.shape[1] 107 | d = coords[ii,jj] 108 | else: 109 | d = paired_distances(coords[ii], coords[jj], metric=metric) 110 | return self._update_edges(d, copy=copy) 111 | 112 | def adj_list(self): 113 | '''Generates a sequence of lists of neighbor indices: 114 | an adjacency list representation.''' 115 | adj = self.matrix('dense', 'csr') 116 | for row in adj: 117 | yield row.nonzero()[-1] 118 | 119 | def degree(self, kind='out', weighted=True): 120 | '''Returns an array of vertex degrees. 121 | kind : either 'in' or 'out', useful for directed graphs 122 | weighted : controls whether to count edges or sum their weights 123 | ''' 124 | if kind == 'out': 125 | axis = 1 126 | adj = self.matrix('dense', 'csc') 127 | else: 128 | axis = 0 129 | adj = self.matrix('dense', 'csr') 130 | 131 | if not weighted and self.is_weighted(): 132 | # With recent numpy and a dense matrix, could do: 133 | # d = np.count_nonzero(adj, axis=axis) 134 | d = (adj!=0).sum(axis=axis) 135 | else: 136 | d = adj.sum(axis=axis) 137 | return np.asarray(d).ravel() 138 | 139 | def to_igraph(self, weighted=None): 140 | '''Converts this Graph object to an igraph-compatible object. 141 | Requires the python-igraph library.''' 142 | # Import here to avoid ImportErrors when igraph isn't available. 143 | import igraph 144 | ig = igraph.Graph(n=self.num_vertices(), edges=self.pairs().tolist(), 145 | directed=self.is_directed()) 146 | if weighted is not False and self.is_weighted(): 147 | ig.es['weight'] = self.edge_weights() 148 | return ig 149 | 150 | def to_graph_tool(self): 151 | '''Converts this Graph object to a graph_tool-compatible object. 152 | Requires the graph_tool library. 153 | Note that the internal ordering of graph_tool seems to be column-major.''' 154 | # Import here to avoid ImportErrors when graph_tool isn't available. 155 | import graph_tool 156 | gt = graph_tool.Graph(directed=self.is_directed()) 157 | gt.add_edge_list(self.pairs()) 158 | if self.is_weighted(): 159 | weights = gt.new_edge_property('double') 160 | for e,w in zip(gt.edges(), self.edge_weights()): 161 | weights[e] = w 162 | gt.edge_properties['weight'] = weights 163 | return gt 164 | 165 | def to_networkx(self, directed=None): 166 | '''Converts this Graph object to a networkx-compatible object. 167 | Requires the networkx library.''' 168 | import networkx as nx 169 | directed = directed if directed is not None else self.is_directed() 170 | cls = nx.DiGraph if directed else nx.Graph 171 | adj = self.matrix() 172 | if ss.issparse(adj): 173 | return nx.from_scipy_sparse_matrix(adj, create_using=cls()) 174 | return nx.from_numpy_matrix(adj, create_using=cls()) 175 | -------------------------------------------------------------------------------- /graphs/base/pairs.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import scipy.sparse as ss 5 | import warnings 6 | 7 | from .base import Graph 8 | 9 | 10 | class EdgePairGraph(Graph): 11 | def __init__(self, pairs, num_vertices=None): 12 | self._pairs = np.atleast_2d(pairs) 13 | # Handle empty-input case 14 | if self._pairs.size == 0: 15 | self._pairs.shape = (0, 2) 16 | self._pairs = self._pairs.astype(np.intp, copy=False) 17 | self._num_vertices = num_vertices if num_vertices is not None else 0 18 | return 19 | # Validate shape and dtype 20 | assert self._pairs.shape[1] == 2 21 | if not np.can_cast(self._pairs, np.intp, casting='same_kind'): 22 | self._pairs = self._pairs.astype(np.intp) 23 | # Set self._num_vertices 24 | if num_vertices is not None: 25 | self._num_vertices = num_vertices 26 | else: 27 | self._num_vertices = self._pairs.max() + 1 28 | 29 | def pairs(self, copy=False, directed=True): 30 | if not directed: 31 | canonical = np.sort(self._pairs, axis=1) 32 | n = self._num_vertices 33 | _, uniq_idx = np.unique(np.ravel_multi_index(canonical.T, (n,n)), 34 | return_index=True) 35 | return canonical[uniq_idx] 36 | if copy: 37 | return self._pairs.copy() 38 | return self._pairs 39 | 40 | def matrix(self, *formats, **kwargs): 41 | kwargs.pop('copy', False) 42 | if kwargs: 43 | raise ValueError('Unexpected kwargs for matrix(): %s' % kwargs) 44 | n = self._num_vertices 45 | row, col = self.pairs().T 46 | data = np.ones(len(row), dtype=np.intp) 47 | adj = ss.coo_matrix((data, (row,col)), shape=(n,n)) 48 | if not formats or 'coo' in formats: 49 | return adj 50 | for fmt in formats: 51 | if fmt != 'dense': 52 | return adj.asformat(fmt) 53 | if 'dense' in formats: 54 | return adj.toarray() 55 | raise NotImplementedError('Unknown matrix format(s): %s' % (formats,)) 56 | 57 | def copy(self): 58 | return EdgePairGraph(self._pairs.copy(), num_vertices=self._num_vertices) 59 | 60 | def num_edges(self): 61 | return len(self._pairs) 62 | 63 | def num_vertices(self): 64 | return self._num_vertices 65 | 66 | def add_edges(self, from_idx, to_idx, 67 | weight=None, symmetric=False, copy=False): 68 | if weight is not None: 69 | warnings.warn('Cannot supply weights for unweighted graph; ' 70 | 'ignoring weight argument') 71 | to_add = np.column_stack((from_idx, to_idx)) 72 | if symmetric: 73 | # add reversed edges, excluding diagonals 74 | diag_mask = np.not_equal(*to_add.T) 75 | rev = to_add[diag_mask,::-1] 76 | to_add = np.vstack((to_add, rev)) 77 | # select only those edges that are not already present 78 | flattener = (self._num_vertices, 1) 79 | flat_inds = self._pairs.dot(flattener) 80 | flat_add = to_add.dot(flattener) 81 | to_add = to_add[np.in1d(flat_add, flat_inds, invert=True)] 82 | # add the new edges 83 | res = self.copy() if copy else self 84 | if len(to_add) > 0: 85 | res._pairs = np.vstack((self._pairs, to_add)) 86 | return res 87 | 88 | def remove_edges(self, from_idx, to_idx, symmetric=False, copy=False): 89 | from_idx, to_idx = np.atleast_1d(from_idx, to_idx) 90 | flat_inds = self._pairs.dot((self._num_vertices, 1)) 91 | to_remove = from_idx * self._num_vertices + to_idx 92 | if symmetric: 93 | to_remove = np.concatenate((to_remove, 94 | to_idx * self._num_vertices + from_idx)) 95 | mask = np.in1d(flat_inds, to_remove, invert=True) 96 | res = self.copy() if copy else self 97 | res._pairs = res._pairs[mask] 98 | return res 99 | 100 | def symmetrize(self, method=None, copy=False): 101 | '''Symmetrizes (ignores method). Returns a copy if copy=True.''' 102 | if copy: 103 | return SymmEdgePairGraph(self._pairs.copy(), 104 | num_vertices=self._num_vertices) 105 | shape = (self._num_vertices, self._num_vertices) 106 | flat_inds = np.union1d(np.ravel_multi_index(self._pairs.T, shape), 107 | np.ravel_multi_index(self._pairs.T[::-1], shape)) 108 | self._pairs = np.transpose(np.unravel_index(flat_inds, shape)) 109 | return self 110 | 111 | def subgraph(self, mask): 112 | nv = self.num_vertices() 113 | idx = np.arange(nv)[mask] 114 | idx_map = np.full(nv, -1) 115 | idx_map[idx] = np.arange(len(idx)) 116 | pairs = idx_map[self._pairs] 117 | pairs = pairs[(pairs >= 0).all(axis=1)] 118 | return EdgePairGraph(pairs, num_vertices=len(idx)) 119 | 120 | pairs.__doc__ = Graph.pairs.__doc__ 121 | matrix.__doc__ = Graph.matrix.__doc__ 122 | add_edges.__doc__ = Graph.add_edges.__doc__ 123 | remove_edges.__doc__ = Graph.remove_edges.__doc__ 124 | subgraph.__doc__ = Graph.subgraph.__doc__ 125 | 126 | 127 | class SymmEdgePairGraph(EdgePairGraph): 128 | def __init__(self, pairs, num_vertices=None, ensure_format=True): 129 | EdgePairGraph.__init__(self, pairs, num_vertices=num_vertices) 130 | if ensure_format: 131 | # push all edges to upper triangle 132 | self._pairs.sort() 133 | # remove any duplicates 134 | shape = (self._num_vertices, self._num_vertices) 135 | _, idx = np.unique(np.ravel_multi_index(self._pairs.T, shape), 136 | return_index=True) 137 | self._pairs = self._pairs[idx] 138 | self._offdiag_mask = np.not_equal(*self._pairs.T) 139 | 140 | def pairs(self, copy=False, directed=True): 141 | if directed: 142 | return np.vstack((self._pairs[self._offdiag_mask], self._pairs[:,::-1])) 143 | if copy: 144 | return self._pairs.copy() 145 | return self._pairs 146 | 147 | def num_edges(self): 148 | num_offdiag_edges = np.count_nonzero(self._offdiag_mask) 149 | return len(self._pairs) + num_offdiag_edges 150 | 151 | def copy(self): 152 | return SymmEdgePairGraph(self._pairs.copy(), 153 | num_vertices=self._num_vertices, 154 | ensure_format=False) 155 | 156 | def remove_edges(self, from_idx, to_idx, symmetric=False, copy=False): 157 | '''Removes all from->to and to->from edges. 158 | Note: the symmetric kwarg is unused.''' 159 | flat_inds = self._pairs.dot((self._num_vertices, 1)) 160 | # convert to sorted order and flatten 161 | to_remove = (np.minimum(from_idx, to_idx) * self._num_vertices 162 | + np.maximum(from_idx, to_idx)) 163 | mask = np.in1d(flat_inds, to_remove, invert=True) 164 | res = self.copy() if copy else self 165 | res._pairs = res._pairs[mask] 166 | res._offdiag_mask = res._offdiag_mask[mask] 167 | return res 168 | 169 | def symmetrize(self, method=None, copy=False): 170 | '''Alias for copy()''' 171 | if not copy: 172 | return self 173 | return SymmEdgePairGraph(self._pairs, num_vertices=self._num_vertices, 174 | ensure_format=False) 175 | 176 | def subgraph(self, mask): 177 | g = EdgePairGraph.subgraph(mask) 178 | return SymmEdgePairGraph(g._pairs, num_vertices=g._num_vertices, 179 | ensure_format=False) 180 | 181 | pairs.__doc__ = Graph.pairs.__doc__ 182 | subgraph.__doc__ = Graph.subgraph.__doc__ 183 | -------------------------------------------------------------------------------- /graphs/mixins/embed.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import warnings 3 | from scipy.sparse import issparse 4 | from scipy.sparse.linalg import eigsh 5 | from scipy.linalg import eig, eigh 6 | from sklearn.decomposition import KernelPCA 7 | 8 | 9 | class EmbedMixin(object): 10 | 11 | def isomap(self, num_dims=None, directed=None): 12 | '''Isomap embedding. 13 | 14 | num_dims : dimension of embedded coordinates, defaults to input dimension 15 | directed : used for .shortest_path() calculation 16 | ''' 17 | W = -0.5 * self.shortest_path(directed=directed) ** 2 18 | kpca = KernelPCA(n_components=num_dims, kernel='precomputed') 19 | return kpca.fit_transform(W) 20 | 21 | def laplacian_eigenmaps(self, num_dims=None, normed=True, val_thresh=1e-8): 22 | '''Laplacian Eigenmaps embedding. 23 | 24 | num_dims : dimension of embedded coordinates, defaults to input dimension 25 | normed : used for .laplacian() calculation 26 | val_thresh : threshold for omitting vectors with near-zero eigenvalues 27 | ''' 28 | L = self.laplacian(normed=normed) 29 | return _null_space(L, num_dims, val_thresh, overwrite=True) 30 | 31 | def locality_preserving_projections(self, coordinates, num_dims=None): 32 | '''Locality Preserving Projections (LPP, linearized Laplacian Eigenmaps).''' 33 | X = np.atleast_2d(coordinates) # n x d 34 | L = self.laplacian(normed=True) # n x n 35 | u,s,_ = np.linalg.svd(X.T.dot(X)) 36 | Fplus = np.linalg.pinv(u * np.sqrt(s)) # d x d 37 | n, d = X.shape 38 | if n >= d: # optimized order: F(X'LX)F' 39 | T = Fplus.dot(X.T.dot(L.dot(X))).dot(Fplus.T) 40 | else: # optimized order: (FX')L(XF') 41 | T = Fplus.dot(X.T).dot(L.dot(X.dot(Fplus.T))) 42 | L = 0.5*(T+T.T) 43 | return _null_space(L, num_vecs=num_dims, overwrite=True) 44 | 45 | def locally_linear_embedding(self, num_dims=None): 46 | '''Locally Linear Embedding (LLE). 47 | Note: may need to call barycenter_edge_weights() before this! 48 | ''' 49 | W = self.matrix() 50 | # compute M = (I-W)'(I-W) 51 | M = W.T.dot(W) - W.T - W 52 | if issparse(M): 53 | M = M.toarray() 54 | M.flat[::M.shape[0] + 1] += 1 55 | return _null_space(M, num_vecs=num_dims, overwrite=True) 56 | 57 | def neighborhood_preserving_embedding(self, X, num_dims=None, reweight=True): 58 | '''Neighborhood Preserving Embedding (NPE, linearized LLE).''' 59 | if reweight: 60 | W = self.barycenter_edge_weights(X).matrix() 61 | else: 62 | W = self.matrix() 63 | # compute M = (I-W)'(I-W) as in LLE 64 | M = W.T.dot(W) - W.T - W 65 | if issparse(M): 66 | M = M.toarray() 67 | M.flat[::M.shape[0] + 1] += 1 68 | # solve generalized eig problem: X'MXa = \lambda X'Xa 69 | vals, vecs = eig(X.T.dot(M).dot(X), X.T.dot(X), overwrite_a=True, 70 | overwrite_b=True) 71 | if num_dims is None: 72 | return vecs 73 | return vecs[:,:num_dims] 74 | 75 | def laplacian_pca(self, coordinates, num_dims=None, beta=0.5): 76 | '''Graph-Laplacian PCA (CVPR 2013). 77 | coordinates : (n,d) array-like, assumed to be mean-centered. 78 | beta : float in [0,1], scales how much PCA/LapEig contributes. 79 | Returns an approximation of input coordinates, ala PCA.''' 80 | X = np.atleast_2d(coordinates) 81 | L = self.laplacian(normed=True) 82 | kernel = X.dot(X.T) 83 | kernel /= eigsh(kernel, k=1, which='LM', return_eigenvectors=False) 84 | L /= eigsh(L, k=1, which='LM', return_eigenvectors=False) 85 | W = (1-beta)*(np.identity(kernel.shape[0]) - kernel) + beta*L 86 | if num_dims is None: 87 | vals, vecs = np.linalg.eigh(W) 88 | else: 89 | vals, vecs = eigh(W, eigvals=(0, num_dims-1), overwrite_a=True) 90 | return X.T.dot(vecs).dot(vecs.T).T 91 | 92 | def layout_circle(self): 93 | '''Position vertices evenly around a circle.''' 94 | n = self.num_vertices() 95 | t = np.linspace(0, 2*np.pi, n+1)[:n] 96 | return np.column_stack((np.cos(t), np.sin(t))) 97 | 98 | def layout_spring(self, num_dims=2, spring_constant=None, iterations=50, 99 | initial_temp=0.1, initial_layout=None): 100 | '''Position vertices using the Fruchterman-Reingold (spring) algorithm. 101 | 102 | num_dims : int (default=2) 103 | Number of dimensions to embed vertices in. 104 | 105 | spring_constant : float (default=None) 106 | Optimal distance between nodes. If None the distance is set to 107 | 1/sqrt(n) where n is the number of nodes. Increase this value 108 | to move nodes farther apart. 109 | 110 | iterations : int (default=50) 111 | Number of iterations of spring-force relaxation 112 | 113 | initial_temp : float (default=0.1) 114 | Largest step-size allowed in the dynamics, decays linearly. 115 | Must be positive, should probably be less than 1. 116 | 117 | initial_layout : array-like of shape (n, num_dims) 118 | If provided, serves as the initial placement of vertex coordinates. 119 | ''' 120 | if initial_layout is None: 121 | X = np.random.random((self.num_vertices(), num_dims)) 122 | else: 123 | X = np.array(initial_layout, dtype=float, copy=True) 124 | assert X.shape == (self.num_vertices(), num_dims) 125 | if spring_constant is None: 126 | # default to sqrt(area_of_viewport / num_vertices) 127 | spring_constant = X.shape[0] ** -0.5 128 | S = self.matrix('csr', 'csc', 'coo', copy=True) 129 | S.data[:] = 1. / S.data # Convert to similarity 130 | ii,jj = S.nonzero() # cache nonzero indices 131 | # simple cooling scheme, linearly steps down 132 | cooling_scheme = np.linspace(initial_temp, 0, iterations+2)[:-2] 133 | # this is still O(V^2) 134 | # could use multilevel methods to speed this up significantly 135 | for t in cooling_scheme: 136 | delta = X[:,None] - X[None] 137 | distance = _bounded_norm(delta, 1e-8) 138 | # repulsion from all vertices 139 | force = spring_constant**2 / distance 140 | # attraction from connected vertices 141 | force[ii,jj] -= S.data * distance[ii,jj]**2 / spring_constant 142 | displacement = np.einsum('ijk,ij->ik', delta, force) 143 | # update positions 144 | length = _bounded_norm(displacement, 1e-2) 145 | X += displacement * t / length[:,None] 146 | return X 147 | 148 | 149 | def _null_space(X, num_vecs=None, val_thresh=1e-8, overwrite=False): 150 | if issparse(X): 151 | # This is a bit of a hack. Make sure we end up with enough eigenvectors. 152 | k = X.shape[0] - 1 if num_vecs is None else num_vecs + 1 153 | try: 154 | # TODO: try using shift-invert mode (sigma=0?) for speed here. 155 | vals,vecs = eigsh(X, k, which='SM') 156 | except: 157 | warnings.warn('Sparse eigsh failed, falling back to dense version') 158 | X = X.toarray() 159 | overwrite = True 160 | if not issparse(X): 161 | vals,vecs = eigh(X, overwrite_a=overwrite) 162 | # vals are not guaranteed to be in sorted order 163 | idx = np.argsort(vals) 164 | vecs = vecs.real[:,idx] 165 | vals = vals.real[idx] 166 | # discard any with really small eigenvalues 167 | i = np.searchsorted(vals, val_thresh) 168 | if num_vecs is None: 169 | # take all of them 170 | num_vecs = vals.shape[0] - i 171 | return vecs[:,i:i+num_vecs] 172 | 173 | 174 | def _bounded_norm(X, min_length): 175 | length = np.linalg.norm(X, ord=2, axis=-1) 176 | np.maximum(length, min_length, out=length) 177 | return length 178 | -------------------------------------------------------------------------------- /graphs/construction/tests/test_regularized.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | np.set_printoptions(suppress=True, precision=3) 3 | import unittest 4 | import warnings 5 | from numpy.testing import assert_array_almost_equal 6 | try: 7 | from sklearn.exceptions import ConvergenceWarning 8 | except ImportError: 9 | from sklearn.utils import ConvergenceWarning 10 | 11 | from graphs.construction import sparse_regularized_graph, smce_graph 12 | 13 | 14 | class TestRegularized(unittest.TestCase): 15 | def setUp(self): 16 | np.random.seed(1234) 17 | self.pts = _gaussian_clusters(2, 5, 20) 18 | 19 | def test_L1_graph(self): 20 | expected = [ 21 | [0, 0.286,0.352,0.362,0, 0, 0, 0, 0, 0], 22 | [0.637,0, 0.209,0, 0.153,0, 0, 0, 0, 0], 23 | [0.446,0.133,0, 0, 0.421,0, 0, 0, 0, 0], 24 | [0.493,0, 0, 0, 0.507,0, 0, 0, 0, 0], 25 | [0, 0.062,0.493,0.444,0, 0, 0, 0, 0, 0], 26 | [0, 0, 0, 0, 0, 0, 0.906,0.055,0.039,0], 27 | [0, 0, 0, 0, 0, 0.603,0, 0, 0.136,0.261], 28 | [0, 0, 0, 0, 0, 0.172,0, 0, 0.332,0.496], 29 | [0, 0, 0, 0, 0, 0.007,0.576,0.278,0, 0.139], 30 | [0, 0, 0, 0, 0, 0, 0.441,0.395,0.164,0] 31 | ] 32 | G = sparse_regularized_graph(self.pts, sparsity_param=0.005) 33 | assert_array_almost_equal(G.matrix('dense'), expected, decimal=3) 34 | 35 | def test_L1_knn_graph(self): 36 | expected = [ 37 | [0, 0.286,0.352,0.362,0, 0, 0, 0, 0, 0], 38 | [0.637,0, 0.209,0, 0.153,0, 0, 0, 0, 0], 39 | [0.446,0.133,0, 0, 0.421,0, 0, 0, 0, 0], 40 | [0.493,0, 0, 0, 0.507,0, 0, 0, 0, 0], 41 | [0, 0, 0.535,0.465,0, 0, 0, 0, 0, 0], 42 | [0, 0, 0, 0, 0, 0, 0.924,0, 0.076,0], 43 | [0, 0, 0, 0, 0, 0.603,0, 0, 0.136,0.261], 44 | [0, 0, 0, 0, 0, 0, 0, 0, 0.454,0.546], 45 | [0, 0, 0, 0, 0, 0.138,0.520,0, 0, 0.343], 46 | [0, 0, 0, 0, 0, 0, 0.441,0.395,0.164,0] 47 | ] 48 | G = sparse_regularized_graph(self.pts, sparsity_param=0.005, kmax=3) 49 | assert_array_almost_equal(G.matrix('dense'), expected, decimal=3) 50 | 51 | def test_L1_graph_cv(self): 52 | expected = [ 53 | [0, 0.231,0.372,0.397,0, 0, 0, 0, 0, 0], 54 | [0.670,0, 0.205,0, 0.124,0, 0, 0, 0, 0], 55 | [0.437,0.138,0, 0.012,0.413,0, 0, 0, 0, 0], 56 | [0.503,0, 0, 0, 0.497,0, 0, 0, 0, 0], 57 | [0, 0.053,0.509,0.438,0, 0, 0, 0, 0, 0], 58 | [0, 0, 0, 0, 0, 0, 0.914,0.061,0.025,0], 59 | [0, 0, 0, 0, 0, 0.597,0, 0, 0.139,0.264], 60 | [0, 0, 0, 0, 0, 0.311,0, 0, 0.391,0.297], 61 | [0, 0, 0, 0, 0, 0.043,0.544,0.310,0, 0.103], 62 | [0, 0, 0, 0, 0, 0, 0.428,0.399,0.173,0] 63 | ] 64 | with warnings.catch_warnings(): 65 | warnings.filterwarnings('ignore', category=ConvergenceWarning) 66 | G = sparse_regularized_graph(self.pts) 67 | assert_array_almost_equal(G.matrix('dense'), expected, decimal=3) 68 | 69 | def test_SPD_graph(self): 70 | expected = [ 71 | [0, 0.216,0.380,0.404,0, 0, 0, 0, 0, 0], 72 | [0.676,0, 0.123,0, 0.202,0, 0, 0, 0, 0], 73 | [0.377,0.140,0, 0, 0.483,0, 0, 0, 0, 0], 74 | [0.506,0, 0, 0, 0.441,0, 0, 0, 0.053,0], 75 | [0.017,0.065,0.454,0.464,0, 0, 0, 0, 0, 0], 76 | [0, 0, 0, 0, 0, 0, 0.907,0.093,0, 0], 77 | [0, 0, 0, 0, 0, 0.575,0, 0, 0.117,0.308], 78 | [0, 0, 0, 0, 0, 0.295,0, 0, 0.319,0.386], 79 | [0, 0, 0, 0, 0, 0.010,0.599,0.274,0, 0.117], 80 | [0, 0, 0, 0, 0, 0, 0.440,0.386,0.174,0] 81 | ] 82 | G = sparse_regularized_graph(self.pts, positive=True, sparsity_param=0.002) 83 | assert_array_almost_equal(G.matrix('dense'), expected, decimal=3) 84 | 85 | def test_SPD_graph_cv(self): 86 | expected = [ 87 | [0, 0.230,0.380,0.390,0, 0, 0, 0, 0, 0], 88 | [0.603,0, 0.209,0, 0.188,0, 0, 0, 0, 0], 89 | [0.366,0.133,0, 0, 0.501,0, 0, 0, 0, 0], 90 | [0.414,0, 0.119,0, 0.383,0, 0, 0, 0.084,0], 91 | [0.002,0.062,0.482,0.454,0, 0, 0, 0, 0, 0], 92 | [0, 0, 0, 0, 0, 0, 0.921,0.079,0, 0], 93 | [0, 0, 0, 0, 0.006,0.584,0, 0, 0.088,0.322], 94 | [0, 0, 0, 0, 0, 0.286,0, 0, 0.288,0.426], 95 | [0, 0, 0, 0, 0, 0.052,0.541,0.254,0, 0.153], 96 | [0, 0, 0, 0, 0, 0, 0.458,0.408,0.134,0] 97 | ] 98 | G = sparse_regularized_graph(self.pts, positive=True) 99 | assert_array_almost_equal(G.matrix('dense'), expected, decimal=3) 100 | 101 | def test_smce_graph(self): 102 | expected = [ 103 | [0, 0.318,0.323,0.359,0, 0, 0, 0, 0, 0], 104 | [0.68, 0, 0.13, 0, 0.191,0, 0, 0, 0, 0], 105 | [0.537,0.047,0, 0, 0.417,0, 0, 0, 0, 0], 106 | [0.492,0, 0, 0, 0.508,0, 0, 0, 0, 0], 107 | [0.063,0.055,0.382,0.5, 0, 0, 0, 0, 0, 0], 108 | [0, 0, 0, 0, 0, 0, 0.768,0, 0.232,0], 109 | [0, 0, 0, 0, 0, 0.667,0, 0, 0.01, 0.323], 110 | [0, 0, 0, 0, 0, 0.031,0.125,0, 0.215,0.629], 111 | [0, 0, 0, 0, 0, 0.386,0.155,0.049,0, 0.41], 112 | [0, 0, 0, 0, 0, 0, 0.391,0.343,0.266,0] 113 | ] 114 | G = smce_graph(self.pts, keep_ratio=1) 115 | assert_array_almost_equal(G.matrix('dense'), expected, decimal=3) 116 | 117 | # use keep_ratio = 0.9 118 | expected = [ 119 | [0, 0.326,0.302,0.348,0, 0, 0, 0, 0, 0], 120 | [0.478,0, 0.222,0, 0.243,0, 0, 0, 0, 0], 121 | [0.376,0.197,0, 0, 0.376,0, 0, 0, 0, 0], 122 | [0.447,0, 0, 0, 0.474,0, 0, 0, 0, 0], 123 | [0, 0.188,0.348,0.443,0, 0, 0, 0, 0, 0], 124 | [0, 0, 0, 0, 0, 0, 0.604,0, 0.305,0], 125 | [0, 0, 0, 0, 0, 0.539,0, 0, 0.097,0.32], 126 | [0, 0, 0, 0, 0, 0.176,0.1, 0, 0.227,0.496], 127 | [0, 0, 0, 0, 0, 0.374,0.111,0.179,0, 0.335], 128 | [0, 0, 0, 0, 0, 0, 0.353,0.364,0.283,0] 129 | ] 130 | G = smce_graph(self.pts, kmax=8, keep_ratio=0.9) 131 | assert_array_almost_equal(G.matrix('dense'), expected, decimal=3) 132 | 133 | 134 | def _gaussian_clusters(num_clusters, pts_per_cluster, dim): 135 | n = num_clusters * pts_per_cluster 136 | offsets = np.random.uniform(-9, 9, (num_clusters, dim)) 137 | return np.random.randn(n, dim) + np.repeat(offsets, pts_per_cluster, axis=0) 138 | 139 | 140 | if __name__ == '__main__': 141 | unittest.main() 142 | -------------------------------------------------------------------------------- /graphs/mixins/label.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | import numpy as np 3 | import scipy.linalg as sl 4 | import scipy.sparse as ss 5 | import warnings 6 | from itertools import count 7 | from scipy.sparse.linalg import eigs 8 | from sklearn.cluster import spectral_clustering 9 | 10 | from ..mini_six import range 11 | 12 | 13 | class LabelMixin(object): 14 | 15 | def color_greedy(self): 16 | '''Returns a greedy vertex coloring as an array of ints.''' 17 | n = self.num_vertices() 18 | coloring = np.zeros(n, dtype=int) 19 | for i, nbrs in enumerate(self.adj_list()): 20 | nbr_colors = set(coloring[nbrs]) 21 | for c in count(1): 22 | if c not in nbr_colors: 23 | coloring[i] = c 24 | break 25 | return coloring 26 | 27 | def bicolor_spectral(self): 28 | '''Returns an approximate 2-coloring as an array of booleans. 29 | 30 | From "A Multiscale Pyramid Transform for Graph Signals" by Shuman et al. 31 | Note: Assumes a single connected component, and may fail otherwise. 32 | ''' 33 | lap = self.laplacian().astype(float) 34 | vals, vecs = eigs(lap, k=1, which='LM') 35 | vec = vecs[:,0].real 36 | return vec > 0 if vec[0] > 0 else vec < 0 37 | 38 | def cluster_spectral(self, num_clusters, kernel='rbf'): 39 | aff = self.kernelize(kernel).matrix() 40 | return spectral_clustering(aff, n_clusters=num_clusters) 41 | 42 | def classify_nearest(self, partial_labels): 43 | '''Simple semi-supervised classification, by assigning unlabeled vertices 44 | the label of nearest labeled vertex. 45 | 46 | partial_labels: (n,) array of integer labels, -1 for unlabeled. 47 | ''' 48 | labels = np.array(partial_labels, copy=True) 49 | unlabeled = labels == -1 50 | # compute geodesic distances from unlabeled vertices 51 | D_unlabeled = self.shortest_path(weighted=True)[unlabeled] 52 | # set distances to other unlabeled vertices to infinity 53 | D_unlabeled[:,unlabeled] = np.inf 54 | # find shortest distances to labeled vertices 55 | idx = D_unlabeled.argmin(axis=1) 56 | # apply the label of the closest vertex 57 | labels[unlabeled] = labels[idx] 58 | return labels 59 | 60 | def classify_lgc(self, partial_labels, kernel='rbf', alpha=0.2, tol=1e-3, 61 | max_iter=30): 62 | '''Iterative label spreading for semi-supervised classification. 63 | 64 | partial_labels: (n,) array of integer labels, -1 for unlabeled. 65 | kernel: one of {'none', 'rbf', 'binary'}, for reweighting edges. 66 | alpha: scalar, clamping factor. 67 | tol: scalar, convergence tolerance. 68 | max_iter: integer, cap on the number of iterations performed. 69 | 70 | From "Learning with local and global consistency" 71 | by Zhou et al. in 2004. 72 | 73 | Based on the LabelSpreading implementation in scikit-learn. 74 | ''' 75 | # compute the gram matrix 76 | gram = -self.kernelize(kernel).laplacian(normed=True) 77 | if ss.issparse(gram): 78 | gram.data[gram.row == gram.col] = 0 79 | else: 80 | np.fill_diagonal(gram, 0) 81 | 82 | # initialize label distributions 83 | partial_labels = np.asarray(partial_labels) 84 | unlabeled = partial_labels == -1 85 | label_dists, classes = _onehot(partial_labels, mask=~unlabeled) 86 | 87 | # initialize clamping terms 88 | clamp_weights = np.where(unlabeled, alpha, 1)[:,None] 89 | y_static = label_dists * min(1 - alpha, 1) 90 | 91 | # iterate 92 | for it in range(max_iter): 93 | old_label_dists = label_dists 94 | label_dists = gram.dot(label_dists) 95 | label_dists *= clamp_weights 96 | label_dists += y_static 97 | # check convergence 98 | if np.abs(label_dists - old_label_dists).sum() <= tol: 99 | break 100 | else: 101 | warnings.warn("classify_lgc didn't converge in %d iterations" % max_iter) 102 | 103 | return classes[label_dists.argmax(axis=1)] 104 | 105 | def classify_local(self, partial_labels, C_l=10.0, C_u=1e-6): 106 | '''Local Learning Regularization for semi-supervised classification. 107 | 108 | partial_labels: (n,) array of integer labels, -1 for unlabeled. 109 | 110 | From "Transductive Classification via Local Learning Regularization" 111 | by Wu & Scholkopf in 2007. 112 | ''' 113 | raise NotImplementedError('NYI') 114 | 115 | def classify_harmonic(self, partial_labels, use_CMN=True): 116 | '''Harmonic function method for semi-supervised classification, 117 | also known as the Gaussian Mean Fields algorithm. 118 | 119 | partial_labels: (n,) array of integer labels, -1 for unlabeled. 120 | use_CMN : when True, apply Class Mass Normalization 121 | 122 | From "Semi-Supervised Learning Using Gaussian Fields and Harmonic Functions" 123 | by Zhu, Ghahramani, and Lafferty in 2003. 124 | 125 | Based on the matlab code at: 126 | http://pages.cs.wisc.edu/~jerryzhu/pub/harmonic_function.m 127 | ''' 128 | # prepare labels 129 | labels = np.array(partial_labels, copy=True) 130 | unlabeled = labels == -1 131 | 132 | # convert known labels to one-hot encoding 133 | fl, classes = _onehot(labels[~unlabeled]) 134 | 135 | L = self.laplacian(normed=False) 136 | if ss.issparse(L): 137 | L = L.tocsr()[unlabeled].toarray() 138 | else: 139 | L = L[unlabeled] 140 | 141 | Lul = L[:,~unlabeled] 142 | Luu = L[:,unlabeled] 143 | fu = -np.linalg.solve(Luu, Lul.dot(fl)) 144 | 145 | if use_CMN: 146 | scale = (1 + fl.sum(axis=0)) / fu.sum(axis=0) 147 | fu *= scale 148 | 149 | # assign new labels 150 | labels[unlabeled] = classes[fu.argmax(axis=1)] 151 | return labels 152 | 153 | def regression(self, y, y_mask, smoothness_penalty=0, kernel='rbf'): 154 | '''Perform vertex-valued regression, given partial labels. 155 | y : (n,d) array of known labels 156 | y_mask : index object such that all_labels[y_mask] == y 157 | 158 | From "Regularization and Semi-supervised Learning on Large Graphs" 159 | by Belkin, Matveeva, and Niyogi in 2004. 160 | Doesn't support multiple labels per vertex, unlike the paper's algorithm. 161 | To allow provided y values to change, use a (small) smoothness_penalty. 162 | ''' 163 | n = self.num_vertices() 164 | 165 | # input validation for y 166 | y = np.array(y, copy=True) 167 | ravel_f = False 168 | if y.ndim == 1: 169 | y = y[:,None] 170 | ravel_f = True 171 | if y.ndim != 2 or y.size == 0: 172 | raise ValueError('Invalid shape of y array: %s' % (y.shape,)) 173 | k, d = y.shape 174 | 175 | # input validation for y_mask 176 | if not hasattr(y_mask, 'dtype') or y_mask.dtype != 'bool': 177 | tmp = np.zeros(n, dtype=bool) 178 | tmp[y_mask] = True 179 | y_mask = tmp 180 | 181 | # mean-center known y for stability 182 | y_mean = y.mean(axis=0) 183 | y -= y_mean 184 | 185 | # use the normalized Laplacian for the smoothness matrix 186 | S = self.kernelize(kernel).laplacian(normed=True) 187 | if ss.issparse(S): 188 | S = S.tocsr() 189 | 190 | if smoothness_penalty == 0: 191 | # see Algorithm 2: Interpolated Regularization 192 | unlabeled_mask = ~y_mask 193 | S_23 = S[unlabeled_mask, :] 194 | S_3 = S_23[:, unlabeled_mask] 195 | rhs = S_23[:, y_mask].dot(y) 196 | if ss.issparse(S): 197 | f_unlabeled = ss.linalg.spsolve(S_3, rhs) 198 | if f_unlabeled.ndim == 1: 199 | f_unlabeled = f_unlabeled[:,None] 200 | else: 201 | f_unlabeled = sl.solve(S_3, rhs, sym_pos=True, overwrite_a=True, 202 | overwrite_b=True) 203 | f = np.zeros((n, d)) 204 | f[y_mask] = y 205 | f[unlabeled_mask] = -f_unlabeled 206 | else: 207 | # see Algorithm 1: Tikhonov Regularization in the paper 208 | y_hat = np.zeros((n, d)) 209 | y_hat[y_mask] = y 210 | I = y_mask.astype(float) # only one label per vertex 211 | lhs = k * smoothness_penalty * S 212 | if ss.issparse(lhs): 213 | lhs.setdiag(lhs.diagonal() + I) 214 | f = ss.linalg.lsqr(lhs, y_hat)[0] 215 | else: 216 | lhs.flat[::n+1] += I 217 | f = sl.solve(lhs, y_hat, sym_pos=True, overwrite_a=True, 218 | overwrite_b=True) 219 | 220 | # re-add the mean 221 | f += y_mean 222 | if ravel_f: 223 | return f.ravel() 224 | return f 225 | 226 | 227 | def _onehot(labels, mask=Ellipsis): 228 | classes = np.unique(labels[mask]) 229 | onehot = np.zeros((len(labels), len(classes)), dtype=int) 230 | for idx, label in enumerate(classes): 231 | onehot[labels==label, idx] = 1 232 | return onehot, classes 233 | -------------------------------------------------------------------------------- /graphs/base/adj.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import numpy as np 4 | import scipy.sparse as ss 5 | 6 | from .base import Graph 7 | 8 | 9 | class AdjacencyMatrixGraph(Graph): 10 | 11 | def copy(self): 12 | return self.__class__(self._adj.copy()) 13 | 14 | def num_vertices(self): 15 | return self._adj.shape[0] 16 | 17 | def is_weighted(self): 18 | return True 19 | 20 | def subgraph(self, mask): 21 | adj = self.matrix('dense', 'csr', 'csc') 22 | sub_adj = adj[mask][:,mask] 23 | return Graph.from_adj_matrix(sub_adj) 24 | 25 | subgraph.__doc__ = Graph.subgraph.__doc__ 26 | 27 | 28 | class DenseAdjacencyMatrixGraph(AdjacencyMatrixGraph): 29 | def __init__(self, adj): 30 | self._adj = np.atleast_2d(adj) 31 | assert self._adj.shape[0] == self._adj.shape[1] 32 | 33 | def pairs(self, copy=False, directed=True): 34 | adj = self._adj if directed else np.triu(self._adj) 35 | return np.transpose(np.nonzero(adj)) 36 | 37 | def matrix(self, *formats, **kwargs): 38 | copy = kwargs.pop('copy', False) 39 | if kwargs: 40 | raise ValueError('Unexpected kwargs for matrix(): %s' % kwargs) 41 | if not formats or 'dense' in formats: 42 | if copy: 43 | return self._adj.copy() 44 | return self._adj 45 | if 'csr' in formats: 46 | return ss.csr_matrix(self._adj) 47 | if 'csc' in formats: 48 | return ss.csc_matrix(self._adj) 49 | if 'coo' in formats: 50 | return ss.coo_matrix(self._adj) 51 | raise NotImplementedError('Unknown matrix format(s): %s' % (formats,)) 52 | 53 | def edge_weights(self, copy=False, directed=True): 54 | ii,jj = self.pairs(directed=directed).T 55 | return self._adj[ii,jj] 56 | 57 | def num_edges(self): 58 | return np.count_nonzero(self._adj) 59 | 60 | def add_edges(self, from_idx, to_idx, weight=1, symmetric=False, copy=False): 61 | weight = np.atleast_1d(1 if weight is None else weight) 62 | res_dtype = np.promote_types(weight.dtype, self._adj.dtype) 63 | adj = self._adj.astype(res_dtype, copy=copy) 64 | adj[from_idx, to_idx] = weight 65 | if symmetric: 66 | adj[to_idx, from_idx] = weight 67 | if copy: 68 | return DenseAdjacencyMatrixGraph(adj) 69 | self._adj = adj 70 | return self 71 | 72 | def remove_edges(self, from_idx, to_idx, symmetric=False, copy=False): 73 | adj = self._adj.copy() if copy else self._adj 74 | adj[from_idx, to_idx] = 0 75 | if symmetric: 76 | adj[to_idx, from_idx] = 0 77 | if copy: 78 | return DenseAdjacencyMatrixGraph(adj) 79 | self._adj = adj 80 | return self 81 | 82 | def _update_edges(self, weights, copy=False): 83 | weights = np.asarray(weights) 84 | res_dtype = np.promote_types(weights.dtype, self._adj.dtype) 85 | adj = self._adj.astype(res_dtype, copy=copy) 86 | adj[adj != 0] = weights 87 | if copy: 88 | return DenseAdjacencyMatrixGraph(adj) 89 | self._adj = adj 90 | return self 91 | 92 | def symmetrize(self, method='sum', copy=False): 93 | adj = _symmetrize(self._adj, method) 94 | if copy: 95 | return DenseAdjacencyMatrixGraph(adj) 96 | self._adj = adj 97 | return self 98 | 99 | pairs.__doc__ = Graph.pairs.__doc__ 100 | matrix.__doc__ = Graph.matrix.__doc__ 101 | edge_weights.__doc__ = Graph.edge_weights.__doc__ 102 | add_edges.__doc__ = Graph.add_edges.__doc__ 103 | remove_edges.__doc__ = Graph.remove_edges.__doc__ 104 | symmetrize.__doc__ = Graph.symmetrize.__doc__ 105 | 106 | 107 | class SparseAdjacencyMatrixGraph(AdjacencyMatrixGraph): 108 | def __init__(self, adj, may_have_zeros=True): 109 | assert ss.issparse(adj), 'SparseAdjacencyMatrixGraph input must be sparse' 110 | if adj.format not in ('coo', 'csr', 'csc'): 111 | adj = adj.tocsr() 112 | self._adj = adj 113 | assert self._adj.shape[0] == self._adj.shape[1] 114 | if may_have_zeros: 115 | # Things go wrong if we have explicit zeros in the graph. 116 | _eliminate_zeros(self._adj) 117 | 118 | def pairs(self, copy=False, directed=True): 119 | adj = self._adj if directed else ss.triu(self._adj) 120 | return np.transpose(adj.nonzero()) 121 | 122 | def matrix(self, *formats, **kwargs): 123 | copy = kwargs.pop('copy', False) 124 | if kwargs: 125 | raise ValueError('Unexpected kwargs for matrix(): %s' % kwargs) 126 | if not formats or self._adj.format in formats: 127 | if copy: 128 | return self._adj.copy() 129 | return self._adj 130 | for fmt in formats: 131 | if fmt != 'dense': 132 | return self._adj.asformat(fmt) 133 | if 'dense' in formats: 134 | return self._adj.toarray() 135 | raise NotImplementedError('Unknown matrix format(s): %s' % (formats,)) 136 | 137 | def edge_weights(self, copy=False, directed=True): 138 | if not directed: 139 | ii, jj = ss.triu(self._adj).nonzero() 140 | return np.asarray(self._adj[ii, jj]).ravel() 141 | # XXX: assumes correct internal ordering and no explicit zeros 142 | w = self._adj.data.ravel() 143 | if copy: 144 | return w.copy() 145 | return w 146 | 147 | def num_edges(self): 148 | return self._adj.nnz 149 | 150 | def add_edges(self, from_idx, to_idx, weight=1, symmetric=False, copy=False): 151 | adj = self._weightable_adj(weight, copy) 152 | if adj.format == 'coo': 153 | adj = adj.tocsr() 154 | adj[from_idx, to_idx] = weight 155 | if symmetric: 156 | adj[to_idx, from_idx] = weight 157 | return self._post_weighting(adj, weight, copy) 158 | 159 | def remove_edges(self, from_idx, to_idx, symmetric=False, copy=False): 160 | adj = self._adj.copy() if copy else self._adj 161 | if adj.format == 'coo': 162 | adj = adj.tocsr() 163 | adj[from_idx, to_idx] = 0 164 | if symmetric: 165 | adj[to_idx, from_idx] = 0 166 | return self._post_weighting(adj, 0, copy) 167 | 168 | def _update_edges(self, weights, copy=False): 169 | adj = self._weightable_adj(weights, copy) 170 | adj.data[:] = weights 171 | return self._post_weighting(adj, weights, copy) 172 | 173 | def add_self_edges(self, weight=1, copy=False): 174 | adj = self._weightable_adj(weight, copy) 175 | try: 176 | adj.setdiag(weight) 177 | except TypeError: # pragma: no cover 178 | # Older scipy doesn't support setdiag on everything. 179 | adj = adj.tocsr() 180 | adj.setdiag(weight) 181 | return self._post_weighting(adj, weight, copy) 182 | 183 | def reweight(self, weight, edges=None, copy=False): 184 | adj = self._weightable_adj(weight, copy) 185 | if edges is None: 186 | adj.data[:] = weight 187 | else: 188 | adj.data[edges] = weight 189 | return self._post_weighting(adj, weight, copy) 190 | 191 | def _weightable_adj(self, weight, copy): 192 | weight = np.atleast_1d(weight) 193 | adj = self._adj 194 | res_dtype = np.promote_types(weight.dtype, adj.dtype) 195 | if copy: 196 | adj = adj.copy() 197 | if res_dtype is not adj.dtype: 198 | adj.data = adj.data.astype(res_dtype) 199 | return adj 200 | 201 | def _post_weighting(self, adj, weight, copy): 202 | # Check if we might have changed the sparsity structure by adding zeros 203 | has_zeros = np.any(weight == 0) 204 | if copy: 205 | return SparseAdjacencyMatrixGraph(adj, may_have_zeros=has_zeros) 206 | self._adj = _eliminate_zeros(adj) if has_zeros else adj 207 | return self 208 | 209 | def symmetrize(self, method='sum', copy=False): 210 | adj = _symmetrize(self._adj.tocsr(), method) 211 | if copy: 212 | return SparseAdjacencyMatrixGraph(adj, may_have_zeros=False) 213 | self._adj = adj 214 | return self 215 | 216 | pairs.__doc__ = Graph.pairs.__doc__ 217 | matrix.__doc__ = Graph.matrix.__doc__ 218 | edge_weights.__doc__ = Graph.edge_weights.__doc__ 219 | add_edges.__doc__ = Graph.add_edges.__doc__ 220 | remove_edges.__doc__ = Graph.remove_edges.__doc__ 221 | symmetrize.__doc__ = Graph.symmetrize.__doc__ 222 | add_self_edges.__doc__ = Graph.add_self_edges.__doc__ 223 | reweight.__doc__ = Graph.reweight.__doc__ 224 | 225 | 226 | def _symmetrize(A, method): 227 | if method == 'sum': 228 | S = A + A.T 229 | elif method == 'max': 230 | if ss.issparse(A): 231 | S = A.maximum(A.T) 232 | else: 233 | S = np.maximum(A, A.T) 234 | else: 235 | S = (A + A.T) / 2.0 236 | return S 237 | 238 | 239 | def _eliminate_zeros(A): 240 | if hasattr(A, 'eliminate_zeros'): 241 | A.eliminate_zeros() 242 | elif A.format == 'coo': # pragma: no cover 243 | # old scipy doesn't provide coo_matrix.eliminate_zeros 244 | nz_mask = A.data != 0 245 | A.data = A.data[nz_mask] 246 | A.row = A.row[nz_mask] 247 | A.col = A.col[nz_mask] 248 | else: 249 | raise ValueError("Can't eliminate_zeros from type: %s" % type(A)) 250 | return A 251 | -------------------------------------------------------------------------------- /graphs/mixins/transformation.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import, print_function 2 | import numpy as np 3 | import scipy.sparse as ss 4 | import scipy.sparse.csgraph as ssc 5 | from scipy.linalg import solve 6 | from collections import deque 7 | 8 | from ..mini_six import range 9 | 10 | 11 | class TransformMixin(object): 12 | 13 | def kernelize(self, kernel): 14 | '''Re-weight according to a specified kernel function. 15 | kernel : str, {none, binary, rbf} 16 | none -> no reweighting 17 | binary -> all edges are given weight 1 18 | rbf -> applies a gaussian function to edge weights 19 | ''' 20 | if kernel == 'none': 21 | return self 22 | if kernel == 'binary': 23 | if self.is_weighted(): 24 | return self._update_edges(1, copy=True) 25 | return self 26 | if kernel == 'rbf': 27 | w = self.edge_weights() 28 | r = np.exp(-w / w.std()) 29 | return self._update_edges(r, copy=True) 30 | raise ValueError('Invalid kernel type: %r' % kernel) 31 | 32 | def barycenter_edge_weights(self, X, copy=True, reg=1e-3): 33 | '''Re-weight such that the sum of each vertex's edge weights is 1. 34 | The resulting weighted graph is suitable for locally linear embedding. 35 | reg : amount of regularization to keep the problem well-posed 36 | ''' 37 | new_weights = [] 38 | for i, adj in enumerate(self.adj_list()): 39 | C = X[adj] - X[i] 40 | G = C.dot(C.T) 41 | trace = np.trace(G) 42 | r = reg * trace if trace > 0 else reg 43 | G.flat[::G.shape[1] + 1] += r 44 | w = solve(G, np.ones(G.shape[0]), sym_pos=True, 45 | overwrite_a=True, overwrite_b=True) 46 | w /= w.sum() 47 | new_weights.extend(w.tolist()) 48 | return self.reweight(new_weights, copy=copy) 49 | 50 | def connected_subgraphs(self, directed=True, ordered=False): 51 | '''Generates connected components as subgraphs. 52 | When ordered=True, subgraphs are ordered by number of vertices. 53 | ''' 54 | num_ccs, labels = self.connected_components(directed=directed) 55 | # check the trivial case first 56 | if num_ccs == 1: 57 | yield self 58 | raise StopIteration 59 | if ordered: 60 | # sort by descending size (num vertices) 61 | order = np.argsort(np.bincount(labels))[::-1] 62 | else: 63 | order = range(num_ccs) 64 | 65 | # don't use self.subgraph() here, because we can reuse adj 66 | adj = self.matrix('dense', 'csr', 'csc') 67 | for c in order: 68 | mask = labels == c 69 | sub_adj = adj[mask][:,mask] 70 | yield self.__class__.from_adj_matrix(sub_adj) 71 | 72 | def shortest_path_subtree(self, start_idx, directed=True): 73 | '''Returns a subgraph containing only the shortest paths from start_idx to 74 | every other vertex. 75 | ''' 76 | adj = self.matrix() 77 | _, pred = ssc.dijkstra(adj, directed=directed, indices=start_idx, 78 | return_predecessors=True) 79 | adj = ssc.reconstruct_path(adj, pred, directed=directed) 80 | if not directed: 81 | adj = adj + adj.T 82 | return self.__class__.from_adj_matrix(adj) 83 | 84 | def minimum_spanning_subtree(self): 85 | '''Returns the (undirected) minimum spanning tree subgraph.''' 86 | dist = self.matrix('dense', copy=True) 87 | dist[dist==0] = np.inf 88 | np.fill_diagonal(dist, 0) 89 | mst = ssc.minimum_spanning_tree(dist) 90 | return self.__class__.from_adj_matrix(mst + mst.T) 91 | 92 | def neighborhood_subgraph(self, start_idx, radius=1, weighted=True, 93 | directed=True, return_mask=False): 94 | '''Returns a subgraph containing only vertices within a given 95 | geodesic radius of start_idx.''' 96 | adj = self.matrix('dense', 'csr', 'csc') 97 | dist = ssc.dijkstra(adj, directed=directed, indices=start_idx, 98 | unweighted=(not weighted), limit=radius) 99 | mask = np.isfinite(dist) 100 | sub_adj = adj[mask][:,mask] 101 | g = self.__class__.from_adj_matrix(sub_adj) 102 | if return_mask: 103 | return g, mask 104 | return g 105 | 106 | def isograph(self, min_weight=None): 107 | '''Remove short-circuit edges using the Isograph algorithm. 108 | 109 | min_weight : float, optional 110 | Minimum weight of edges to consider removing. Defaults to max(MST). 111 | 112 | From "Isograph: Neighbourhood Graph Construction Based On Geodesic Distance 113 | For Semi-Supervised Learning" by Ghazvininejad et al., 2011. 114 | Note: This uses the non-iterative algorithm which removes edges 115 | rather than reweighting them. 116 | ''' 117 | W = self.matrix('dense') 118 | # get candidate edges: all edges - MST edges 119 | tree = self.minimum_spanning_subtree() 120 | candidates = np.argwhere((W - tree.matrix('dense')) > 0) 121 | cand_weights = W[candidates[:,0], candidates[:,1]] 122 | # order by increasing edge weight 123 | order = np.argsort(cand_weights) 124 | cand_weights = cand_weights[order] 125 | # disregard edges shorter than a threshold 126 | if min_weight is None: 127 | min_weight = tree.edge_weights().max() 128 | idx = np.searchsorted(cand_weights, min_weight) 129 | cand_weights = cand_weights[idx:] 130 | candidates = candidates[order[idx:]] 131 | # check each candidate edge 132 | to_remove = np.zeros_like(cand_weights, dtype=bool) 133 | for i, (u,v) in enumerate(candidates): 134 | W_uv = np.where(W < cand_weights[i], W, 0) 135 | len_uv = ssc.dijkstra(W_uv, indices=u, unweighted=True, limit=2)[v] 136 | if len_uv > 2: 137 | to_remove[i] = True 138 | ii, jj = candidates[to_remove].T 139 | return self.remove_edges(ii, jj, copy=True) 140 | 141 | def circle_tear(self, spanning_tree='mst', cycle_len_thresh=5, spt_idx=None, 142 | copy=True): 143 | '''Circular graph tearing. 144 | 145 | spanning_tree: one of {'mst', 'spt'} 146 | cycle_len_thresh: int, length of longest allowable cycle 147 | spt_idx: int, start vertex for shortest_path_subtree, random if None 148 | 149 | From "How to project 'circular' manifolds using geodesic distances?" 150 | by Lee & Verleysen, ESANN 2004. 151 | 152 | See also: shortest_path_subtree, minimum_spanning_subtree 153 | ''' 154 | # make the initial spanning tree graph 155 | if spanning_tree == 'mst': 156 | tree = self.minimum_spanning_subtree().matrix() 157 | elif spanning_tree == 'spt': 158 | if spt_idx is None: 159 | spt_idx = np.random.choice(self.num_vertices()) 160 | tree = self.shortest_path_subtree(spt_idx, directed=False).matrix() 161 | 162 | # find edges in self but not in the tree 163 | potential_edges = np.argwhere(ss.triu(self.matrix() - tree)) 164 | 165 | # remove edges that induce large cycles 166 | ii, jj = _find_cycle_inducers(tree, potential_edges, cycle_len_thresh) 167 | return self.remove_edges(ii, jj, symmetric=True, copy=copy) 168 | 169 | def cycle_cut(self, cycle_len_thresh=12, directed=False, copy=True): 170 | '''CycleCut algorithm: removes bottleneck edges. 171 | Paper DOI: 10.1.1.225.5335 172 | ''' 173 | symmetric = not directed 174 | adj = self.kernelize('binary').matrix('csr', 'dense', copy=True) 175 | if symmetric: 176 | adj = adj + adj.T 177 | 178 | removed_edges = [] 179 | while True: 180 | c = _atomic_cycle(adj, cycle_len_thresh, directed=directed) 181 | if c is None: 182 | break 183 | # remove edges in the cycle 184 | ii, jj = c.T 185 | adj[ii,jj] = 0 186 | if symmetric: 187 | adj[jj,ii] = 0 188 | removed_edges.extend(c) 189 | 190 | #XXX: if _atomic_cycle changes, may need to do this on each loop 191 | if ss.issparse(adj): 192 | adj.eliminate_zeros() 193 | 194 | # select only the necessary cuts 195 | ii, jj = _find_cycle_inducers(adj, removed_edges, cycle_len_thresh, 196 | directed=directed) 197 | # remove the bad edges 198 | return self.remove_edges(ii, jj, symmetric=symmetric, copy=copy) 199 | 200 | 201 | def _atomic_cycle(adj, length_thresh, directed=False): 202 | # TODO: make this more efficient 203 | start_vertex = np.random.choice(adj.shape[0]) 204 | # run BFS 205 | q = deque([start_vertex]) 206 | visited_vertices = set([start_vertex]) 207 | visited_edges = set() 208 | while q: 209 | a = q.popleft() 210 | nbrs = adj[a].nonzero()[-1] 211 | for b in nbrs: 212 | if b not in visited_vertices: 213 | q.append(b) 214 | visited_vertices.add(b) 215 | visited_edges.add((a,b)) 216 | if not directed: 217 | visited_edges.add((b,a)) 218 | continue 219 | # run an inner BFS 220 | inner_q = deque([b]) 221 | inner_visited = set([b]) 222 | parent_vertices = {b: -1} 223 | while inner_q: 224 | c = inner_q.popleft() 225 | inner_nbrs = adj[c].nonzero()[-1] 226 | for d in inner_nbrs: 227 | if d in inner_visited or (d,c) not in visited_edges: 228 | continue 229 | parent_vertices[d] = c 230 | inner_q.append(d) 231 | inner_visited.add(d) 232 | if d != a: 233 | continue 234 | # atomic cycle found 235 | cycle = [] 236 | while parent_vertices[d] != -1: 237 | x, d = d, parent_vertices[d] 238 | cycle.append((x, d)) 239 | cycle.append((d, a)) 240 | if len(cycle) >= length_thresh: 241 | return np.array(cycle) 242 | else: 243 | # abort the inner BFS 244 | inner_q.clear() 245 | break 246 | # finished considering edge a->b 247 | visited_edges.add((a,b)) 248 | if not directed: 249 | visited_edges.add((b,a)) 250 | # no cycles found 251 | return None 252 | 253 | 254 | def _find_cycle_inducers(adj, potential_edges, length_thresh, directed=False): 255 | # remove edges that induce large cycles 256 | path_dist = ssc.dijkstra(adj, directed=directed, return_predecessors=False, 257 | unweighted=True) 258 | remove_ii, remove_jj = [], [] 259 | for i,j in potential_edges: 260 | if length_thresh < path_dist[i,j] < np.inf: 261 | remove_ii.append(i) 262 | remove_jj.append(j) 263 | else: 264 | # keeping this edge: update path lengths 265 | tmp = (path_dist[:,i] + 1)[:,None] + path_dist[j,:] 266 | ii, jj = np.nonzero(tmp < path_dist) 267 | new_lengths = tmp[ii, jj] 268 | path_dist[ii,jj] = new_lengths 269 | if not directed: 270 | path_dist[jj,ii] = new_lengths 271 | return remove_ii, remove_jj 272 | --------------------------------------------------------------------------------