├── tests-require.txt
├── docs
└── img
│ ├── simple_graph.png
│ ├── bipartite_graph.png
│ ├── directed_selfloop_graph.png
│ ├── directed_antiparallel_graph.png
│ └── xswap.svg
├── .gitignore
├── ci
├── build-wheels.sh
└── deploy.sh
├── xswap
├── __init__.py
├── src
│ ├── xswap.h
│ ├── xswap.cpp
│ ├── xswap_wrapper.cpp
│ └── bitset.cpp
├── network_formats.py
├── permute.py
├── preprocessing.py
├── prior.py
└── lib
│ └── roaring.hh
├── LICENSE
├── tests
├── test_time.py
├── test_permute.py
├── test_formats.py
├── test_roaring.cpp
├── test_prior.py
└── test_bitset.cpp
├── setup.py
├── .travis.yml
└── README.md
/tests-require.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | pandas
3 | pytest
4 | requests
5 | scipy
6 | setuptools
7 |
--------------------------------------------------------------------------------
/docs/img/simple_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hetio/xswap/HEAD/docs/img/simple_graph.png
--------------------------------------------------------------------------------
/docs/img/bipartite_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hetio/xswap/HEAD/docs/img/bipartite_graph.png
--------------------------------------------------------------------------------
/docs/img/directed_selfloop_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hetio/xswap/HEAD/docs/img/directed_selfloop_graph.png
--------------------------------------------------------------------------------
/docs/img/directed_antiparallel_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hetio/xswap/HEAD/docs/img/directed_antiparallel_graph.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | docs/output/
2 | tests/permutation_stats.txt
3 | tests/*.o
4 | build/
5 | dist/
6 | .vscode/
7 | __pycache__/
8 | .pytest_cache/
9 | **.so
10 | xswap.egg-info/
11 |
--------------------------------------------------------------------------------
/ci/build-wheels.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Compile wheels
4 | /opt/python/cp35-cp35m/bin/pip wheel /io/ -w wheelhouse/
5 | /opt/python/cp36-cp36m/bin/pip wheel /io/ -w wheelhouse/
6 | /opt/python/cp37-cp37m/bin/pip wheel /io/ -w wheelhouse/
7 |
8 | # Bundle external shared libraries into the wheels
9 | for whl in wheelhouse/**.whl; do
10 | auditwheel repair "$whl" --plat $PLAT -w /io/wheelhouse/
11 | done
12 |
--------------------------------------------------------------------------------
/xswap/__init__.py:
--------------------------------------------------------------------------------
1 | from xswap import network_formats
2 | from xswap import preprocessing
3 | from xswap import prior
4 | from xswap.permute import permute_edge_list
5 |
6 | __version__ = '0.0.2'
7 |
8 | __all__ = [
9 | 'network_formats.edges_to_matrix',
10 | 'network_formats.matrix_to_edges',
11 | 'permute_edge_list',
12 | 'preprocessing.load_str_edges',
13 | 'preprocessing.load_processed_edges',
14 | 'preprocessing.map_str_edges',
15 | 'prior.compute_xswap_occurrence_matrix',
16 | 'prior.compute_xswap_priors',
17 | 'prior.approximate_xswap_prior',
18 | ]
19 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 2-Clause License
2 |
3 | Copyright (c) 2018, Greene Laboratory
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |
--------------------------------------------------------------------------------
/tests/test_time.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 |
4 | import requests
5 |
6 | import xswap
7 |
8 | test_directory = os.path.dirname(os.path.realpath(__file__)) + '/'
9 |
10 |
11 | def load_edges():
12 | edges_url = "https://github.com/greenelab/xswap/raw/{}/{}".format(
13 | "8c31b4cbdbbf2cfa5018b1277bbd0e9f6263e573", "graphs/GiG_edges_reduced.txt")
14 | response = requests.get(edges_url)
15 | edges = list()
16 | for edge in response.iter_lines():
17 | edge = str(edge, 'utf-8')
18 | source, target = edge.split(',')
19 | edges.append((int(source), int(target)))
20 | return edges
21 |
22 |
23 | def test_time():
24 | edges = load_edges()
25 | t1 = time.time()
26 | new_edges, stats = xswap.permute_edge_list(edges)
27 | t2 = time.time()
28 | time_diff = t2 - t1
29 | print("{:.4f} seconds elapsed.".format(time_diff))
30 | assert edges != new_edges
31 | assert time_diff < 5
32 |
33 | num_repeats = 0
34 | old_set = set(edges)
35 | new_set = set(new_edges)
36 | for edge in old_set:
37 | if edge in new_set:
38 | num_repeats += 1
39 | p_unch = num_repeats / len(edges)
40 | with open(test_directory + 'permutation_stats.txt', 'w') as f:
41 | f.write('Runtime: {:.3f} sec. {:.3f} percent unchanged of {} total edges after '
42 | '{} swap attempts\n'.format(time_diff, p_unch, len(edges), 10*len(edges)))
43 |
--------------------------------------------------------------------------------
/ci/deploy.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | ## deploy.sh: run during a Travis CI build to deploy output directory to the gh-pages branch on GitHub.
4 | ## References
5 | ## - https://github.com/manubot/rootstock/blob/ddb0288895cd5bc5dab117fb366c52216a717d0e/ci/deploy.sh
6 | ## - https://github.com/wp-cli/wp-cli/issues/3798
7 | ## - https://github.com/manubot/catalog/blob/fd0ef6a999cca38890023eb65f19d1b87e96e83c/deploy.sh#L1-L45
8 |
9 | # Set options for extra caution & debugging
10 | set -o errexit \
11 | -o nounset \
12 | -o pipefail
13 |
14 | eval "$(ssh-agent -s)"
15 | # Ensure command traces are disabled while dealing with the private key
16 | [[ "$SHELLOPTS" =~ xtrace ]] && XTRACE_ON=1
17 | [[ "${XTRACE_ON:-}" ]] && set +o xtrace && echo "xtrace disabled"
18 | base64 --decode <<< "$GITHUB_DEPLOY_PRIVATE_KEY" | ssh-add -
19 | [[ "${XTRACE_ON:-}" ]] && set -o xtrace && echo "xtrace reenabled"
20 |
21 | # Configure git
22 | git config --global push.default simple
23 | git config --global user.name "Travis CI"
24 | git config --global user.email "deploy@travis-ci.com"
25 | git checkout "$TRAVIS_BRANCH"
26 | git remote set-url origin "git@github.com:$TRAVIS_REPO_SLUG.git"
27 |
28 | # Fetch and create gh-pages branch
29 | # Travis does a shallow and single branch git clone
30 | git remote set-branches --add origin gh-pages
31 | git fetch origin gh-pages:gh-pages
32 |
33 | commit_message="\
34 | Generate catalog output on $(date --iso --utc)
35 |
36 | built by $TRAVIS_JOB_WEB_URL
37 | based on https://github.com/$TRAVIS_REPO_SLUG/commit/$TRAVIS_COMMIT
38 | [skip ci]
39 | "
40 | # echo >&2 "$commit_message"
41 |
42 | ghp-import \
43 | --push --no-jekyll \
44 | --message="$commit_message" \
45 | docs/output/xswap
46 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import pathlib
3 | import re
4 |
5 | import setuptools
6 |
7 | os.environ["CC"] = "g++"
8 |
9 | directory = pathlib.Path(__file__).parent.resolve()
10 |
11 | # version
12 | init_path = directory.joinpath('xswap', '__init__.py')
13 | text = init_path.read_text()
14 | pattern = re.compile(r"^__version__ = ['\"]([^'\"]*)['\"]", re.MULTILINE)
15 | version = pattern.search(text).group(1)
16 |
17 | # long_description
18 | readme_path = directory.joinpath('README.md')
19 | long_description = readme_path.read_text()
20 |
21 | xswap_cpp_extension = setuptools.Extension(
22 | 'xswap._xswap_backend',
23 | sources=['xswap/src/xswap_wrapper.cpp', 'xswap/src/bitset.cpp', 'xswap/src/xswap.cpp', 'xswap/lib/roaring.c'],
24 | extra_compile_args=["-std=c++11"],
25 | )
26 |
27 | setuptools.setup(
28 | # Package details
29 | name='xswap',
30 | version=version,
31 | url='https://github.com/greenelab/xswap',
32 | project_urls={
33 | 'Documentation': 'https://hetio.github.io/xswap/',
34 | 'Source': 'https://github.com/hetio/xswap',
35 | 'Tracker': 'https://github.com/hetio/xswap/issues',
36 | 'Publication': 'https://greenelab.github.io/xswap-manuscript/',
37 | },
38 | description='Python-wrapped C/C++ library for degree-preserving network randomization',
39 | long_description_content_type='text/markdown',
40 | long_description=long_description,
41 | license='BSD 2-Clause',
42 |
43 | # Author details
44 | author='Michael Zietz',
45 | author_email='michael.zietz@gmail.com',
46 |
47 | # Specify python version
48 | python_requires='>=3.5',
49 |
50 | ext_modules=[xswap_cpp_extension],
51 | packages=setuptools.find_packages(),
52 | )
53 |
--------------------------------------------------------------------------------
/docs/img/xswap.svg:
--------------------------------------------------------------------------------
1 |
61 |
--------------------------------------------------------------------------------
/tests/test_permute.py:
--------------------------------------------------------------------------------
1 | import tempfile
2 |
3 | import pytest
4 | import requests
5 |
6 | import xswap
7 |
8 |
9 | @pytest.mark.parametrize('edges,permutable', [
10 | ([(0, 0), (1, 1), (1, 2), (2, 3)], True),
11 | ([(0, 0)], False),
12 | ])
13 | def test_xswap_changes_edges(edges, permutable):
14 | """
15 | Check that XSwap returns a different set of edges than the ones given if the edges
16 | are permutable. Check that XSwap does not modify edges in place.
17 | """
18 | edges_copy = edges.copy()
19 | new_edges, stats = xswap.permute_edge_list(
20 | edges, allow_self_loops=True, allow_antiparallel=True)
21 | assert edges == edges_copy
22 | if permutable:
23 | assert new_edges != edges
24 | else:
25 | assert new_edges == edges
26 |
27 |
28 | def test_roaring_warning():
29 | """
30 | Check that a warning is given when using the much slower but far more general
31 | Roaring bitset rather than the faster fully uncompressed bitset.
32 | """
33 | edges_url = "https://github.com/greenelab/xswap/raw/{}/{}".format(
34 | "8c31b4cbdbbf2cfa5018b1277bbd0e9f6263e573", "graphs/GiG_edges_reduced.txt")
35 | response = requests.get(edges_url)
36 | with tempfile.NamedTemporaryFile() as tf:
37 | tf.write(response.content)
38 | edges = xswap.preprocessing.load_processed_edges(tf.name)
39 |
40 | with pytest.warns(None):
41 | permuted_edges, stats = xswap.permute_edge_list(edges, allow_self_loops=True,
42 | allow_antiparallel=False, multiplier=0.1, seed=0, max_malloc=4000000000)
43 |
44 | with pytest.warns(RuntimeWarning, match="Using Roaring bitset because of the large number of edges."):
45 | permuted_edges, stats = xswap.permute_edge_list(edges, allow_self_loops=True,
46 | allow_antiparallel=False, multiplier=0.1, seed=0, max_malloc=10)
47 |
--------------------------------------------------------------------------------
/tests/test_formats.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import pytest
3 | import scipy.sparse
4 |
5 | import xswap.network_formats
6 |
7 |
8 | @pytest.mark.parametrize('matrix,correct_edges,include_reverse_edges', [
9 | (numpy.array([[1,0,0,0],[0,0,1,0],[0,0,0,1]]), [(0, 0), (1, 2), (2, 3)], False),
10 | (numpy.array([[1,0,0],[0,0,1],[0,1,1]]), [(0, 0), (1, 2), (2, 2)], False),
11 | (numpy.array([[1,0,0],[0,0,1],[0,1,1]]), [(0, 0), (1, 2), (2, 1), (2, 2)], True),
12 | ])
13 | def test_matrix_to_edges(matrix, correct_edges, include_reverse_edges):
14 | edges = xswap.network_formats.matrix_to_edges(matrix, include_reverse_edges)
15 | assert sorted(edges) == sorted(correct_edges)
16 |
17 |
18 | @pytest.mark.parametrize('edges,correct_matrix,add_reverse_edges,shape,dtype,sparse', [
19 | (
20 | [(0, 1), (0, 3), (2, 2)],
21 | numpy.array([[0,1,0,1], [1,0,0,0], [0,0,1,0], [1,0,0,0]], dtype=int),
22 | True, (4, 4), int, False),
23 | (
24 | [(0, 1), (0, 3), (2, 2)],
25 | numpy.array([[0,1,0,1], [0,0,0,0], [0,0,1,0], [0,0,0,0]], dtype=int),
26 | False, (4, 4), int, False),
27 | (
28 | [(0, 1), (0, 3), (2, 2)],
29 | numpy.array([[0,1,0,1], [0,0,0,0], [0,0,1,0]], dtype=int),
30 | False, (3, 4), int, False),
31 | (
32 | [(0, 1), (0, 3), (2, 2)],
33 | numpy.array([[0,1,0,1], [0,0,0,0], [0,0,1,0]], dtype=float),
34 | False, (3, 4), float, False),
35 | (
36 | [(0, 1), (0, 3), (2, 2)],
37 | numpy.array([[0,1,0,1], [0,0,0,0], [0,0,1,0]], dtype=numpy.float32),
38 | False, (3, 4), numpy.float32, False),
39 | (
40 | [(0, 1), (0, 3), (2, 2)],
41 | scipy.sparse.csc_matrix([[0,1,0,1], [0,0,0,0], [0,0,1,0]], dtype=numpy.float32),
42 | False, (3, 4), numpy.float32, True),
43 | ])
44 | def test_edges_to_matrix(edges, correct_matrix, add_reverse_edges, shape, dtype, sparse):
45 | matrix = xswap.network_formats.edges_to_matrix(
46 | edge_list=edges, add_reverse_edges=add_reverse_edges, shape=shape,
47 | dtype=dtype, sparse=sparse)
48 |
49 | assert matrix.dtype == dtype
50 | assert scipy.sparse.issparse(matrix) == sparse
51 | if sparse:
52 | assert (matrix != correct_matrix).nnz == 0
53 | else:
54 | assert numpy.array_equal(matrix, correct_matrix)
55 |
--------------------------------------------------------------------------------
/tests/test_roaring.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include "../xswap/src/xswap.h"
3 |
4 |
5 | main(int argc, char const *argv[])
6 | {
7 | int counter, incorrect_contains, incorrect_doesnt_contain;
8 |
9 | // Create real edges to be added to the Roaring set
10 | int** real_edges = (int**)malloc(sizeof(int*) * 16);
11 | counter = 0;
12 | for (int i = 4; i < 8; i++) {
13 | for (int j = 4; j < 8; j++) {
14 | real_edges[counter] = (int*)malloc(sizeof(int) * 2);
15 | real_edges[counter][0] = i;
16 | real_edges[counter][1] = j;
17 | counter += 1;
18 | }
19 | }
20 |
21 | Edges edges;
22 | edges.edge_array = real_edges;
23 | edges.num_edges = 16;
24 | RoaringBitSet edges_set = RoaringBitSet(edges);
25 |
26 | // Check that edges added at the creation of the set are contained
27 | incorrect_doesnt_contain = 0;
28 | for (int i = 4; i < 8; i++) {
29 | for (int j = 4; j < 8; j++) {
30 | int edge[2] = {i, j};
31 | if (!edges_set.contains(edge)) {
32 | incorrect_doesnt_contain += 1;
33 | }
34 | }
35 | }
36 |
37 | // Create fake edges and check that they are not in the set
38 | counter = 0;
39 | incorrect_contains = 0;
40 | for (int i = 0; i < 4; i++) {
41 | for (int j = 0; j < 4; j++) {
42 | int fake_edge[2] = {i, j};
43 | // Check that this edge is not in the set
44 | if (edges_set.contains(fake_edge)) {
45 | incorrect_contains += 1;
46 | }
47 | // Add the edge and check that it was added
48 | edges_set.add(fake_edge);
49 | if (!edges_set.contains(fake_edge)) {
50 | incorrect_doesnt_contain += 1;
51 | }
52 | // Remove the edge and check that it is removed
53 | edges_set.remove(fake_edge);
54 | if (edges_set.contains(fake_edge)) {
55 | incorrect_contains += 1;
56 | }
57 | counter += 1;
58 | }
59 | }
60 |
61 | free(real_edges);
62 | if (incorrect_contains == 0 && incorrect_doesnt_contain == 0) {
63 | std::cout << "All tests passed" << "\n";
64 | return 0;
65 | } else {
66 | std::cout << "Tests failed " << incorrect_contains << " " << incorrect_doesnt_contain << "\n";
67 | return 1;
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/xswap/src/xswap.h:
--------------------------------------------------------------------------------
1 | #include
2 | #include "../lib/roaring.hh"
3 |
4 | extern int CHAR_BITS;
5 |
6 | struct Edges {
7 | int** edge_array;
8 | int num_edges;
9 | int max_id;
10 | };
11 |
12 | // Slower bitset
13 | class RoaringBitSet
14 | {
15 | public:
16 | RoaringBitSet() = default;
17 | RoaringBitSet(Edges edges);
18 | bool contains(int *edge);
19 | void add(int *edge);
20 | void remove(int *edge);
21 |
22 | private:
23 | Roaring bitmap;
24 | };
25 |
26 | // Faster edge bitset for smaller numbers of edges
27 | class UncompressedBitSet
28 | {
29 | public:
30 | UncompressedBitSet() = default;
31 | UncompressedBitSet(int max_id, unsigned long long int max_malloc);
32 | UncompressedBitSet(Edges edges, unsigned long long int max_malloc);
33 | bool contains(int *edge);
34 | void add(int *edge);
35 | void remove(int *edge);
36 | void free_array();
37 |
38 | private:
39 | char* bitset;
40 | size_t max_cantor;
41 | void create_bitset(size_t num_elements, unsigned long long int max_malloc);
42 | char get_bit(char word, char bit_position);
43 | void set_bit_true(char* word, char bit_position);
44 | void set_bit_false(char* word, char bit_position);
45 | };
46 |
47 | // Wrapper class for the two bitset implementations
48 | class BitSet
49 | {
50 | public:
51 | BitSet(Edges edges, unsigned long long int max_malloc);
52 | bool contains(int *edge);
53 | void add(int *edge);
54 | void remove(int *edge);
55 | void free_array();
56 | PyObject* runtime_warning_roaring(void);
57 | UncompressedBitSet uncompressed_set;
58 |
59 | private:
60 | bool use_compressed;
61 | RoaringBitSet compressed_set;
62 | };
63 |
64 | struct statsCounter {
65 | int num_swaps;
66 | int same_edge = 0;
67 | int self_loop = 0;
68 | int duplicate = 0;
69 | int undir_duplicate = 0;
70 | int excluded = 0;
71 | };
72 |
73 | struct Conditions {
74 | int seed;
75 | bool allow_antiparallel;
76 | bool allow_self_loop;
77 | Edges excluded_edges;
78 | };
79 |
80 | size_t cantor_pair(int* edge);
81 |
82 | void swap_edges(Edges edges, int num_swaps, Conditions cond, statsCounter *stats,
83 | unsigned long long int max_malloc);
84 |
85 | bool is_valid_edge(int *edge, BitSet edges_set, Conditions cond,
86 | statsCounter *stats);
87 |
88 | bool is_valid_swap(int **new_edges, BitSet edges_set, Conditions cond,
89 | statsCounter *stats);
90 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | setup_and_test: &setup_and_test
2 | stage: test
3 | language: python
4 | addons:
5 | apt:
6 | packages:
7 | - pkg-config
8 | - python3-dev
9 | before_install:
10 | - pip install -r tests-require.txt
11 | install:
12 | - pkg-config --cflags --libs python3
13 | - python setup.py build
14 | - pip install .
15 | script:
16 | - pytest tests/
17 | - >
18 | g++ tests/test_bitset.cpp xswap/src/xswap.h xswap/src/bitset.cpp
19 | xswap/lib/roaring.c -o tests/test_bitset.o -std=c++11
20 | `pkg-config --cflags --libs python3`
21 | - ./tests/test_bitset.o
22 | - >
23 | g++ tests/test_roaring.cpp xswap/src/xswap.h xswap/src/bitset.cpp
24 | xswap/lib/roaring.c -o tests/test_roaring.o -std=c++11
25 | `pkg-config --cflags --libs python3`
26 | - ./tests/test_roaring.o
27 |
28 | build_and_upload: &build_and_upload
29 | stage: deploy
30 | sudo: required
31 | if: tag IS present
32 | services:
33 | - docker
34 | install:
35 | - docker pull $DOCKER_IMAGE
36 | script:
37 | - docker run --rm -e PLAT=$PLAT -v `pwd`:/io $DOCKER_IMAGE /io/ci/build-wheels.sh
38 | - /opt/python/3.6/bin/pip install twine
39 | - /opt/python/3.6/bin/python -m twine upload -u zietzm -p $PYPI_PASSWORD --repository-url https://upload.pypi.org/legacy/ --skip-existing wheelhouse/*
40 |
41 | compiler:
42 | - g++
43 | matrix:
44 | include:
45 | - <<: *setup_and_test
46 | name: "Test 3.5 on Ubuntu"
47 | dist: xenial
48 | python: 3.5
49 | - <<: *setup_and_test
50 | name: "Test 3.6 on Ubuntu"
51 | dist: xenial
52 | python: 3.6
53 | - <<: *setup_and_test
54 | name: "Test 3.7 on Ubuntu"
55 | dist: xenial
56 | python: 3.7
57 | - <<: *build_and_upload
58 | name: "Build manylinux1_x86_64"
59 | env:
60 | - DOCKER_IMAGE=quay.io/pypa/manylinux1_x86_64
61 | - PLAT=manylinux1_x86_64
62 | - <<: *build_and_upload
63 | name: "Build manylinux1_i686"
64 | env:
65 | - DOCKER_IMAGE=quay.io/pypa/manylinux1_i686
66 | - PLAT=manylinux1_i686
67 | - <<: *build_and_upload
68 | name: "Build manylinux2010_x86_64"
69 | env:
70 | - DOCKER_IMAGE=quay.io/pypa/manylinux2010_x86_64
71 | - PLAT=manylinux2010_x86_64
72 | - name: "Build documentation"
73 | dist: xenial
74 | language: python
75 | python: 3.7
76 | install:
77 | - pip install --requirement tests-require.txt
78 | - pip install pdoc3~=0.7.0 ghp-import~=0.5.5
79 | script:
80 | - pdoc --force --html
81 | --config="git_link_template=\"https://github.com/$TRAVIS_REPO_SLUG/blob/{commit}/{path}#L{start_line}-L{end_line}\""
82 | --output-dir=docs/output
83 | xswap
84 | deploy:
85 | provider: script
86 | script: bash ci/deploy.sh
87 | skip_cleanup: true
88 | on:
89 | branch: master
90 | condition: $TRAVIS_EVENT_TYPE = "push"
91 |
--------------------------------------------------------------------------------
/xswap/src/xswap.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include "xswap.h"
3 |
4 | void swap_edges(Edges edges, int num_swaps, Conditions cond, statsCounter *stats,
5 | unsigned long long int max_malloc) {
6 | // Initialize bitset for possible edges
7 | BitSet edges_set = BitSet(edges, max_malloc);
8 |
9 | // Initialize unbiased random number generator
10 | std::mt19937 rng(cond.seed);
11 | std::uniform_int_distribution uni(0, edges.num_edges - 1);
12 |
13 | // Do XSwap
14 | for (int i = 0; i < num_swaps; i++) {
15 | // Draw edges randomly
16 | int edge_index_a = uni(rng);
17 | int edge_index_b = uni(rng);
18 |
19 | if (edge_index_a == edge_index_b) {
20 | stats->same_edge += 1;
21 | continue;
22 | }
23 |
24 | // Old edges
25 | int* edge_a = edges.edge_array[edge_index_a];
26 | int* edge_b = edges.edge_array[edge_index_b];
27 |
28 | // Form potential new edges
29 | int new_edge_a[2] = { edge_a[0], edge_b[1] };
30 | int new_edge_b[2] = { edge_b[0], edge_a[1] };
31 | int* new_edges[2] = { new_edge_a, new_edge_b };
32 |
33 | bool valid = is_valid_swap(new_edges, edges_set, cond, stats);
34 | if (valid) {
35 | edges_set.remove(edge_a);
36 | edges_set.remove(edge_b);
37 |
38 | int temp_target = edge_a[1];
39 | edge_a[1] = edge_b[1];
40 | edge_b[1] = temp_target;
41 |
42 | edges_set.add(new_edge_a);
43 | edges_set.add(new_edge_b);
44 | }
45 | }
46 | edges_set.free_array();
47 | }
48 |
49 | bool is_valid_edge(int *new_edge, BitSet edges_set, Conditions valid_conditions,
50 | statsCounter *stats) {
51 | // New edge would be a self-loop
52 | if (!valid_conditions.allow_self_loop && new_edge[0] == new_edge[1]) {
53 | stats->self_loop += 1;
54 | return false;
55 | }
56 | // New edge already exists
57 | if (edges_set.contains(new_edge)) {
58 | stats->duplicate += 1;
59 | return false;
60 | }
61 | // Undirected and reverse of new edge already exists
62 | int reversed[2] = { new_edge[1], new_edge[0] };
63 | if (!valid_conditions.allow_antiparallel && edges_set.contains(reversed)) {
64 | stats->undir_duplicate += 1;
65 | return false;
66 | }
67 | for (int i = 0; i < valid_conditions.excluded_edges.num_edges; i++) {
68 | if (valid_conditions.excluded_edges.edge_array[i][0] == new_edge[0] &&
69 | valid_conditions.excluded_edges.edge_array[i][1] == new_edge[1]) {
70 | stats->excluded += 1;
71 | return false;
72 | }
73 | }
74 | return true;
75 | }
76 |
77 | bool is_valid_swap(int **new_edges, BitSet edges_set, Conditions valid_conditions,
78 | statsCounter *stats) {
79 | for (int i = 0; i < 2; i++) {
80 | bool is_valid = is_valid_edge(new_edges[i], edges_set, valid_conditions, stats);
81 | if (!is_valid) {
82 | return false;
83 | }
84 | }
85 | return true;
86 | }
87 |
--------------------------------------------------------------------------------
/xswap/network_formats.py:
--------------------------------------------------------------------------------
1 | from typing import List, Tuple, TypeVar
2 |
3 | import numpy
4 | import scipy.sparse
5 |
6 |
7 | def matrix_to_edges(matrix: numpy.ndarray, include_reverse_edges: bool=True):
8 | """
9 | Convert (bi)adjacency matrix to an edge list. Inverse of `edges_to_matrix`.
10 |
11 | Parameters
12 | ----------
13 | matrix : numpy.ndarray
14 | Adjacency matrix or biadjacency matrix of a network
15 | include_reverse_edges : bool
16 | Whether to return edges that are the inverse of existing edges. For
17 | example, if returning [(0, 1), (1, 0)] is desired or not. If False,
18 | then only edges where source <= target are returned. This parameter
19 | should be `True` when passing a biadjacency matrix, as matrix positions
20 | indicate separate nodes.
21 |
22 | Returns
23 | -------
24 | edge_list : List[Tuple[int, int]]
25 | Edge list with node ids as the corresponding matrix indices. For example,
26 | if `matrix` has `matrix[0, 2] == 1`, then `(0, 2)` will be among the
27 | returned edges.
28 | """
29 | sparse = scipy.sparse.coo_matrix(matrix)
30 | edges = zip(sparse.row, sparse.col)
31 |
32 | if not include_reverse_edges:
33 | edges = filter(lambda edge: edge[0] <= edge[1], edges)
34 | return list(edges)
35 |
36 |
37 | def edges_to_matrix(edge_list: List[Tuple[int, int]], add_reverse_edges: bool,
38 | shape: Tuple[int, int], dtype: TypeVar=bool, sparse: bool=True):
39 | """
40 | Convert edge list to (bi)adjacency matrix. Inverse of `matrix_to_edges`.
41 |
42 | Parameters
43 | ----------
44 | edge_list : List[Tuple[int, int]]
45 | An edge list mapped such that node ids correspond to desired matrix
46 | positions. For example, (0, 0) will mean that the resulting matrix has
47 | a positive value of type `dtype` in that position.
48 | add_reverse_edges : bool
49 | Whether to include the reverse of edges in the matrix. For example,
50 | if `edge_list = [(1, 0)]` and `add_reverse_edge = True`, then the
51 | returned matrix has `matrix[1, 0]` = `matrix[0, 1]` = 1. Else, the matrix
52 | only has `matrix[1, 0]` = 1. If a biadjacency matrix is desired, then
53 | set `add_reverse_edges = False`.
54 | shape : Tuple[int, int]
55 | Shape of the matrix to be returned. Allows edges to be converted to
56 | a matrix even when there are nodes without edges.
57 | dtype : data-type
58 | Dtype of the returned matrix. For example, `int`, `bool`, `float`, etc.
59 | sparse : bool
60 | Whether a sparse matrix should be returned. If `False`, returns a dense
61 | numpy.ndarray
62 |
63 | Returns
64 | -------
65 | matrix : scipy.sparse.csc_matrix or numpy.ndarray
66 | """
67 | matrix = scipy.sparse.csc_matrix(
68 | (numpy.ones(len(edge_list)), zip(*edge_list)), dtype=dtype, shape=shape,
69 | )
70 |
71 | if add_reverse_edges:
72 | matrix = (matrix + matrix.T) > 0
73 | matrix = matrix.astype(dtype)
74 |
75 | if not sparse:
76 | matrix = matrix.toarray()
77 |
78 | return matrix
79 |
--------------------------------------------------------------------------------
/tests/test_prior.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import pandas
3 | import pytest
4 |
5 | import xswap
6 |
7 |
8 | @pytest.mark.parametrize('edges,true_prior,num_swaps,shape', [
9 | ([(0, 0), (1, 1)], 0.5 * numpy.ones((2, 2)), 10000, (2, 2)),
10 | ([(0, 1), (1, 0)], 0.5 * numpy.ones((2, 2)), 10000, (2, 2)),
11 | ([(0, 0)], numpy.ones((1, 1)), 10, (1, 1)),
12 | ([(0, 1), (1, 2), (3, 4), (1, 0)], numpy.zeros((5, 5)), 0, (5, 5)),
13 | ([(0, 1), (1, 2), (3, 4), (1, 0)], numpy.zeros((4, 5)), 0, (4, 5)),
14 | ])
15 | def test_prior_matrix(edges, true_prior, num_swaps, shape):
16 | """
17 | Check that `xswap.prior.compute_xswap_occurrence_matrix` is returning
18 | reasonable results for very small networks where the correct prior is obvious.
19 | """
20 | occurrence_matrix = xswap.prior.compute_xswap_occurrence_matrix(
21 | edges, n_permutations=num_swaps, shape=shape, allow_self_loops=True,
22 | allow_antiparallel=True)
23 | if num_swaps:
24 | edge_prior = (occurrence_matrix / num_swaps).toarray()
25 | else:
26 | edge_prior = occurrence_matrix.toarray()
27 | assert numpy.abs(edge_prior - true_prior).max() == pytest.approx(0, abs=0.01)
28 |
29 |
30 | @pytest.mark.parametrize('edges,dtypes,source_degrees,target_degrees,shape,allow_antiparallel', [
31 | (
32 | [(0, 2), (0, 3), (1, 2), (2, 3), (3, 4)],
33 | {'id': numpy.uint16, 'edge': bool, 'degree': numpy.uint32, 'xswap_prior': float},
34 | {0: 2, 1: 1, 2: 3, 3: 3, 4: 1}, {0: 2, 1: 1, 2: 3, 3: 3, 4: 1}, (5, 5), False
35 | ),
36 | (
37 | [(0, 2), (0, 3), (1, 2), (2, 3), (3, 4)],
38 | {'id': numpy.int8, 'edge': int, 'degree': numpy.float, 'xswap_prior': numpy.float64},
39 | {0: 2, 1: 1, 2: 3, 3: 3, 4: 1}, {0: 2, 1: 1, 2: 3, 3: 3, 4: 1}, (5, 5), False
40 | ),
41 | (
42 | [(0, 2), (0, 3), (1, 2), (1, 3)],
43 | {'id': numpy.float16, 'edge': float, 'degree': float, 'xswap_prior': numpy.float32},
44 | {0: 2, 1: 2, 2: 0, 3: 0}, {0: 0, 1: 0, 2: 2, 3: 2}, (4, 4), True
45 | ),
46 | ])
47 | def test_prior_dataframe(edges, dtypes, source_degrees, target_degrees, shape, allow_antiparallel):
48 | """
49 | Check that the `xswap.prior.compute_xswap_priors` performs correctly
50 | """
51 | prior_df = xswap.prior.compute_xswap_priors(edges, n_permutations=1000,
52 | shape=shape, allow_self_loops=False, allow_antiparallel=allow_antiparallel, dtypes=dtypes)
53 |
54 | assert isinstance(prior_df, pandas.DataFrame)
55 | assert list(prior_df.columns) == ['source_id', 'target_id', 'edge', 'source_degree',
56 | 'target_degree', 'xswap_prior']
57 | assert dict(prior_df.dtypes) == {
58 | 'source_id': dtypes['id'], 'target_id': dtypes['id'], 'edge': dtypes['edge'],
59 | 'source_degree': dtypes['degree'], 'target_degree': dtypes['degree'],
60 | 'xswap_prior': dtypes['xswap_prior']
61 | }
62 |
63 | assert prior_df.set_index('source_id')['source_degree'].to_dict() == source_degrees
64 | assert prior_df.set_index('target_id')['target_degree'].to_dict() == target_degrees
65 |
66 | # Ensure that all the edges are accounted for in the dataframe
67 | for edge in edges:
68 | assert prior_df.query('source_id == {} & target_id == {}'.format(*edge))['edge'].values[0]
69 |
70 | # Whether directed-ness is correctly propagated through the pipeline
71 | if allow_antiparallel:
72 | assert prior_df['edge'].sum() == len(edges)
73 | else:
74 | assert prior_df['edge'].sum() == len(edges) * 2
75 |
--------------------------------------------------------------------------------
/xswap/permute.py:
--------------------------------------------------------------------------------
1 | from typing import List, Set, Tuple
2 |
3 |
4 | def permute_edge_list(edge_list: List[Tuple[int, int]], allow_self_loops: bool = False,
5 | allow_antiparallel: bool = False, multiplier: float = 10,
6 | excluded_edges: Set[Tuple[int, int]] = set(), seed: int = 0,
7 | max_malloc: int = 4000000000):
8 | """
9 | Permute the edges of a graph using the XSwap method given by Hanhijärvi,
10 | et al. (doi.org/f3mn58). XSwap is a degree-preserving network randomization
11 | technique that selects edges, checks the validity of the swap, and exchanges
12 | the target nodes between the edges. For information on what values to select
13 | for directed, please see README.md.
14 |
15 | Parameters
16 | ----------
17 | edge_list : List[Tuple[int, int]]
18 | Edge list representing the graph to be randomized. Tuples can contain
19 | integer values representing nodes. No value should be greater than C++'s
20 | `INT_MAX`, in this case 2_147_483_647.
21 | allow_self_loops : bool
22 | Whether to allow edges like (0, 0). In the case of bipartite graphs,
23 | such an edge represents a connection between two distinct nodes, while
24 | in other graphs it may represent an edge from a node to itself, in which
25 | case an edge may or may not be meaningful depending on context.
26 | allow_antiparallel : bool
27 | Whether to allow simultaneous edges like (0, 1) and (1, 0). In the case
28 | of bipartite graphs, these edges represent two connections between four
29 | distinct nodes, while for other graphs, these may be connections between
30 | the same two nodes.
31 | multiplier : float
32 | The number of edge swap attempts is determined by the product of the
33 | number of existing edges and multiplier. For example, if five edges are
34 | passed and multiplier is set to 10, 50 swaps will be attempted. Non-integer
35 | products will be rounded down to the nearest integer.
36 | excluded_edges : Set[Tuple[int, int]]
37 | Specific edges which should never be created by the network randomization
38 | seed : int
39 | Random seed that will be passed to the C++ Mersenne Twister 19937 random
40 | number generator.
41 | max_malloc : int (`unsigned long long int` in C)
42 | The maximum amount of memory to be allocated using `malloc` when making
43 | a bitset to hold edges. An uncompressed bitset is implemented for
44 | holding edges that is significantly faster than alternatives. However,
45 | it is memory-inefficient and will not be used if more memory is required
46 | than `max_malloc`. Above the threshold, a Roaring bitset will be used.
47 |
48 | Returns
49 | -------
50 | new_edges : List[Tuple[int, int]]
51 | Edge list of a permutation of the network given as `edge_list`
52 | stats : Dict[str, int]
53 | Information about the permutation performed. Gives the following information:
54 | `swap_attempts` - number of attempted swaps
55 | `same_edge` - number of swaps rejected because one edge was chosen twice
56 | `self_loop` - number of swaps rejected because new edge is a self-loop
57 | 'duplicate` - number of swaps rejected because new edge already exists
58 | `undir_duplicate` - number of swaps rejected because the network is
59 | undirected and the reverse of the new edge already exists
60 | `excluded` - number of swaps rejected because new edge was among excluded
61 | """
62 | import xswap._xswap_backend
63 | if len(edge_list) != len(set(edge_list)):
64 | raise ValueError("Edge list contained duplicate edges.")
65 |
66 | # Number of attempted XSwap swaps
67 | num_swaps = int(multiplier * len(edge_list))
68 |
69 | # Compute the maximum node ID (for creating the bitset)
70 | max_id = max(map(max, edge_list))
71 |
72 | new_edges, stats = xswap._xswap_backend._xswap(
73 | edge_list, list(excluded_edges), max_id, allow_self_loops,
74 | allow_antiparallel, num_swaps, seed, max_malloc)
75 |
76 | return new_edges, stats
77 |
--------------------------------------------------------------------------------
/xswap/preprocessing.py:
--------------------------------------------------------------------------------
1 | import csv
2 |
3 |
4 | def load_str_edges(filename, node_delim=',', edge_delim='\n'):
5 | """
6 | Load edges from file into memory. Store edges as a list and store each edge
7 | as Tuple[str, str]. Used to load edges for preprocessing.
8 | """
9 | with open(filename, 'r', newline='') as f:
10 | reader = csv.reader(f, delimiter=node_delim, lineterminator=edge_delim)
11 | str_edges = [tuple(row) for row in reader if len(row) > 1]
12 | return str_edges
13 |
14 |
15 | def load_processed_edges(filename):
16 | """
17 | Load processed edges from a file. Processed means that edges are guaranteed
18 | to be integers ranging from zero to the number of unique nodes.
19 | """
20 | str_edges = load_str_edges(filename)
21 | edges = [
22 | (int(edge[0]), int(edge[1])) for edge in str_edges
23 | ]
24 | return edges
25 |
26 |
27 | def write_edges(filename, edges, node_delim=',', edge_delim='\n'):
28 | with open(filename, 'w', newline='') as f:
29 | writer = csv.writer(f, delimiter=node_delim, lineterminator=edge_delim)
30 | writer.writerows(edges)
31 |
32 |
33 | def write_mapping(filename, mapping, delimiter=','):
34 | with open(filename, 'w', newline='') as f:
35 | writer = csv.writer(f, delimiter=delimiter)
36 | writer.writerow(['original', 'mapped'])
37 | for original, mapped in mapping.items():
38 | writer.writerow([original, mapped])
39 |
40 |
41 | def _map_nodes_to_int(nodes):
42 | """
43 | Return a dict mapping a list of nodes to their sorted indices. Nodes should
44 | be a list of strings.
45 |
46 | Returns:
47 | --------
48 | Dict[str, int]
49 | """
50 | sorted_node_set = sorted(set(nodes))
51 | name_to_id = {name: i for i, name in enumerate(sorted_node_set)}
52 | return name_to_id
53 |
54 |
55 | def _apply_map(edges, source_mapping, target_mapping):
56 | """
57 | Maps edges according to new node names specified by source and target maps.
58 |
59 | edges : List[Tuple[str, str]]
60 | source_mapping : Dict[str, int]
61 | target_mapping : Dict[str, int]
62 | """
63 | source_nodes = [edge[0] for edge in edges]
64 | target_nodes = [edge[1] for edge in edges]
65 | mapped_nodes = [
66 | map(source_mapping.get, source_nodes),
67 | map(target_mapping.get, target_nodes),
68 | ]
69 | return list(zip(*mapped_nodes))
70 |
71 |
72 | def map_str_edges(edges, bipartite):
73 | """
74 | Maps a list of edge tuples containing strings to a minimal set of
75 | integer edges.
76 |
77 | edges : List[Tuple[str, str]]
78 | bipartite : bool
79 | Whether to map source and target nodes using the same mapping.
80 | For example, an edge like ('1', '1') may refer to a connection between
81 | separate nodes, or it may be a self-loop. If `bipartite=True`, the
82 | edge would be mapped like (0, 1), where the new node ids reflect the fact
83 | that the same names do not indicate the same nodes. To ensure that names
84 | are consistently mapped between source and target, put `bipartite=False`.
85 |
86 | Returns:
87 | --------
88 | Tuple[List[Tuple[int, int]], Dict[int, str]]
89 |
90 | Example:
91 | --------
92 | >>> map_str_edges([('a', 'b'), ('b', 'c')], bipartite=False)
93 |
94 | ([(0, 1), (1, 2)], {0: 'a', 1: 'b', 2: 'c'})
95 | """
96 | source_nodes = [edge[0] for edge in edges]
97 | target_nodes = [edge[1] for edge in edges]
98 |
99 | # Two separate mappings to be used for source and target nodes
100 | if bipartite:
101 | source_map = _map_nodes_to_int(source_nodes)
102 | target_map = _map_nodes_to_int(target_nodes)
103 |
104 | # One single mapping to be used for both source and target nodes
105 | if not bipartite:
106 | combined_nodes = list(set(source_nodes + target_nodes))
107 | source_map = target_map = _map_nodes_to_int(combined_nodes)
108 |
109 | mapped_edges = _apply_map(edges, source_map, target_map)
110 | return (mapped_edges, source_map, target_map)
111 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # XSwap: Fast degree-preserving network permutation
2 |
3 | [](https://travis-ci.com/hetio/xswap)
4 | [](https://pypi.org/project/xswap/)
5 | [](https://github.com/hetio/xswap/issues)
6 |
7 | **Full documentation:**
8 |
9 |
10 |
11 | XSwap is an algorithm for degree-preserving network randomization (permutation) [1].
12 | Permuted networks can be used for a number of purposes in network analysis, including for generating counterfactual distributions of features when only the network's degree sequence is maintained or for computing a prior probability of an edge given only the network's degree sequence.
13 | Overall, permuted networks allow one to quantify the effects of degree on analysis and prediction methods.
14 | Understanding this effect is useful when a network's degree sequence is subject to biases.
15 | This implementation is a modified version of the algorithm due to Hanhijärvi et al. with two additional parameters (`allow_self_loops` and `allow_antiparallel`), which enable greater generalizability to bipartite, directed, and undirected networks.
16 |
17 | 1. **Randomization Techniques for Graphs**
18 | Sami Hanhijärvi, Gemma C. Garriga, Kai Puolamäki
19 | *Proceedings of the 2009 SIAM International Conference on Data Mining* (2009-04-30)
20 | DOI: [10.1137/1.9781611972795.67](https://doi.org/10.1137/1.9781611972795.67)
21 |
22 | ## Usage examples
23 |
24 | #### Permuting an edge list
25 |
26 | ```python
27 | >>> edges = [(0, 1), (1, 0)]
28 | >>> permuted_edges, permutation_statistics = xswap.permute_edge_list(
29 | edges, allow_self_loops=True, allow_antiparallel=True,
30 | multiplier=10)
31 | >>> permuted_edges
32 | [(0, 0), (1, 1)]
33 | >>> permutation_statistics
34 | {'swap_attempts': 20, 'same_edge': 10, 'self_loop': 0, 'duplicate': 1,
35 | 'undir_duplicate': 0, 'excluded': 0}
36 | ```
37 |
38 | #### Computing degree-sequence based prior probabilities of edges existing
39 |
40 | ```python
41 | >>> edges = [(0, 1), (1, 0)]
42 | >>> prior_prob_df = xswap.prior.compute_xswap_priors(
43 | edges, n_permutations=10000, shape=(2, 2), allow_self_loops=True,
44 | allow_antiparallel=True)
45 | >>> prior_prob_df
46 | source_id target_id edge source_degree target_degree xswap_prior
47 | 0 0 0 False 1 1 0.5
48 | 1 0 1 True 1 1 0.5
49 | 2 1 0 True 1 1 0.5
50 | 3 1 1 False 1 1 0.5
51 | ```
52 |
53 | ## Choice of parameters
54 |
55 | #### Bipartite networks
56 |
57 | Bipartite networks should be indexed using the bi-adjacency matrix, meaning that the edge `(0, 0)` is from source node 0 to target node 0, and is not a self-loop.
58 | Moreover, bipartite networks should be permuted using `allow_self_loops=False` and `allow_antiparallel=True`.
59 |
60 | #### Directed and undirected networks
61 |
62 | For non-bipartite networks, the decisions of `allow_self_loops` and `allow_antiparallel` are not always the same.
63 | For undirected networks, set `allow_antiparallel=False`, as otherwise the edges (1, 0) and (0, 1), which represent the same edge, will be treated as separate.
64 | Antiparallel edges may or may not be allowed for directed networks, depending on context.
65 | Similarly, self-loops may or may not be allowed for directed or undirected networks, depending on the specific network being permuted.
66 |
67 | ## Libraries
68 |
69 | The XSwap library includes [Roaring Bitmaps](https://github.com/RoaringBitmap/CRoaring), available under the [Apache 2.0 license](https://github.com/RoaringBitmap/CRoaring/blob/LICENSE).
70 |
71 | ## Acknowledgments
72 |
73 | Development of this project has largely taken place in the [Greene Lab](http://www.greenelab.com/) at the University of Pennsylvania. As an open source project under the `hetio` organization, this repository is grateful for its community of maintainers, contributors, and users.
74 |
75 | This work is funded in part by the Gordon and Betty Moore Foundation’s Data-Driven Discovery Initiative through Grants [GBMF4552](https://www.moore.org/grant-detail?grantId=GBMF4552) to Casey Greene, [GBMF4560](https://www.moore.org/grant-detail?grantId=GBMF4560) to Blair Sullivan, and the National Institutes of Health’s National Human Genome Research Institute [R01 HG010067](http://grantome.com/grant/NIH/R01-HG010067-02).
76 |
--------------------------------------------------------------------------------
/xswap/src/xswap_wrapper.cpp:
--------------------------------------------------------------------------------
1 | #include "xswap.h"
2 |
3 | #define XSWAP_MODULE
4 |
5 | static Edges py_list_to_edges(PyObject *py_list) {
6 | int num_edges = (int)PyList_Size(py_list);
7 | int** edges_array = (int**)malloc(sizeof(int*) * num_edges);
8 |
9 | for (int i = 0; i < num_edges; i++) {
10 | edges_array[i] = (int*)malloc(sizeof(int) * 2);
11 | PyObject* py_tuple = PyList_GetItem(py_list, i);
12 | for (int j = 0; j < 2; j++) {
13 | PyObject* temp = PyTuple_GetItem(py_tuple, j);
14 | int value = (int)PyLong_AsLong(temp);
15 | edges_array[i][j] = value;
16 | }
17 | }
18 | Edges return_object;
19 | return_object.edge_array = edges_array;
20 | return_object.num_edges = num_edges;
21 | return return_object;
22 | }
23 |
24 | static PyObject* edge_to_py_tuple(int *edge) {
25 | PyObject* edge_tuple = PyTuple_New(2);
26 | for (int j = 0; j < 2; j++) {
27 | PyObject* node_id = PyLong_FromLong(edge[j]);
28 | PyTuple_SET_ITEM(edge_tuple, j, node_id);
29 | }
30 | return edge_tuple;
31 | }
32 |
33 | static PyObject* edges_to_py_list(Edges edges) {
34 | int num_edges = edges.num_edges;
35 | PyObject* py_list = PyList_New(num_edges);
36 |
37 | for (int i = 0; i < num_edges; i++) {
38 | PyObject* edge_tuple = edge_to_py_tuple(edges.edge_array[i]);
39 | PyList_SET_ITEM(py_list, i, edge_tuple);
40 | }
41 | return py_list;
42 | }
43 |
44 | static PyObject* stats_to_py_dict(statsCounter& stats) {
45 | PyObject* py_num_swaps = PyLong_FromLong(stats.num_swaps);
46 | PyObject* py_same_edge = PyLong_FromLong(stats.same_edge);
47 | PyObject* py_self_loop = PyLong_FromLong(stats.self_loop);
48 | PyObject* py_duplicate = PyLong_FromLong(stats.duplicate);
49 | PyObject* py_undir_duplicate = PyLong_FromLong(stats.undir_duplicate);
50 | PyObject* py_excluded = PyLong_FromLong(stats.excluded);
51 |
52 | PyObject* dict = PyDict_New();
53 | PyDict_SetItemString(dict, "swap_attempts", py_num_swaps);
54 | PyDict_SetItemString(dict, "same_edge", py_same_edge);
55 | PyDict_SetItemString(dict, "self_loop", py_self_loop);
56 | PyDict_SetItemString(dict, "duplicate", py_duplicate);
57 | PyDict_SetItemString(dict, "undir_duplicate", py_undir_duplicate);
58 | PyDict_SetItemString(dict, "excluded", py_excluded);
59 | return dict;
60 | }
61 |
62 | static PyObject* wrap_xswap(PyObject *self, PyObject *args) {
63 | // Get arguments from python and compute quantities where needed
64 | PyObject *py_edges, *py_excluded_edges;
65 | int max_id, num_swaps, seed, allow_self_loop, allow_antiparallel;
66 | unsigned long long int max_malloc;
67 | int parsed_successfully = PyArg_ParseTuple(args, "OOippiiK", &py_edges,
68 | &py_excluded_edges, &max_id, &allow_self_loop,
69 | &allow_antiparallel, &num_swaps, &seed, &max_malloc);
70 | if (!parsed_successfully)
71 | return NULL;
72 |
73 | // Load edges from python list
74 | Edges edges = py_list_to_edges(py_edges);
75 | edges.max_id = max_id;
76 | Edges excluded_edges = py_list_to_edges(py_excluded_edges);
77 |
78 | // Set the conditions under which new edges are accepted
79 | Conditions valid_cond;
80 | valid_cond.seed = seed;
81 | valid_cond.allow_self_loop = allow_self_loop;
82 | valid_cond.allow_antiparallel = allow_antiparallel;
83 | valid_cond.excluded_edges = excluded_edges;
84 |
85 | // Initialize stats counters for failure reasons
86 | statsCounter stats;
87 | stats.num_swaps = num_swaps;
88 |
89 | // Perform XSwap
90 | swap_edges(edges, num_swaps, valid_cond, &stats, max_malloc);
91 |
92 | // Get new edges as python list
93 | PyObject* py_list = edges_to_py_list(edges);
94 |
95 | // Get stats as python dict
96 | PyObject* stats_py_dict = stats_to_py_dict(stats);
97 |
98 | // Create and return a python tuple of new_edges, stats
99 | PyObject* return_tuple = PyTuple_New(2);
100 | PyTuple_SET_ITEM(return_tuple, 0, py_list);
101 | PyTuple_SET_ITEM(return_tuple, 1, stats_py_dict);
102 | for (int i = 0; i < edges.num_edges; i++) {
103 | free(edges.edge_array[i]);
104 | }
105 | free(edges.edge_array);
106 | for (int i = 0; i < valid_cond.excluded_edges.num_edges; i++) {
107 | free(valid_cond.excluded_edges.edge_array[i]);
108 | }
109 | free(valid_cond.excluded_edges.edge_array);
110 | return return_tuple;
111 | }
112 |
113 | static PyMethodDef XSwapMethods[] = {
114 | {"_xswap", wrap_xswap, METH_VARARGS, "Backend for edge permutation"},
115 | {NULL, NULL, 0, NULL}
116 | };
117 |
118 | static struct PyModuleDef xswapmodule = {
119 | PyModuleDef_HEAD_INIT,
120 | "_xswap_backend", /* name of module */
121 | NULL, /* module documentation, NULL */
122 | -1, /* -1 since the module keeps state in global variables. */
123 | XSwapMethods
124 | };
125 |
126 | PyMODINIT_FUNC PyInit__xswap_backend(void) {
127 | return PyModule_Create(&xswapmodule);
128 | }
129 |
--------------------------------------------------------------------------------
/tests/test_bitset.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "../xswap/src/xswap.h"
6 |
7 | void handle_eptr(std::exception_ptr eptr) {
8 | try {
9 | if (eptr) {
10 | std::rethrow_exception(eptr);
11 | }
12 | } catch(const std::exception& e) {
13 | std::cout << "Unexpected exception while attempting bad element access " << e.what() << "\n";
14 | }
15 | }
16 |
17 | bool test_add(UncompressedBitSet edges_set) {
18 | int edge_to_add[2] = {1, 1};
19 | edges_set.add(edge_to_add);
20 | int** fake_edges = (int**)malloc(sizeof(int*) * 16);
21 | int counter = 0;
22 | for (int i = 0; i < 4; i++) {
23 | for (int j = 0; j < 4; j++) {
24 | fake_edges[counter] = (int*)malloc(sizeof(int) * 2);
25 | fake_edges[counter][0] = i;
26 | fake_edges[counter][1] = j;
27 | counter += 1;
28 | }
29 | }
30 | bool correctly_contains = edges_set.contains(edge_to_add);
31 | int num_incorrect = 0;
32 | for (int i = 0; i < 16; i++) {
33 | bool incorrectly_contains = edges_set.contains(fake_edges[i]);
34 | bool was_added = (fake_edges[i][0] == edge_to_add[0] && fake_edges[i][1] == edge_to_add[1]);
35 | if (incorrectly_contains and !was_added) {
36 | num_incorrect += 1;
37 | std::printf("Incorrectly contained: (%d, %d)\n", fake_edges[i][0], fake_edges[i][1]);
38 | }
39 | }
40 | free(fake_edges);
41 | if (num_incorrect == 0 && correctly_contains == true) {
42 | return true;
43 | } else {
44 | return false;
45 | }
46 | }
47 |
48 | bool test_remove(UncompressedBitSet edges_set) {
49 | int edge_to_add[2] = {1, 1};
50 | edges_set.add(edge_to_add);
51 | bool was_added = edges_set.contains(edge_to_add);
52 | edges_set.remove(edge_to_add);
53 | bool was_removed = !edges_set.contains(edge_to_add);
54 | bool passed = was_added && was_removed;
55 | if (!was_added)
56 | std::printf("Did not add edge properly");
57 | if (!was_removed)
58 | std::printf("Did not remove edge properly");
59 | return passed;
60 | }
61 |
62 | bool test_oob_insert(UncompressedBitSet edges_set) {
63 |
64 | int edge_to_add[2] = {4, 4};
65 | std::exception_ptr eptr;
66 | try {
67 | edges_set.add(edge_to_add);
68 | } catch(std::out_of_range) {
69 | return true;
70 | } catch(...) {
71 | eptr = std::current_exception();
72 | handle_eptr(eptr);
73 | return true;
74 | }
75 | std::printf("No exception on OOB insert\n");
76 | return false;
77 | }
78 |
79 | bool test_oob_access(UncompressedBitSet edges_set) {
80 | int edge_to_access[2] = {4, 4};
81 | std::exception_ptr eptr;
82 | try {
83 | edges_set.add(edge_to_access);
84 | } catch(std::out_of_range) {
85 | return true;
86 | } catch(...) {
87 | eptr = std::current_exception();
88 | handle_eptr(eptr);
89 | return true;
90 | }
91 | std::printf("No exception on OOB access\n");
92 | return false;
93 | }
94 |
95 | bool test_oob_remove(UncompressedBitSet edges_set) {
96 | int edge_to_access[2] = {4, 4};
97 | std::exception_ptr eptr;
98 | try {
99 | edges_set.add(edge_to_access);
100 | } catch(std::out_of_range) {
101 | return true;
102 | } catch(...) {
103 | eptr = std::current_exception();
104 | handle_eptr(eptr);
105 | return true;
106 | }
107 | std::printf("No exception on OOB removal\n");
108 | return false;
109 | }
110 |
111 | bool test_remove_nonexistent(UncompressedBitSet edges_set) {
112 | int edge_to_access[2] = {2, 2};
113 | std::exception_ptr eptr;
114 | try {
115 | edges_set.remove(edge_to_access);
116 | } catch(std::logic_error) {
117 | return true;
118 | } catch(...) {
119 | eptr = std::current_exception();
120 | handle_eptr(eptr);
121 | return true;
122 | }
123 | std::printf("No exception on removal of nonexisting element\n");
124 | return false;
125 | }
126 |
127 | bool test_insert_existing(UncompressedBitSet edges_set) {
128 | int edge_to_access[2] = {2, 2};
129 | edges_set.add(edge_to_access);
130 | std::exception_ptr eptr;
131 | try {
132 | edges_set.add(edge_to_access);
133 | } catch(std::logic_error) {
134 | return true;
135 | } catch(...) {
136 | eptr = std::current_exception();
137 | handle_eptr(eptr);
138 | return true;
139 | }
140 | std::printf("No exception on addition of existing element\n");
141 | return false;
142 | }
143 |
144 | main(int argc, char const *argv[]) {
145 | unsigned long long int max_malloc = 4000000;
146 | int num_tests = 7;
147 | bool test_passed[num_tests];
148 |
149 | UncompressedBitSet edges_set = UncompressedBitSet(3, max_malloc);
150 | test_passed[0] = test_add(edges_set);
151 | edges_set = UncompressedBitSet(3, max_malloc); // Reset so functions don't interfere
152 | test_passed[1] = test_remove(edges_set);
153 | test_passed[2] = test_oob_insert(edges_set);
154 | test_passed[3] = test_oob_access(edges_set);
155 | test_passed[4] = test_oob_remove(edges_set);
156 | edges_set = UncompressedBitSet(3, max_malloc);
157 | test_passed[5] = test_remove_nonexistent(edges_set);
158 | edges_set = UncompressedBitSet(3, max_malloc);
159 | test_passed[6] = test_insert_existing(edges_set);
160 |
161 | bool all_tests_passed = true;
162 | for (int i = 0; i < num_tests; i++) {
163 | all_tests_passed &= test_passed[i];
164 | }
165 |
166 | if (all_tests_passed) {
167 | std::printf("All tests passed\n");
168 | return 0;
169 | } else {
170 | std::printf("Test failure\n");
171 | return 1;
172 | }
173 | edges_set.free_array();
174 | }
175 |
--------------------------------------------------------------------------------
/xswap/src/bitset.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "xswap.h"
4 |
5 | int CHAR_BITS = 8*sizeof(char);
6 |
7 | size_t cantor_pair(int* edge) {
8 | size_t source = edge[0];
9 | size_t target = edge[1];
10 | return ((source + target) * (source + target + 1) / 2) + target;
11 | }
12 |
13 | UncompressedBitSet::UncompressedBitSet(int max_id, unsigned long long int max_malloc) {
14 | int max_pair[2] = {max_id, max_id};
15 | max_cantor = cantor_pair(max_pair);
16 | create_bitset(max_cantor, max_malloc);
17 | }
18 |
19 | UncompressedBitSet::UncompressedBitSet(Edges edges, unsigned long long int max_malloc) {
20 | int max_pair[2] = {edges.max_id, edges.max_id};
21 | max_cantor = cantor_pair(max_pair);
22 | create_bitset(max_cantor, max_malloc);
23 | for (int i = 0; i < edges.num_edges; i++) {
24 | add(edges.edge_array[i]);
25 | }
26 | }
27 |
28 | bool UncompressedBitSet::contains(int *edge) {
29 | size_t edge_cantor = cantor_pair(edge);
30 | if (edge_cantor > max_cantor)
31 | throw std::out_of_range("Attempting to check membership for out-of-bounds element.");
32 | return (bool)get_bit(bitset[edge_cantor / CHAR_BITS], edge_cantor % CHAR_BITS);
33 | }
34 |
35 | void UncompressedBitSet::add(int *edge) {
36 | size_t edge_cantor = cantor_pair(edge);
37 | if (edge_cantor > max_cantor) {
38 | throw std::out_of_range("Attempting to add an out-of-bounds element to the bitset.");
39 | }
40 | if (get_bit(bitset[edge_cantor / CHAR_BITS], edge_cantor % CHAR_BITS)) {
41 | throw std::logic_error("Attempting to add an existing element.");
42 | }
43 | set_bit_true(&bitset[edge_cantor / CHAR_BITS], edge_cantor % CHAR_BITS);
44 | }
45 |
46 | void UncompressedBitSet::remove(int *edge) {
47 | size_t edge_cantor = cantor_pair(edge);
48 | if (edge_cantor > max_cantor)
49 | throw std::out_of_range("Attempting to remove an out-of-bounds element.");
50 | if (!get_bit(bitset[edge_cantor / CHAR_BITS], edge_cantor % CHAR_BITS))
51 | throw std::logic_error("Attempting to remove a nonexisting element.");
52 | set_bit_false(&bitset[edge_cantor / CHAR_BITS], edge_cantor % CHAR_BITS);
53 | }
54 |
55 | void UncompressedBitSet::free_array() {
56 | free(bitset);
57 | }
58 |
59 | // num_elements corresponds to the minimum number of bits that are needed
60 | void UncompressedBitSet::create_bitset(size_t num_elements,
61 | unsigned long long int max_malloc) {
62 | // Minimum sufficient number of bytes for the array "ceil(num_elements / CHAR_BITS)"
63 | size_t bytes_needed = (num_elements + CHAR_BITS - (num_elements % CHAR_BITS)) / CHAR_BITS;
64 | if (bytes_needed > max_malloc) {
65 | throw std::runtime_error("Bitset requires too much memory.");
66 | }
67 | bitset = (char*)calloc(bytes_needed, 1);
68 | }
69 |
70 | /* Gets the bit from byte `word` at position `bit_position`. In the array, bits
71 | correspond to cantor pair values 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, etc. To access
72 | the bit corresponding to cantor pair value 9, call `get_bit` with `word` equal
73 | to the second bit and `bit_position` equal to 1 (ie. the second bit).
74 | `word >> (7 - bit_position)` puts the selected bit in the least significant position */
75 | char UncompressedBitSet::get_bit(char word, char bit_position) {
76 | return (word >> (7 - bit_position)) & 0x1;
77 | }
78 |
79 | void UncompressedBitSet::set_bit_true(char* word, char bit_position) {
80 | *word |= (0x1 << (7 - bit_position));
81 | }
82 |
83 | void UncompressedBitSet::set_bit_false(char* word, char bit_position) {
84 | *word &= ~(0x1 << (7 - bit_position));
85 | }
86 |
87 | RoaringBitSet::RoaringBitSet(Edges edges) {
88 | for (int i = 0; i < edges.num_edges; i++) {
89 | add(edges.edge_array[i]);
90 | }
91 | }
92 |
93 | bool RoaringBitSet::contains(int *edge) {
94 | int edge_cantor = cantor_pair(edge);
95 | return bitmap.contains(edge_cantor);
96 | }
97 |
98 | void RoaringBitSet::add(int *edge) {
99 | int edge_cantor = cantor_pair(edge);
100 | bool success = bitmap.addChecked(edge_cantor);
101 | if (!success) {
102 | throw std::logic_error("Attempting to add an existing element.");
103 | }
104 | }
105 |
106 | void RoaringBitSet::remove(int *edge) {
107 | int edge_cantor = cantor_pair(edge);
108 | bool success = bitmap.removeChecked(edge_cantor);
109 | if (!success) {
110 | throw std::logic_error("Attempting to remove a nonexisting element.");
111 | }
112 | }
113 |
114 | BitSet::BitSet(Edges edges, unsigned long long int max_malloc) {
115 | int max_pair[2] = {edges.max_id, edges.max_id};
116 | size_t max_cantor = cantor_pair(max_pair);
117 |
118 | if (max_cantor < max_malloc) {
119 | use_compressed = false;
120 | uncompressed_set = UncompressedBitSet(edges, max_malloc);
121 | } else {
122 | runtime_warning_roaring();
123 | use_compressed = true;
124 | compressed_set = RoaringBitSet(edges);
125 | }
126 | }
127 |
128 | PyObject *BitSet::runtime_warning_roaring(void) {
129 | // Roaring bitset is significantly slower, but used because of large network sizes
130 | PyErr_WarnEx(PyExc_RuntimeWarning, "Using Roaring bitset because of the large number of edges.", 2);
131 | return NULL;
132 | }
133 |
134 | bool BitSet::contains(int *edge) {
135 | if (use_compressed) {
136 | return compressed_set.contains(edge);
137 | } else {
138 | return uncompressed_set.contains(edge);
139 | }
140 | }
141 |
142 | void BitSet::add(int *edge) {
143 | if (use_compressed) {
144 | return compressed_set.add(edge);
145 | } else {
146 | return uncompressed_set.add(edge);
147 | }
148 | }
149 |
150 | void BitSet::remove(int *edge) {
151 | if (use_compressed) {
152 | return compressed_set.remove(edge);
153 | } else {
154 | return uncompressed_set.remove(edge);
155 | }
156 | }
157 |
158 | void BitSet::free_array() {
159 | if (use_compressed) {
160 | return;
161 | } else {
162 | uncompressed_set.free_array();
163 | }
164 | }
165 |
--------------------------------------------------------------------------------
/xswap/prior.py:
--------------------------------------------------------------------------------
1 | from typing import List, Tuple
2 |
3 | import numpy
4 | import pandas
5 | import scipy.sparse
6 |
7 | import xswap.network_formats
8 |
9 |
10 | def compute_xswap_occurrence_matrix(edge_list: List[Tuple[int, int]],
11 | n_permutations: int,
12 | shape: Tuple[int, int],
13 | allow_self_loops: bool = False,
14 | allow_antiparallel: bool = False,
15 | sparse: bool = True,
16 | swap_multiplier: float = 10,
17 | initial_seed: int = 0,
18 | max_malloc: int = 4000000000):
19 | """
20 | Compute the XSwap prior probability for every node pair in a network. The
21 | XSwap prior is the probability of a node pair having an edge between them in
22 | degree-preserving permutations of a network. The prior value for a node
23 | pair can be considered as the probability of an edge existing between two
24 | nodes given only the network's degree sequence.
25 |
26 | Parameters
27 | ----------
28 | edge_list : List[Tuple[int, int]]
29 | Edge list representing the graph whose XSwap edge priors are to be
30 | computed. Tuples contain integer values representing nodes. No value
31 | should be greater than C++'s `INT_MAX`, in this case 2_147_483_647.
32 | An adjacency matrix will be created assuming that a node's value is its
33 | index in the matrix. If not, map edges (identifiers can be string or
34 | otherwise) using `xswap.preprocessing.map_str_edges`.
35 | n_permutations : int
36 | The number of permuted networks used to compute the empirical XSwap prior
37 | shape : Tuple[int, int]
38 | The shape of the matrix to be returned. In other words, a tuple of the
39 | number of source and target nodes.
40 | allow_self_loops : bool
41 | Whether to allow edges like (0, 0). In the case of bipartite graphs,
42 | such an edge represents a connection between two distinct nodes, while
43 | in other graphs it may represent an edge from a node to itself, in which
44 | case an edge may or may not be meaningful depending on context.
45 | allow_antiparallel : bool
46 | Whether to allow simultaneous edges like (0, 1) and (1, 0). In the case
47 | of bipartite graphs, these edges represent two connections between four
48 | distinct nodes, while for other graphs, these may be connections between
49 | the same two nodes.
50 | sparse : bool
51 | Whether to use a sparse matrix when adding up edge occurrences across
52 | permutations. If large changes in sparsity are expected, a dense
53 | array may be preferable.
54 | swap_multiplier : float
55 | The number of edge swap attempts is determined by the product of the
56 | number of existing edges and multiplier. For example, if five edges are
57 | passed and multiplier is set to 10, 50 swaps will be attempted. Non-integer
58 | products will be rounded down to the nearest integer.
59 | initial_seed : int
60 | Random seed that will be passed to the C++ Mersenne Twister 19937 random
61 | number generator. `initial_seed` will be used for the first permutation,
62 | and the seed used for each subsequent permutation will be incremented by
63 | one. For example, if `initial_seed` is 0 and `n_permutations` is 2, then
64 | the two permutations will pass seeds 0 and 1, respectively.
65 | max_malloc : int (`unsigned long long int` in C)
66 | The maximum amount of memory to be allocated using `malloc` when making
67 | a bitset to hold edges. An uncompressed bitset is implemented for
68 | holding edges that is significantly faster than alternatives. However,
69 | it is memory-inefficient and will not be used if more memory is required
70 | than `max_malloc`. Above the threshold, a Roaring bitset will be used.
71 |
72 | Returns
73 | -------
74 | edge_counter : scipy.sparse.csc_matrix
75 | Adjacency matrix with entries equal to the number of permutations in
76 | which a given edge appeared
77 | """
78 | import xswap._xswap_backend
79 | if len(edge_list) != len(set(edge_list)):
80 | raise ValueError("Edge list contained duplicate edges. "
81 | "XSwap does not support multigraphs.")
82 |
83 | num_swaps = int(swap_multiplier * len(edge_list))
84 |
85 | max_id = max(map(max, edge_list))
86 |
87 | if sparse:
88 | edge_counter = scipy.sparse.csc_matrix(shape, dtype=int)
89 | else:
90 | edge_counter = numpy.zeros(shape, dtype=int)
91 |
92 | for i in range(n_permutations):
93 | permuted_edges, stats = xswap._xswap_backend._xswap(
94 | edge_list, [], max_id, allow_self_loops, allow_antiparallel,
95 | num_swaps, initial_seed + i, max_malloc)
96 | permuted_matrix = xswap.network_formats.edges_to_matrix(
97 | permuted_edges, add_reverse_edges=(not allow_antiparallel),
98 | shape=shape, dtype=int, sparse=sparse)
99 | edge_counter += permuted_matrix
100 |
101 | return edge_counter
102 |
103 |
104 | def compute_xswap_priors(edge_list: List[Tuple[int, int]], n_permutations: int,
105 | shape: Tuple[int, int], allow_self_loops: bool = False,
106 | allow_antiparallel: bool = False, sparse: bool = True,
107 | swap_multiplier: int = 10, initial_seed: int = 0,
108 | max_malloc: int = 4000000000,
109 | dtypes = {'id': numpy.uint16, 'degree': numpy.uint16,
110 | 'edge': bool, 'xswap_prior': float},
111 | ):
112 | """
113 | Compute the XSwap prior for every potential edge in the network. Uses
114 | degree-grouping to maximize the effective number of permutations for each
115 | node pair. That is, node pairs with the same source and target degrees can
116 | be grouped when computing the XSwap prior, allowing there to be more
117 | permutations for some node pairs than `n_permutations`.
118 |
119 | Note that the mechanics of this function are separated to minimize memory use.
120 |
121 | Parameters
122 | ----------
123 | edge_list : List[Tuple[int, int]]
124 | Edge list representing the graph whose XSwap edge priors are to be
125 | computed. Tuples contain integer values representing nodes. No value
126 | should be greater than C++'s `INT_MAX`, in this case 2_147_483_647.
127 | An adjacency matrix will be created assuming that a node's value is its
128 | index in the matrix. If not, map edges (identifiers can be string or
129 | otherwise) using `xswap.preprocessing.map_str_edges`.
130 | n_permutations : int
131 | The number of permuted networks used to compute the empirical XSwap prior
132 | shape : Tuple[int, int]
133 | The shape of the matrix to be returned. In other words, a tuple of the
134 | number of source and target nodes.
135 | allow_self_loops : bool
136 | Whether to allow edges like (0, 0). In the case of bipartite graphs,
137 | such an edge represents a connection between two distinct nodes, while
138 | in other graphs it may represent an edge from a node to itself, in which
139 | case an edge may or may not be meaningful depending on context.
140 | allow_antiparallel : bool
141 | Whether to allow simultaneous edges like (0, 1) and (1, 0). In the case
142 | of bipartite graphs, these edges represent two connections between four
143 | distinct nodes, while for other graphs, these may be connections between
144 | the same two nodes.
145 | sparse : bool
146 | Whether to use a sparse matrix when adding up edge occurrences across
147 | permutations. If large changes in sparsity are expected, a dense
148 | array may be preferable.
149 | swap_multiplier : float
150 | The number of edge swap attempts is determined by the product of the
151 | number of existing edges and multiplier. For example, if five edges are
152 | passed and multiplier is set to 10, 50 swaps will be attempted. Non-integer
153 | products will be rounded down to the nearest integer.
154 | initial_seed : int
155 | Random seed that will be passed to the C++ Mersenne Twister 19937 random
156 | number generator. `initial_seed` will be used for the first permutation,
157 | and the seed used for each subsequent permutation will be incremented by
158 | one. For example, if `initial_seed` is 0 and `n_permutations` is 2, then
159 | the two permutations will pass seeds 0 and 1, respectively.
160 | max_malloc : int (`unsigned long long int` in C)
161 | The maximum amount of memory to be allocated using `malloc` when making
162 | a bitset to hold edges. An uncompressed bitset is implemented for
163 | holding edges that is significantly faster than alternatives. However,
164 | it is memory-inefficient and will not be used if more memory is required
165 | than `max_malloc`. Above the threshold, a Roaring bitset will be used.
166 | dtypes : dict
167 | Dictionary mapping returned column types to dtypes. Keys should be
168 | `'id'`, `'degree'`, `'edge'`, and `'xswap_prior'`. `dtype` need only
169 | be changed from its defaults if the values of `id` or `degree` are
170 | greater than the maxima in the default dtypes, or in cases where greater
171 | precision is desired. (`numpy.uint16` has a maximum value of 65535.)
172 |
173 | Returns
174 | -------
175 | prior_df : pandas.DataFrame
176 | Columns are the following:
177 | [source_id, target_id, edge, source_degree, target_degree, xswap_prior]
178 | """
179 | # Compute the adjacency matrix of the original (unpermuted) network
180 | original_edges = xswap.network_formats.edges_to_matrix(
181 | edge_list, add_reverse_edges=(not allow_antiparallel), shape=shape,
182 | dtype=dtypes['edge'], sparse=True)
183 |
184 | # Setup DataFrame for recording prior data
185 | prior_df = pandas.DataFrame({
186 | 'source_id': numpy.repeat(numpy.arange(shape[0], dtype=dtypes['id']), shape[1]),
187 | 'target_id': numpy.tile(numpy.arange(shape[1], dtype=dtypes['id']), shape[0]),
188 | 'edge': original_edges.toarray().flatten(),
189 | })
190 | del original_edges
191 |
192 | prior_df['source_degree'] = (prior_df
193 | .groupby('source_id')
194 | .transform(sum)['edge']
195 | .astype(dtypes['degree']))
196 | del prior_df['source_id']
197 |
198 | prior_df['target_degree'] = (prior_df
199 | .groupby('target_id')
200 | .transform(sum)['edge']
201 | .astype(dtypes['degree']))
202 | del prior_df['target_id']
203 |
204 | # Compute the number of occurrences of each edge across permutations
205 | edge_counter = compute_xswap_occurrence_matrix(
206 | edge_list=edge_list, n_permutations=n_permutations, shape=shape,
207 | allow_self_loops=allow_self_loops, allow_antiparallel=allow_antiparallel,
208 | sparse=sparse, swap_multiplier=swap_multiplier, initial_seed=initial_seed,
209 | max_malloc=max_malloc)
210 |
211 | prior_df['num_permuted_edges'] = edge_counter.toarray().flatten()
212 | del edge_counter
213 |
214 | # The number of edges that occurred across all node pairs with the same
215 | # `source_degree` and `target_degree`
216 | dgp_edge_count = (
217 | prior_df
218 | .groupby(['source_degree', 'target_degree'])
219 | .transform(sum)['num_permuted_edges']
220 | .values
221 | .astype(dtypes['degree'])
222 | )
223 | del prior_df['num_permuted_edges']
224 |
225 | # The effective number of permutations for every node pair, incorporating
226 | # degree-grouping
227 | num_dgp = (
228 | n_permutations * prior_df.groupby(['source_degree', 'target_degree'])
229 | .transform(len)['edge']
230 | .values
231 | )
232 | xswap_prior = (dgp_edge_count / num_dgp).astype(dtypes['xswap_prior'])
233 | del dgp_edge_count, num_dgp
234 |
235 | prior_df['xswap_prior'] = xswap_prior
236 | del xswap_prior
237 |
238 | prior_df = (
239 | prior_df
240 | .assign(
241 | source_id=numpy.repeat(numpy.arange(shape[0], dtype=dtypes['id']), shape[1]),
242 | target_id=numpy.tile(numpy.arange(shape[1], dtype=dtypes['id']), shape[0]),
243 | )
244 | .filter(items=['source_id', 'target_id', 'edge', 'source_degree',
245 | 'target_degree', 'xswap_prior'])
246 | )
247 | return prior_df
248 |
249 |
250 | def approximate_xswap_prior(source_degree, target_degree, num_edges):
251 | """
252 | Approximate the XSwap prior by assuming that the XSwap Markov Chain is stationary.
253 | While this is not the case in reality, some networks' priors can be estimated
254 | very well using this equation.
255 |
256 | Parameters
257 | ----------
258 | source_degree : int, float, numpy.array, or pandas.Series
259 | The source degree for a single node pair or a number of source degrees.
260 | The type of object passed should match `target_degree`.
261 | target_degree : int, float, numpy.array, or pandas.Series
262 | The target degree for a single node pair or a number of target degrees.
263 | The type of object passed should match `source_degree`.
264 | num_edges : int or float
265 | The total number of edges in the network
266 |
267 | Returns
268 | -------
269 | approximate_prior : float, numpy.array, or pandas.Series
270 | Output type matches the types of `source_degree` and `target_degree`.
271 | """
272 | return source_degree * target_degree / (
273 | (source_degree * target_degree) ** 2
274 | + (num_edges - source_degree - target_degree + 1) ** 2
275 | ) ** 0.5
276 |
--------------------------------------------------------------------------------
/xswap/lib/roaring.hh:
--------------------------------------------------------------------------------
1 | /* auto-generated on Lun 14 jan 2019 11:35:33 EST. Do not edit! */
2 | #include "roaring.h"
3 | /* begin file /Users/dlemire/CVS/github/CRoaring/cpp/roaring.hh */
4 | /*
5 | A C++ header for Roaring Bitmaps.
6 | */
7 | #ifndef INCLUDE_ROARING_HH_
8 | #define INCLUDE_ROARING_HH_
9 |
10 | #include
11 |
12 | #include
13 | #include
14 | #include
15 | #include
16 |
17 | class RoaringSetBitForwardIterator;
18 |
19 | class Roaring {
20 | public:
21 | /**
22 | * Create an empty bitmap
23 | */
24 | Roaring() {
25 | ra_init(&roaring.high_low_container);
26 | roaring.copy_on_write = false;
27 | }
28 |
29 | /**
30 | * Construct a bitmap from a list of integer values.
31 | */
32 | Roaring(size_t n, const uint32_t *data) : Roaring() {
33 | roaring_bitmap_add_many(&roaring, n, data);
34 | }
35 |
36 | /**
37 | * Copy constructor
38 | */
39 | Roaring(const Roaring &r) {
40 | bool is_ok =
41 | ra_copy(&r.roaring.high_low_container, &roaring.high_low_container,
42 | r.roaring.copy_on_write);
43 | if (!is_ok) {
44 | throw std::runtime_error("failed memory alloc in constructor");
45 | }
46 | roaring.copy_on_write = r.roaring.copy_on_write;
47 | }
48 |
49 | /**
50 | * Move constructor. The moved object remains valid, i.e.
51 | * all methods can still be called on it.
52 | */
53 | Roaring(Roaring &&r) noexcept {
54 | roaring = std::move(r.roaring);
55 | r.roaring.copy_on_write = false;
56 | ra_init(&r.roaring.high_low_container);
57 | }
58 |
59 | /**
60 | * Construct a roaring object from the C struct.
61 | *
62 | * Passing a NULL point is unsafe.
63 | * the pointer to the C struct will be invalid after the call.
64 | */
65 | Roaring(roaring_bitmap_t *s) noexcept {
66 | // steal the interior struct
67 | roaring.high_low_container = s->high_low_container;
68 | roaring.copy_on_write = s->copy_on_write;
69 | // deallocate the old container
70 | free(s);
71 | }
72 |
73 | /**
74 | * Construct a bitmap from a list of integer values.
75 | */
76 | static Roaring bitmapOf(size_t n, ...) {
77 | Roaring ans;
78 | va_list vl;
79 | va_start(vl, n);
80 | for (size_t i = 0; i < n; i++) {
81 | ans.add(va_arg(vl, uint32_t));
82 | }
83 | va_end(vl);
84 | return ans;
85 | }
86 |
87 | /**
88 | * Add value x
89 | *
90 | */
91 | void add(uint32_t x) { roaring_bitmap_add(&roaring, x); }
92 |
93 | /**
94 | * Add value x
95 | * Returns true if a new value was added, false if the value was already existing.
96 | */
97 | bool addChecked(uint32_t x) {
98 | return roaring_bitmap_add_checked(&roaring, x);
99 | }
100 |
101 | /**
102 | * add if all values from x (included) to y (excluded)
103 | */
104 | void addRange(const uint64_t x, const uint64_t y) {
105 | return roaring_bitmap_add_range(&roaring, x, y);
106 | }
107 |
108 | /**
109 | * Add value n_args from pointer vals
110 | *
111 | */
112 | void addMany(size_t n_args, const uint32_t *vals) {
113 | roaring_bitmap_add_many(&roaring, n_args, vals);
114 | }
115 |
116 | /**
117 | * Remove value x
118 | *
119 | */
120 | void remove(uint32_t x) { roaring_bitmap_remove(&roaring, x); }
121 |
122 | /**
123 | * Remove value x
124 | * Returns true if a new value was removed, false if the value was not existing.
125 | */
126 | bool removeChecked(uint32_t x) {
127 | return roaring_bitmap_remove_checked(&roaring, x);
128 | }
129 |
130 | /**
131 | * Return the largest value (if not empty)
132 | *
133 | */
134 | uint32_t maximum() const { return roaring_bitmap_maximum(&roaring); }
135 |
136 | /**
137 | * Return the smallest value (if not empty)
138 | *
139 | */
140 | uint32_t minimum() const { return roaring_bitmap_minimum(&roaring); }
141 |
142 | /**
143 | * Check if value x is present
144 | */
145 | bool contains(uint32_t x) const {
146 | return roaring_bitmap_contains(&roaring, x);
147 | }
148 |
149 | /**
150 | * Check if all values from x (included) to y (excluded) are present
151 | */
152 | bool containsRange(const uint64_t x, const uint64_t y) const {
153 | return roaring_bitmap_contains_range(&roaring, x, y);
154 | }
155 |
156 | /**
157 | * Destructor
158 | */
159 | ~Roaring() { ra_clear(&roaring.high_low_container); }
160 |
161 | /**
162 | * Copies the content of the provided bitmap, and
163 | * discard the current content.
164 | */
165 | Roaring &operator=(const Roaring &r) {
166 | ra_clear(&roaring.high_low_container);
167 | bool is_ok =
168 | ra_copy(&r.roaring.high_low_container, &roaring.high_low_container,
169 | r.roaring.copy_on_write);
170 | if (!is_ok) {
171 | throw std::runtime_error("failed memory alloc in assignment");
172 | }
173 | roaring.copy_on_write = r.roaring.copy_on_write;
174 | return *this;
175 | }
176 |
177 | /**
178 | * Moves the content of the provided bitmap, and
179 | * discard the current content.
180 | */
181 | Roaring &operator=(Roaring &&r) noexcept {
182 | ra_clear(&roaring.high_low_container);
183 | roaring = std::move(r.roaring);
184 | r.roaring.copy_on_write = false;
185 | ra_init(&r.roaring.high_low_container);
186 | return *this;
187 | }
188 |
189 | /**
190 | * Compute the intersection between the current bitmap and the provided
191 | * bitmap,
192 | * writing the result in the current bitmap. The provided bitmap is not
193 | * modified.
194 | */
195 | Roaring &operator&=(const Roaring &r) {
196 | roaring_bitmap_and_inplace(&roaring, &r.roaring);
197 | return *this;
198 | }
199 |
200 | /**
201 | * Compute the difference between the current bitmap and the provided
202 | * bitmap,
203 | * writing the result in the current bitmap. The provided bitmap is not
204 | * modified.
205 | */
206 | Roaring &operator-=(const Roaring &r) {
207 | roaring_bitmap_andnot_inplace(&roaring, &r.roaring);
208 | return *this;
209 | }
210 |
211 | /**
212 | * Compute the union between the current bitmap and the provided bitmap,
213 | * writing the result in the current bitmap. The provided bitmap is not
214 | * modified.
215 | *
216 | * See also the fastunion function to aggregate many bitmaps more quickly.
217 | */
218 | Roaring &operator|=(const Roaring &r) {
219 | roaring_bitmap_or_inplace(&roaring, &r.roaring);
220 | return *this;
221 | }
222 |
223 | /**
224 | * Compute the symmetric union between the current bitmap and the provided
225 | * bitmap,
226 | * writing the result in the current bitmap. The provided bitmap is not
227 | * modified.
228 | */
229 | Roaring &operator^=(const Roaring &r) {
230 | roaring_bitmap_xor_inplace(&roaring, &r.roaring);
231 | return *this;
232 | }
233 |
234 | /**
235 | * Exchange the content of this bitmap with another.
236 | */
237 | void swap(Roaring &r) { std::swap(r.roaring, roaring); }
238 |
239 | /**
240 | * Get the cardinality of the bitmap (number of elements).
241 | */
242 | uint64_t cardinality() const {
243 | return roaring_bitmap_get_cardinality(&roaring);
244 | }
245 |
246 | /**
247 | * Returns true if the bitmap is empty (cardinality is zero).
248 | */
249 | bool isEmpty() const { return roaring_bitmap_is_empty(&roaring); }
250 |
251 | /**
252 | * Returns true if the bitmap is subset of the other.
253 | */
254 | bool isSubset(const Roaring &r) const {
255 | return roaring_bitmap_is_subset(&roaring, &r.roaring);
256 | }
257 |
258 | /**
259 | * Returns true if the bitmap is strict subset of the other.
260 | */
261 | bool isStrictSubset(const Roaring &r) const {
262 | return roaring_bitmap_is_strict_subset(&roaring, &r.roaring);
263 | }
264 |
265 | /**
266 | * Convert the bitmap to an array. Write the output to "ans",
267 | * caller is responsible to ensure that there is enough memory
268 | * allocated
269 | * (e.g., ans = new uint32[mybitmap.cardinality()];)
270 | */
271 | void toUint32Array(uint32_t *ans) const {
272 | roaring_bitmap_to_uint32_array(&roaring, ans);
273 | }
274 | /**
275 | * to int array with pagination
276 | *
277 | */
278 | void rangeUint32Array(uint32_t *ans, size_t offset, size_t limit) const {
279 | roaring_bitmap_range_uint32_array(&roaring, offset, limit, ans);
280 | }
281 |
282 | /**
283 | * Return true if the two bitmaps contain the same elements.
284 | */
285 | bool operator==(const Roaring &r) const {
286 | return roaring_bitmap_equals(&roaring, &r.roaring);
287 | }
288 |
289 | /**
290 | * compute the negation of the roaring bitmap within a specified interval.
291 | * areas outside the range are passed through unchanged.
292 | */
293 | void flip(uint64_t range_start, uint64_t range_end) {
294 | roaring_bitmap_flip_inplace(&roaring, range_start, range_end);
295 | }
296 |
297 | /**
298 | * Remove run-length encoding even when it is more space efficient
299 | * return whether a change was applied
300 | */
301 | bool removeRunCompression() {
302 | return roaring_bitmap_remove_run_compression(&roaring);
303 | }
304 |
305 | /** convert array and bitmap containers to run containers when it is more
306 | * efficient;
307 | * also convert from run containers when more space efficient. Returns
308 | * true if the result has at least one run container.
309 | * Additional savings might be possible by calling shrinkToFit().
310 | */
311 | bool runOptimize() { return roaring_bitmap_run_optimize(&roaring); }
312 |
313 | /**
314 | * If needed, reallocate memory to shrink the memory usage. Returns
315 | * the number of bytes saved.
316 | */
317 | size_t shrinkToFit() { return roaring_bitmap_shrink_to_fit(&roaring); }
318 |
319 | /**
320 | * Iterate over the bitmap elements. The function iterator is called once for
321 | * all the values with ptr (can be NULL) as the second parameter of each call.
322 | *
323 | * roaring_iterator is simply a pointer to a function that returns bool
324 | * (true means that the iteration should continue while false means that it
325 | * should stop), and takes (uint32_t,void*) as inputs.
326 | */
327 | void iterate(roaring_iterator iterator, void *ptr) const {
328 | roaring_iterate(&roaring, iterator, ptr);
329 | }
330 |
331 | /**
332 | * If the size of the roaring bitmap is strictly greater than rank, then
333 | * this function returns true and set element to the element of given rank.
334 | * Otherwise, it returns false.
335 | */
336 | bool select(uint32_t rnk, uint32_t *element) const {
337 | return roaring_bitmap_select(&roaring, rnk, element);
338 | }
339 |
340 | /**
341 | * Computes the size of the intersection between two bitmaps.
342 | *
343 | */
344 | uint64_t and_cardinality(const Roaring &r) const {
345 | return roaring_bitmap_and_cardinality(&roaring, &r.roaring);
346 | }
347 |
348 | /**
349 | * Check whether the two bitmaps intersect.
350 | *
351 | */
352 | bool intersect(const Roaring &r) const {
353 | return roaring_bitmap_intersect(&roaring, &r.roaring);
354 | }
355 |
356 | /**
357 | * Computes the Jaccard index between two bitmaps. (Also known as the
358 | * Tanimoto distance,
359 | * or the Jaccard similarity coefficient)
360 | *
361 | * The Jaccard index is undefined if both bitmaps are empty.
362 | *
363 | */
364 | double jaccard_index(const Roaring &r) const {
365 | return roaring_bitmap_jaccard_index(&roaring, &r.roaring);
366 | }
367 |
368 | /**
369 | * Computes the size of the union between two bitmaps.
370 | *
371 | */
372 | uint64_t or_cardinality(const Roaring &r) const {
373 | return roaring_bitmap_or_cardinality(&roaring, &r.roaring);
374 | }
375 |
376 | /**
377 | * Computes the size of the difference (andnot) between two bitmaps.
378 | *
379 | */
380 | uint64_t andnot_cardinality(const Roaring &r) const {
381 | return roaring_bitmap_andnot_cardinality(&roaring, &r.roaring);
382 | }
383 |
384 | /**
385 | * Computes the size of the symmetric difference (andnot) between two
386 | * bitmaps.
387 | *
388 | */
389 | uint64_t xor_cardinality(const Roaring &r) const {
390 | return roaring_bitmap_xor_cardinality(&roaring, &r.roaring);
391 | }
392 |
393 | /**
394 | * Returns the number of integers that are smaller or equal to x.
395 | */
396 | uint64_t rank(uint32_t x) const { return roaring_bitmap_rank(&roaring, x); }
397 |
398 | /**
399 | * write a bitmap to a char buffer. This is meant to be compatible with
400 | * the
401 | * Java and Go versions. Returns how many bytes were written which should be
402 | * getSizeInBytes().
403 | *
404 | * Setting the portable flag to false enable a custom format that
405 | * can save space compared to the portable format (e.g., for very
406 | * sparse bitmaps).
407 | *
408 | * Boost users can serialize bitmaps in this manner:
409 | *
410 | * BOOST_SERIALIZATION_SPLIT_FREE(Roaring)
411 | * namespace boost {
412 | * namespace serialization {
413 | *
414 | * template
415 | * void save(Archive& ar, const Roaring& bitmask,
416 | * const unsigned int version) {
417 | * std::size_t expected_size_in_bytes = bitmask.getSizeInBytes();
418 | * std::vector buffer(expected_size_in_bytes);
419 | * std::size_t size_in_bytes = bitmask.write(buffer.data());
420 | *
421 | * ar& size_in_bytes;
422 | * ar& boost::serialization::make_binary_object(buffer.data(),
423 | * size_in_bytes);
424 | * }
425 | * template
426 | * void load(Archive& ar, Roaring& bitmask,
427 | * const unsigned int version) {
428 | * std::size_t size_in_bytes = 0;
429 | * ar& size_in_bytes;
430 | * std::vector buffer(size_in_bytes);
431 | * ar& boost::serialization::make_binary_object(buffer.data(),
432 | * size_in_bytes);
433 | * bitmask = Roaring::readSafe(buffer.data(), size_in_bytes);
434 | *}
435 | *} // namespace serialization
436 | *} // namespace boost
437 | */
438 | size_t write(char *buf, bool portable = true) const {
439 | if (portable)
440 | return roaring_bitmap_portable_serialize(&roaring, buf);
441 | else
442 | return roaring_bitmap_serialize(&roaring, buf);
443 | }
444 |
445 | /**
446 | * read a bitmap from a serialized version. This is meant to be compatible
447 | * with the Java and Go versions.
448 | *
449 | * Setting the portable flag to false enable a custom format that
450 | * can save space compared to the portable format (e.g., for very
451 | * sparse bitmaps).
452 | *
453 | * This function is unsafe in the sense that if you provide bad data,
454 | * many, many bytes could be read. See also readSafe.
455 | */
456 | static Roaring read(const char *buf, bool portable = true) {
457 | roaring_bitmap_t * r = portable ? roaring_bitmap_portable_deserialize(buf) : roaring_bitmap_deserialize(buf);
458 | if (r == NULL) {
459 | throw std::runtime_error("failed alloc while reading");
460 | }
461 | return Roaring(r);
462 | }
463 | /**
464 | * read a bitmap from a serialized version, reading no more than maxbytes bytes.
465 | * This is meant to be compatible with the Java and Go versions.
466 | *
467 | */
468 | static Roaring readSafe(const char *buf, size_t maxbytes) {
469 | roaring_bitmap_t * r = roaring_bitmap_portable_deserialize_safe(buf,maxbytes);
470 | if (r == NULL) {
471 | throw std::runtime_error("failed alloc while reading");
472 | }
473 | return Roaring(r);
474 | }
475 | /**
476 | * How many bytes are required to serialize this bitmap (meant to be
477 | * compatible
478 | * with Java and Go versions)
479 | *
480 | * Setting the portable flag to false enable a custom format that
481 | * can save space compared to the portable format (e.g., for very
482 | * sparse bitmaps).
483 | */
484 | size_t getSizeInBytes(bool portable = true) const {
485 | if (portable)
486 | return roaring_bitmap_portable_size_in_bytes(&roaring);
487 | else
488 | return roaring_bitmap_size_in_bytes(&roaring);
489 | }
490 |
491 | /**
492 | * Computes the intersection between two bitmaps and returns new bitmap.
493 | * The current bitmap and the provided bitmap are unchanged.
494 | */
495 | Roaring operator&(const Roaring &o) const {
496 | roaring_bitmap_t *r = roaring_bitmap_and(&roaring, &o.roaring);
497 | if (r == NULL) {
498 | throw std::runtime_error("failed materalization in and");
499 | }
500 | return Roaring(r);
501 | }
502 |
503 | /**
504 | * Computes the difference between two bitmaps and returns new bitmap.
505 | * The current bitmap and the provided bitmap are unchanged.
506 | */
507 | Roaring operator-(const Roaring &o) const {
508 | roaring_bitmap_t *r = roaring_bitmap_andnot(&roaring, &o.roaring);
509 | if (r == NULL) {
510 | throw std::runtime_error("failed materalization in andnot");
511 | }
512 | return Roaring(r);
513 | }
514 |
515 | /**
516 | * Computes the union between two bitmaps and returns new bitmap.
517 | * The current bitmap and the provided bitmap are unchanged.
518 | */
519 | Roaring operator|(const Roaring &o) const {
520 | roaring_bitmap_t *r = roaring_bitmap_or(&roaring, &o.roaring);
521 | if (r == NULL) {
522 | throw std::runtime_error("failed materalization in or");
523 | }
524 | return Roaring(r);
525 | }
526 |
527 | /**
528 | * Computes the symmetric union between two bitmaps and returns new bitmap.
529 | * The current bitmap and the provided bitmap are unchanged.
530 | */
531 | Roaring operator^(const Roaring &o) const {
532 | roaring_bitmap_t *r = roaring_bitmap_xor(&roaring, &o.roaring);
533 | if (r == NULL) {
534 | throw std::runtime_error("failed materalization in xor");
535 | }
536 | return Roaring(r);
537 | }
538 |
539 | /**
540 | * Whether or not we apply copy and write.
541 | */
542 | void setCopyOnWrite(bool val) { roaring.copy_on_write = val; }
543 |
544 | /**
545 | * Print the content of the bitmap
546 | */
547 | void printf() const { roaring_bitmap_printf(&roaring); }
548 |
549 | /**
550 | * Print the content of the bitmap into a string
551 | */
552 | std::string toString() const {
553 | struct iter_data {
554 | std::string str;
555 | char first_char = '{';
556 | } outer_iter_data;
557 | if (!isEmpty()) {
558 | iterate(
559 | [](uint32_t value, void *inner_iter_data) -> bool {
560 | ((iter_data *)inner_iter_data)->str +=
561 | ((iter_data *)inner_iter_data)->first_char;
562 | ((iter_data *)inner_iter_data)->str +=
563 | std::to_string(value);
564 | ((iter_data *)inner_iter_data)->first_char = ',';
565 | return true;
566 | },
567 | (void *)&outer_iter_data);
568 | } else
569 | outer_iter_data.str = '{';
570 | outer_iter_data.str += '}';
571 | return outer_iter_data.str;
572 | }
573 |
574 | /**
575 | * Whether or not copy and write is active.
576 | */
577 | bool getCopyOnWrite() const { return roaring.copy_on_write; }
578 |
579 | /**
580 | * computes the logical or (union) between "n" bitmaps (referenced by a
581 | * pointer).
582 | */
583 | static Roaring fastunion(size_t n, const Roaring **inputs) {
584 | const roaring_bitmap_t **x =
585 | (const roaring_bitmap_t **)malloc(n * sizeof(roaring_bitmap_t *));
586 | if (x == NULL) {
587 | throw std::runtime_error("failed memory alloc in fastunion");
588 | }
589 | for (size_t k = 0; k < n; ++k) x[k] = &inputs[k]->roaring;
590 |
591 | roaring_bitmap_t *c_ans = roaring_bitmap_or_many(n, x);
592 | if (c_ans == NULL) {
593 | free(x);
594 | throw std::runtime_error("failed memory alloc in fastunion");
595 | }
596 | Roaring ans(c_ans);
597 | free(x);
598 | return ans;
599 | }
600 |
601 | typedef RoaringSetBitForwardIterator const_iterator;
602 |
603 | /**
604 | * Returns an iterator that can be used to access the position of the
605 | * set bits. The running time complexity of a full scan is proportional to
606 | * the
607 | * number
608 | * of set bits: be aware that if you have long strings of 1s, this can be
609 | * very inefficient.
610 | *
611 | * It can be much faster to use the toArray method if you want to
612 | * retrieve the set bits.
613 | */
614 | const_iterator begin() const;
615 |
616 | /**
617 | * A bogus iterator that can be used together with begin()
618 | * for constructions such as for(auto i = b.begin();
619 | * i!=b.end(); ++i) {}
620 | */
621 | const_iterator &end() const;
622 |
623 | roaring_bitmap_t roaring;
624 | };
625 |
626 | /**
627 | * Used to go through the set bits. Not optimally fast, but convenient.
628 | */
629 | class RoaringSetBitForwardIterator final {
630 | public:
631 | typedef std::forward_iterator_tag iterator_category;
632 | typedef uint32_t *pointer;
633 | typedef uint32_t &reference_type;
634 | typedef uint32_t value_type;
635 | typedef int32_t difference_type;
636 | typedef RoaringSetBitForwardIterator type_of_iterator;
637 |
638 | /**
639 | * Provides the location of the set bit.
640 | */
641 | value_type operator*() const { return i.current_value; }
642 |
643 | bool operator<(const type_of_iterator &o) {
644 | if (!i.has_value) return false;
645 | if (!o.i.has_value) return true;
646 | return i.current_value < *o;
647 | }
648 |
649 | bool operator<=(const type_of_iterator &o) {
650 | if (!o.i.has_value) return true;
651 | if (!i.has_value) return false;
652 | return i.current_value <= *o;
653 | }
654 |
655 | bool operator>(const type_of_iterator &o) {
656 | if (!o.i.has_value) return false;
657 | if (!i.has_value) return true;
658 | return i.current_value > *o;
659 | }
660 |
661 | bool operator>=(const type_of_iterator &o) {
662 | if (!i.has_value) return true;
663 | if (!o.i.has_value) return false;
664 | return i.current_value >= *o;
665 | }
666 |
667 | /**
668 | * Move the iterator to the first value >= val.
669 | */
670 | void equalorlarger(uint32_t val) {
671 | roaring_move_uint32_iterator_equalorlarger(&i,val);
672 | }
673 |
674 | type_of_iterator &operator++() { // ++i, must returned inc. value
675 | roaring_advance_uint32_iterator(&i);
676 | return *this;
677 | }
678 |
679 | type_of_iterator operator++(int) { // i++, must return orig. value
680 | RoaringSetBitForwardIterator orig(*this);
681 | roaring_advance_uint32_iterator(&i);
682 | return orig;
683 | }
684 |
685 | type_of_iterator& operator--() { // prefix --
686 | roaring_previous_uint32_iterator(&i);
687 | return *this;
688 | }
689 |
690 | type_of_iterator operator--(int) { // postfix --
691 | RoaringSetBitForwardIterator orig(*this);
692 | roaring_previous_uint32_iterator(&i);
693 | return orig;
694 | }
695 |
696 | bool operator==(const RoaringSetBitForwardIterator &o) const {
697 | return i.current_value == *o && i.has_value == o.i.has_value;
698 | }
699 |
700 | bool operator!=(const RoaringSetBitForwardIterator &o) const {
701 | return i.current_value != *o || i.has_value != o.i.has_value;
702 | }
703 |
704 | RoaringSetBitForwardIterator(const Roaring &parent,
705 | bool exhausted = false) {
706 | if (exhausted) {
707 | i.parent = &parent.roaring;
708 | i.container_index = INT32_MAX;
709 | i.has_value = false;
710 | i.current_value = UINT32_MAX;
711 | } else {
712 | roaring_init_iterator(&parent.roaring, &i);
713 | }
714 | }
715 |
716 | roaring_uint32_iterator_t i;
717 | };
718 |
719 | inline RoaringSetBitForwardIterator Roaring::begin() const {
720 | return RoaringSetBitForwardIterator(*this);
721 | }
722 |
723 | inline RoaringSetBitForwardIterator &Roaring::end() const {
724 | static RoaringSetBitForwardIterator e(*this, true);
725 | return e;
726 | }
727 |
728 | #endif /* INCLUDE_ROARING_HH_ */
729 | /* end file /Users/dlemire/CVS/github/CRoaring/cpp/roaring.hh */
730 | /* begin file /Users/dlemire/CVS/github/CRoaring/cpp/roaring64map.hh */
731 | /*
732 | A C++ header for 64-bit Roaring Bitmaps, implemented by way of a map of many
733 | 32-bit Roaring Bitmaps.
734 | */
735 | #ifndef INCLUDE_ROARING_64_MAP_HH_
736 | #define INCLUDE_ROARING_64_MAP_HH_
737 |
738 | #include
739 | #include
740 | #include
741 | #include
742 | #include