├── tests-require.txt
├── docs
    └── img
    │   ├── simple_graph.png
    │   ├── bipartite_graph.png
    │   ├── directed_selfloop_graph.png
    │   ├── directed_antiparallel_graph.png
    │   └── xswap.svg
├── .gitignore
├── ci
    ├── build-wheels.sh
    └── deploy.sh
├── xswap
    ├── __init__.py
    ├── src
    │   ├── xswap.h
    │   ├── xswap.cpp
    │   ├── xswap_wrapper.cpp
    │   └── bitset.cpp
    ├── network_formats.py
    ├── permute.py
    ├── preprocessing.py
    ├── prior.py
    └── lib
    │   └── roaring.hh
├── LICENSE
├── tests
    ├── test_time.py
    ├── test_permute.py
    ├── test_formats.py
    ├── test_roaring.cpp
    ├── test_prior.py
    └── test_bitset.cpp
├── setup.py
├── .travis.yml
└── README.md


/tests-require.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | pandas
3 | pytest
4 | requests
5 | scipy
6 | setuptools
7 | 


--------------------------------------------------------------------------------
/docs/img/simple_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hetio/xswap/HEAD/docs/img/simple_graph.png


--------------------------------------------------------------------------------
/docs/img/bipartite_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hetio/xswap/HEAD/docs/img/bipartite_graph.png


--------------------------------------------------------------------------------
/docs/img/directed_selfloop_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hetio/xswap/HEAD/docs/img/directed_selfloop_graph.png


--------------------------------------------------------------------------------
/docs/img/directed_antiparallel_graph.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hetio/xswap/HEAD/docs/img/directed_antiparallel_graph.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | docs/output/
 2 | tests/permutation_stats.txt
 3 | tests/*.o
 4 | build/
 5 | dist/
 6 | .vscode/
 7 | __pycache__/
 8 | .pytest_cache/
 9 | **.so
10 | xswap.egg-info/
11 | 


--------------------------------------------------------------------------------
/ci/build-wheels.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Compile wheels
 4 | /opt/python/cp35-cp35m/bin/pip wheel /io/ -w wheelhouse/
 5 | /opt/python/cp36-cp36m/bin/pip wheel /io/ -w wheelhouse/
 6 | /opt/python/cp37-cp37m/bin/pip wheel /io/ -w wheelhouse/
 7 | 
 8 | # Bundle external shared libraries into the wheels
 9 | for whl in wheelhouse/**.whl; do
10 |     auditwheel repair "$whl" --plat $PLAT -w /io/wheelhouse/
11 | done
12 | 


--------------------------------------------------------------------------------
/xswap/__init__.py:
--------------------------------------------------------------------------------
 1 | from xswap import network_formats
 2 | from xswap import preprocessing
 3 | from xswap import prior
 4 | from xswap.permute import permute_edge_list
 5 | 
 6 | __version__ = '0.0.2'
 7 | 
 8 | __all__ = [
 9 |     'network_formats.edges_to_matrix',
10 |     'network_formats.matrix_to_edges',
11 |     'permute_edge_list',
12 |     'preprocessing.load_str_edges',
13 |     'preprocessing.load_processed_edges',
14 |     'preprocessing.map_str_edges',
15 |     'prior.compute_xswap_occurrence_matrix',
16 |     'prior.compute_xswap_priors',
17 |     'prior.approximate_xswap_prior',
18 | ]
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2018, Greene Laboratory
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/tests/test_time.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | 
 4 | import requests
 5 | 
 6 | import xswap
 7 | 
 8 | test_directory = os.path.dirname(os.path.realpath(__file__)) + '/'
 9 | 
10 | 
11 | def load_edges():
12 |     edges_url = "https://github.com/greenelab/xswap/raw/{}/{}".format(
13 |         "8c31b4cbdbbf2cfa5018b1277bbd0e9f6263e573", "graphs/GiG_edges_reduced.txt")
14 |     response = requests.get(edges_url)
15 |     edges = list()
16 |     for edge in response.iter_lines():
17 |         edge = str(edge, 'utf-8')
18 |         source, target = edge.split(',')
19 |         edges.append((int(source), int(target)))
20 |     return edges
21 | 
22 | 
23 | def test_time():
24 |     edges = load_edges()
25 |     t1 = time.time()
26 |     new_edges, stats = xswap.permute_edge_list(edges)
27 |     t2 = time.time()
28 |     time_diff = t2 - t1
29 |     print("{:.4f}  seconds elapsed.".format(time_diff))
30 |     assert edges != new_edges
31 |     assert time_diff < 5
32 | 
33 |     num_repeats = 0
34 |     old_set = set(edges)
35 |     new_set = set(new_edges)
36 |     for edge in old_set:
37 |         if edge in new_set:
38 |             num_repeats += 1
39 |     p_unch = num_repeats / len(edges)
40 |     with open(test_directory + 'permutation_stats.txt', 'w') as f:
41 |         f.write('Runtime: {:.3f} sec. {:.3f} percent unchanged of {} total edges after '
42 |                 '{} swap attempts\n'.format(time_diff, p_unch, len(edges), 10*len(edges)))
43 | 


--------------------------------------------------------------------------------
/ci/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | ## deploy.sh: run during a Travis CI build to deploy output directory to the gh-pages branch on GitHub.
 4 | ## References
 5 | ## - https://github.com/manubot/rootstock/blob/ddb0288895cd5bc5dab117fb366c52216a717d0e/ci/deploy.sh
 6 | ## - https://github.com/wp-cli/wp-cli/issues/3798
 7 | ## - https://github.com/manubot/catalog/blob/fd0ef6a999cca38890023eb65f19d1b87e96e83c/deploy.sh#L1-L45
 8 | 
 9 | # Set options for extra caution & debugging
10 | set -o errexit \
11 |     -o nounset \
12 |     -o pipefail
13 | 
14 | eval "$(ssh-agent -s)"
15 | # Ensure command traces are disabled while dealing with the private key
16 | [[ "$SHELLOPTS" =~ xtrace ]] && XTRACE_ON=1
17 | [[ "${XTRACE_ON:-}" ]] && set +o xtrace && echo "xtrace disabled"
18 | base64 --decode <<< "$GITHUB_DEPLOY_PRIVATE_KEY" | ssh-add -
19 | [[ "${XTRACE_ON:-}" ]] && set -o xtrace && echo "xtrace reenabled"
20 | 
21 | # Configure git
22 | git config --global push.default simple
23 | git config --global user.name "Travis CI"
24 | git config --global user.email "deploy@travis-ci.com"
25 | git checkout "$TRAVIS_BRANCH"
26 | git remote set-url origin "git@github.com:$TRAVIS_REPO_SLUG.git"
27 | 
28 | # Fetch and create gh-pages branch
29 | # Travis does a shallow and single branch git clone
30 | git remote set-branches --add origin gh-pages
31 | git fetch origin gh-pages:gh-pages
32 | 
33 | commit_message="\
34 | Generate catalog output on $(date --iso --utc)
35 | 
36 | built by $TRAVIS_JOB_WEB_URL
37 | based on https://github.com/$TRAVIS_REPO_SLUG/commit/$TRAVIS_COMMIT
38 | [skip ci]
39 | "
40 | # echo >&2 "$commit_message"
41 | 
42 | ghp-import \
43 |   --push --no-jekyll \
44 |   --message="$commit_message" \
45 |   docs/output/xswap
46 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pathlib
 3 | import re
 4 | 
 5 | import setuptools
 6 | 
 7 | os.environ["CC"] = "g++"
 8 | 
 9 | directory = pathlib.Path(__file__).parent.resolve()
10 | 
11 | # version
12 | init_path = directory.joinpath('xswap', '__init__.py')
13 | text = init_path.read_text()
14 | pattern = re.compile(r"^__version__ = ['\"]([^'\"]*)['\"]", re.MULTILINE)
15 | version = pattern.search(text).group(1)
16 | 
17 | # long_description
18 | readme_path = directory.joinpath('README.md')
19 | long_description = readme_path.read_text()
20 | 
21 | xswap_cpp_extension = setuptools.Extension(
22 |     'xswap._xswap_backend',
23 |     sources=['xswap/src/xswap_wrapper.cpp', 'xswap/src/bitset.cpp', 'xswap/src/xswap.cpp', 'xswap/lib/roaring.c'],
24 |     extra_compile_args=["-std=c++11"],
25 | )
26 | 
27 | setuptools.setup(
28 |     # Package details
29 |     name='xswap',
30 |     version=version,
31 |     url='https://github.com/greenelab/xswap',
32 |     project_urls={
33 |         'Documentation': 'https://hetio.github.io/xswap/',
34 |         'Source': 'https://github.com/hetio/xswap',
35 |         'Tracker': 'https://github.com/hetio/xswap/issues',
36 |         'Publication': 'https://greenelab.github.io/xswap-manuscript/',
37 |     },
38 |     description='Python-wrapped C/C++ library for degree-preserving network randomization',
39 |     long_description_content_type='text/markdown',
40 |     long_description=long_description,
41 |     license='BSD 2-Clause',
42 | 
43 |     # Author details
44 |     author='Michael Zietz',
45 |     author_email='michael.zietz@gmail.com',
46 | 
47 |     # Specify python version
48 |     python_requires='>=3.5',
49 | 
50 |     ext_modules=[xswap_cpp_extension],
51 |     packages=setuptools.find_packages(),
52 | )
53 | 


--------------------------------------------------------------------------------
/docs/img/xswap.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100">
 2 |   <style>
 3 |     g > line {
 4 |       stroke: #424242;
 5 |       stroke-width: 3px;
 6 |     }
 7 |   </style>
 8 |   <line
 9 |     x1="0"
10 |     y1="100"
11 |     x2="45"
12 |     y2="55"
13 |     stroke="lightgrey"
14 |     stroke-width="1"
15 |   ></line>
16 |   <line
17 |     x1="100"
18 |     y1="0"
19 |     x2="55"
20 |     y2="45"
21 |     stroke="lightgrey"
22 |     stroke-width="1"
23 |   ></line>
24 |   <line
25 |     x1="46"
26 |     y1="46"
27 |     x2="51"
28 |     y2="51"
29 |     stroke="#424242"
30 |     stroke-width="3"
31 |   ></line>
32 |   <path
33 |     fill="#424242"
34 |     d="
35 |       M 54 54
36 |       L 47 53
37 |       L 53 47
38 |     "
39 |   ></path>
40 |   <g
41 |     transform="translate(-25,-25) translate(50,50) rotate(45) scale(0.5) translate(-50,-50)"
42 |   >
43 |     <line x1="25" y1="25" x2="75" y2="25"></line>
44 |     <line x1="25" y1="75" x2="75" y2="75"></line>
45 |     <circle cx="25" cy="25" r="10" fill="#02b3e4"></circle>
46 |     <circle cx="75" cy="25" r="10" fill="#e91e63"></circle>
47 |     <circle cx="75" cy="75" r="10" fill="#fa750f"></circle>
48 |     <circle cx="25" cy="75" r="10" fill="#c341d8"></circle>
49 |   </g>
50 |   <g
51 |     transform="translate(25,25) translate(50,50) rotate(45) scale(0.5) translate(-50,-50)"
52 |   >
53 |     <line x1="25" y1="25" x2="75" y2="75"></line>
54 |     <line x1="25" y1="75" x2="75" y2="25"></line>
55 |     <circle cx="25" cy="25" r="10" fill="#02b3e4"></circle>
56 |     <circle cx="75" cy="25" r="10" fill="#e91e63"></circle>
57 |     <circle cx="75" cy="75" r="10" fill="#fa750f"></circle>
58 |     <circle cx="25" cy="75" r="10" fill="#c341d8"></circle>
59 |   </g>
60 | </svg>
61 | 


--------------------------------------------------------------------------------
/tests/test_permute.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | 
 3 | import pytest
 4 | import requests
 5 | 
 6 | import xswap
 7 | 
 8 | 
 9 | @pytest.mark.parametrize('edges,permutable', [
10 |     ([(0, 0), (1, 1), (1, 2), (2, 3)], True),
11 |     ([(0, 0)], False),
12 | ])
13 | def test_xswap_changes_edges(edges, permutable):
14 |     """
15 |     Check that XSwap returns a different set of edges than the ones given if the edges
16 |     are permutable. Check that XSwap does not modify edges in place.
17 |     """
18 |     edges_copy = edges.copy()
19 |     new_edges, stats = xswap.permute_edge_list(
20 |         edges, allow_self_loops=True, allow_antiparallel=True)
21 |     assert edges == edges_copy
22 |     if permutable:
23 |         assert new_edges != edges
24 |     else:
25 |         assert new_edges == edges
26 | 
27 | 
28 | def test_roaring_warning():
29 |     """
30 |     Check that a warning is given when using the much slower but far more general
31 |     Roaring bitset rather than the faster fully uncompressed bitset.
32 |     """
33 |     edges_url = "https://github.com/greenelab/xswap/raw/{}/{}".format(
34 |         "8c31b4cbdbbf2cfa5018b1277bbd0e9f6263e573", "graphs/GiG_edges_reduced.txt")
35 |     response = requests.get(edges_url)
36 |     with tempfile.NamedTemporaryFile() as tf:
37 |         tf.write(response.content)
38 |         edges = xswap.preprocessing.load_processed_edges(tf.name)
39 | 
40 |     with pytest.warns(None):
41 |         permuted_edges, stats = xswap.permute_edge_list(edges, allow_self_loops=True,
42 |             allow_antiparallel=False, multiplier=0.1, seed=0, max_malloc=4000000000)
43 | 
44 |     with pytest.warns(RuntimeWarning, match="Using Roaring bitset because of the large number of edges."):
45 |         permuted_edges, stats = xswap.permute_edge_list(edges, allow_self_loops=True,
46 |             allow_antiparallel=False, multiplier=0.1, seed=0, max_malloc=10)
47 | 


--------------------------------------------------------------------------------
/tests/test_formats.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import pytest
 3 | import scipy.sparse
 4 | 
 5 | import xswap.network_formats
 6 | 
 7 | 
 8 | @pytest.mark.parametrize('matrix,correct_edges,include_reverse_edges', [
 9 |     (numpy.array([[1,0,0,0],[0,0,1,0],[0,0,0,1]]), [(0, 0), (1, 2), (2, 3)], False),
10 |     (numpy.array([[1,0,0],[0,0,1],[0,1,1]]), [(0, 0), (1, 2), (2, 2)], False),
11 |     (numpy.array([[1,0,0],[0,0,1],[0,1,1]]), [(0, 0), (1, 2), (2, 1), (2, 2)], True),
12 | ])
13 | def test_matrix_to_edges(matrix, correct_edges, include_reverse_edges):
14 |     edges = xswap.network_formats.matrix_to_edges(matrix, include_reverse_edges)
15 |     assert sorted(edges) == sorted(correct_edges)
16 | 
17 | 
18 | @pytest.mark.parametrize('edges,correct_matrix,add_reverse_edges,shape,dtype,sparse', [
19 |     (
20 |         [(0, 1), (0, 3), (2, 2)],
21 |         numpy.array([[0,1,0,1], [1,0,0,0], [0,0,1,0], [1,0,0,0]], dtype=int),
22 |         True, (4, 4), int, False),
23 |     (
24 |         [(0, 1), (0, 3), (2, 2)],
25 |         numpy.array([[0,1,0,1], [0,0,0,0], [0,0,1,0], [0,0,0,0]], dtype=int),
26 |         False, (4, 4), int, False),
27 |     (
28 |         [(0, 1), (0, 3), (2, 2)],
29 |         numpy.array([[0,1,0,1], [0,0,0,0], [0,0,1,0]], dtype=int),
30 |         False, (3, 4), int, False),
31 |     (
32 |         [(0, 1), (0, 3), (2, 2)],
33 |         numpy.array([[0,1,0,1], [0,0,0,0], [0,0,1,0]], dtype=float),
34 |         False, (3, 4), float, False),
35 |     (
36 |         [(0, 1), (0, 3), (2, 2)],
37 |         numpy.array([[0,1,0,1], [0,0,0,0], [0,0,1,0]], dtype=numpy.float32),
38 |         False, (3, 4), numpy.float32, False),
39 |     (
40 |         [(0, 1), (0, 3), (2, 2)],
41 |         scipy.sparse.csc_matrix([[0,1,0,1], [0,0,0,0], [0,0,1,0]], dtype=numpy.float32),
42 |         False, (3, 4), numpy.float32, True),
43 | ])
44 | def test_edges_to_matrix(edges, correct_matrix, add_reverse_edges, shape, dtype, sparse):
45 |     matrix = xswap.network_formats.edges_to_matrix(
46 |         edge_list=edges, add_reverse_edges=add_reverse_edges, shape=shape,
47 |         dtype=dtype, sparse=sparse)
48 | 
49 |     assert matrix.dtype == dtype
50 |     assert scipy.sparse.issparse(matrix) == sparse
51 |     if sparse:
52 |         assert (matrix != correct_matrix).nnz == 0
53 |     else:
54 |         assert numpy.array_equal(matrix, correct_matrix)
55 | 


--------------------------------------------------------------------------------
/tests/test_roaring.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include "../xswap/src/xswap.h"
 3 | 
 4 | 
 5 | main(int argc, char const *argv[])
 6 | {
 7 |     int counter, incorrect_contains, incorrect_doesnt_contain;
 8 | 
 9 |     // Create real edges to be added to the Roaring set
10 |     int** real_edges = (int**)malloc(sizeof(int*) * 16);
11 |     counter = 0;
12 |     for (int i = 4; i < 8; i++) {
13 |         for (int j = 4; j < 8; j++) {
14 |             real_edges[counter] = (int*)malloc(sizeof(int) * 2);
15 |             real_edges[counter][0] = i;
16 |             real_edges[counter][1] = j;
17 |             counter += 1;
18 |         }
19 |     }
20 | 
21 |     Edges edges;
22 |     edges.edge_array = real_edges;
23 |     edges.num_edges = 16;
24 |     RoaringBitSet edges_set = RoaringBitSet(edges);
25 | 
26 |     // Check that edges added at the creation of the set are contained
27 |     incorrect_doesnt_contain = 0;
28 |     for (int i = 4; i < 8; i++) {
29 |         for (int j = 4; j < 8; j++) {
30 |             int edge[2] = {i, j};
31 |             if (!edges_set.contains(edge)) {
32 |                 incorrect_doesnt_contain += 1;
33 |             }
34 |         }
35 |     }
36 | 
37 |     // Create fake edges and check that they are not in the set
38 |     counter = 0;
39 |     incorrect_contains = 0;
40 |     for (int i = 0; i < 4; i++) {
41 |         for (int j = 0; j < 4; j++) {
42 |             int fake_edge[2] = {i, j};
43 |             // Check that this edge is not in the set
44 |             if (edges_set.contains(fake_edge)) {
45 |                 incorrect_contains += 1;
46 |             }
47 |             // Add the edge and check that it was added
48 |             edges_set.add(fake_edge);
49 |             if (!edges_set.contains(fake_edge)) {
50 |                 incorrect_doesnt_contain += 1;
51 |             }
52 |             // Remove the edge and check that it is removed
53 |             edges_set.remove(fake_edge);
54 |             if (edges_set.contains(fake_edge)) {
55 |                 incorrect_contains += 1;
56 |             }
57 |             counter += 1;
58 |         }
59 |     }
60 | 
61 |     free(real_edges);
62 |     if (incorrect_contains == 0 && incorrect_doesnt_contain == 0) {
63 |         std::cout << "All tests passed" << "\n";
64 |         return 0;
65 |     } else {
66 |         std::cout << "Tests failed " << incorrect_contains << " " << incorrect_doesnt_contain << "\n";
67 |         return 1;
68 |     }
69 | }
70 | 


--------------------------------------------------------------------------------
/xswap/src/xswap.h:
--------------------------------------------------------------------------------
 1 | #include <Python.h>
 2 | #include "../lib/roaring.hh"
 3 | 
 4 | extern int CHAR_BITS;
 5 | 
 6 | struct Edges {
 7 |     int** edge_array;
 8 |     int num_edges;
 9 |     int max_id;
10 | };
11 | 
12 | // Slower bitset
13 | class RoaringBitSet
14 | {
15 |     public:
16 |         RoaringBitSet() = default;
17 |         RoaringBitSet(Edges edges);
18 |         bool contains(int *edge);
19 |         void add(int *edge);
20 |         void remove(int *edge);
21 | 
22 |     private:
23 |         Roaring bitmap;
24 | };
25 | 
26 | // Faster edge bitset for smaller numbers of edges
27 | class UncompressedBitSet
28 | {
29 |     public:
30 |         UncompressedBitSet() = default;
31 |         UncompressedBitSet(int max_id, unsigned long long int max_malloc);
32 |         UncompressedBitSet(Edges edges, unsigned long long int max_malloc);
33 |         bool contains(int *edge);
34 |         void add(int *edge);
35 |         void remove(int *edge);
36 |         void free_array();
37 | 
38 |     private:
39 |         char* bitset;
40 |         size_t max_cantor;
41 |         void create_bitset(size_t num_elements, unsigned long long int max_malloc);
42 |         char get_bit(char word, char bit_position);
43 |         void set_bit_true(char* word, char bit_position);
44 |         void set_bit_false(char* word, char bit_position);
45 | };
46 | 
47 | // Wrapper class for the two bitset implementations
48 | class BitSet
49 | {
50 |     public:
51 |         BitSet(Edges edges, unsigned long long int max_malloc);
52 |         bool contains(int *edge);
53 |         void add(int *edge);
54 |         void remove(int *edge);
55 |         void free_array();
56 |         PyObject* runtime_warning_roaring(void);
57 |         UncompressedBitSet uncompressed_set;
58 | 
59 |     private:
60 |         bool use_compressed;
61 |         RoaringBitSet compressed_set;
62 | };
63 | 
64 | struct statsCounter {
65 |     int num_swaps;
66 |     int same_edge = 0;
67 |     int self_loop = 0;
68 |     int duplicate = 0;
69 |     int undir_duplicate = 0;
70 |     int excluded = 0;
71 | };
72 | 
73 | struct Conditions {
74 |     int seed;
75 |     bool allow_antiparallel;
76 |     bool allow_self_loop;
77 |     Edges excluded_edges;
78 | };
79 | 
80 | size_t cantor_pair(int* edge);
81 | 
82 | void swap_edges(Edges edges, int num_swaps, Conditions cond, statsCounter *stats,
83 |                 unsigned long long int max_malloc);
84 | 
85 | bool is_valid_edge(int *edge, BitSet edges_set, Conditions cond,
86 |                    statsCounter *stats);
87 | 
88 | bool is_valid_swap(int **new_edges, BitSet edges_set, Conditions cond,
89 |                    statsCounter *stats);
90 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | setup_and_test: &setup_and_test
 2 |   stage: test
 3 |   language: python
 4 |   addons:
 5 |     apt:
 6 |       packages:
 7 |         - pkg-config
 8 |         - python3-dev
 9 |   before_install:
10 |     - pip install -r tests-require.txt
11 |   install:
12 |     - pkg-config --cflags --libs python3
13 |     - python setup.py build
14 |     - pip install .
15 |   script:
16 |     - pytest tests/
17 |     - >
18 |         g++ tests/test_bitset.cpp xswap/src/xswap.h xswap/src/bitset.cpp
19 |         xswap/lib/roaring.c -o tests/test_bitset.o -std=c++11
20 |         `pkg-config --cflags --libs python3`
21 |     - ./tests/test_bitset.o
22 |     - >
23 |         g++ tests/test_roaring.cpp xswap/src/xswap.h xswap/src/bitset.cpp
24 |         xswap/lib/roaring.c -o tests/test_roaring.o -std=c++11
25 |         `pkg-config --cflags --libs python3`
26 |     - ./tests/test_roaring.o
27 | 
28 | build_and_upload: &build_and_upload
29 |   stage: deploy
30 |   sudo: required
31 |   if: tag IS present
32 |   services:
33 |     - docker
34 |   install:
35 |     - docker pull $DOCKER_IMAGE
36 |   script:
37 |     - docker run --rm -e PLAT=$PLAT -v `pwd`:/io $DOCKER_IMAGE /io/ci/build-wheels.sh
38 |     - /opt/python/3.6/bin/pip install twine
39 |     - /opt/python/3.6/bin/python -m twine upload -u zietzm -p $PYPI_PASSWORD --repository-url https://upload.pypi.org/legacy/ --skip-existing wheelhouse/*
40 | 
41 | compiler:
42 |   - g++
43 | matrix:
44 |   include:
45 |     - <<: *setup_and_test
46 |       name: "Test 3.5 on Ubuntu"
47 |       dist: xenial
48 |       python: 3.5
49 |     - <<: *setup_and_test
50 |       name: "Test 3.6 on Ubuntu"
51 |       dist: xenial
52 |       python: 3.6
53 |     - <<: *setup_and_test
54 |       name: "Test 3.7 on Ubuntu"
55 |       dist: xenial
56 |       python: 3.7
57 |     - <<: *build_and_upload
58 |       name: "Build manylinux1_x86_64"
59 |       env:
60 |         - DOCKER_IMAGE=quay.io/pypa/manylinux1_x86_64
61 |         - PLAT=manylinux1_x86_64
62 |     - <<: *build_and_upload
63 |       name: "Build manylinux1_i686"
64 |       env:
65 |         - DOCKER_IMAGE=quay.io/pypa/manylinux1_i686
66 |         - PLAT=manylinux1_i686
67 |     - <<: *build_and_upload
68 |       name: "Build manylinux2010_x86_64"
69 |       env:
70 |         - DOCKER_IMAGE=quay.io/pypa/manylinux2010_x86_64
71 |         - PLAT=manylinux2010_x86_64
72 |     - name: "Build documentation"
73 |       dist: xenial
74 |       language: python
75 |       python: 3.7
76 |       install:
77 |         - pip install --requirement tests-require.txt
78 |         - pip install pdoc3~=0.7.0 ghp-import~=0.5.5
79 |       script:
80 |         - pdoc --force --html
81 |           --config="git_link_template=\"https://github.com/$TRAVIS_REPO_SLUG/blob/{commit}/{path}#L{start_line}-L{end_line}\""
82 |           --output-dir=docs/output
83 |           xswap
84 |       deploy:
85 |         provider: script
86 |         script: bash ci/deploy.sh
87 |         skip_cleanup: true
88 |         on:
89 |           branch: master
90 |           condition: $TRAVIS_EVENT_TYPE = "push"
91 | 


--------------------------------------------------------------------------------
/xswap/src/xswap.cpp:
--------------------------------------------------------------------------------
 1 | #include <random>
 2 | #include "xswap.h"
 3 | 
 4 | void swap_edges(Edges edges, int num_swaps, Conditions cond, statsCounter *stats,
 5 |                 unsigned long long int max_malloc) {
 6 |     // Initialize bitset for possible edges
 7 |     BitSet edges_set = BitSet(edges, max_malloc);
 8 | 
 9 |     // Initialize unbiased random number generator
10 |     std::mt19937 rng(cond.seed);
11 |     std::uniform_int_distribution<int> uni(0, edges.num_edges - 1);
12 | 
13 |     // Do XSwap
14 |     for (int i = 0; i < num_swaps; i++) {
15 |         // Draw edges randomly
16 |         int edge_index_a = uni(rng);
17 |         int edge_index_b = uni(rng);
18 | 
19 |         if (edge_index_a == edge_index_b) {
20 |             stats->same_edge += 1;
21 |             continue;
22 |         }
23 | 
24 |         // Old edges
25 |         int* edge_a = edges.edge_array[edge_index_a];
26 |         int* edge_b = edges.edge_array[edge_index_b];
27 | 
28 |         // Form potential new edges
29 |         int new_edge_a[2] = { edge_a[0], edge_b[1] };
30 |         int new_edge_b[2] = { edge_b[0], edge_a[1] };
31 |         int* new_edges[2] = { new_edge_a, new_edge_b };
32 | 
33 |         bool valid = is_valid_swap(new_edges, edges_set, cond, stats);
34 |         if (valid) {
35 |             edges_set.remove(edge_a);
36 |             edges_set.remove(edge_b);
37 | 
38 |             int temp_target = edge_a[1];
39 |             edge_a[1] = edge_b[1];
40 |             edge_b[1] = temp_target;
41 | 
42 |             edges_set.add(new_edge_a);
43 |             edges_set.add(new_edge_b);
44 |         }
45 |     }
46 |     edges_set.free_array();
47 | }
48 | 
49 | bool is_valid_edge(int *new_edge, BitSet edges_set, Conditions valid_conditions,
50 |                    statsCounter *stats) {
51 |     // New edge would be a self-loop
52 |     if (!valid_conditions.allow_self_loop && new_edge[0] == new_edge[1]) {
53 |         stats->self_loop += 1;
54 |         return false;
55 |     }
56 |     // New edge already exists
57 |     if (edges_set.contains(new_edge)) {
58 |         stats->duplicate += 1;
59 |         return false;
60 |     }
61 |     // Undirected and reverse of new edge already exists
62 |     int reversed[2] = { new_edge[1], new_edge[0] };
63 |     if (!valid_conditions.allow_antiparallel && edges_set.contains(reversed)) {
64 |         stats->undir_duplicate += 1;
65 |         return false;
66 |     }
67 |     for (int i = 0; i < valid_conditions.excluded_edges.num_edges; i++) {
68 |         if (valid_conditions.excluded_edges.edge_array[i][0] == new_edge[0] &&
69 |             valid_conditions.excluded_edges.edge_array[i][1] == new_edge[1]) {
70 |             stats->excluded += 1;
71 |             return false;
72 |         }
73 |     }
74 |     return true;
75 | }
76 | 
77 | bool is_valid_swap(int **new_edges, BitSet edges_set, Conditions valid_conditions,
78 |                    statsCounter *stats) {
79 |     for (int i = 0; i < 2; i++) {
80 |         bool is_valid = is_valid_edge(new_edges[i], edges_set, valid_conditions, stats);
81 |         if (!is_valid) {
82 |             return false;
83 |         }
84 |     }
85 |     return true;
86 | }
87 | 


--------------------------------------------------------------------------------
/xswap/network_formats.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple, TypeVar
 2 | 
 3 | import numpy
 4 | import scipy.sparse
 5 | 
 6 | 
 7 | def matrix_to_edges(matrix: numpy.ndarray, include_reverse_edges: bool=True):
 8 |     """
 9 |     Convert (bi)adjacency matrix to an edge list. Inverse of `edges_to_matrix`.
10 | 
11 |     Parameters
12 |     ----------
13 |     matrix : numpy.ndarray
14 |         Adjacency matrix or biadjacency matrix of a network
15 |     include_reverse_edges : bool
16 |         Whether to return edges that are the inverse of existing edges. For
17 |         example, if returning [(0, 1), (1, 0)] is desired or not. If False,
18 |         then only edges where source <= target are returned. This parameter
19 |         should be `True` when passing a biadjacency matrix, as matrix positions
20 |         indicate separate nodes.
21 | 
22 |     Returns
23 |     -------
24 |     edge_list : List[Tuple[int, int]]
25 |         Edge list with node ids as the corresponding matrix indices. For example,
26 |         if `matrix` has `matrix[0, 2] == 1`, then `(0, 2)` will be among the
27 |         returned edges.
28 |     """
29 |     sparse = scipy.sparse.coo_matrix(matrix)
30 |     edges = zip(sparse.row, sparse.col)
31 | 
32 |     if not include_reverse_edges:
33 |         edges = filter(lambda edge: edge[0] <= edge[1], edges)
34 |     return list(edges)
35 | 
36 | 
37 | def edges_to_matrix(edge_list: List[Tuple[int, int]], add_reverse_edges: bool,
38 |                     shape: Tuple[int, int], dtype: TypeVar=bool, sparse: bool=True):
39 |     """
40 |     Convert edge list to (bi)adjacency matrix. Inverse of `matrix_to_edges`.
41 | 
42 |     Parameters
43 |     ----------
44 |     edge_list : List[Tuple[int, int]]
45 |         An edge list mapped such that node ids correspond to desired matrix
46 |         positions. For example, (0, 0) will mean that the resulting matrix has
47 |         a positive value of type `dtype` in that position.
48 |     add_reverse_edges : bool
49 |         Whether to include the reverse of edges in the matrix. For example,
50 |         if `edge_list = [(1, 0)]` and `add_reverse_edge = True`, then the
51 |         returned matrix has `matrix[1, 0]` = `matrix[0, 1]` = 1. Else, the matrix
52 |         only has `matrix[1, 0]` = 1. If a biadjacency matrix is desired, then
53 |         set `add_reverse_edges = False`.
54 |     shape : Tuple[int, int]
55 |         Shape of the matrix to be returned. Allows edges to be converted to
56 |         a matrix even when there are nodes without edges.
57 |     dtype : data-type
58 |         Dtype of the returned matrix. For example, `int`, `bool`, `float`, etc.
59 |     sparse : bool
60 |         Whether a sparse matrix should be returned. If `False`, returns a dense
61 |         numpy.ndarray
62 | 
63 |     Returns
64 |     -------
65 |     matrix : scipy.sparse.csc_matrix or numpy.ndarray
66 |     """
67 |     matrix = scipy.sparse.csc_matrix(
68 |         (numpy.ones(len(edge_list)), zip(*edge_list)), dtype=dtype, shape=shape,
69 |     )
70 | 
71 |     if add_reverse_edges:
72 |         matrix = (matrix + matrix.T) > 0
73 |         matrix = matrix.astype(dtype)
74 | 
75 |     if not sparse:
76 |         matrix = matrix.toarray()
77 | 
78 |     return matrix
79 | 


--------------------------------------------------------------------------------
/tests/test_prior.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import pandas
 3 | import pytest
 4 | 
 5 | import xswap
 6 | 
 7 | 
 8 | @pytest.mark.parametrize('edges,true_prior,num_swaps,shape', [
 9 |     ([(0, 0), (1, 1)], 0.5 * numpy.ones((2, 2)), 10000, (2, 2)),
10 |     ([(0, 1), (1, 0)], 0.5 * numpy.ones((2, 2)), 10000, (2, 2)),
11 |     ([(0, 0)], numpy.ones((1, 1)), 10, (1, 1)),
12 |     ([(0, 1), (1, 2), (3, 4), (1, 0)], numpy.zeros((5, 5)), 0, (5, 5)),
13 |     ([(0, 1), (1, 2), (3, 4), (1, 0)], numpy.zeros((4, 5)), 0, (4, 5)),
14 | ])
15 | def test_prior_matrix(edges, true_prior, num_swaps, shape):
16 |     """
17 |     Check that `xswap.prior.compute_xswap_occurrence_matrix` is returning
18 |     reasonable results for very small networks where the correct prior is obvious.
19 |     """
20 |     occurrence_matrix = xswap.prior.compute_xswap_occurrence_matrix(
21 |         edges, n_permutations=num_swaps, shape=shape, allow_self_loops=True,
22 |         allow_antiparallel=True)
23 |     if num_swaps:
24 |         edge_prior = (occurrence_matrix / num_swaps).toarray()
25 |     else:
26 |         edge_prior = occurrence_matrix.toarray()
27 |     assert numpy.abs(edge_prior - true_prior).max() == pytest.approx(0, abs=0.01)
28 | 
29 | 
30 | @pytest.mark.parametrize('edges,dtypes,source_degrees,target_degrees,shape,allow_antiparallel', [
31 |     (
32 |         [(0, 2), (0, 3), (1, 2), (2, 3), (3, 4)],
33 |         {'id': numpy.uint16, 'edge': bool, 'degree': numpy.uint32, 'xswap_prior': float},
34 |         {0: 2, 1: 1, 2: 3, 3: 3, 4: 1}, {0: 2, 1: 1, 2: 3, 3: 3, 4: 1}, (5, 5), False
35 |     ),
36 |     (
37 |         [(0, 2), (0, 3), (1, 2), (2, 3), (3, 4)],
38 |         {'id': numpy.int8, 'edge': int, 'degree': numpy.float, 'xswap_prior': numpy.float64},
39 |         {0: 2, 1: 1, 2: 3, 3: 3, 4: 1}, {0: 2, 1: 1, 2: 3, 3: 3, 4: 1}, (5, 5), False
40 |     ),
41 |     (
42 |         [(0, 2), (0, 3), (1, 2), (1, 3)],
43 |         {'id': numpy.float16, 'edge': float, 'degree': float, 'xswap_prior': numpy.float32},
44 |         {0: 2, 1: 2, 2: 0, 3: 0}, {0: 0, 1: 0, 2: 2, 3: 2}, (4, 4), True
45 |     ),
46 | ])
47 | def test_prior_dataframe(edges, dtypes, source_degrees, target_degrees, shape, allow_antiparallel):
48 |     """
49 |     Check that the `xswap.prior.compute_xswap_priors` performs correctly
50 |     """
51 |     prior_df = xswap.prior.compute_xswap_priors(edges, n_permutations=1000,
52 |         shape=shape, allow_self_loops=False, allow_antiparallel=allow_antiparallel, dtypes=dtypes)
53 | 
54 |     assert isinstance(prior_df, pandas.DataFrame)
55 |     assert list(prior_df.columns) == ['source_id', 'target_id', 'edge', 'source_degree',
56 |                                       'target_degree', 'xswap_prior']
57 |     assert dict(prior_df.dtypes) == {
58 |         'source_id': dtypes['id'], 'target_id': dtypes['id'], 'edge': dtypes['edge'],
59 |         'source_degree': dtypes['degree'], 'target_degree': dtypes['degree'],
60 |         'xswap_prior': dtypes['xswap_prior']
61 |     }
62 | 
63 |     assert prior_df.set_index('source_id')['source_degree'].to_dict() == source_degrees
64 |     assert prior_df.set_index('target_id')['target_degree'].to_dict() == target_degrees
65 | 
66 |     # Ensure that all the edges are accounted for in the dataframe
67 |     for edge in edges:
68 |         assert prior_df.query('source_id == {} & target_id == {}'.format(*edge))['edge'].values[0]
69 | 
70 |     # Whether directed-ness is correctly propagated through the pipeline
71 |     if allow_antiparallel:
72 |         assert prior_df['edge'].sum() == len(edges)
73 |     else:
74 |         assert prior_df['edge'].sum() == len(edges) * 2
75 | 


--------------------------------------------------------------------------------
/xswap/permute.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Set, Tuple
 2 | 
 3 | 
 4 | def permute_edge_list(edge_list: List[Tuple[int, int]], allow_self_loops: bool = False,
 5 |                       allow_antiparallel: bool = False, multiplier: float = 10,
 6 |                       excluded_edges: Set[Tuple[int, int]] = set(), seed: int = 0,
 7 |                       max_malloc: int = 4000000000):
 8 |     """
 9 |     Permute the edges of a graph using the XSwap method given by Hanhijärvi,
10 |     et al. (doi.org/f3mn58). XSwap is a degree-preserving network randomization
11 |     technique that selects edges, checks the validity of the swap, and exchanges
12 |     the target nodes between the edges. For information on what values to select
13 |     for directed, please see README.md.
14 | 
15 |     Parameters
16 |     ----------
17 |     edge_list : List[Tuple[int, int]]
18 |         Edge list representing the graph to be randomized. Tuples can contain
19 |         integer values representing nodes. No value should be greater than C++'s
20 |         `INT_MAX`, in this case 2_147_483_647.
21 |     allow_self_loops : bool
22 |         Whether to allow edges like (0, 0). In the case of bipartite graphs,
23 |         such an edge represents a connection between two distinct nodes, while
24 |         in other graphs it may represent an edge from a node to itself, in which
25 |         case an edge may or may not be meaningful depending on context.
26 |     allow_antiparallel : bool
27 |         Whether to allow simultaneous edges like (0, 1) and (1, 0). In the case
28 |         of bipartite graphs, these edges represent two connections between four
29 |         distinct nodes, while for other graphs, these may be connections between
30 |         the same two nodes.
31 |     multiplier : float
32 |         The number of edge swap attempts is determined by the product of the
33 |         number of existing edges and multiplier. For example, if five edges are
34 |         passed and multiplier is set to 10, 50 swaps will be attempted. Non-integer
35 |         products will be rounded down to the nearest integer.
36 |     excluded_edges : Set[Tuple[int, int]]
37 |         Specific edges which should never be created by the network randomization
38 |     seed : int
39 |         Random seed that will be passed to the C++ Mersenne Twister 19937 random
40 |         number generator.
41 |     max_malloc : int (`unsigned long long int` in C)
42 |         The maximum amount of memory to be allocated using `malloc` when making
43 |         a bitset to hold edges. An uncompressed bitset is implemented for
44 |         holding edges that is significantly faster than alternatives. However,
45 |         it is memory-inefficient and will not be used if more memory is required
46 |         than `max_malloc`. Above the threshold, a Roaring bitset will be used.
47 | 
48 |     Returns
49 |     -------
50 |     new_edges : List[Tuple[int, int]]
51 |         Edge list of a permutation of the network given as `edge_list`
52 |     stats : Dict[str, int]
53 |         Information about the permutation performed. Gives the following information:
54 |         `swap_attempts` - number of attempted swaps
55 |         `same_edge` - number of swaps rejected because one edge was chosen twice
56 |         `self_loop` - number of swaps rejected because new edge is a self-loop
57 |         'duplicate` - number of swaps rejected because new edge already exists
58 |         `undir_duplicate` - number of swaps rejected because the network is
59 |             undirected and the reverse of the new edge already exists
60 |         `excluded` - number of swaps rejected because new edge was among excluded
61 |     """
62 |     import xswap._xswap_backend
63 |     if len(edge_list) != len(set(edge_list)):
64 |         raise ValueError("Edge list contained duplicate edges.")
65 | 
66 |     # Number of attempted XSwap swaps
67 |     num_swaps = int(multiplier * len(edge_list))
68 | 
69 |     # Compute the maximum node ID (for creating the bitset)
70 |     max_id = max(map(max, edge_list))
71 | 
72 |     new_edges, stats = xswap._xswap_backend._xswap(
73 |         edge_list, list(excluded_edges), max_id, allow_self_loops,
74 |         allow_antiparallel, num_swaps, seed, max_malloc)
75 | 
76 |     return new_edges, stats
77 | 


--------------------------------------------------------------------------------
/xswap/preprocessing.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | 
  3 | 
  4 | def load_str_edges(filename, node_delim=',', edge_delim='\n'):
  5 |     """
  6 |     Load edges from file into memory. Store edges as a list and store each edge
  7 |     as Tuple[str, str]. Used to load edges for preprocessing.
  8 |     """
  9 |     with open(filename, 'r', newline='') as f:
 10 |         reader = csv.reader(f, delimiter=node_delim, lineterminator=edge_delim)
 11 |         str_edges = [tuple(row) for row in reader if len(row) > 1]
 12 |     return str_edges
 13 | 
 14 | 
 15 | def load_processed_edges(filename):
 16 |     """
 17 |     Load processed edges from a file. Processed means that edges are guaranteed
 18 |     to be integers ranging from zero to the number of unique nodes.
 19 |     """
 20 |     str_edges = load_str_edges(filename)
 21 |     edges = [
 22 |         (int(edge[0]), int(edge[1])) for edge in str_edges
 23 |     ]
 24 |     return edges
 25 | 
 26 | 
 27 | def write_edges(filename, edges, node_delim=',', edge_delim='\n'):
 28 |     with open(filename, 'w', newline='') as f:
 29 |         writer = csv.writer(f, delimiter=node_delim, lineterminator=edge_delim)
 30 |         writer.writerows(edges)
 31 | 
 32 | 
 33 | def write_mapping(filename, mapping, delimiter=','):
 34 |     with open(filename, 'w', newline='') as f:
 35 |         writer = csv.writer(f, delimiter=delimiter)
 36 |         writer.writerow(['original', 'mapped'])
 37 |         for original, mapped in mapping.items():
 38 |             writer.writerow([original, mapped])
 39 | 
 40 | 
 41 | def _map_nodes_to_int(nodes):
 42 |     """
 43 |     Return a dict mapping a list of nodes to their sorted indices. Nodes should
 44 |     be a list of strings.
 45 | 
 46 |     Returns:
 47 |     --------
 48 |     Dict[str, int]
 49 |     """
 50 |     sorted_node_set = sorted(set(nodes))
 51 |     name_to_id = {name: i for i, name in enumerate(sorted_node_set)}
 52 |     return name_to_id
 53 | 
 54 | 
 55 | def _apply_map(edges, source_mapping, target_mapping):
 56 |     """
 57 |     Maps edges according to new node names specified by source and target maps.
 58 | 
 59 |     edges : List[Tuple[str, str]]
 60 |     source_mapping : Dict[str, int]
 61 |     target_mapping : Dict[str, int]
 62 |     """
 63 |     source_nodes = [edge[0] for edge in edges]
 64 |     target_nodes = [edge[1] for edge in edges]
 65 |     mapped_nodes = [
 66 |         map(source_mapping.get, source_nodes),
 67 |         map(target_mapping.get, target_nodes),
 68 |     ]
 69 |     return list(zip(*mapped_nodes))
 70 | 
 71 | 
 72 | def map_str_edges(edges, bipartite):
 73 |     """
 74 |     Maps a list of edge tuples containing strings to a minimal set of
 75 |     integer edges.
 76 | 
 77 |     edges : List[Tuple[str, str]]
 78 |     bipartite : bool
 79 |         Whether to map source and target nodes using the same mapping.
 80 |         For example, an edge like ('1', '1') may refer to a connection between
 81 |         separate nodes, or it may be a self-loop. If `bipartite=True`, the
 82 |         edge would be mapped like (0, 1), where the new node ids reflect the fact
 83 |         that the same names do not indicate the same nodes. To ensure that names
 84 |         are consistently mapped between source and target, put `bipartite=False`.
 85 | 
 86 |     Returns:
 87 |     --------
 88 |     Tuple[List[Tuple[int, int]], Dict[int, str]]
 89 | 
 90 |     Example:
 91 |     --------
 92 |     >>> map_str_edges([('a', 'b'), ('b', 'c')], bipartite=False)
 93 | 
 94 |     ([(0, 1), (1, 2)], {0: 'a', 1: 'b', 2: 'c'})
 95 |     """
 96 |     source_nodes = [edge[0] for edge in edges]
 97 |     target_nodes = [edge[1] for edge in edges]
 98 | 
 99 |     # Two separate mappings to be used for source and target nodes
100 |     if bipartite:
101 |         source_map = _map_nodes_to_int(source_nodes)
102 |         target_map = _map_nodes_to_int(target_nodes)
103 | 
104 |     # One single mapping to be used for both source and target nodes
105 |     if not bipartite:
106 |         combined_nodes = list(set(source_nodes + target_nodes))
107 |         source_map = target_map = _map_nodes_to_int(combined_nodes)
108 | 
109 |     mapped_edges = _apply_map(edges, source_map, target_map)
110 |     return (mapped_edges, source_map, target_map)
111 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # XSwap: Fast degree-preserving network permutation
 2 | 
 3 | [![Linux Build Status](https://img.shields.io/travis/com/hetio/xswap/master.svg?logo=travis)](https://travis-ci.com/hetio/xswap)
 4 | [![PyPI](https://img.shields.io/pypi/v/xswap.svg?logo=pypi&logoColor=white)](https://pypi.org/project/xswap/)
 5 | [![GitHub issues](https://img.shields.io/github/issues/hetio/xswap.svg?logo=github)](https://github.com/hetio/xswap/issues)
 6 | 
 7 | **Full documentation:** <https://hetio.github.io/xswap/>
 8 | 
 9 | <img src="https://raw.githubusercontent.com/hetio/xswap/master/docs/img/xswap.svg?sanitize=true" width="250px">
10 | 
11 | XSwap is an algorithm for degree-preserving network randomization (permutation) [1].
12 | Permuted networks can be used for a number of purposes in network analysis, including for generating counterfactual distributions of features when only the network's degree sequence is maintained or for computing a prior probability of an edge given only the network's degree sequence.
13 | Overall, permuted networks allow one to quantify the effects of degree on analysis and prediction methods.
14 | Understanding this effect is useful when a network's degree sequence is subject to biases.
15 | This implementation is a modified version of the algorithm due to Hanhijärvi et al. with two additional parameters (`allow_self_loops` and `allow_antiparallel`), which enable greater generalizability to bipartite, directed, and undirected networks.
16 | 
17 | 1. **Randomization Techniques for Graphs**  
18 | Sami Hanhijärvi, Gemma C. Garriga, Kai Puolamäki  
19 | *Proceedings of the 2009 SIAM International Conference on Data Mining* (2009-04-30) <https://doi.org/f3mn58>  
20 | DOI: [10.1137/1.9781611972795.67](https://doi.org/10.1137/1.9781611972795.67)
21 | 
22 | ## Usage examples
23 | 
24 | #### Permuting an edge list
25 | 
26 | ```python
27 | >>> edges = [(0, 1), (1, 0)]
28 | >>> permuted_edges, permutation_statistics = xswap.permute_edge_list(
29 |         edges, allow_self_loops=True, allow_antiparallel=True,
30 |         multiplier=10)
31 | >>> permuted_edges
32 | [(0, 0), (1, 1)]
33 | >>> permutation_statistics
34 | {'swap_attempts': 20, 'same_edge': 10, 'self_loop': 0, 'duplicate': 1,
35 |  'undir_duplicate': 0, 'excluded': 0}
36 | ```
37 | 
38 | #### Computing degree-sequence based prior probabilities of edges existing
39 | 
40 | ```python
41 | >>> edges = [(0, 1), (1, 0)]
42 | >>> prior_prob_df = xswap.prior.compute_xswap_priors(
43 |         edges, n_permutations=10000, shape=(2, 2), allow_self_loops=True,
44 |         allow_antiparallel=True)
45 | >>> prior_prob_df
46 |    source_id  target_id   edge  source_degree  target_degree  xswap_prior
47 | 0          0          0  False              1              1          0.5
48 | 1          0          1   True              1              1          0.5
49 | 2          1          0   True              1              1          0.5
50 | 3          1          1  False              1              1          0.5
51 | ```
52 | 
53 | ## Choice of parameters
54 | 
55 | #### Bipartite networks
56 | 
57 | Bipartite networks should be indexed using the bi-adjacency matrix, meaning that the edge `(0, 0)` is from source node 0 to target node 0, and is not a self-loop.
58 | Moreover, bipartite networks should be permuted using `allow_self_loops=False` and `allow_antiparallel=True`.
59 | 
60 | #### Directed and undirected networks
61 | 
62 | For non-bipartite networks, the decisions of `allow_self_loops` and `allow_antiparallel` are not always the same.
63 | For undirected networks, set `allow_antiparallel=False`, as otherwise the edges (1, 0) and (0, 1), which represent the same edge, will be treated as separate.
64 | Antiparallel edges may or may not be allowed for directed networks, depending on context.
65 | Similarly, self-loops may or may not be allowed for directed or undirected networks, depending on the specific network being permuted.
66 | 
67 | ## Libraries
68 | 
69 | The XSwap library includes [Roaring Bitmaps](https://github.com/RoaringBitmap/CRoaring), available under the [Apache 2.0 license](https://github.com/RoaringBitmap/CRoaring/blob/LICENSE).
70 | 
71 | ## Acknowledgments
72 | 
73 | Development of this project has largely taken place in the [Greene Lab](http://www.greenelab.com/) at the University of Pennsylvania. As an open source project under the `hetio` organization, this repository is grateful for its community of maintainers, contributors, and users.
74 | 
75 | This work is funded in part by the Gordon and Betty Moore Foundation’s Data-Driven Discovery Initiative through Grants [GBMF4552](https://www.moore.org/grant-detail?grantId=GBMF4552) to Casey Greene, [GBMF4560](https://www.moore.org/grant-detail?grantId=GBMF4560) to Blair Sullivan, and the National Institutes of Health’s National Human Genome Research Institute [R01 HG010067](http://grantome.com/grant/NIH/R01-HG010067-02).
76 | 


--------------------------------------------------------------------------------
/xswap/src/xswap_wrapper.cpp:
--------------------------------------------------------------------------------
  1 | #include "xswap.h"
  2 | 
  3 | #define XSWAP_MODULE
  4 | 
  5 | static Edges py_list_to_edges(PyObject *py_list) {
  6 |     int num_edges = (int)PyList_Size(py_list);
  7 |     int** edges_array = (int**)malloc(sizeof(int*) * num_edges);
  8 | 
  9 |     for (int i = 0; i < num_edges; i++) {
 10 |         edges_array[i] = (int*)malloc(sizeof(int) * 2);
 11 |         PyObject* py_tuple = PyList_GetItem(py_list, i);
 12 |         for (int j = 0; j < 2; j++) {
 13 |             PyObject* temp = PyTuple_GetItem(py_tuple, j);
 14 |             int value = (int)PyLong_AsLong(temp);
 15 |             edges_array[i][j] = value;
 16 |         }
 17 |     }
 18 |     Edges return_object;
 19 |     return_object.edge_array = edges_array;
 20 |     return_object.num_edges = num_edges;
 21 |     return return_object;
 22 | }
 23 | 
 24 | static PyObject* edge_to_py_tuple(int *edge) {
 25 |     PyObject* edge_tuple = PyTuple_New(2);
 26 |     for (int j = 0; j < 2; j++) {
 27 |         PyObject* node_id = PyLong_FromLong(edge[j]);
 28 |         PyTuple_SET_ITEM(edge_tuple, j, node_id);
 29 |     }
 30 |     return edge_tuple;
 31 | }
 32 | 
 33 | static PyObject* edges_to_py_list(Edges edges) {
 34 |     int num_edges = edges.num_edges;
 35 |     PyObject* py_list = PyList_New(num_edges);
 36 | 
 37 |     for (int i = 0; i < num_edges; i++) {
 38 |         PyObject* edge_tuple = edge_to_py_tuple(edges.edge_array[i]);
 39 |         PyList_SET_ITEM(py_list, i, edge_tuple);
 40 |     }
 41 |     return py_list;
 42 | }
 43 | 
 44 | static PyObject* stats_to_py_dict(statsCounter& stats) {
 45 |     PyObject* py_num_swaps = PyLong_FromLong(stats.num_swaps);
 46 |     PyObject* py_same_edge = PyLong_FromLong(stats.same_edge);
 47 |     PyObject* py_self_loop = PyLong_FromLong(stats.self_loop);
 48 |     PyObject* py_duplicate = PyLong_FromLong(stats.duplicate);
 49 |     PyObject* py_undir_duplicate = PyLong_FromLong(stats.undir_duplicate);
 50 |     PyObject* py_excluded = PyLong_FromLong(stats.excluded);
 51 | 
 52 |     PyObject* dict = PyDict_New();
 53 |     PyDict_SetItemString(dict, "swap_attempts", py_num_swaps);
 54 |     PyDict_SetItemString(dict, "same_edge", py_same_edge);
 55 |     PyDict_SetItemString(dict, "self_loop", py_self_loop);
 56 |     PyDict_SetItemString(dict, "duplicate", py_duplicate);
 57 |     PyDict_SetItemString(dict, "undir_duplicate", py_undir_duplicate);
 58 |     PyDict_SetItemString(dict, "excluded", py_excluded);
 59 |     return dict;
 60 | }
 61 | 
 62 | static PyObject* wrap_xswap(PyObject *self, PyObject *args) {
 63 |     // Get arguments from python and compute quantities where needed
 64 |     PyObject *py_edges, *py_excluded_edges;
 65 |     int max_id, num_swaps, seed, allow_self_loop, allow_antiparallel;
 66 |     unsigned long long int max_malloc;
 67 |     int parsed_successfully = PyArg_ParseTuple(args, "OOippiiK", &py_edges,
 68 |         &py_excluded_edges, &max_id, &allow_self_loop,
 69 |         &allow_antiparallel, &num_swaps, &seed, &max_malloc);
 70 |     if (!parsed_successfully)
 71 |         return NULL;
 72 | 
 73 |     // Load edges from python list
 74 |     Edges edges = py_list_to_edges(py_edges);
 75 |     edges.max_id = max_id;
 76 |     Edges excluded_edges = py_list_to_edges(py_excluded_edges);
 77 | 
 78 |     // Set the conditions under which new edges are accepted
 79 |     Conditions valid_cond;
 80 |     valid_cond.seed = seed;
 81 |     valid_cond.allow_self_loop = allow_self_loop;
 82 |     valid_cond.allow_antiparallel = allow_antiparallel;
 83 |     valid_cond.excluded_edges = excluded_edges;
 84 | 
 85 |     // Initialize stats counters for failure reasons
 86 |     statsCounter stats;
 87 |     stats.num_swaps = num_swaps;
 88 | 
 89 |     // Perform XSwap
 90 |     swap_edges(edges, num_swaps, valid_cond, &stats, max_malloc);
 91 | 
 92 |     // Get new edges as python list
 93 |     PyObject* py_list = edges_to_py_list(edges);
 94 | 
 95 |     // Get stats as python dict
 96 |     PyObject* stats_py_dict = stats_to_py_dict(stats);
 97 | 
 98 |     // Create and return a python tuple of new_edges, stats
 99 |     PyObject* return_tuple = PyTuple_New(2);
100 |     PyTuple_SET_ITEM(return_tuple, 0, py_list);
101 |     PyTuple_SET_ITEM(return_tuple, 1, stats_py_dict);
102 |     for (int i = 0; i < edges.num_edges; i++) {
103 |         free(edges.edge_array[i]);
104 |     }
105 |     free(edges.edge_array);
106 |     for (int i = 0; i < valid_cond.excluded_edges.num_edges; i++) {
107 |         free(valid_cond.excluded_edges.edge_array[i]);
108 |     }
109 |     free(valid_cond.excluded_edges.edge_array);
110 |     return return_tuple;
111 | }
112 | 
113 | static PyMethodDef XSwapMethods[] = {
114 |     {"_xswap", wrap_xswap, METH_VARARGS, "Backend for edge permutation"},
115 |     {NULL, NULL, 0, NULL}
116 | };
117 | 
118 | static struct PyModuleDef xswapmodule = {
119 |     PyModuleDef_HEAD_INIT,
120 |     "_xswap_backend",  /* name of module */
121 |     NULL,  /* module documentation, NULL */
122 |     -1,  /* -1 since the module keeps state in global variables. */
123 |     XSwapMethods
124 | };
125 | 
126 | PyMODINIT_FUNC PyInit__xswap_backend(void) {
127 |     return PyModule_Create(&xswapmodule);
128 | }
129 | 


--------------------------------------------------------------------------------
/tests/test_bitset.cpp:
--------------------------------------------------------------------------------
  1 | #include <cstdlib>
  2 | #include <cstdio>
  3 | #include <iostream>
  4 | #include <stdexcept>
  5 | #include "../xswap/src/xswap.h"
  6 | 
  7 | void handle_eptr(std::exception_ptr eptr) {
  8 |     try {
  9 |         if (eptr) {
 10 |             std::rethrow_exception(eptr);
 11 |         }
 12 |     } catch(const std::exception& e) {
 13 |         std::cout << "Unexpected exception while attempting bad element access " << e.what() << "\n";
 14 |     }
 15 | }
 16 | 
 17 | bool test_add(UncompressedBitSet edges_set) {
 18 |     int edge_to_add[2] = {1, 1};
 19 |     edges_set.add(edge_to_add);
 20 |     int** fake_edges = (int**)malloc(sizeof(int*) * 16);
 21 |     int counter = 0;
 22 |     for (int i = 0; i < 4; i++) {
 23 |         for (int j = 0; j < 4; j++) {
 24 |             fake_edges[counter] = (int*)malloc(sizeof(int) * 2);
 25 |             fake_edges[counter][0] = i;
 26 |             fake_edges[counter][1] = j;
 27 |             counter += 1;
 28 |         }
 29 |     }
 30 |     bool correctly_contains = edges_set.contains(edge_to_add);
 31 |     int num_incorrect = 0;
 32 |     for (int i = 0; i < 16; i++) {
 33 |         bool incorrectly_contains = edges_set.contains(fake_edges[i]);
 34 |         bool was_added = (fake_edges[i][0] == edge_to_add[0] && fake_edges[i][1] == edge_to_add[1]);
 35 |         if (incorrectly_contains and !was_added) {
 36 |             num_incorrect += 1;
 37 |             std::printf("Incorrectly contained: (%d, %d)\n", fake_edges[i][0], fake_edges[i][1]);
 38 |         }
 39 |     }
 40 |     free(fake_edges);
 41 |     if (num_incorrect == 0 && correctly_contains == true) {
 42 |         return true;
 43 |     } else {
 44 |         return false;
 45 |     }
 46 | }
 47 | 
 48 | bool test_remove(UncompressedBitSet edges_set) {
 49 |     int edge_to_add[2] = {1, 1};
 50 |     edges_set.add(edge_to_add);
 51 |     bool was_added = edges_set.contains(edge_to_add);
 52 |     edges_set.remove(edge_to_add);
 53 |     bool was_removed = !edges_set.contains(edge_to_add);
 54 |     bool passed = was_added && was_removed;
 55 |     if (!was_added)
 56 |         std::printf("Did not add edge properly");
 57 |     if (!was_removed)
 58 |         std::printf("Did not remove edge properly");
 59 |     return passed;
 60 | }
 61 | 
 62 | bool test_oob_insert(UncompressedBitSet edges_set) {
 63 | 
 64 |     int edge_to_add[2] = {4, 4};
 65 |     std::exception_ptr eptr;
 66 |     try {
 67 |         edges_set.add(edge_to_add);
 68 |     } catch(std::out_of_range) {
 69 |         return true;
 70 |     } catch(...) {
 71 |         eptr = std::current_exception();
 72 |         handle_eptr(eptr);
 73 |         return true;
 74 |     }
 75 |     std::printf("No exception on OOB insert\n");
 76 |     return false;
 77 | }
 78 | 
 79 | bool test_oob_access(UncompressedBitSet edges_set) {
 80 |     int edge_to_access[2] = {4, 4};
 81 |     std::exception_ptr eptr;
 82 |     try {
 83 |         edges_set.add(edge_to_access);
 84 |     } catch(std::out_of_range) {
 85 |         return true;
 86 |     } catch(...) {
 87 |         eptr = std::current_exception();
 88 |         handle_eptr(eptr);
 89 |         return true;
 90 |     }
 91 |     std::printf("No exception on OOB access\n");
 92 |     return false;
 93 | }
 94 | 
 95 | bool test_oob_remove(UncompressedBitSet edges_set) {
 96 |     int edge_to_access[2] = {4, 4};
 97 |     std::exception_ptr eptr;
 98 |     try {
 99 |         edges_set.add(edge_to_access);
100 |     } catch(std::out_of_range) {
101 |         return true;
102 |     } catch(...) {
103 |         eptr = std::current_exception();
104 |         handle_eptr(eptr);
105 |         return true;
106 |     }
107 |     std::printf("No exception on OOB removal\n");
108 |     return false;
109 | }
110 | 
111 | bool test_remove_nonexistent(UncompressedBitSet edges_set) {
112 |     int edge_to_access[2] = {2, 2};
113 |     std::exception_ptr eptr;
114 |     try {
115 |         edges_set.remove(edge_to_access);
116 |     } catch(std::logic_error) {
117 |         return true;
118 |     } catch(...) {
119 |         eptr = std::current_exception();
120 |         handle_eptr(eptr);
121 |         return true;
122 |     }
123 |     std::printf("No exception on removal of nonexisting element\n");
124 |     return false;
125 | }
126 | 
127 | bool test_insert_existing(UncompressedBitSet edges_set) {
128 |     int edge_to_access[2] = {2, 2};
129 |     edges_set.add(edge_to_access);
130 |     std::exception_ptr eptr;
131 |     try {
132 |         edges_set.add(edge_to_access);
133 |     } catch(std::logic_error) {
134 |         return true;
135 |     } catch(...) {
136 |         eptr = std::current_exception();
137 |         handle_eptr(eptr);
138 |         return true;
139 |     }
140 |     std::printf("No exception on addition of existing element\n");
141 |     return false;
142 | }
143 | 
144 | main(int argc, char const *argv[]) {
145 |     unsigned long long int max_malloc = 4000000;
146 |     int num_tests = 7;
147 |     bool test_passed[num_tests];
148 | 
149 |     UncompressedBitSet edges_set = UncompressedBitSet(3, max_malloc);
150 |     test_passed[0] = test_add(edges_set);
151 |     edges_set = UncompressedBitSet(3, max_malloc);  // Reset so functions don't interfere
152 |     test_passed[1] = test_remove(edges_set);
153 |     test_passed[2] = test_oob_insert(edges_set);
154 |     test_passed[3] = test_oob_access(edges_set);
155 |     test_passed[4] = test_oob_remove(edges_set);
156 |     edges_set = UncompressedBitSet(3, max_malloc);
157 |     test_passed[5] = test_remove_nonexistent(edges_set);
158 |     edges_set = UncompressedBitSet(3, max_malloc);
159 |     test_passed[6] = test_insert_existing(edges_set);
160 | 
161 |     bool all_tests_passed = true;
162 |     for (int i = 0; i < num_tests; i++) {
163 |         all_tests_passed &= test_passed[i];
164 |     }
165 | 
166 |     if (all_tests_passed) {
167 |         std::printf("All tests passed\n");
168 |         return 0;
169 |     } else {
170 |         std::printf("Test failure\n");
171 |         return 1;
172 |     }
173 |     edges_set.free_array();
174 | }
175 | 


--------------------------------------------------------------------------------
/xswap/src/bitset.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <stdexcept>
  3 | #include "xswap.h"
  4 | 
  5 | int CHAR_BITS = 8*sizeof(char);
  6 | 
  7 | size_t cantor_pair(int* edge) {
  8 |     size_t source = edge[0];
  9 |     size_t target = edge[1];
 10 |     return ((source + target) * (source + target + 1) / 2) + target;
 11 | }
 12 | 
 13 | UncompressedBitSet::UncompressedBitSet(int max_id, unsigned long long int max_malloc) {
 14 |     int max_pair[2] = {max_id, max_id};
 15 |     max_cantor = cantor_pair(max_pair);
 16 |     create_bitset(max_cantor, max_malloc);
 17 | }
 18 | 
 19 | UncompressedBitSet::UncompressedBitSet(Edges edges, unsigned long long int max_malloc) {
 20 |     int max_pair[2] = {edges.max_id, edges.max_id};
 21 |     max_cantor = cantor_pair(max_pair);
 22 |     create_bitset(max_cantor, max_malloc);
 23 |     for (int i = 0; i < edges.num_edges; i++) {
 24 |         add(edges.edge_array[i]);
 25 |     }
 26 | }
 27 | 
 28 | bool UncompressedBitSet::contains(int *edge) {
 29 |     size_t edge_cantor = cantor_pair(edge);
 30 |     if (edge_cantor > max_cantor)
 31 |         throw std::out_of_range("Attempting to check membership for out-of-bounds element.");
 32 |     return (bool)get_bit(bitset[edge_cantor / CHAR_BITS], edge_cantor % CHAR_BITS);
 33 | }
 34 | 
 35 | void UncompressedBitSet::add(int *edge) {
 36 |     size_t edge_cantor = cantor_pair(edge);
 37 |     if (edge_cantor > max_cantor) {
 38 |         throw std::out_of_range("Attempting to add an out-of-bounds element to the bitset.");
 39 |     }
 40 |     if (get_bit(bitset[edge_cantor / CHAR_BITS], edge_cantor % CHAR_BITS)) {
 41 |         throw std::logic_error("Attempting to add an existing element.");
 42 |     }
 43 |     set_bit_true(&bitset[edge_cantor / CHAR_BITS], edge_cantor % CHAR_BITS);
 44 | }
 45 | 
 46 | void UncompressedBitSet::remove(int *edge) {
 47 |     size_t edge_cantor = cantor_pair(edge);
 48 |     if (edge_cantor > max_cantor)
 49 |         throw std::out_of_range("Attempting to remove an out-of-bounds element.");
 50 |     if (!get_bit(bitset[edge_cantor / CHAR_BITS], edge_cantor % CHAR_BITS))
 51 |         throw std::logic_error("Attempting to remove a nonexisting element.");
 52 |     set_bit_false(&bitset[edge_cantor / CHAR_BITS], edge_cantor % CHAR_BITS);
 53 | }
 54 | 
 55 | void UncompressedBitSet::free_array() {
 56 |     free(bitset);
 57 | }
 58 | 
 59 | // num_elements corresponds to the minimum number of bits that are needed
 60 | void UncompressedBitSet::create_bitset(size_t num_elements,
 61 |                                        unsigned long long int max_malloc) {
 62 |     // Minimum sufficient number of bytes for the array "ceil(num_elements / CHAR_BITS)"
 63 |     size_t bytes_needed = (num_elements + CHAR_BITS - (num_elements % CHAR_BITS)) / CHAR_BITS;
 64 |     if (bytes_needed > max_malloc) {
 65 |         throw std::runtime_error("Bitset requires too much memory.");
 66 |     }
 67 |     bitset = (char*)calloc(bytes_needed, 1);
 68 | }
 69 | 
 70 | /* Gets the bit from byte `word` at position `bit_position`. In the array, bits
 71 |  correspond to cantor pair values 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, etc. To access
 72 |  the bit corresponding to cantor pair value 9, call `get_bit` with `word` equal
 73 |  to the second bit and `bit_position` equal to 1 (ie. the second bit).
 74 |  `word >> (7 - bit_position)` puts the selected bit in the least significant position */
 75 | char UncompressedBitSet::get_bit(char word, char bit_position) {
 76 |     return (word >> (7 - bit_position)) & 0x1;
 77 | }
 78 | 
 79 | void UncompressedBitSet::set_bit_true(char* word, char bit_position) {
 80 |     *word |= (0x1 << (7 - bit_position));
 81 | }
 82 | 
 83 | void UncompressedBitSet::set_bit_false(char* word, char bit_position) {
 84 |     *word &= ~(0x1 << (7 - bit_position));
 85 | }
 86 | 
 87 | RoaringBitSet::RoaringBitSet(Edges edges) {
 88 |     for (int i = 0; i < edges.num_edges; i++) {
 89 |         add(edges.edge_array[i]);
 90 |     }
 91 | }
 92 | 
 93 | bool RoaringBitSet::contains(int *edge) {
 94 |     int edge_cantor = cantor_pair(edge);
 95 |     return bitmap.contains(edge_cantor);
 96 | }
 97 | 
 98 | void RoaringBitSet::add(int *edge) {
 99 |     int edge_cantor = cantor_pair(edge);
100 |     bool success = bitmap.addChecked(edge_cantor);
101 |     if (!success) {
102 |         throw std::logic_error("Attempting to add an existing element.");
103 |     }
104 | }
105 | 
106 | void RoaringBitSet::remove(int *edge) {
107 |     int edge_cantor = cantor_pair(edge);
108 |     bool success = bitmap.removeChecked(edge_cantor);
109 |     if (!success) {
110 |         throw std::logic_error("Attempting to remove a nonexisting element.");
111 |     }
112 | }
113 | 
114 | BitSet::BitSet(Edges edges, unsigned long long int max_malloc) {
115 |     int max_pair[2] = {edges.max_id, edges.max_id};
116 |     size_t max_cantor = cantor_pair(max_pair);
117 | 
118 |     if (max_cantor < max_malloc) {
119 |         use_compressed = false;
120 |         uncompressed_set = UncompressedBitSet(edges, max_malloc);
121 |     } else {
122 |         runtime_warning_roaring();
123 |         use_compressed = true;
124 |         compressed_set = RoaringBitSet(edges);
125 |     }
126 | }
127 | 
128 | PyObject *BitSet::runtime_warning_roaring(void) {
129 |     // Roaring bitset is significantly slower, but used because of large network sizes
130 |     PyErr_WarnEx(PyExc_RuntimeWarning, "Using Roaring bitset because of the large number of edges.", 2);
131 |     return NULL;
132 | }
133 | 
134 | bool BitSet::contains(int *edge) {
135 |     if (use_compressed) {
136 |         return compressed_set.contains(edge);
137 |     } else {
138 |         return uncompressed_set.contains(edge);
139 |     }
140 | }
141 | 
142 | void BitSet::add(int *edge) {
143 |     if (use_compressed) {
144 |         return compressed_set.add(edge);
145 |     } else {
146 |         return uncompressed_set.add(edge);
147 |     }
148 | }
149 | 
150 | void BitSet::remove(int *edge) {
151 |     if (use_compressed) {
152 |         return compressed_set.remove(edge);
153 |     } else {
154 |         return uncompressed_set.remove(edge);
155 |     }
156 | }
157 | 
158 | void BitSet::free_array() {
159 |     if (use_compressed) {
160 |         return;
161 |     } else {
162 |         uncompressed_set.free_array();
163 |     }
164 | }
165 | 


--------------------------------------------------------------------------------
/xswap/prior.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Tuple
  2 | 
  3 | import numpy
  4 | import pandas
  5 | import scipy.sparse
  6 | 
  7 | import xswap.network_formats
  8 | 
  9 | 
 10 | def compute_xswap_occurrence_matrix(edge_list: List[Tuple[int, int]],
 11 |                                     n_permutations: int,
 12 |                                     shape: Tuple[int, int],
 13 |                                     allow_self_loops: bool = False,
 14 |                                     allow_antiparallel: bool = False,
 15 |                                     sparse: bool = True,
 16 |                                     swap_multiplier: float = 10,
 17 |                                     initial_seed: int = 0,
 18 |                                     max_malloc: int = 4000000000):
 19 |     """
 20 |     Compute the XSwap prior probability for every node pair in a network. The
 21 |     XSwap prior is the probability of a node pair having an edge between them in
 22 |     degree-preserving permutations of a network. The prior value for a node
 23 |     pair can be considered as the probability of an edge existing between two
 24 |     nodes given only the network's degree sequence.
 25 | 
 26 |     Parameters
 27 |     ----------
 28 |     edge_list : List[Tuple[int, int]]
 29 |         Edge list representing the graph whose XSwap edge priors are to be
 30 |         computed. Tuples contain integer values representing nodes. No value
 31 |         should be greater than C++'s `INT_MAX`, in this case 2_147_483_647.
 32 |         An adjacency matrix will be created assuming that a node's value is its
 33 |         index in the matrix. If not, map edges (identifiers can be string or
 34 |         otherwise) using `xswap.preprocessing.map_str_edges`.
 35 |     n_permutations : int
 36 |         The number of permuted networks used to compute the empirical XSwap prior
 37 |     shape : Tuple[int, int]
 38 |         The shape of the matrix to be returned. In other words, a tuple of the
 39 |         number of source and target nodes.
 40 |     allow_self_loops : bool
 41 |         Whether to allow edges like (0, 0). In the case of bipartite graphs,
 42 |         such an edge represents a connection between two distinct nodes, while
 43 |         in other graphs it may represent an edge from a node to itself, in which
 44 |         case an edge may or may not be meaningful depending on context.
 45 |     allow_antiparallel : bool
 46 |         Whether to allow simultaneous edges like (0, 1) and (1, 0). In the case
 47 |         of bipartite graphs, these edges represent two connections between four
 48 |         distinct nodes, while for other graphs, these may be connections between
 49 |         the same two nodes.
 50 |     sparse : bool
 51 |         Whether to use a sparse matrix when adding up edge occurrences across
 52 |         permutations. If large changes in sparsity are expected, a dense
 53 |         array may be preferable.
 54 |     swap_multiplier : float
 55 |         The number of edge swap attempts is determined by the product of the
 56 |         number of existing edges and multiplier. For example, if five edges are
 57 |         passed and multiplier is set to 10, 50 swaps will be attempted. Non-integer
 58 |         products will be rounded down to the nearest integer.
 59 |     initial_seed : int
 60 |         Random seed that will be passed to the C++ Mersenne Twister 19937 random
 61 |         number generator. `initial_seed` will be used for the first permutation,
 62 |         and the seed used for each subsequent permutation will be incremented by
 63 |         one. For example, if `initial_seed` is 0 and `n_permutations` is 2, then
 64 |         the two permutations will pass seeds 0 and 1, respectively.
 65 |     max_malloc : int (`unsigned long long int` in C)
 66 |         The maximum amount of memory to be allocated using `malloc` when making
 67 |         a bitset to hold edges. An uncompressed bitset is implemented for
 68 |         holding edges that is significantly faster than alternatives. However,
 69 |         it is memory-inefficient and will not be used if more memory is required
 70 |         than `max_malloc`. Above the threshold, a Roaring bitset will be used.
 71 | 
 72 |     Returns
 73 |     -------
 74 |     edge_counter : scipy.sparse.csc_matrix
 75 |         Adjacency matrix with entries equal to the number of permutations in
 76 |         which a given edge appeared
 77 |     """
 78 |     import xswap._xswap_backend
 79 |     if len(edge_list) != len(set(edge_list)):
 80 |         raise ValueError("Edge list contained duplicate edges. "
 81 |                          "XSwap does not support multigraphs.")
 82 | 
 83 |     num_swaps = int(swap_multiplier * len(edge_list))
 84 | 
 85 |     max_id = max(map(max, edge_list))
 86 | 
 87 |     if sparse:
 88 |         edge_counter = scipy.sparse.csc_matrix(shape, dtype=int)
 89 |     else:
 90 |         edge_counter = numpy.zeros(shape, dtype=int)
 91 | 
 92 |     for i in range(n_permutations):
 93 |         permuted_edges, stats = xswap._xswap_backend._xswap(
 94 |             edge_list, [], max_id, allow_self_loops, allow_antiparallel,
 95 |             num_swaps, initial_seed + i, max_malloc)
 96 |         permuted_matrix = xswap.network_formats.edges_to_matrix(
 97 |             permuted_edges, add_reverse_edges=(not allow_antiparallel),
 98 |             shape=shape, dtype=int, sparse=sparse)
 99 |         edge_counter += permuted_matrix
100 | 
101 |     return edge_counter
102 | 
103 | 
104 | def compute_xswap_priors(edge_list: List[Tuple[int, int]], n_permutations: int,
105 |                          shape: Tuple[int, int], allow_self_loops: bool = False,
106 |                          allow_antiparallel: bool = False, sparse: bool = True,
107 |                          swap_multiplier: int = 10, initial_seed: int = 0,
108 |                          max_malloc: int = 4000000000,
109 |                          dtypes = {'id': numpy.uint16, 'degree': numpy.uint16,
110 |                                    'edge': bool, 'xswap_prior': float},
111 |                         ):
112 |     """
113 |     Compute the XSwap prior for every potential edge in the network. Uses
114 |     degree-grouping to maximize the effective number of permutations for each
115 |     node pair. That is, node pairs with the same source and target degrees can
116 |     be grouped when computing the XSwap prior, allowing there to be more
117 |     permutations for some node pairs than `n_permutations`.
118 | 
119 |     Note that the mechanics of this function are separated to minimize memory use.
120 | 
121 |     Parameters
122 |     ----------
123 |     edge_list : List[Tuple[int, int]]
124 |         Edge list representing the graph whose XSwap edge priors are to be
125 |         computed. Tuples contain integer values representing nodes. No value
126 |         should be greater than C++'s `INT_MAX`, in this case 2_147_483_647.
127 |         An adjacency matrix will be created assuming that a node's value is its
128 |         index in the matrix. If not, map edges (identifiers can be string or
129 |         otherwise) using `xswap.preprocessing.map_str_edges`.
130 |     n_permutations : int
131 |         The number of permuted networks used to compute the empirical XSwap prior
132 |     shape : Tuple[int, int]
133 |         The shape of the matrix to be returned. In other words, a tuple of the
134 |         number of source and target nodes.
135 |     allow_self_loops : bool
136 |         Whether to allow edges like (0, 0). In the case of bipartite graphs,
137 |         such an edge represents a connection between two distinct nodes, while
138 |         in other graphs it may represent an edge from a node to itself, in which
139 |         case an edge may or may not be meaningful depending on context.
140 |     allow_antiparallel : bool
141 |         Whether to allow simultaneous edges like (0, 1) and (1, 0). In the case
142 |         of bipartite graphs, these edges represent two connections between four
143 |         distinct nodes, while for other graphs, these may be connections between
144 |         the same two nodes.
145 |     sparse : bool
146 |         Whether to use a sparse matrix when adding up edge occurrences across
147 |         permutations. If large changes in sparsity are expected, a dense
148 |         array may be preferable.
149 |     swap_multiplier : float
150 |         The number of edge swap attempts is determined by the product of the
151 |         number of existing edges and multiplier. For example, if five edges are
152 |         passed and multiplier is set to 10, 50 swaps will be attempted. Non-integer
153 |         products will be rounded down to the nearest integer.
154 |     initial_seed : int
155 |         Random seed that will be passed to the C++ Mersenne Twister 19937 random
156 |         number generator. `initial_seed` will be used for the first permutation,
157 |         and the seed used for each subsequent permutation will be incremented by
158 |         one. For example, if `initial_seed` is 0 and `n_permutations` is 2, then
159 |         the two permutations will pass seeds 0 and 1, respectively.
160 |     max_malloc : int (`unsigned long long int` in C)
161 |         The maximum amount of memory to be allocated using `malloc` when making
162 |         a bitset to hold edges. An uncompressed bitset is implemented for
163 |         holding edges that is significantly faster than alternatives. However,
164 |         it is memory-inefficient and will not be used if more memory is required
165 |         than `max_malloc`. Above the threshold, a Roaring bitset will be used.
166 |     dtypes : dict
167 |         Dictionary mapping returned column types to dtypes. Keys should be
168 |         `'id'`, `'degree'`, `'edge'`, and `'xswap_prior'`. `dtype` need only
169 |         be changed from its defaults if the values of `id` or `degree` are
170 |         greater than the maxima in the default dtypes, or in cases where greater
171 |         precision is desired. (`numpy.uint16` has a maximum value of 65535.)
172 | 
173 |     Returns
174 |     -------
175 |     prior_df : pandas.DataFrame
176 |         Columns are the following:
177 |         [source_id, target_id, edge, source_degree, target_degree, xswap_prior]
178 |     """
179 |     # Compute the adjacency matrix of the original (unpermuted) network
180 |     original_edges = xswap.network_formats.edges_to_matrix(
181 |         edge_list, add_reverse_edges=(not allow_antiparallel), shape=shape,
182 |         dtype=dtypes['edge'], sparse=True)
183 | 
184 |     # Setup DataFrame for recording prior data
185 |     prior_df = pandas.DataFrame({
186 |         'source_id': numpy.repeat(numpy.arange(shape[0], dtype=dtypes['id']), shape[1]),
187 |         'target_id': numpy.tile(numpy.arange(shape[1], dtype=dtypes['id']), shape[0]),
188 |         'edge': original_edges.toarray().flatten(),
189 |     })
190 |     del original_edges
191 | 
192 |     prior_df['source_degree'] = (prior_df
193 |                                  .groupby('source_id')
194 |                                  .transform(sum)['edge']
195 |                                  .astype(dtypes['degree']))
196 |     del prior_df['source_id']
197 | 
198 |     prior_df['target_degree'] = (prior_df
199 |                                  .groupby('target_id')
200 |                                  .transform(sum)['edge']
201 |                                  .astype(dtypes['degree']))
202 |     del prior_df['target_id']
203 | 
204 |     # Compute the number of occurrences of each edge across permutations
205 |     edge_counter = compute_xswap_occurrence_matrix(
206 |         edge_list=edge_list, n_permutations=n_permutations, shape=shape,
207 |         allow_self_loops=allow_self_loops, allow_antiparallel=allow_antiparallel,
208 |         sparse=sparse, swap_multiplier=swap_multiplier, initial_seed=initial_seed,
209 |         max_malloc=max_malloc)
210 | 
211 |     prior_df['num_permuted_edges'] = edge_counter.toarray().flatten()
212 |     del edge_counter
213 | 
214 |     # The number of edges that occurred across all node pairs with the same
215 |     # `source_degree` and `target_degree`
216 |     dgp_edge_count = (
217 |         prior_df
218 |         .groupby(['source_degree', 'target_degree'])
219 |         .transform(sum)['num_permuted_edges']
220 |         .values
221 |         .astype(dtypes['degree'])
222 |     )
223 |     del prior_df['num_permuted_edges']
224 | 
225 |     # The effective number of permutations for every node pair, incorporating
226 |     # degree-grouping
227 |     num_dgp = (
228 |         n_permutations * prior_df.groupby(['source_degree', 'target_degree'])
229 |                                  .transform(len)['edge']
230 |                                  .values
231 |     )
232 |     xswap_prior = (dgp_edge_count / num_dgp).astype(dtypes['xswap_prior'])
233 |     del dgp_edge_count, num_dgp
234 | 
235 |     prior_df['xswap_prior'] = xswap_prior
236 |     del xswap_prior
237 | 
238 |     prior_df = (
239 |         prior_df
240 |         .assign(
241 |             source_id=numpy.repeat(numpy.arange(shape[0], dtype=dtypes['id']), shape[1]),
242 |             target_id=numpy.tile(numpy.arange(shape[1], dtype=dtypes['id']), shape[0]),
243 |         )
244 |         .filter(items=['source_id', 'target_id', 'edge', 'source_degree',
245 |                        'target_degree', 'xswap_prior'])
246 |     )
247 |     return prior_df
248 | 
249 | 
250 | def approximate_xswap_prior(source_degree, target_degree, num_edges):
251 |     """
252 |     Approximate the XSwap prior by assuming that the XSwap Markov Chain is stationary.
253 |     While this is not the case in reality, some networks' priors can be estimated
254 |     very well using this equation.
255 | 
256 |     Parameters
257 |     ----------
258 |     source_degree : int, float, numpy.array, or pandas.Series
259 |         The source degree for a single node pair or a number of source degrees.
260 |         The type of object passed should match `target_degree`.
261 |     target_degree : int, float, numpy.array, or pandas.Series
262 |         The target degree for a single node pair or a number of target degrees.
263 |         The type of object passed should match `source_degree`.
264 |     num_edges : int or float
265 |         The total number of edges in the network
266 | 
267 |     Returns
268 |     -------
269 |     approximate_prior : float, numpy.array, or pandas.Series
270 |         Output type matches the types of `source_degree` and `target_degree`.
271 |     """
272 |     return source_degree * target_degree / (
273 |         (source_degree * target_degree) ** 2
274 |         + (num_edges - source_degree - target_degree + 1) ** 2
275 |     ) ** 0.5
276 | 


--------------------------------------------------------------------------------
/xswap/lib/roaring.hh:
--------------------------------------------------------------------------------
   1 | /* auto-generated on Lun 14 jan 2019 11:35:33 EST. Do not edit! */
   2 | #include "roaring.h"
   3 | /* begin file /Users/dlemire/CVS/github/CRoaring/cpp/roaring.hh */
   4 | /*
   5 | A C++ header for Roaring Bitmaps.
   6 | */
   7 | #ifndef INCLUDE_ROARING_HH_
   8 | #define INCLUDE_ROARING_HH_
   9 | 
  10 | #include <stdarg.h>
  11 | 
  12 | #include <algorithm>
  13 | #include <new>
  14 | #include <stdexcept>
  15 | #include <string>
  16 | 
  17 | class RoaringSetBitForwardIterator;
  18 | 
  19 | class Roaring {
  20 |    public:
  21 |     /**
  22 |      * Create an empty bitmap
  23 |      */
  24 |     Roaring() {
  25 |         ra_init(&roaring.high_low_container);
  26 |         roaring.copy_on_write = false;
  27 |     }
  28 | 
  29 |     /**
  30 |      * Construct a bitmap from a list of integer values.
  31 |      */
  32 |     Roaring(size_t n, const uint32_t *data) : Roaring() {
  33 |         roaring_bitmap_add_many(&roaring, n, data);
  34 |     }
  35 | 
  36 |     /**
  37 |      * Copy constructor
  38 |      */
  39 |     Roaring(const Roaring &r) {
  40 |         bool is_ok =
  41 |             ra_copy(&r.roaring.high_low_container, &roaring.high_low_container,
  42 |                     r.roaring.copy_on_write);
  43 |         if (!is_ok) {
  44 |             throw std::runtime_error("failed memory alloc in constructor");
  45 |         }
  46 |         roaring.copy_on_write = r.roaring.copy_on_write;
  47 |     }
  48 | 
  49 |     /**
  50 |      * Move constructor. The moved object remains valid, i.e.
  51 |      * all methods can still be called on it.
  52 |      */
  53 |     Roaring(Roaring &&r) noexcept {
  54 |         roaring = std::move(r.roaring);
  55 |         r.roaring.copy_on_write = false;
  56 |         ra_init(&r.roaring.high_low_container);
  57 |     }
  58 | 
  59 |     /**
  60 |      * Construct a roaring object from the C struct.
  61 |      *
  62 |      * Passing a NULL point is unsafe.
  63 |      * the pointer to the C struct will be invalid after the call.
  64 |      */
  65 |     Roaring(roaring_bitmap_t *s) noexcept {
  66 |         // steal the interior struct
  67 |         roaring.high_low_container = s->high_low_container;
  68 |         roaring.copy_on_write = s->copy_on_write;
  69 |         // deallocate the old container
  70 |         free(s);
  71 |     }
  72 | 
  73 |     /**
  74 |      * Construct a bitmap from a list of integer values.
  75 |      */
  76 |     static Roaring bitmapOf(size_t n, ...) {
  77 |         Roaring ans;
  78 |         va_list vl;
  79 |         va_start(vl, n);
  80 |         for (size_t i = 0; i < n; i++) {
  81 |             ans.add(va_arg(vl, uint32_t));
  82 |         }
  83 |         va_end(vl);
  84 |         return ans;
  85 |     }
  86 | 
  87 |     /**
  88 |      * Add value x
  89 |      *
  90 |      */
  91 |     void add(uint32_t x) { roaring_bitmap_add(&roaring, x); }
  92 | 
  93 |     /**
  94 |      * Add value x
  95 |      * Returns true if a new value was added, false if the value was already existing.
  96 |      */
  97 |     bool addChecked(uint32_t x) {
  98 |         return roaring_bitmap_add_checked(&roaring, x);
  99 |     }
 100 | 
 101 |     /**
 102 |     * add if all values from x (included) to y (excluded)
 103 |     */
 104 |     void addRange(const uint64_t x, const uint64_t y)  {
 105 |         return roaring_bitmap_add_range(&roaring, x, y);
 106 |     }
 107 | 
 108 |     /**
 109 |      * Add value n_args from pointer vals
 110 |      *
 111 |      */
 112 |     void addMany(size_t n_args, const uint32_t *vals) {
 113 |         roaring_bitmap_add_many(&roaring, n_args, vals);
 114 |     }
 115 | 
 116 |     /**
 117 |      * Remove value x
 118 |      *
 119 |      */
 120 |     void remove(uint32_t x) { roaring_bitmap_remove(&roaring, x); }
 121 | 
 122 |     /**
 123 |      * Remove value x
 124 |      * Returns true if a new value was removed, false if the value was not existing.
 125 |      */
 126 |     bool removeChecked(uint32_t x) {
 127 |         return roaring_bitmap_remove_checked(&roaring, x);
 128 |     }
 129 | 
 130 |     /**
 131 |      * Return the largest value (if not empty)
 132 |      *
 133 |      */
 134 |     uint32_t maximum() const { return roaring_bitmap_maximum(&roaring); }
 135 | 
 136 |     /**
 137 |     * Return the smallest value (if not empty)
 138 |     *
 139 |     */
 140 |     uint32_t minimum() const { return roaring_bitmap_minimum(&roaring); }
 141 | 
 142 |     /**
 143 |      * Check if value x is present
 144 |      */
 145 |     bool contains(uint32_t x) const {
 146 |         return roaring_bitmap_contains(&roaring, x);
 147 |     }
 148 | 
 149 |     /**
 150 |     * Check if all values from x (included) to y (excluded) are present
 151 |     */
 152 |     bool containsRange(const uint64_t x, const uint64_t y) const {
 153 |         return roaring_bitmap_contains_range(&roaring, x, y);
 154 |     }
 155 | 
 156 |     /**
 157 |      * Destructor
 158 |      */
 159 |     ~Roaring() { ra_clear(&roaring.high_low_container); }
 160 | 
 161 |     /**
 162 |      * Copies the content of the provided bitmap, and
 163 |      * discard the current content.
 164 |      */
 165 |     Roaring &operator=(const Roaring &r) {
 166 |         ra_clear(&roaring.high_low_container);
 167 |         bool is_ok =
 168 |             ra_copy(&r.roaring.high_low_container, &roaring.high_low_container,
 169 |                     r.roaring.copy_on_write);
 170 |         if (!is_ok) {
 171 |             throw std::runtime_error("failed memory alloc in assignment");
 172 |         }
 173 |         roaring.copy_on_write = r.roaring.copy_on_write;
 174 |         return *this;
 175 |     }
 176 | 
 177 |     /**
 178 |      * Moves the content of the provided bitmap, and
 179 |      * discard the current content.
 180 |      */
 181 |     Roaring &operator=(Roaring &&r) noexcept {
 182 |         ra_clear(&roaring.high_low_container);
 183 |         roaring = std::move(r.roaring);
 184 |         r.roaring.copy_on_write = false;
 185 |         ra_init(&r.roaring.high_low_container);
 186 |         return *this;
 187 |     }
 188 | 
 189 |     /**
 190 |      * Compute the intersection between the current bitmap and the provided
 191 |      * bitmap,
 192 |      * writing the result in the current bitmap. The provided bitmap is not
 193 |      * modified.
 194 |      */
 195 |     Roaring &operator&=(const Roaring &r) {
 196 |         roaring_bitmap_and_inplace(&roaring, &r.roaring);
 197 |         return *this;
 198 |     }
 199 | 
 200 |     /**
 201 |      * Compute the difference between the current bitmap and the provided
 202 |      * bitmap,
 203 |      * writing the result in the current bitmap. The provided bitmap is not
 204 |      * modified.
 205 |      */
 206 |     Roaring &operator-=(const Roaring &r) {
 207 |         roaring_bitmap_andnot_inplace(&roaring, &r.roaring);
 208 |         return *this;
 209 |     }
 210 | 
 211 |     /**
 212 |      * Compute the union between the current bitmap and the provided bitmap,
 213 |      * writing the result in the current bitmap. The provided bitmap is not
 214 |      * modified.
 215 |      *
 216 |      * See also the fastunion function to aggregate many bitmaps more quickly.
 217 |      */
 218 |     Roaring &operator|=(const Roaring &r) {
 219 |         roaring_bitmap_or_inplace(&roaring, &r.roaring);
 220 |         return *this;
 221 |     }
 222 | 
 223 |     /**
 224 |      * Compute the symmetric union between the current bitmap and the provided
 225 |      * bitmap,
 226 |      * writing the result in the current bitmap. The provided bitmap is not
 227 |      * modified.
 228 |      */
 229 |     Roaring &operator^=(const Roaring &r) {
 230 |         roaring_bitmap_xor_inplace(&roaring, &r.roaring);
 231 |         return *this;
 232 |     }
 233 | 
 234 |     /**
 235 |      * Exchange the content of this bitmap with another.
 236 |      */
 237 |     void swap(Roaring &r) { std::swap(r.roaring, roaring); }
 238 | 
 239 |     /**
 240 |      * Get the cardinality of the bitmap (number of elements).
 241 |      */
 242 |     uint64_t cardinality() const {
 243 |         return roaring_bitmap_get_cardinality(&roaring);
 244 |     }
 245 | 
 246 |     /**
 247 |     * Returns true if the bitmap is empty (cardinality is zero).
 248 |     */
 249 |     bool isEmpty() const { return roaring_bitmap_is_empty(&roaring); }
 250 | 
 251 |     /**
 252 |     * Returns true if the bitmap is subset of the other.
 253 |     */
 254 |     bool isSubset(const Roaring &r) const {
 255 |         return roaring_bitmap_is_subset(&roaring, &r.roaring);
 256 |     }
 257 | 
 258 |     /**
 259 |     * Returns true if the bitmap is strict subset of the other.
 260 |     */
 261 |     bool isStrictSubset(const Roaring &r) const {
 262 |         return roaring_bitmap_is_strict_subset(&roaring, &r.roaring);
 263 |     }
 264 | 
 265 |     /**
 266 |      * Convert the bitmap to an array. Write the output to "ans",
 267 |      * caller is responsible to ensure that there is enough memory
 268 |      * allocated
 269 |      * (e.g., ans = new uint32[mybitmap.cardinality()];)
 270 |      */
 271 |     void toUint32Array(uint32_t *ans) const {
 272 |         roaring_bitmap_to_uint32_array(&roaring, ans);
 273 |     }
 274 |     /**
 275 |      * to int array with pagination
 276 |      *
 277 |      */
 278 |     void rangeUint32Array(uint32_t *ans, size_t offset, size_t limit) const {
 279 |         roaring_bitmap_range_uint32_array(&roaring, offset, limit, ans);
 280 |     }
 281 | 
 282 |     /**
 283 |      * Return true if the two bitmaps contain the same elements.
 284 |      */
 285 |     bool operator==(const Roaring &r) const {
 286 |         return roaring_bitmap_equals(&roaring, &r.roaring);
 287 |     }
 288 | 
 289 |     /**
 290 |      * compute the negation of the roaring bitmap within a specified interval.
 291 |      * areas outside the range are passed through unchanged.
 292 |      */
 293 |     void flip(uint64_t range_start, uint64_t range_end) {
 294 |         roaring_bitmap_flip_inplace(&roaring, range_start, range_end);
 295 |     }
 296 | 
 297 |     /**
 298 |      *  Remove run-length encoding even when it is more space efficient
 299 |      *  return whether a change was applied
 300 |      */
 301 |     bool removeRunCompression() {
 302 |         return roaring_bitmap_remove_run_compression(&roaring);
 303 |     }
 304 | 
 305 |     /** convert array and bitmap containers to run containers when it is more
 306 |      * efficient;
 307 |      * also convert from run containers when more space efficient.  Returns
 308 |      * true if the result has at least one run container.
 309 |      * Additional savings might be possible by calling shrinkToFit().
 310 |      */
 311 |     bool runOptimize() { return roaring_bitmap_run_optimize(&roaring); }
 312 | 
 313 |     /**
 314 |      * If needed, reallocate memory to shrink the memory usage. Returns
 315 |      * the number of bytes saved.
 316 |     */
 317 |     size_t shrinkToFit() { return roaring_bitmap_shrink_to_fit(&roaring); }
 318 | 
 319 |     /**
 320 |      * Iterate over the bitmap elements. The function iterator is called once for
 321 |      * all the values with ptr (can be NULL) as the second parameter of each call.
 322 |      *
 323 |      * roaring_iterator is simply a pointer to a function that returns bool
 324 |      * (true means that the iteration should continue while false means that it
 325 |      * should stop), and takes (uint32_t,void*) as inputs.
 326 |      */
 327 |     void iterate(roaring_iterator iterator, void *ptr) const {
 328 |         roaring_iterate(&roaring, iterator, ptr);
 329 |     }
 330 | 
 331 |     /**
 332 |      * If the size of the roaring bitmap is strictly greater than rank, then
 333 |      * this function returns true and set element to the element of given rank.
 334 |      *   Otherwise, it returns false.
 335 |      */
 336 |     bool select(uint32_t rnk, uint32_t *element) const {
 337 |         return roaring_bitmap_select(&roaring, rnk, element);
 338 |     }
 339 | 
 340 |     /**
 341 |      * Computes the size of the intersection between two bitmaps.
 342 |      *
 343 |      */
 344 |     uint64_t and_cardinality(const Roaring &r) const {
 345 |         return roaring_bitmap_and_cardinality(&roaring, &r.roaring);
 346 |     }
 347 | 
 348 |     /**
 349 |      * Check whether the two bitmaps intersect.
 350 |      *
 351 |      */
 352 |     bool intersect(const Roaring &r) const {
 353 |     	 return roaring_bitmap_intersect(&roaring, &r.roaring);
 354 |     }
 355 | 
 356 |     /**
 357 |      * Computes the Jaccard index between two bitmaps. (Also known as the
 358 |      * Tanimoto distance,
 359 |      * or the Jaccard similarity coefficient)
 360 |      *
 361 |      * The Jaccard index is undefined if both bitmaps are empty.
 362 |      *
 363 |      */
 364 |     double jaccard_index(const Roaring &r) const {
 365 |         return roaring_bitmap_jaccard_index(&roaring, &r.roaring);
 366 |     }
 367 | 
 368 |     /**
 369 |      * Computes the size of the union between two bitmaps.
 370 |      *
 371 |      */
 372 |     uint64_t or_cardinality(const Roaring &r) const {
 373 |         return roaring_bitmap_or_cardinality(&roaring, &r.roaring);
 374 |     }
 375 | 
 376 |     /**
 377 |      * Computes the size of the difference (andnot) between two bitmaps.
 378 |      *
 379 |      */
 380 |     uint64_t andnot_cardinality(const Roaring &r) const {
 381 |         return roaring_bitmap_andnot_cardinality(&roaring, &r.roaring);
 382 |     }
 383 | 
 384 |     /**
 385 |      * Computes the size of the symmetric difference (andnot) between two
 386 |      * bitmaps.
 387 |      *
 388 |      */
 389 |     uint64_t xor_cardinality(const Roaring &r) const {
 390 |         return roaring_bitmap_xor_cardinality(&roaring, &r.roaring);
 391 |     }
 392 | 
 393 |     /**
 394 |     * Returns the number of integers that are smaller or equal to x.
 395 |     */
 396 |     uint64_t rank(uint32_t x) const { return roaring_bitmap_rank(&roaring, x); }
 397 | 
 398 |     /**
 399 |     * write a bitmap to a char buffer. This is meant to be compatible with
 400 |     * the
 401 |     * Java and Go versions. Returns how many bytes were written which should be
 402 |     * getSizeInBytes().
 403 |     *
 404 |     * Setting the portable flag to false enable a custom format that
 405 |     * can save space compared to the portable format (e.g., for very
 406 |     * sparse bitmaps).
 407 |     *
 408 |     * Boost users can serialize bitmaps in this manner:
 409 |     *
 410 |     *       BOOST_SERIALIZATION_SPLIT_FREE(Roaring)
 411 |     *       namespace boost {
 412 |     *       namespace serialization {
 413 |     *
 414 |     *       template <class Archive>
 415 |     *       void save(Archive& ar, const Roaring& bitmask,
 416 |     *          const unsigned int version) {
 417 |     *         std::size_t expected_size_in_bytes = bitmask.getSizeInBytes();
 418 |     *         std::vector<char> buffer(expected_size_in_bytes);
 419 |     *         std::size_t       size_in_bytes = bitmask.write(buffer.data());
 420 |     *
 421 |     *         ar& size_in_bytes;
 422 |     *         ar& boost::serialization::make_binary_object(buffer.data(),
 423 |     *             size_in_bytes);
 424 |     *      }
 425 |     *      template <class Archive>
 426 |     *      void load(Archive& ar, Roaring& bitmask,
 427 |     *          const unsigned int version) {
 428 |     *         std::size_t size_in_bytes = 0;
 429 |     *         ar& size_in_bytes;
 430 |     *         std::vector<char> buffer(size_in_bytes);
 431 |     *         ar&  boost::serialization::make_binary_object(buffer.data(),
 432 |     *            size_in_bytes);
 433 |     *         bitmask = Roaring::readSafe(buffer.data(), size_in_bytes);
 434 |     *}
 435 |     *}  // namespace serialization
 436 |     *}  // namespace boost
 437 |     */
 438 |     size_t write(char *buf, bool portable = true) const {
 439 |         if (portable)
 440 |             return roaring_bitmap_portable_serialize(&roaring, buf);
 441 |         else
 442 |             return roaring_bitmap_serialize(&roaring, buf);
 443 |     }
 444 | 
 445 |     /**
 446 |      * read a bitmap from a serialized version. This is meant to be compatible
 447 |      * with the Java and Go versions.
 448 |      *
 449 |      * Setting the portable flag to false enable a custom format that
 450 |      * can save space compared to the portable format (e.g., for very
 451 |      * sparse bitmaps).
 452 |      *
 453 |      * This function is unsafe in the sense that if you provide bad data,
 454 |      * many, many bytes could be read. See also readSafe.
 455 |      */
 456 |     static Roaring read(const char *buf, bool portable = true) {
 457 |         roaring_bitmap_t * r = portable ? roaring_bitmap_portable_deserialize(buf) : roaring_bitmap_deserialize(buf);
 458 |         if (r == NULL) {
 459 |             throw std::runtime_error("failed alloc while reading");
 460 |         }
 461 |         return Roaring(r);
 462 |     }
 463 |     /**
 464 |      * read a bitmap from a serialized version, reading no more than maxbytes bytes.
 465 |      * This is meant to be compatible with the Java and Go versions.
 466 |      *
 467 |      */
 468 |     static Roaring readSafe(const char *buf, size_t maxbytes) {
 469 |         roaring_bitmap_t * r = roaring_bitmap_portable_deserialize_safe(buf,maxbytes);
 470 |         if (r == NULL) {
 471 |             throw std::runtime_error("failed alloc while reading");
 472 |         }
 473 |         return Roaring(r);
 474 |     }
 475 |     /**
 476 |      * How many bytes are required to serialize this bitmap (meant to be
 477 |      * compatible
 478 |      * with Java and Go versions)
 479 |      *
 480 |      * Setting the portable flag to false enable a custom format that
 481 |      * can save space compared to the portable format (e.g., for very
 482 |      * sparse bitmaps).
 483 |      */
 484 |     size_t getSizeInBytes(bool portable = true) const {
 485 |         if (portable)
 486 |             return roaring_bitmap_portable_size_in_bytes(&roaring);
 487 |         else
 488 |             return roaring_bitmap_size_in_bytes(&roaring);
 489 |     }
 490 | 
 491 |     /**
 492 |      * Computes the intersection between two bitmaps and returns new bitmap.
 493 |      * The current bitmap and the provided bitmap are unchanged.
 494 |      */
 495 |     Roaring operator&(const Roaring &o) const {
 496 |         roaring_bitmap_t *r = roaring_bitmap_and(&roaring, &o.roaring);
 497 |         if (r == NULL) {
 498 |             throw std::runtime_error("failed materalization in and");
 499 |         }
 500 |         return Roaring(r);
 501 |     }
 502 | 
 503 |     /**
 504 |      * Computes the difference between two bitmaps and returns new bitmap.
 505 |      * The current bitmap and the provided bitmap are unchanged.
 506 |      */
 507 |     Roaring operator-(const Roaring &o) const {
 508 |         roaring_bitmap_t *r = roaring_bitmap_andnot(&roaring, &o.roaring);
 509 |         if (r == NULL) {
 510 |             throw std::runtime_error("failed materalization in andnot");
 511 |         }
 512 |         return Roaring(r);
 513 |     }
 514 | 
 515 |     /**
 516 |      * Computes the union between two bitmaps and returns new bitmap.
 517 |      * The current bitmap and the provided bitmap are unchanged.
 518 |      */
 519 |     Roaring operator|(const Roaring &o) const {
 520 |         roaring_bitmap_t *r = roaring_bitmap_or(&roaring, &o.roaring);
 521 |         if (r == NULL) {
 522 |             throw std::runtime_error("failed materalization in or");
 523 |         }
 524 |         return Roaring(r);
 525 |     }
 526 | 
 527 |     /**
 528 |      * Computes the symmetric union between two bitmaps and returns new bitmap.
 529 |      * The current bitmap and the provided bitmap are unchanged.
 530 |      */
 531 |     Roaring operator^(const Roaring &o) const {
 532 |         roaring_bitmap_t *r = roaring_bitmap_xor(&roaring, &o.roaring);
 533 |         if (r == NULL) {
 534 |             throw std::runtime_error("failed materalization in xor");
 535 |         }
 536 |         return Roaring(r);
 537 |     }
 538 | 
 539 |     /**
 540 |      * Whether or not we apply copy and write.
 541 |      */
 542 |     void setCopyOnWrite(bool val) { roaring.copy_on_write = val; }
 543 | 
 544 |     /**
 545 |      * Print the content of the bitmap
 546 |      */
 547 |     void printf() const { roaring_bitmap_printf(&roaring); }
 548 | 
 549 |     /**
 550 |      * Print the content of the bitmap into a string
 551 |      */
 552 |     std::string toString() const {
 553 |         struct iter_data {
 554 |             std::string str;
 555 |             char first_char = '{';
 556 |         } outer_iter_data;
 557 |         if (!isEmpty()) {
 558 |             iterate(
 559 |                 [](uint32_t value, void *inner_iter_data) -> bool {
 560 |                     ((iter_data *)inner_iter_data)->str +=
 561 |                         ((iter_data *)inner_iter_data)->first_char;
 562 |                     ((iter_data *)inner_iter_data)->str +=
 563 |                         std::to_string(value);
 564 |                     ((iter_data *)inner_iter_data)->first_char = ',';
 565 |                     return true;
 566 |                 },
 567 |                 (void *)&outer_iter_data);
 568 |         } else
 569 |             outer_iter_data.str = '{';
 570 |         outer_iter_data.str += '}';
 571 |         return outer_iter_data.str;
 572 |     }
 573 | 
 574 |     /**
 575 |      * Whether or not copy and write is active.
 576 |      */
 577 |     bool getCopyOnWrite() const { return roaring.copy_on_write; }
 578 | 
 579 |     /**
 580 |      * computes the logical or (union) between "n" bitmaps (referenced by a
 581 |      * pointer).
 582 |      */
 583 |     static Roaring fastunion(size_t n, const Roaring **inputs) {
 584 |         const roaring_bitmap_t **x =
 585 |             (const roaring_bitmap_t **)malloc(n * sizeof(roaring_bitmap_t *));
 586 |         if (x == NULL) {
 587 |             throw std::runtime_error("failed memory alloc in fastunion");
 588 |         }
 589 |         for (size_t k = 0; k < n; ++k) x[k] = &inputs[k]->roaring;
 590 | 
 591 |         roaring_bitmap_t *c_ans = roaring_bitmap_or_many(n, x);
 592 |         if (c_ans == NULL) {
 593 |             free(x);
 594 |             throw std::runtime_error("failed memory alloc in fastunion");
 595 |         }
 596 |         Roaring ans(c_ans);
 597 |         free(x);
 598 |         return ans;
 599 |     }
 600 | 
 601 |     typedef RoaringSetBitForwardIterator const_iterator;
 602 | 
 603 |     /**
 604 |     * Returns an iterator that can be used to access the position of the
 605 |     * set bits. The running time complexity of a full scan is proportional to
 606 |     * the
 607 |     * number
 608 |     * of set bits: be aware that if you have long strings of 1s, this can be
 609 |     * very inefficient.
 610 |     *
 611 |     * It can be much faster to use the toArray method if you want to
 612 |     * retrieve the set bits.
 613 |     */
 614 |     const_iterator begin() const;
 615 | 
 616 |     /**
 617 |     * A bogus iterator that can be used together with begin()
 618 |     * for constructions such as for(auto i = b.begin();
 619 |     * i!=b.end(); ++i) {}
 620 |     */
 621 |     const_iterator &end() const;
 622 | 
 623 |     roaring_bitmap_t roaring;
 624 | };
 625 | 
 626 | /**
 627 |  * Used to go through the set bits. Not optimally fast, but convenient.
 628 |  */
 629 | class RoaringSetBitForwardIterator final {
 630 |    public:
 631 |     typedef std::forward_iterator_tag iterator_category;
 632 |     typedef uint32_t *pointer;
 633 |     typedef uint32_t &reference_type;
 634 |     typedef uint32_t value_type;
 635 |     typedef int32_t difference_type;
 636 |     typedef RoaringSetBitForwardIterator type_of_iterator;
 637 | 
 638 |     /**
 639 |      * Provides the location of the set bit.
 640 |      */
 641 |     value_type operator*() const { return i.current_value; }
 642 | 
 643 |     bool operator<(const type_of_iterator &o) {
 644 |         if (!i.has_value) return false;
 645 |         if (!o.i.has_value) return true;
 646 |         return i.current_value < *o;
 647 |     }
 648 | 
 649 |     bool operator<=(const type_of_iterator &o) {
 650 |         if (!o.i.has_value) return true;
 651 |         if (!i.has_value) return false;
 652 |         return i.current_value <= *o;
 653 |     }
 654 | 
 655 |     bool operator>(const type_of_iterator &o) {
 656 |         if (!o.i.has_value) return false;
 657 |         if (!i.has_value) return true;
 658 |         return i.current_value > *o;
 659 |     }
 660 | 
 661 |     bool operator>=(const type_of_iterator &o) {
 662 |         if (!i.has_value) return true;
 663 |         if (!o.i.has_value) return false;
 664 |         return i.current_value >= *o;
 665 |     }
 666 | 
 667 |     /**
 668 |     * Move the iterator to the first value >= val.
 669 |     */
 670 |     void equalorlarger(uint32_t val) {
 671 |       roaring_move_uint32_iterator_equalorlarger(&i,val);
 672 |     }
 673 | 
 674 |     type_of_iterator &operator++() {  // ++i, must returned inc. value
 675 |         roaring_advance_uint32_iterator(&i);
 676 |         return *this;
 677 |     }
 678 | 
 679 |     type_of_iterator operator++(int) {  // i++, must return orig. value
 680 |         RoaringSetBitForwardIterator orig(*this);
 681 |         roaring_advance_uint32_iterator(&i);
 682 |         return orig;
 683 |     }
 684 | 
 685 |     type_of_iterator& operator--() { // prefix --
 686 |         roaring_previous_uint32_iterator(&i);
 687 |         return *this;
 688 |     }
 689 | 
 690 |     type_of_iterator operator--(int) { // postfix --
 691 |         RoaringSetBitForwardIterator orig(*this);
 692 |         roaring_previous_uint32_iterator(&i);
 693 |         return orig;
 694 |     }
 695 | 
 696 |     bool operator==(const RoaringSetBitForwardIterator &o) const {
 697 |         return i.current_value == *o && i.has_value == o.i.has_value;
 698 |     }
 699 | 
 700 |     bool operator!=(const RoaringSetBitForwardIterator &o) const {
 701 |         return i.current_value != *o || i.has_value != o.i.has_value;
 702 |     }
 703 | 
 704 |     RoaringSetBitForwardIterator(const Roaring &parent,
 705 |                                  bool exhausted = false) {
 706 |         if (exhausted) {
 707 |             i.parent = &parent.roaring;
 708 |             i.container_index = INT32_MAX;
 709 |             i.has_value = false;
 710 |             i.current_value = UINT32_MAX;
 711 |         } else {
 712 |             roaring_init_iterator(&parent.roaring, &i);
 713 |         }
 714 |     }
 715 | 
 716 |     roaring_uint32_iterator_t i;
 717 | };
 718 | 
 719 | inline RoaringSetBitForwardIterator Roaring::begin() const {
 720 |     return RoaringSetBitForwardIterator(*this);
 721 | }
 722 | 
 723 | inline RoaringSetBitForwardIterator &Roaring::end() const {
 724 |     static RoaringSetBitForwardIterator e(*this, true);
 725 |     return e;
 726 | }
 727 | 
 728 | #endif /* INCLUDE_ROARING_HH_ */
 729 | /* end file /Users/dlemire/CVS/github/CRoaring/cpp/roaring.hh */
 730 | /* begin file /Users/dlemire/CVS/github/CRoaring/cpp/roaring64map.hh */
 731 | /*
 732 | A C++ header for 64-bit Roaring Bitmaps, implemented by way of a map of many
 733 | 32-bit Roaring Bitmaps.
 734 | */
 735 | #ifndef INCLUDE_ROARING_64_MAP_HH_
 736 | #define INCLUDE_ROARING_64_MAP_HH_
 737 | 
 738 | #include <algorithm>
 739 | #include <cstdarg>
 740 | #include <cstdio>
 741 | #include <limits>
 742 | #include <map>
 743 | #include <new>
 744 | #include <numeric>
 745 | #include <stdexcept>
 746 | #include <string>
 747 | #include <utility>
 748 | 
 749 | 
 750 | class Roaring64MapSetBitForwardIterator;
 751 | 
 752 | class Roaring64Map {
 753 |    public:
 754 |     /**
 755 |      * Create an empty bitmap
 756 |      */
 757 |     Roaring64Map() = default;
 758 | 
 759 |     /**
 760 |      * Construct a bitmap from a list of 32-bit integer values.
 761 |      */
 762 |     Roaring64Map(size_t n, const uint32_t *data) { addMany(n, data); }
 763 | 
 764 |     /**
 765 |      * Construct a bitmap from a list of 64-bit integer values.
 766 |      */
 767 |     Roaring64Map(size_t n, const uint64_t *data) { addMany(n, data); }
 768 | 
 769 |     /**
 770 |      * Construct a 64-bit map from a 32-bit one
 771 |      */
 772 |     Roaring64Map(const Roaring &r) { emplaceOrInsert(0, r); }
 773 | 
 774 |     /**
 775 |      * Construct a roaring object from the C struct.
 776 |      *
 777 |      * Passing a NULL point is unsafe.
 778 |      */
 779 |     Roaring64Map(roaring_bitmap_t *s) { emplaceOrInsert(0, s); }
 780 | 
 781 |     /**
 782 |      * Construct a bitmap from a list of integer values.
 783 |      */
 784 |     static Roaring64Map bitmapOf(size_t n...) {
 785 |         Roaring64Map ans;
 786 |         va_list vl;
 787 |         va_start(vl, n);
 788 |         for (size_t i = 0; i < n; i++) {
 789 |             ans.add(va_arg(vl, uint64_t));
 790 |         }
 791 |         va_end(vl);
 792 |         return ans;
 793 |     }
 794 | 
 795 |     /**
 796 |      * Add value x
 797 |      *
 798 |      */
 799 |     void add(uint32_t x) {
 800 |         roarings[0].add(x);
 801 |         roarings[0].setCopyOnWrite(copyOnWrite);
 802 |     }
 803 |     void add(uint64_t x) {
 804 |         roarings[highBytes(x)].add(lowBytes(x));
 805 |         roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
 806 |     }
 807 | 
 808 |     /**
 809 |      * Add value x
 810 |      * Returns true if a new value was added, false if the value was already existing.
 811 |      */
 812 |     bool addChecked(uint32_t x) {
 813 |         bool result = roarings[0].addChecked(x);
 814 |         roarings[0].setCopyOnWrite(copyOnWrite);
 815 |         return result;
 816 |     }
 817 |     bool addChecked(uint64_t x) {
 818 |         bool result = roarings[highBytes(x)].addChecked(lowBytes(x));
 819 |         roarings[highBytes(x)].setCopyOnWrite(copyOnWrite);
 820 |         return result;
 821 |     }
 822 | 
 823 |     /**
 824 |      * Add value n_args from pointer vals
 825 |      *
 826 |      */
 827 |     void addMany(size_t n_args, const uint32_t *vals) {
 828 |         for (size_t lcv = 0; lcv < n_args; lcv++) {
 829 |             roarings[0].add(vals[lcv]);
 830 |             roarings[0].setCopyOnWrite(copyOnWrite);
 831 |         }
 832 |     }
 833 |     void addMany(size_t n_args, const uint64_t *vals) {
 834 |         for (size_t lcv = 0; lcv < n_args; lcv++) {
 835 |             roarings[highBytes(vals[lcv])].add(lowBytes(vals[lcv]));
 836 |             roarings[highBytes(vals[lcv])].setCopyOnWrite(copyOnWrite);
 837 |         }
 838 |     }
 839 | 
 840 |     /**
 841 |      * Remove value x
 842 |      *
 843 |      */
 844 |     void remove(uint32_t x) { roarings[0].remove(x); }
 845 |     void remove(uint64_t x) {
 846 |         auto roaring_iter = roarings.find(highBytes(x));
 847 |         if (roaring_iter != roarings.cend())
 848 |             roaring_iter->second.remove(lowBytes(x));
 849 |     }
 850 | 
 851 |     /**
 852 |      * Remove value x
 853 |      * Returns true if a new value was removed, false if the value was not existing.
 854 |      */
 855 |     bool removeChecked(uint32_t x) {
 856 |         return roarings[0].removeChecked(x);
 857 |     }
 858 |     bool removeChecked(uint64_t x) {
 859 |         auto roaring_iter = roarings.find(highBytes(x));
 860 |         if (roaring_iter != roarings.cend())
 861 |             return roaring_iter->second.removeChecked(lowBytes(x));
 862 |         return false;
 863 |     }
 864 | 
 865 |     /**
 866 |      * Return the largest value (if not empty)
 867 |      *
 868 |      */
 869 |     uint64_t maximum() const {
 870 |         for (auto roaring_iter = roarings.crbegin();
 871 |              roaring_iter != roarings.crend(); ++roaring_iter) {
 872 |             if (!roaring_iter->second.isEmpty()) {
 873 |                 return uniteBytes(roaring_iter->first,
 874 |                                   roaring_iter->second.maximum());
 875 |             }
 876 |         }
 877 |         // we put std::numeric_limits<>::max/min in parenthesis
 878 |         // to avoid a clash with the Windows.h header under Windows
 879 |         return (std::numeric_limits<uint64_t>::min)();
 880 |     }
 881 | 
 882 |     /**
 883 |      * Return the smallest value (if not empty)
 884 |      *
 885 |      */
 886 |     uint64_t minimum() const {
 887 |         for (auto roaring_iter = roarings.cbegin();
 888 |              roaring_iter != roarings.cend(); ++roaring_iter) {
 889 |             if (!roaring_iter->second.isEmpty()) {
 890 |                 return uniteBytes(roaring_iter->first,
 891 |                                   roaring_iter->second.minimum());
 892 |             }
 893 |         }
 894 |         // we put std::numeric_limits<>::max/min in parenthesis
 895 |         // to avoid a clash with the Windows.h header under Windows
 896 |         return (std::numeric_limits<uint64_t>::max)();
 897 |     }
 898 | 
 899 |     /**
 900 |      * Check if value x is present
 901 |      */
 902 |     bool contains(uint32_t x) const {
 903 |         return roarings.count(0) == 0 ? false : roarings.at(0).contains(x);
 904 |     }
 905 |     bool contains(uint64_t x) const {
 906 |         return roarings.count(highBytes(x)) == 0
 907 |                    ? false
 908 |                    : roarings.at(highBytes(x)).contains(lowBytes(x));
 909 |     }
 910 | 
 911 |     /**
 912 |      * Compute the intersection between the current bitmap and the provided
 913 |      * bitmap,
 914 |      * writing the result in the current bitmap. The provided bitmap is not
 915 |      * modified.
 916 |      */
 917 |     Roaring64Map &operator&=(const Roaring64Map &r) {
 918 |         for (auto &map_entry : roarings) {
 919 |             if (r.roarings.count(map_entry.first) == 1)
 920 |                 map_entry.second &= r.roarings.at(map_entry.first);
 921 |             else
 922 |                 map_entry.second = Roaring();
 923 |         }
 924 |         return *this;
 925 |     }
 926 | 
 927 |     /**
 928 |      * Compute the difference between the current bitmap and the provided
 929 |      * bitmap,
 930 |      * writing the result in the current bitmap. The provided bitmap is not
 931 |      * modified.
 932 |      */
 933 |     Roaring64Map &operator-=(const Roaring64Map &r) {
 934 |         for (auto &map_entry : roarings) {
 935 |             if (r.roarings.count(map_entry.first) == 1)
 936 |                 map_entry.second -= r.roarings.at(map_entry.first);
 937 |         }
 938 |         return *this;
 939 |     }
 940 | 
 941 |     /**
 942 |      * Compute the union between the current bitmap and the provided bitmap,
 943 |      * writing the result in the current bitmap. The provided bitmap is not
 944 |      * modified.
 945 |      *
 946 |      * See also the fastunion function to aggregate many bitmaps more quickly.
 947 |      */
 948 |     Roaring64Map &operator|=(const Roaring64Map &r) {
 949 |         for (const auto &map_entry : r.roarings) {
 950 |             if (roarings.count(map_entry.first) == 0) {
 951 |                 roarings[map_entry.first] = map_entry.second;
 952 |                 roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
 953 |             } else
 954 |                 roarings[map_entry.first] |= map_entry.second;
 955 |         }
 956 |         return *this;
 957 |     }
 958 | 
 959 |     /**
 960 |      * Compute the symmetric union between the current bitmap and the provided
 961 |      * bitmap,
 962 |      * writing the result in the current bitmap. The provided bitmap is not
 963 |      * modified.
 964 |      */
 965 |     Roaring64Map &operator^=(const Roaring64Map &r) {
 966 |         for (const auto &map_entry : r.roarings) {
 967 |             if (roarings.count(map_entry.first) == 0) {
 968 |                 roarings[map_entry.first] = map_entry.second;
 969 |                 roarings[map_entry.first].setCopyOnWrite(copyOnWrite);
 970 |             } else
 971 |                 roarings[map_entry.first] ^= map_entry.second;
 972 |         }
 973 |         return *this;
 974 |     }
 975 | 
 976 |     /**
 977 |      * Exchange the content of this bitmap with another.
 978 |      */
 979 |     void swap(Roaring64Map &r) { roarings.swap(r.roarings); }
 980 | 
 981 |     /**
 982 |      * Get the cardinality of the bitmap (number of elements).
 983 |      * Throws std::length_error in the special case where the bitmap is full
 984 |      * (cardinality() == 2^64). Check isFull() before calling to avoid
 985 |      * exception.
 986 |      */
 987 |     uint64_t cardinality() const {
 988 |         if (isFull()) {
 989 |             throw std::length_error(
 990 |                 "bitmap is full, cardinality is 2^64, "
 991 |                 "unable to represent in a 64-bit integer");
 992 |         }
 993 |         return std::accumulate(
 994 |             roarings.cbegin(), roarings.cend(), (uint64_t)0,
 995 |             [](uint64_t previous,
 996 |                const std::pair<uint32_t, Roaring> &map_entry) {
 997 |                 return previous + map_entry.second.cardinality();
 998 |             });
 999 |     }
1000 | 
1001 |     /**
1002 |     * Returns true if the bitmap is empty (cardinality is zero).
1003 |     */
1004 |     bool isEmpty() const {
1005 |         return std::all_of(roarings.cbegin(), roarings.cend(),
1006 |                            [](const std::pair<uint32_t, Roaring> &map_entry) {
1007 |                                return map_entry.second.isEmpty();
1008 |                            });
1009 |     }
1010 | 
1011 |     /**
1012 |     * Returns true if the bitmap is full (cardinality is max uint64_t + 1).
1013 |     */
1014 |     bool isFull() const {
1015 |         // only bother to check if map is fully saturated
1016 |         //
1017 |         // we put std::numeric_limits<>::max/min in parenthesis
1018 |         // to avoid a clash with the Windows.h header under Windows
1019 |         return roarings.size() ==
1020 |                        ((size_t)(std::numeric_limits<uint32_t>::max)()) + 1
1021 |                    ? std::all_of(
1022 |                          roarings.cbegin(), roarings.cend(),
1023 |                          [](const std::pair<uint32_t, Roaring> &roaring_map_entry) {
1024 |                              // roarings within map are saturated if cardinality
1025 |                              // is uint32_t max + 1
1026 |                              return roaring_map_entry.second.cardinality() ==
1027 |                                     ((uint64_t)
1028 |                                          (std::numeric_limits<uint32_t>::max)()) +
1029 |                                         1;
1030 |                          })
1031 |                    : false;
1032 |     }
1033 | 
1034 |     /**
1035 |     * Returns true if the bitmap is subset of the other.
1036 |     */
1037 |     bool isSubset(const Roaring64Map &r) const {
1038 |         for (const auto &map_entry : roarings) {
1039 |             auto roaring_iter = r.roarings.find(map_entry.first);
1040 |             if (roaring_iter == roarings.cend())
1041 |                 return false;
1042 |             else if (!map_entry.second.isSubset(roaring_iter->second))
1043 |                 return false;
1044 |         }
1045 |         return true;
1046 |     }
1047 | 
1048 |     /**
1049 |     * Returns true if the bitmap is strict subset of the other.
1050 |     * Throws std::length_error in the special case where the bitmap is full
1051 |     * (cardinality() == 2^64). Check isFull() before calling to avoid exception.
1052 |     */
1053 |     bool isStrictSubset(const Roaring64Map &r) const {
1054 |         return isSubset(r) && cardinality() != r.cardinality();
1055 |     }
1056 | 
1057 |     /**
1058 |      * Convert the bitmap to an array. Write the output to "ans",
1059 |      * caller is responsible to ensure that there is enough memory
1060 |      * allocated
1061 |      * (e.g., ans = new uint32[mybitmap.cardinality()];)
1062 |      */
1063 |     void toUint64Array(uint64_t *ans) const {
1064 |         // Annoyingly, VS 2017 marks std::accumulate() as [[nodiscard]]
1065 |         (void)std::accumulate(roarings.cbegin(), roarings.cend(), ans,
1066 |                               [](uint64_t *previous,
1067 |                                  const std::pair<uint32_t, Roaring> &map_entry) {
1068 |                                   for (uint32_t low_bits : map_entry.second)
1069 |                                       *previous++ =
1070 |                                           uniteBytes(map_entry.first, low_bits);
1071 |                                   return previous;
1072 |                               });
1073 |     }
1074 | 
1075 |     /**
1076 |      * Return true if the two bitmaps contain the same elements.
1077 |      */
1078 |     bool operator==(const Roaring64Map &r) const {
1079 |         // we cannot use operator == on the map because either side may contain
1080 |         // empty Roaring Bitmaps
1081 |         auto lhs_iter = roarings.cbegin();
1082 |         auto rhs_iter = r.roarings.cbegin();
1083 |         do {
1084 |             // if the left map has reached its end, ensure that the right map
1085 |             // contains only empty Bitmaps
1086 |             if (lhs_iter == roarings.cend()) {
1087 |                 while (rhs_iter != r.roarings.cend()) {
1088 |                     if (rhs_iter->second.isEmpty()) {
1089 |                         ++rhs_iter;
1090 |                         continue;
1091 |                     }
1092 |                     return false;
1093 |                 }
1094 |                 return true;
1095 |             }
1096 |             // if the left map has an empty bitmap, skip it
1097 |             if (lhs_iter->second.isEmpty()) {
1098 |                 ++lhs_iter;
1099 |                 continue;
1100 |             }
1101 | 
1102 |             do {
1103 |                 // if the right map has reached its end, ensure that the right
1104 |                 // map contains only empty Bitmaps
1105 |                 if (rhs_iter == r.roarings.cend()) {
1106 |                     while (lhs_iter != roarings.cend()) {
1107 |                         if (lhs_iter->second.isEmpty()) {
1108 |                             ++lhs_iter;
1109 |                             continue;
1110 |                         }
1111 |                         return false;
1112 |                     }
1113 |                     return true;
1114 |                 }
1115 |                 // if the right map has an empty bitmap, skip it
1116 |                 if (rhs_iter->second.isEmpty()) {
1117 |                     ++rhs_iter;
1118 |                     continue;
1119 |                 }
1120 |             } while (false);
1121 |             // if neither map has reached its end ensure elements are equal and
1122 |             // move to the next element in both
1123 |         } while (lhs_iter++->second == rhs_iter++->second);
1124 |         return false;
1125 |     }
1126 | 
1127 |     /**
1128 |      * compute the negation of the roaring bitmap within a specified interval.
1129 |      * areas outside the range are passed through unchanged.
1130 |      */
1131 |     void flip(uint64_t range_start, uint64_t range_end) {
1132 |         uint32_t start_high = highBytes(range_start);
1133 |         uint32_t start_low = lowBytes(range_start);
1134 |         uint32_t end_high = highBytes(range_end);
1135 |         uint32_t end_low = lowBytes(range_end);
1136 | 
1137 |         if (start_high == end_high) {
1138 |             roarings[start_high].flip(start_low, end_low);
1139 |             return;
1140 |         }
1141 |         // we put std::numeric_limits<>::max/min in parenthesis
1142 |         // to avoid a clash with the Windows.h header under Windows
1143 |         roarings[start_high].flip(start_low,
1144 |                                   (std::numeric_limits<uint32_t>::max)());
1145 |         roarings[start_high++].setCopyOnWrite(copyOnWrite);
1146 | 
1147 |         for (; start_high <= highBytes(range_end) - 1; ++start_high) {
1148 |             roarings[start_high].flip((std::numeric_limits<uint32_t>::min)(),
1149 |                                       (std::numeric_limits<uint32_t>::max)());
1150 |             roarings[start_high].setCopyOnWrite(copyOnWrite);
1151 |         }
1152 | 
1153 |         roarings[start_high].flip((std::numeric_limits<uint32_t>::min)(),
1154 |                                   end_low);
1155 |         roarings[start_high].setCopyOnWrite(copyOnWrite);
1156 |     }
1157 | 
1158 |     /**
1159 |      *  Remove run-length encoding even when it is more space efficient
1160 |      *  return whether a change was applied
1161 |      */
1162 |     bool removeRunCompression() {
1163 |         return std::accumulate(
1164 |             roarings.begin(), roarings.end(), false,
1165 |             [](bool previous, std::pair<const uint32_t, Roaring> &map_entry) {
1166 |                 return map_entry.second.removeRunCompression() && previous;
1167 |             });
1168 |     }
1169 | 
1170 |     /** convert array and bitmap containers to run containers when it is more
1171 |      * efficient;
1172 |      * also convert from run containers when more space efficient.  Returns
1173 |      * true if the result has at least one run container.
1174 |      * Additional savings might be possible by calling shrinkToFit().
1175 |      */
1176 |     bool runOptimize() {
1177 |         return std::accumulate(
1178 |             roarings.begin(), roarings.end(), false,
1179 |             [](bool previous, std::pair<const uint32_t, Roaring> &map_entry) {
1180 |                 return map_entry.second.runOptimize() && previous;
1181 |             });
1182 |     }
1183 | 
1184 |     /**
1185 |      * If needed, reallocate memory to shrink the memory usage. Returns
1186 |      * the number of bytes saved.
1187 |     */
1188 |     size_t shrinkToFit() {
1189 |         size_t savedBytes = 0;
1190 |         auto iter = roarings.begin();
1191 |         while (iter != roarings.cend()) {
1192 |             if (iter->second.isEmpty()) {
1193 |                 // empty Roarings are 84 bytes
1194 |                 savedBytes += 88;
1195 |                 roarings.erase(iter++);
1196 |             } else {
1197 |                 savedBytes += iter->second.shrinkToFit();
1198 |                 iter++;
1199 |             }
1200 |         }
1201 |         return savedBytes;
1202 |     }
1203 | 
1204 |     /**
1205 |      * Iterate over the bitmap elements. The function iterator is called once
1206 |      * for all the values with ptr (can be NULL) as the second parameter of each
1207 |      * call.
1208 |      *
1209 |      * roaring_iterator is simply a pointer to a function that returns bool
1210 |      * (true means that the iteration should continue while false means that it
1211 |      * should stop), and takes (uint32_t,void*) as inputs.
1212 |      */
1213 |     void iterate(roaring_iterator64 iterator, void *ptr) const {
1214 |         std::for_each(roarings.begin(), roarings.cend(),
1215 |                       [=](const std::pair<uint32_t, Roaring> &map_entry) {
1216 |                           roaring_iterate64(&map_entry.second.roaring, iterator,
1217 |                                             uint64_t(map_entry.first) << 32,
1218 |                                             ptr);
1219 |                       });
1220 |     }
1221 | 
1222 |     /**
1223 |      * If the size of the roaring bitmap is strictly greater than rank, then
1224 |      this
1225 |        function returns true and set element to the element of given rank.
1226 |        Otherwise, it returns false.
1227 |      */
1228 |     bool select(uint64_t rnk, uint64_t *element) const {
1229 |         for (const auto &map_entry : roarings) {
1230 |             uint64_t sub_cardinality = (uint64_t)map_entry.second.cardinality();
1231 |             if (rnk < sub_cardinality) {
1232 |                 *element = ((uint64_t)map_entry.first) << 32;
1233 |                 // assuming little endian
1234 |                 return map_entry.second.select((uint32_t)rnk,
1235 |                                                ((uint32_t *)element));
1236 |             }
1237 |             rnk -= sub_cardinality;
1238 |         }
1239 |         return false;
1240 |     }
1241 | 
1242 |     /**
1243 |     * Returns the number of integers that are smaller or equal to x.
1244 |     */
1245 |     uint64_t rank(uint64_t x) const {
1246 |         uint64_t result = 0;
1247 |         auto roaring_destination = roarings.find(highBytes(x));
1248 |         if (roaring_destination != roarings.cend()) {
1249 |             for (auto roaring_iter = roarings.cbegin();
1250 |                  roaring_iter != roaring_destination; ++roaring_iter) {
1251 |                 result += roaring_iter->second.cardinality();
1252 |             }
1253 |             result += roaring_destination->second.rank(lowBytes(x));
1254 |             return result;
1255 |         }
1256 |         roaring_destination = roarings.lower_bound(highBytes(x));
1257 |         for (auto roaring_iter = roarings.cbegin();
1258 |              roaring_iter != roaring_destination; ++roaring_iter) {
1259 |             result += roaring_iter->second.cardinality();
1260 |         }
1261 |         return result;
1262 |     }
1263 | 
1264 |     /**
1265 |      * write a bitmap to a char buffer. This is meant to be compatible with
1266 |      * the
1267 |      * Java and Go versions. Returns how many bytes were written which should be
1268 |      * getSizeInBytes().
1269 |      *
1270 |      * Setting the portable flag to false enable a custom format that
1271 |      * can save space compared to the portable format (e.g., for very
1272 |      * sparse bitmaps).
1273 |      */
1274 |     size_t write(char *buf, bool portable = true) const {
1275 |         const char *orig = buf;
1276 |         // push map size
1277 |         *((uint64_t *)buf) = roarings.size();
1278 |         buf += sizeof(uint64_t);
1279 |         std::for_each(
1280 |             roarings.cbegin(), roarings.cend(),
1281 |             [&buf, portable](const std::pair<uint32_t, Roaring> &map_entry) {
1282 |                 // push map key
1283 |                 memcpy(buf, &map_entry.first,
1284 |                        sizeof(uint32_t));  // this is undefined:
1285 |                                            // *((uint32_t*)buf) =
1286 |                                            // map_entry.first;
1287 |                 buf += sizeof(uint32_t);
1288 |                 // push map value Roaring
1289 |                 buf += map_entry.second.write(buf, portable);
1290 |             });
1291 |         return buf - orig;
1292 |     }
1293 | 
1294 |     /**
1295 |      * read a bitmap from a serialized version. This is meant to be compatible
1296 |      * with
1297 |      * the
1298 |      * Java and Go versions.
1299 |      *
1300 |      * Setting the portable flag to false enable a custom format that
1301 |      * can save space compared to the portable format (e.g., for very
1302 |      * sparse bitmaps).
1303 |      *
1304 |      * This function is unsafe in the sense that if you provide bad data,
1305 |      * many bytes could be read, possibly causing a buffer overflow. See also readSafe.
1306 |      */
1307 |     static Roaring64Map read(const char *buf, bool portable = true) {
1308 |         Roaring64Map result;
1309 |         // get map size
1310 |         uint64_t map_size = *((uint64_t *)buf);
1311 |         buf += sizeof(uint64_t);
1312 |         for (uint64_t lcv = 0; lcv < map_size; lcv++) {
1313 |             // get map key
1314 |             uint32_t key;
1315 |             memcpy(&key, buf, sizeof(uint32_t));  // this is undefined: uint32_t
1316 |                                                   // key = *((uint32_t*)buf);
1317 |             buf += sizeof(uint32_t);
1318 |             // read map value Roaring
1319 |             Roaring read = Roaring::read(buf, portable);
1320 |             result.emplaceOrInsert(key, read);
1321 |             // forward buffer past the last Roaring Bitmap
1322 |             buf += read.getSizeInBytes(portable);
1323 |         }
1324 |         return result;
1325 |     }
1326 | 
1327 |     /**
1328 |      * read a bitmap from a serialized version, reading no more than maxbytes bytes.
1329 |      * This is meant to be compatible with the Java and Go versions.
1330 |      *
1331 |      * Setting the portable flag to false enable a custom format that
1332 |      * can save space compared to the portable format (e.g., for very
1333 |      * sparse bitmaps).
1334 |      */
1335 |     static Roaring64Map readSafe(const char *buf, size_t maxbytes) {
1336 |         Roaring64Map result;
1337 |         // get map size
1338 |         uint64_t map_size = *((uint64_t *)buf);
1339 |         buf += sizeof(uint64_t);
1340 |         for (uint64_t lcv = 0; lcv < map_size; lcv++) {
1341 |             // get map key
1342 |             if(maxbytes < sizeof(uint32_t)) {
1343 |                 throw std::runtime_error("ran out of bytes");
1344 |             }
1345 |             uint32_t key;
1346 |             memcpy(&key, buf, sizeof(uint32_t));  // this is undefined: uint32_t
1347 |                                                   // key = *((uint32_t*)buf);
1348 |             buf += sizeof(uint32_t);
1349 |             maxbytes -= sizeof(uint32_t);
1350 |             // read map value Roaring
1351 |             Roaring read = Roaring::readSafe(buf, maxbytes);
1352 |             result.emplaceOrInsert(key, read);
1353 |             // forward buffer past the last Roaring Bitmap
1354 |             size_t tz = read.getSizeInBytes(true);
1355 |             buf += tz;
1356 |             maxbytes -= tz;
1357 |         }
1358 |         return result;
1359 |     }
1360 | 
1361 |     /**
1362 |      * How many bytes are required to serialize this bitmap (meant to be
1363 |      * compatible
1364 |      * with Java and Go versions)
1365 |      *
1366 |      * Setting the portable flag to false enable a custom format that
1367 |      * can save space compared to the portable format (e.g., for very
1368 |      * sparse bitmaps).
1369 |      */
1370 |     size_t getSizeInBytes(bool portable = true) const {
1371 |         // start with, respectively, map size and size of keys for each map
1372 |         // entry
1373 |         return std::accumulate(
1374 |             roarings.cbegin(), roarings.cend(),
1375 |             sizeof(uint64_t) + roarings.size() * sizeof(uint32_t),
1376 |             [=](size_t previous,
1377 |                 const std::pair<uint32_t, Roaring> &map_entry) {
1378 |                 // add in bytes used by each Roaring
1379 |                 return previous + map_entry.second.getSizeInBytes(portable);
1380 |             });
1381 |     }
1382 | 
1383 |     /**
1384 |      * Computes the intersection between two bitmaps and returns new bitmap.
1385 |      * The current bitmap and the provided bitmap are unchanged.
1386 |      */
1387 |     Roaring64Map operator&(const Roaring64Map &o) const {
1388 |         return Roaring64Map(*this) &= o;
1389 |     }
1390 | 
1391 |     /**
1392 |      * Computes the difference between two bitmaps and returns new bitmap.
1393 |      * The current bitmap and the provided bitmap are unchanged.
1394 |      */
1395 |     Roaring64Map operator-(const Roaring64Map &o) const {
1396 |         return Roaring64Map(*this) -= o;
1397 |     }
1398 | 
1399 |     /**
1400 |      * Computes the union between two bitmaps and returns new bitmap.
1401 |      * The current bitmap and the provided bitmap are unchanged.
1402 |      */
1403 |     Roaring64Map operator|(const Roaring64Map &o) const {
1404 |         return Roaring64Map(*this) |= o;
1405 |     }
1406 | 
1407 |     /**
1408 |      * Computes the symmetric union between two bitmaps and returns new bitmap.
1409 |      * The current bitmap and the provided bitmap are unchanged.
1410 |      */
1411 |     Roaring64Map operator^(const Roaring64Map &o) const {
1412 |         return Roaring64Map(*this) ^= o;
1413 |     }
1414 | 
1415 |     /**
1416 |      * Whether or not we apply copy and write.
1417 |      */
1418 |     void setCopyOnWrite(bool val) {
1419 |         if (copyOnWrite == val) return;
1420 |         copyOnWrite = val;
1421 |         std::for_each(roarings.begin(), roarings.end(),
1422 |                       [=](std::pair<const uint32_t, Roaring> &map_entry) {
1423 |                           map_entry.second.setCopyOnWrite(val);
1424 |                       });
1425 |     }
1426 | 
1427 |     /**
1428 |      * Print the content of the bitmap
1429 |      */
1430 |     void printf() const {
1431 |         if (!isEmpty()) {
1432 |             auto map_iter = roarings.cbegin();
1433 |             while (map_iter->second.isEmpty()) ++map_iter;
1434 |             struct iter_data {
1435 |                 uint32_t high_bits;
1436 |                 char first_char = '{';
1437 |             } outer_iter_data;
1438 |             outer_iter_data.high_bits = roarings.begin()->first;
1439 |             map_iter->second.iterate(
1440 |                 [](uint32_t low_bits, void *inner_iter_data) -> bool {
1441 |                     std::printf("%c%llu",
1442 |                                 ((iter_data *)inner_iter_data)->first_char,
1443 |                                 (long long unsigned)uniteBytes(
1444 |                                     ((iter_data *)inner_iter_data)->high_bits,
1445 |                                     low_bits));
1446 |                     ((iter_data *)inner_iter_data)->first_char = ',';
1447 |                     return true;
1448 |                 },
1449 |                 (void *)&outer_iter_data);
1450 |             std::for_each(
1451 |                 ++map_iter, roarings.cend(),
1452 |                 [](const std::pair<uint32_t, Roaring> &map_entry) {
1453 |                     map_entry.second.iterate(
1454 |                         [](uint32_t low_bits, void *high_bits) -> bool {
1455 |                             std::printf(",%llu",
1456 |                                         (long long unsigned)uniteBytes(
1457 |                                             *(uint32_t *)high_bits, low_bits));
1458 |                             return true;
1459 |                         },
1460 |                         (void *)&map_entry.first);
1461 |                 });
1462 |         } else
1463 |             std::printf("{");
1464 |         std::printf("}\n");
1465 |     }
1466 | 
1467 |     /**
1468 |      * Print the content of the bitmap into a string
1469 |      */
1470 |     std::string toString() const {
1471 |         struct iter_data {
1472 |             std::string str;
1473 |             uint32_t high_bits;
1474 |             char first_char = '{';
1475 |         } outer_iter_data;
1476 |         if (!isEmpty()) {
1477 |             auto map_iter = roarings.cbegin();
1478 |             while (map_iter->second.isEmpty()) ++map_iter;
1479 |             outer_iter_data.high_bits = roarings.begin()->first;
1480 |             map_iter->second.iterate(
1481 |                 [](uint32_t low_bits, void *inner_iter_data) -> bool {
1482 |                     ((iter_data *)inner_iter_data)->str +=
1483 |                         ((iter_data *)inner_iter_data)->first_char;
1484 |                     ((iter_data *)inner_iter_data)->str += std::to_string(
1485 |                         uniteBytes(((iter_data *)inner_iter_data)->high_bits,
1486 |                                    low_bits));
1487 |                     ((iter_data *)inner_iter_data)->first_char = ',';
1488 |                     return true;
1489 |                 },
1490 |                 (void *)&outer_iter_data);
1491 |             std::for_each(
1492 |                 ++map_iter, roarings.cend(),
1493 |                 [&outer_iter_data](
1494 |                     const std::pair<uint32_t, Roaring> &map_entry) {
1495 |                     outer_iter_data.high_bits = map_entry.first;
1496 |                     map_entry.second.iterate(
1497 |                         [](uint32_t low_bits, void *inner_iter_data) -> bool {
1498 |                             ((iter_data *)inner_iter_data)->str +=
1499 |                                 ((iter_data *)inner_iter_data)->first_char;
1500 |                             ((iter_data *)inner_iter_data)->str +=
1501 |                                 std::to_string(uniteBytes(
1502 |                                     ((iter_data *)inner_iter_data)->high_bits,
1503 |                                     low_bits));
1504 |                             return true;
1505 |                         },
1506 |                         (void *)&outer_iter_data);
1507 |                 });
1508 |         } else
1509 |             outer_iter_data.str = '{';
1510 |         outer_iter_data.str += '}';
1511 |         return outer_iter_data.str;
1512 |     }
1513 | 
1514 |     /**
1515 |      * Whether or not copy and write is active.
1516 |      */
1517 |     bool getCopyOnWrite() const { return copyOnWrite; }
1518 | 
1519 |     /**
1520 |      * computes the logical or (union) between "n" bitmaps (referenced by a
1521 |      * pointer).
1522 |      */
1523 |     static Roaring64Map fastunion(size_t n, const Roaring64Map **inputs) {
1524 |         Roaring64Map ans;
1525 |         // not particularly fast
1526 |         for (size_t lcv = 0; lcv < n; ++lcv) {
1527 |             ans |= *(inputs[lcv]);
1528 |         }
1529 |         return ans;
1530 |     }
1531 | 
1532 |     friend class Roaring64MapSetBitForwardIterator;
1533 |     typedef Roaring64MapSetBitForwardIterator const_iterator;
1534 | 
1535 |     /**
1536 |     * Returns an iterator that can be used to access the position of the
1537 |     * set bits. The running time complexity of a full scan is proportional to
1538 |     * the
1539 |     * number
1540 |     * of set bits: be aware that if you have long strings of 1s, this can be
1541 |     * very inefficient.
1542 |     *
1543 |     * It can be much faster to use the toArray method if you want to
1544 |     * retrieve the set bits.
1545 |     */
1546 |     const_iterator begin() const;
1547 | 
1548 |     /**
1549 |     * A bogus iterator that can be used together with begin()
1550 |     * for constructions such as for(auto i = b.begin();
1551 |     * i!=b.end(); ++i) {}
1552 |     */
1553 |     const_iterator end() const;
1554 | 
1555 |    private:
1556 |     std::map<uint32_t, Roaring> roarings;
1557 |     bool copyOnWrite = false;
1558 |     static uint32_t highBytes(const uint64_t in) { return uint32_t(in >> 32); }
1559 |     static uint32_t lowBytes(const uint64_t in) { return uint32_t(in); }
1560 |     static uint64_t uniteBytes(const uint32_t highBytes,
1561 |                                const uint32_t lowBytes) {
1562 |         return (uint64_t(highBytes) << 32) | uint64_t(lowBytes);
1563 |     }
1564 |     // this is needed to tolerate gcc's C++11 libstdc++ lacking emplace
1565 |     // prior to version 4.8
1566 |     void emplaceOrInsert(const uint32_t key, const Roaring &value) {
1567 | #if defined(__GLIBCXX__) && __GLIBCXX__ < 20130322
1568 |         roarings.insert(std::make_pair(key, value));
1569 | #else
1570 |         roarings.emplace(std::make_pair(key, value));
1571 | #endif
1572 |     }
1573 | };
1574 | 
1575 | /**
1576 |  * Used to go through the set bits. Not optimally fast, but convenient.
1577 |  */
1578 | class Roaring64MapSetBitForwardIterator final {
1579 |    public:
1580 |     typedef std::forward_iterator_tag iterator_category;
1581 |     typedef uint64_t *pointer;
1582 |     typedef uint64_t &reference_type;
1583 |     typedef uint64_t value_type;
1584 |     typedef int64_t difference_type;
1585 |     typedef Roaring64MapSetBitForwardIterator type_of_iterator;
1586 | 
1587 |     /**
1588 |      * Provides the location of the set bit.
1589 |      */
1590 |     value_type operator*() const {
1591 |         return Roaring64Map::uniteBytes(map_iter->first, i.current_value);
1592 |     }
1593 | 
1594 |     bool operator<(const type_of_iterator &o) {
1595 |         if (map_iter == map_end) return false;
1596 |         if (o.map_iter == o.map_end) return true;
1597 |         return **this < *o;
1598 |     }
1599 | 
1600 |     bool operator<=(const type_of_iterator &o) {
1601 |         if (o.map_iter == o.map_end) return true;
1602 |         if (map_iter == map_end) return false;
1603 |         return **this <= *o;
1604 |     }
1605 | 
1606 |     bool operator>(const type_of_iterator &o) {
1607 |         if (o.map_iter == o.map_end) return false;
1608 |         if (map_iter == map_end) return true;
1609 |         return **this > *o;
1610 |     }
1611 | 
1612 |     bool operator>=(const type_of_iterator &o) {
1613 |         if (map_iter == map_end) return true;
1614 |         if (o.map_iter == o.map_end) return false;
1615 |         return **this >= *o;
1616 |     }
1617 | 
1618 |     type_of_iterator &operator++() {  // ++i, must returned inc. value
1619 |         if (i.has_value == true) roaring_advance_uint32_iterator(&i);
1620 |         while (!i.has_value) {
1621 |             map_iter++;
1622 |             if (map_iter == map_end) return *this;
1623 |             roaring_init_iterator(&map_iter->second.roaring, &i);
1624 |         }
1625 |         return *this;
1626 |     }
1627 | 
1628 |     type_of_iterator operator++(int) {  // i++, must return orig. value
1629 |         Roaring64MapSetBitForwardIterator orig(*this);
1630 |         roaring_advance_uint32_iterator(&i);
1631 |         while (!i.has_value) {
1632 |             map_iter++;
1633 |             if (map_iter == map_end) return orig;
1634 |             roaring_init_iterator(&map_iter->second.roaring, &i);
1635 |         }
1636 |         return orig;
1637 |     }
1638 | 
1639 |     bool operator==(const Roaring64MapSetBitForwardIterator &o) {
1640 |         if (map_iter == map_end && o.map_iter == o.map_end) return true;
1641 |         if (o.map_iter == o.map_end) return false;
1642 |         return **this == *o;
1643 |     }
1644 | 
1645 |     bool operator!=(const Roaring64MapSetBitForwardIterator &o) {
1646 |         if (map_iter == map_end && o.map_iter == o.map_end) return false;
1647 |         if (o.map_iter == o.map_end) return true;
1648 |         return **this != *o;
1649 |     }
1650 | 
1651 |     Roaring64MapSetBitForwardIterator(const Roaring64Map &parent,
1652 |                                       bool exhausted = false)
1653 |         : map_end(parent.roarings.cend()) {
1654 |         if (exhausted || parent.roarings.empty()) {
1655 |             map_iter = parent.roarings.cend();
1656 |         } else {
1657 |             map_iter = parent.roarings.cbegin();
1658 |             roaring_init_iterator(&map_iter->second.roaring, &i);
1659 |             while (!i.has_value) {
1660 |                 map_iter++;
1661 |                 if (map_iter == map_end) return;
1662 |                 roaring_init_iterator(&map_iter->second.roaring, &i);
1663 |             }
1664 |         }
1665 |     }
1666 | 
1667 |    private:
1668 |     std::map<uint32_t, Roaring>::const_iterator map_iter;
1669 |     std::map<uint32_t, Roaring>::const_iterator map_end;
1670 |     roaring_uint32_iterator_t i;
1671 | };
1672 | 
1673 | inline Roaring64MapSetBitForwardIterator Roaring64Map::begin() const {
1674 |     return Roaring64MapSetBitForwardIterator(*this);
1675 | }
1676 | 
1677 | inline Roaring64MapSetBitForwardIterator Roaring64Map::end() const {
1678 |     return Roaring64MapSetBitForwardIterator(*this, true);
1679 | }
1680 | 
1681 | #endif /* INCLUDE_ROARING_64_MAP_HH_ */
1682 | /* end file /Users/dlemire/CVS/github/CRoaring/cpp/roaring64map.hh */


--------------------------------------------------------------------------------