├── .droneci ├── NiceLinuxDockerfile └── config.yml ├── .gitignore ├── .gitmodules ├── README.md ├── pysdsl ├── __init__.cpp ├── __init__.py ├── _memory_monitor.cpp ├── bits.cpp ├── calc.hpp ├── docstrings.hpp ├── io.hpp ├── memory_monitor.py ├── operations │ ├── creation.hpp │ ├── iteration.hpp │ └── sizes.hpp ├── supports.hpp ├── types │ ├── bitvector.hpp │ ├── encodedvector.hpp │ ├── intvector.hpp │ ├── pysequence.hpp │ ├── sorted_int_stack.hpp │ ├── suffixarray.hpp │ └── wavelet.hpp └── util │ ├── indexiterator.hpp │ └── tupletricks.hpp ├── setup.py └── tests ├── test_encvector.py ├── test_intvector.py ├── test_sorted_int_stack.py ├── test_suffixarray.py └── test_wavelet.py /.droneci/NiceLinuxDockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:latest 2 | RUN mkdir /work 3 | WORKDIR /work 4 | RUN \ 5 | apt update && \ 6 | apt install -y --no-install-recommends clang ssh git tar zip ca-certificates software-properties-common && \ 7 | apt install -y cmake python-pip python3-pip && \ 8 | add-apt-repository -y ppa:deadsnakes/ppa && \ 9 | apt install -y python3.4 python3.4-dev python3.5 python3.5-dev python3.6 python3.6-dev python3.7 python3.7-dev && \ 10 | rm -rf /var/lib/apt/lists/* 11 | -------------------------------------------------------------------------------- /.droneci/config.yml: -------------------------------------------------------------------------------- 1 | kind: pipeline 2 | name: build-2.7 3 | 4 | steps: 5 | - name: build 6 | image: ivigns/nice-linux:ubuntu-0.0.4 7 | volumes: 8 | - name: cache 9 | path: /tmp/cache 10 | commands: 11 | - git submodule sync 12 | - git submodule update --init --recursive 13 | - cd /drone/src/libdivsufsort 14 | - git pull origin master 15 | - cd /drone/src/sdsl-lite 16 | - git pull origin master 17 | - cd /drone/src/sdsl-lite/external/libdivsufsort 18 | - git pull origin master 19 | - cd /drone/src/pybind11 20 | - git pull origin master 21 | - cd /tmp 22 | - mkdir sdsl-lite-build 23 | - mkdir libdivsufsort-build 24 | - export CC=clang 25 | - export CXX=clang++ 26 | - export CFLAGS="-O3 -static -lc -fPIC -static-libgcc -pipe -fcolor-diagnostics -lpthread -Wl,-Bstatic" 27 | - cd /tmp/libdivsufsort-build 28 | - cmake -D BUILD_SHARED_LIBS=OFF /drone/src/libdivsufsort 29 | - make install 30 | - export CXXFLAGS="-O3 -pipe -fcolor-diagnostics -static-libgcc -static-libstdc++ -fPIC -lpthread -Wl,-Bstatic -Wl,-Bdynamic" 31 | - cd /tmp/sdsl-lite-build 32 | - cmake /drone/src/sdsl-lite 33 | - make install 34 | - export CXXFLAGS="-O3 -pipe -stdlib=libc++ -fcolor-diagnostics -fPIC" 35 | - export CFLAGS="-O3 -pipe -fcolor-diagnostics -fPIC" 36 | - cd /drone/src/pybind11 37 | - pip install --no-binary ':all:' --no-cache-dir . --verbose 38 | - cd /drone/src 39 | - pip wheel --no-binary ':all:' --no-cache-dir -w dist . --verbose 40 | - cp -r . /tmp/cache 41 | 42 | - name: show 43 | image: debian:testing 44 | volumes: 45 | - name: cache 46 | path: /tmp/cache 47 | commands: 48 | - cd /tmp/cache && ls -aR 49 | 50 | - name: test-wheel 51 | image: debian:testing 52 | volumes: 53 | - name: cache 54 | path: /tmp/cache 55 | commands: 56 | - apt-get -qq update 57 | - apt-get -qqy install python python-pip 58 | - pip install $(ls -d /tmp/cache/dist/pysdsl*.whl) 59 | - pip install -U pytest 60 | - pytest --verbose --showlocals --color=yes tests 61 | 62 | - name: publish 63 | image: plugins/github-release 64 | volumes: 65 | - name: cache 66 | path: /tmp/cache 67 | settings: 68 | api_key: 69 | from_secret: repo_token 70 | files: 71 | - /tmp/cache/dist/pysdsl*.whl 72 | title: ${DRONE_TAG} 73 | when: 74 | event: tag 75 | 76 | volumes: 77 | - name: cache 78 | temp: {} 79 | 80 | --- 81 | kind: pipeline 82 | name: build-3.4 83 | 84 | steps: 85 | - name: build 86 | image: ivigns/nice-linux:ubuntu-0.0.4 87 | volumes: 88 | - name: cache 89 | path: /tmp/cache 90 | commands: 91 | - git submodule sync 92 | - git submodule update --init --recursive 93 | - cd /drone/src/libdivsufsort 94 | - git pull origin master 95 | - cd /drone/src/sdsl-lite 96 | - git pull origin master 97 | - cd /drone/src/sdsl-lite/external/libdivsufsort 98 | - git pull origin master 99 | - cd /drone/src/pybind11 100 | - git pull origin master 101 | - cd /tmp 102 | - mkdir sdsl-lite-build 103 | - mkdir libdivsufsort-build 104 | - export CC=clang 105 | - export CXX=clang++ 106 | - export CFLAGS="-O3 -static -lc -fPIC -static-libgcc -pipe -fcolor-diagnostics -lpthread -Wl,-Bstatic" 107 | - cd /tmp/libdivsufsort-build 108 | - cmake -D BUILD_SHARED_LIBS=OFF /drone/src/libdivsufsort 109 | - make install 110 | - export CXXFLAGS="-O3 -pipe -fcolor-diagnostics -static-libgcc -static-libstdc++ -fPIC -lpthread -Wl,-Bstatic -Wl,-Bdynamic" 111 | - cd /tmp/sdsl-lite-build 112 | - cmake /drone/src/sdsl-lite 113 | - make install 114 | - export CXXFLAGS="-O3 -pipe -stdlib=libc++ -fcolor-diagnostics -fPIC" 115 | - export CFLAGS="-O3 -pipe -fcolor-diagnostics -fPIC" 116 | - cd /drone/src/pybind11 117 | - python3.4 -m pip install --no-binary ':all:' --no-cache-dir . --verbose 118 | - cd /drone/src 119 | - python3.4 -m pip wheel --no-binary ':all:' --no-cache-dir -w dist . --verbose 120 | - cp -r . /tmp/cache 121 | 122 | - name: show 123 | image: debian:testing 124 | volumes: 125 | - name: cache 126 | path: /tmp/cache 127 | commands: 128 | - cd /tmp/cache && ls -aR 129 | 130 | - name: test-wheel 131 | image: ivigns/nice-linux:ubuntu-0.0.4 132 | volumes: 133 | - name: cache 134 | path: /tmp/cache 135 | commands: 136 | - python3.4 -m pip install $(ls -d /tmp/cache/dist/pysdsl*.whl) 137 | - python3.4 -m pip install -U pytest 138 | - pytest --verbose --showlocals --color=yes tests 139 | 140 | - name: publish 141 | image: plugins/github-release 142 | volumes: 143 | - name: cache 144 | path: /tmp/cache 145 | settings: 146 | api_key: 147 | from_secret: repo_token 148 | files: 149 | - /tmp/cache/dist/pysdsl*.whl 150 | title: ${DRONE_TAG} 151 | when: 152 | event: tag 153 | 154 | volumes: 155 | - name: cache 156 | temp: {} 157 | 158 | --- 159 | kind: pipeline 160 | name: build-3.5 161 | 162 | steps: 163 | - name: build 164 | image: ivigns/nice-linux:ubuntu-0.0.4 165 | volumes: 166 | - name: cache 167 | path: /tmp/cache 168 | commands: 169 | - git submodule sync 170 | - git submodule update --init --recursive 171 | - cd /drone/src/libdivsufsort 172 | - git pull origin master 173 | - cd /drone/src/sdsl-lite 174 | - git pull origin master 175 | - cd /drone/src/sdsl-lite/external/libdivsufsort 176 | - git pull origin master 177 | - cd /drone/src/pybind11 178 | - git pull origin master 179 | - cd /tmp 180 | - mkdir sdsl-lite-build 181 | - mkdir libdivsufsort-build 182 | - export CC=clang 183 | - export CXX=clang++ 184 | - export CFLAGS="-O3 -static -lc -fPIC -static-libgcc -pipe -fcolor-diagnostics -lpthread -Wl,-Bstatic" 185 | - cd /tmp/libdivsufsort-build 186 | - cmake -D BUILD_SHARED_LIBS=OFF /drone/src/libdivsufsort 187 | - make install 188 | - export CXXFLAGS="-O3 -pipe -fcolor-diagnostics -static-libgcc -static-libstdc++ -fPIC -lpthread -Wl,-Bstatic -Wl,-Bdynamic" 189 | - cd /tmp/sdsl-lite-build 190 | - cmake /drone/src/sdsl-lite 191 | - make install 192 | - export CXXFLAGS="-O3 -pipe -stdlib=libc++ -fcolor-diagnostics -fPIC" 193 | - export CFLAGS="-O3 -pipe -fcolor-diagnostics -fPIC" 194 | - cd /drone/src/pybind11 195 | - python3.5 -m pip install --no-binary ':all:' --no-cache-dir . --verbose 196 | - cd /drone/src 197 | - python3.5 -m pip wheel --no-binary ':all:' --no-cache-dir -w dist . --verbose 198 | - cp -r . /tmp/cache 199 | 200 | - name: show 201 | image: debian:testing 202 | volumes: 203 | - name: cache 204 | path: /tmp/cache 205 | commands: 206 | - cd /tmp/cache && ls -aR 207 | 208 | - name: test-wheel 209 | image: ivigns/nice-linux:ubuntu-0.0.4 210 | volumes: 211 | - name: cache 212 | path: /tmp/cache 213 | commands: 214 | - python3.5 -m pip install $(ls -d /tmp/cache/dist/pysdsl*.whl) 215 | - python3.5 -m pip install -U pytest 216 | - pytest --verbose --showlocals --color=yes tests 217 | 218 | - name: publish 219 | image: plugins/github-release 220 | volumes: 221 | - name: cache 222 | path: /tmp/cache 223 | settings: 224 | api_key: 225 | from_secret: repo_token 226 | files: 227 | - /tmp/cache/dist/pysdsl*.whl 228 | title: ${DRONE_TAG} 229 | when: 230 | event: tag 231 | 232 | volumes: 233 | - name: cache 234 | temp: {} 235 | 236 | --- 237 | kind: pipeline 238 | name: build-3.6 239 | 240 | steps: 241 | - name: build 242 | image: ivigns/nice-linux:ubuntu-0.0.4 243 | volumes: 244 | - name: cache 245 | path: /tmp/cache 246 | commands: 247 | - git submodule sync 248 | - git submodule update --init --recursive 249 | - cd /drone/src/libdivsufsort 250 | - git pull origin master 251 | - cd /drone/src/sdsl-lite 252 | - git pull origin master 253 | - cd /drone/src/sdsl-lite/external/libdivsufsort 254 | - git pull origin master 255 | - cd /drone/src/pybind11 256 | - git pull origin master 257 | - cd /tmp 258 | - mkdir sdsl-lite-build 259 | - mkdir libdivsufsort-build 260 | - export CC=clang 261 | - export CXX=clang++ 262 | - export CFLAGS="-O3 -static -lc -fPIC -static-libgcc -pipe -fcolor-diagnostics -lpthread -Wl,-Bstatic" 263 | - cd /tmp/libdivsufsort-build 264 | - cmake -D BUILD_SHARED_LIBS=OFF /drone/src/libdivsufsort 265 | - make install 266 | - export CXXFLAGS="-O3 -pipe -fcolor-diagnostics -static-libgcc -static-libstdc++ -fPIC -lpthread -Wl,-Bstatic -Wl,-Bdynamic" 267 | - cd /tmp/sdsl-lite-build 268 | - cmake /drone/src/sdsl-lite 269 | - make install 270 | - export CXXFLAGS="-O3 -pipe -stdlib=libc++ -fcolor-diagnostics -fPIC" 271 | - export CFLAGS="-O3 -pipe -fcolor-diagnostics -fPIC" 272 | - cd /drone/src/pybind11 273 | - python3.6 -m pip install --no-binary ':all:' --no-cache-dir . --verbose 274 | - cd /drone/src 275 | - python3.6 -m pip wheel --no-binary ':all:' --no-cache-dir -w dist . --verbose 276 | - cp -r . /tmp/cache 277 | 278 | - name: show 279 | image: debian:testing 280 | volumes: 281 | - name: cache 282 | path: /tmp/cache 283 | commands: 284 | - cd /tmp/cache && ls -aR 285 | 286 | - name: test-wheel 287 | image: ivigns/nice-linux:ubuntu-0.0.4 288 | volumes: 289 | - name: cache 290 | path: /tmp/cache 291 | commands: 292 | - python3.6 -m pip install $(ls -d /tmp/cache/dist/pysdsl*.whl) 293 | - python3.6 -m pip install -U pytest 294 | - pytest --verbose --showlocals --color=yes tests 295 | 296 | - name: publish 297 | image: plugins/github-release 298 | volumes: 299 | - name: cache 300 | path: /tmp/cache 301 | settings: 302 | api_key: 303 | from_secret: repo_token 304 | files: 305 | - /tmp/cache/dist/pysdsl*.whl 306 | title: ${DRONE_TAG} 307 | when: 308 | event: tag 309 | 310 | volumes: 311 | - name: cache 312 | temp: {} 313 | 314 | --- 315 | kind: pipeline 316 | name: build-3.7 317 | 318 | steps: 319 | - name: build 320 | image: ivigns/nice-linux:ubuntu-0.0.4 321 | volumes: 322 | - name: cache 323 | path: /tmp/cache 324 | commands: 325 | - git submodule sync 326 | - git submodule update --init --recursive 327 | - cd /drone/src/libdivsufsort 328 | - git pull origin master 329 | - cd /drone/src/sdsl-lite 330 | - git pull origin master 331 | - cd /drone/src/sdsl-lite/external/libdivsufsort 332 | - git pull origin master 333 | - cd /drone/src/pybind11 334 | - git pull origin master 335 | - cd /tmp 336 | - mkdir sdsl-lite-build 337 | - mkdir libdivsufsort-build 338 | - export CC=clang 339 | - export CXX=clang++ 340 | - export CFLAGS="-O3 -static -lc -fPIC -static-libgcc -pipe -fcolor-diagnostics -lpthread -Wl,-Bstatic" 341 | - cd /tmp/libdivsufsort-build 342 | - cmake -D BUILD_SHARED_LIBS=OFF /drone/src/libdivsufsort 343 | - make install 344 | - export CXXFLAGS="-O3 -pipe -fcolor-diagnostics -static-libgcc -static-libstdc++ -fPIC -lpthread -Wl,-Bstatic -Wl,-Bdynamic" 345 | - cd /tmp/sdsl-lite-build 346 | - cmake /drone/src/sdsl-lite 347 | - make install 348 | - export CXXFLAGS="-O3 -pipe -stdlib=libc++ -fcolor-diagnostics -fPIC" 349 | - export CFLAGS="-O3 -pipe -fcolor-diagnostics -fPIC" 350 | - cd /drone/src/pybind11 351 | - python3.7 -m pip install --no-binary ':all:' --no-cache-dir . --verbose 352 | - cd /drone/src 353 | - python3.7 -m pip wheel --no-binary ':all:' --no-cache-dir -w dist . --verbose 354 | - cp -r . /tmp/cache 355 | 356 | - name: show 357 | image: debian:testing 358 | volumes: 359 | - name: cache 360 | path: /tmp/cache 361 | commands: 362 | - cd /tmp/cache && ls -aR 363 | 364 | - name: test-wheel 365 | image: ivigns/nice-linux:ubuntu-0.0.4 366 | volumes: 367 | - name: cache 368 | path: /tmp/cache 369 | commands: 370 | - python3.7 -m pip install $(ls -d /tmp/cache/dist/pysdsl*.whl) 371 | - python3.7 -m pip install -U pytest 372 | - pytest --verbose --showlocals --color=yes tests 373 | 374 | - name: publish 375 | image: plugins/github-release 376 | volumes: 377 | - name: cache 378 | path: /tmp/cache 379 | settings: 380 | api_key: 381 | from_secret: repo_token 382 | files: 383 | - /tmp/cache/dist/pysdsl*.whl 384 | title: ${DRONE_TAG} 385 | when: 386 | event: tag 387 | 388 | volumes: 389 | - name: cache 390 | temp: {} 391 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .rendered.* 2 | .*.cppimporthash 3 | *.so 4 | .vscode/* 5 | python?.*/ 6 | tmp/ 7 | *.whl 8 | *.pyc 9 | __pycache__/ 10 | .pytest_cache/ 11 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "sdsl-lite"] 2 | path = sdsl-lite 3 | url = https://github.com/simongog/sdsl-lite 4 | [submodule "pybind11"] 5 | path = pybind11 6 | url = https://github.com/pybind/pybind11 7 | [submodule "libdivsufsort"] 8 | path = libdivsufsort 9 | url = https://github.com/y-256/libdivsufsort 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python bindings to Succinct Data Structure Library 2.0 2 | 3 | The Succinct Data Structure Library ([SDSL][SDSL]) is a powerful and flexible C++11 library implementing succinct data structures. In total, the library contains the highlights of 40 [research publications][SDSLLIT]. Succinct data structures can represent an object (such as a bitvector or a tree) in space close to the information-theoretic lower bound of the object while supporting operations of the original object efficiently. The theoretical time complexity of an operation performed on the classical data structure and the equivalent succinct data structure are (most of the time) identical. 4 | 5 | Most of examples from [SDSL cheat sheet][SDSL-CHEAT-SHEET] and [SDSL tutorial][SDSL-TUTORIAL] are implemented. 6 | 7 | ## Mutable bit-compressed vectors 8 | 9 | Core classes (see `pysdsl.int_vector` for dict of all of them): 10 | 11 | * `pysdsl.IntVector(size, default_value, bit_width=64)` — dynamic bit width 12 | * `pysdsl.BitVector(size, default_value)` — static (fixed) bit width (1) 13 | * `pysdsl.Int4Vector(size, default_value)` — static bit width (4) 14 | * `pysdsl.Int8Vector(size, default_value)` — static bit width (8) 15 | * `pysdsl.Int16Vector(size, default_value)` — static bit width (16) 16 | * `pysdsl.Int24Vector(size, default_value)` — static bit width (24) 17 | * `pysdsl.Int32Vector(size, default_value)` — static bit width (32) 18 | * `pysdsl.Int64Vector(size, default_value)` — static bit width (64) 19 | 20 | Construction from python sequences is also supported. 21 | 22 | ```python 23 | 24 | In [1]: import pysdsl 25 | 26 | In [2]: %time v = pysdsl.IntVector(1024 * 1024 * 256) 27 | CPU times: user 914 ms, sys: 509 ms, total: 1.42 s 28 | Wall time: 1.42 s 29 | 30 | In [3]: v.size_in_mega_bytes 31 | Out[3]: 2048.000008583069 32 | 33 | In [4]: %time v.set_to_id() # like *v = range(len(v)) 34 | CPU times: user 8.19 s, sys: 1.3 ms, total: 8.19 s 35 | Wall time: 8.19 s 36 | 37 | In [5]: v.width 38 | Out[5]: 64 39 | 40 | In [6]: %time v.bit_compress() 41 | CPU times: user 23.3 s, sys: 155 ms, total: 23.5 s 42 | Wall time: 23.5 s 43 | 44 | In [7]: v.width 45 | Out[7]: 28 46 | 47 | In [8]: v.size_in_mega_bytes 48 | Out[8]: 896.0000085830688 49 | 50 | ``` 51 | 52 | Buffer interface: 53 | 54 | ```python 55 | In [9]: import array 56 | 57 | In [10]: v = pysdsl.Int64Vector([1, 2, 3]) 58 | 59 | In [11]: array.array('Q', v) 60 | Out[11]: array('Q', [1, 2, 3]) 61 | ``` 62 | 63 | ## Immutable compressed integer vectors 64 | 65 | (See `pysdsl.enc_vector`): 66 | 67 | * `EncVectorEliasDelta(IntVector)` 68 | * `EncVectorEliasGamma(IntVector)` 69 | * `EncVectorFibonacci(IntVector)` 70 | * `EncVectorComma2(IntVector)` 71 | * `EncVectorComma4(IntVector)` 72 | 73 | ```python 74 | In [9]: %time ev = pysdsl.EncVectorEliasDelta(v) 75 | CPU times: user 26.5 s, sys: 31.8 ms, total: 26.5 s 76 | Wall time: 26.5 s 77 | 78 | In [10]: ev.size_in_mega_bytes 79 | Out[10]: 45.75003242492676 80 | ``` 81 | 82 | Encoding values with variable length codes (see `pysdsl.variable_length_codes_vector`): 83 | 84 | * `VariableLengthCodesVectorEliasDelta(IntVector)` 85 | * `VariableLengthCodesVectorEliasGamma(IntVector)` 86 | * `VariableLengthCodesVectorFibonacci(IntVector)` 87 | * `VariableLengthCodesVectorComma2(IntVector)` 88 | * `VariableLengthCodesVectorComma4(IntVector)` 89 | 90 | Encoding values with "escaping" technique (see `pysdsl.direct_accessible_codes_vector`): 91 | 92 | * `DirectAccessibleCodesVector(IntVector)` 93 | * `DirectAccessibleCodesVector8(IntVector)`, 94 | * `DirectAccessibleCodesVector16(IntVector)`, 95 | * `DirectAccessibleCodesVector63(IntVector)`, 96 | * `DirectAccessibleCodesVectorDP(IntVector)` — number of layers is chosen 97 | with dynamic programming 98 | * `DirectAccessibleCodesVectorDPRRR(IntVector)` — same but built on top of 99 | RamanRamanRaoVector (see later) 100 | 101 | Construction from python sequences is also supported. 102 | 103 | ## Immutable compressed bit (boolean) vectors 104 | 105 | (See pysdsl.`all_immutable_bitvectors`) 106 | 107 | * `BitVectorInterLeaved64(BitVector)` 108 | * `BitVectorInterLeaved128(BitVector)` 109 | * `BitVectorInterLeaved256(BitVector)` 110 | * `BitVectorInterLeaved512(BitVector)` — A bit vector which interleaves the 111 | original `BitVector` with rank information 112 | (see later) 113 | * `SDVector(BitVector)` — A bit vector which compresses very sparse populated 114 | bit vectors by representing the positions of 1 by the 115 | Elias-Fano representation for 116 | non-decreasing sequences 117 | * `RamanRamanRaoVector15(BitVector)` 118 | * `RamanRamanRaoVector63(BitVector)` 119 | * `RamanRamanRaoVector256(BitVector)` — An H₀-compressed bitvector representation. 120 | * `HybVector8(BitVector)` 121 | * `HybVector16(BitVector)` — A hybrid-encoded compressed bitvector 122 | representation 123 | 124 | See also: `pysdsl.raman_raman_rao_vectors`, `pysdsl.sparse_bit_vectors`, 125 | `pysdsl.hybrid_bit_vectors` and `pysdsl.bit_vector_interleaved`. 126 | 127 | ## Rank and select operations on bitvectors 128 | 129 | For bitvector `v` `rank(i)` for pattern `P` (by default `P` is a bitstring of 130 | len 1: `1`) is the number of patterns `P` in the prefix `[0..i)` in vector `v`. 131 | 132 | For bitvector `v` `select(i)` for pattern `P` (by default `P`=`1`) is the 133 | position of the `i`-th occurrence of pattern `P` in vector `v`. 134 | 135 | Create support instances for rank and/or select for different patterns via: 136 | 137 | * `v.init_rank()` or `v.init_rank_1()` for ranks of pattern `1` 138 | (e.g. the number of set bits in `v`) 139 | * `v.init_rank_0()` for ranks of pattern `0` 140 | * `v.init_rank_00()` (if supported by vector class) for ranks of pattern `00` 141 | * `v.init_rank_01()` (if supported by vector class) for ranks of pattern `01` 142 | * `v.init_rank_10()` (if supported by vector class) for ranks of pattern `10` 143 | * `v.init_rank_11()` (if supported by vector class) for ranks of pattern `11` 144 | * `v.init_support()` or `v.init_support_1()` for support of pattern `1` 145 | (e.g. the positions of set bits) 146 | * `v.init_support_0()` for ranks of pattern `0` 147 | * `v.init_support_00()` (if supported by vector class) for ranks of pattern `00` 148 | * `v.init_support_01()` (if supported by vector class) for ranks of pattern `01` 149 | * `v.init_support_10()` (if supported by vector class) for ranks of pattern `10` 150 | * `v.init_support_11()` (if supported by vector class) for ranks of pattern `11` 151 | 152 | Once support instance `s` is created call it (`s(idx)` or `s.__call__(idx)`) 153 | or use corresponding methods `s.rank(idx)` or `s.select(idx)` to get 154 | the results. 155 | 156 | `s.rank(idx)` and `s.select(idx)` are undefined if original bitvector is 157 | mutable and was modified. 158 | 159 | 160 | ## Wavelet trees 161 | 162 | The wavelet tree is a data structure that provides three efficient methods: 163 | 164 | * The `[]`-operator: `wt[i]` returns the `i`-th symbol of vector for which the wavelet tree was build for. 165 | * The rank method: `wt.rank(i, c)` returns the number of occurrences of symbol `c` in the prefix `[0..i-1]` in the vector for which the wavelet tree was build for. 166 | * The select method: `wt.select(j, c)` returns the index `i` from `[0..size()-1]` of the `j`-th occurrence of symbol `c`. 167 | 168 | ## Comressed suffix arrays 169 | 170 | Suffix array is a sorted array of all suffixes of a string. 171 | 172 | SDSL supports bitcompressed and compressed suffix arrays. 173 | 174 | Byte representaion of original IntVector should have no zero symbols in order to construct SuffixArray. 175 | 176 | ## Objects memory structure 177 | 178 | Any object has a `.structure` property with technical information about an 179 | object. `.structure_json` also provided for web-view implementations. 180 | `.write_structure_json()` method puts that information into a file. 181 | 182 | `.size_in_bytes` and `.size_in_mega_bytes` properties show how much memory the 183 | object is occupying. 184 | 185 | ## Saving/Loading objects 186 | 187 | All objects provide `.store_to_checked_file()` method allowing one to save 188 | object into a file. 189 | 190 | All classes provide `.load_from_checkded_file()` static method allowing one to 191 | load object stored with `.store_to_checked_file()` 192 | 193 | 194 | ## Building 195 | 196 | Requirements: static libraries for sdsl and divsufsort. 197 | 198 | Call `pip` with binaries disabled to fetch sources and build the package: 199 | 200 | ```bash 201 | pip install --no-binaries :all: pysdsl 202 | ``` 203 | 204 | 205 | [SDSL]: https://github.com/simongog/sdsl-lite 206 | [SDSLLIT]: https://github.com/simongog/sdsl-lite/wiki/Literature 207 | "Succinct Data Structure Literature" 208 | [SDSL-CHEAT-SHEET]: https://simongog.github.io/assets/data/sdsl-cheatsheet.pdf 209 | [SDSL-TUTORIAL]: https://simongog.github.io/assets/data/sdsl-slides/tutorial 210 | -------------------------------------------------------------------------------- /pysdsl/__init__.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #define assert(x) if(!x) {throw std::runtime_error("assertion failed");} 7 | 8 | #include 9 | 10 | #include 11 | 12 | #include "docstrings.hpp" 13 | #include "operations/creation.hpp" 14 | #include "types/bitvector.hpp" 15 | #include "types/encodedvector.hpp" 16 | #include "types/intvector.hpp" 17 | #include "types/suffixarray.hpp" 18 | #include "types/wavelet.hpp" 19 | #include "types/sorted_int_stack.hpp" 20 | 21 | namespace py = pybind11; 22 | 23 | 24 | PYBIND11_MODULE(pysdsl, m) 25 | { 26 | m.doc() = "sdsl-lite bindings for python"; 27 | 28 | auto iv_classes_tpl = add_int_vectors(m); 29 | 30 | auto iv_classes = std::get<0>(iv_classes_tpl); 31 | auto iv_classes_as_params = std::get<1>(iv_classes_tpl); 32 | 33 | py::class_>& bit_vector_cls = std::get<1>(iv_classes); 34 | 35 | auto bit_vector_classes = std::make_tuple(bit_vector_cls); 36 | 37 | auto tmp = add_bitvectors(m, bit_vector_cls); 38 | auto compressed_bit_vector_classes = std::get<0>(tmp); 39 | auto cbv_propagate = std::get<1>(tmp); 40 | 41 | auto enc_classes = add_encoded_vectors(m); 42 | 43 | auto wavelet_classes = add_wavelet(m, cbv_propagate); 44 | 45 | auto csa_classes = add_csa(m); 46 | 47 | auto sorted_stack = add_sorted_int_stack(m); 48 | 49 | for_each_in_tuple(iv_classes, make_inits_many_functor(iv_classes)); 50 | for_each_in_tuple(iv_classes, make_inits_many_functor(enc_classes)); 51 | for_each_in_tuple(iv_classes, 52 | make_inits_many_functor(compressed_bit_vector_classes)); 53 | for_each_in_tuple(iv_classes, make_inits_many_functor(wavelet_classes)); 54 | 55 | for_each_in_tuple(enc_classes, make_inits_many_functor(iv_classes)); 56 | 57 | for_each_in_tuple(sorted_stack, make_inits_many_functor(sorted_stack)); 58 | #ifndef NOCROSSCONSTRUCTORS 59 | for_each_in_tuple(enc_classes, make_inits_many_functor(enc_classes)); 60 | //for_each_in_tuple(enc_classes, make_inits_many_functor(wavelet_classes)); 61 | #endif 62 | 63 | for_each_in_tuple(compressed_bit_vector_classes, 64 | make_inits_many_functor(bit_vector_classes)); 65 | #ifndef NOCROSSCONSTRUCTORS 66 | for_each_in_tuple(compressed_bit_vector_classes, 67 | make_inits_many_functor(compressed_bit_vector_classes)); 68 | #endif 69 | 70 | for_each_in_tuple(wavelet_classes, make_inits_many_functor(iv_classes)); 71 | #ifndef NOCROSSCONSTRUCTORS 72 | for_each_in_tuple(wavelet_classes, make_inits_many_functor(enc_classes)); 73 | for_each_in_tuple(wavelet_classes, 74 | make_inits_many_functor(wavelet_classes)); 75 | #endif 76 | 77 | for_each_in_tuple(iv_classes, make_pysequence_init_functor()); 78 | for_each_in_tuple(enc_classes, make_pysequence_init_functor()); 79 | //for_each_in_tuple(compressed_bit_vector_classes, 80 | // make_pysequence_init_functor()); 81 | 82 | for_each_in_tuple(wavelet_classes, make_pysequence_init_functor()); 83 | for_each_in_tuple(csa_classes, make_pysequence_init_functor()); 84 | } 85 | -------------------------------------------------------------------------------- /pysdsl/__init__.py: -------------------------------------------------------------------------------- 1 | from pysdsl import * 2 | -------------------------------------------------------------------------------- /pysdsl/_memory_monitor.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include 7 | 8 | namespace py = pybind11; 9 | 10 | 11 | PYBIND11_MODULE(_memory_monitor, m) 12 | { 13 | m.doc() = "Internals of memory monitor module"; 14 | 15 | m.def("start", [] () { return sdsl::memory_monitor::start(); }); 16 | m.def("stop", [] () { return sdsl::memory_monitor::stop(); }); 17 | m.def( 18 | "report", 19 | [] () 20 | { 21 | std::stringstream fout; 22 | sdsl::memory_monitor::write_memory_log(fout); 23 | auto json = py::module::import("json"); 24 | return json.attr("loads")(fout.str()); 25 | } 26 | ); 27 | m.def( 28 | "report_json", 29 | [] () 30 | { 31 | std::stringstream fout; 32 | sdsl::memory_monitor::write_memory_log(fout); 33 | return fout.str(); 34 | } 35 | ); 36 | m.def( 37 | "report_html", 38 | [] () 39 | { 40 | std::stringstream fout; 41 | sdsl::memory_monitor::write_memory_log(fout); 42 | return fout.str(); 43 | } 44 | ); 45 | m.def( 46 | "report_html", 47 | [](const std::string& file_name) 48 | { 49 | std::ofstream fout; 50 | fout.open(file_name, std::ios::out | std::ios::binary); 51 | if (!fout.good()) throw std::runtime_error("Can't write to file"); 52 | sdsl::memory_monitor::write_memory_log(fout); 53 | if (!fout.good()) throw std::runtime_error("Error during write"); 54 | fout.close(); 55 | }, 56 | py::arg("file_name"), 57 | py::call_guard() 58 | ); 59 | m.def( 60 | "report_json", 61 | [](const std::string& file_name) 62 | { 63 | std::ofstream fout; 64 | fout.open(file_name, std::ios::out | std::ios::binary); 65 | if (!fout.good()) throw std::runtime_error("Can't write to file"); 66 | sdsl::memory_monitor::write_memory_log(fout); 67 | if (!fout.good()) throw std::runtime_error("Error during write"); 68 | fout.close(); 69 | }, 70 | py::arg("file_name"), 71 | py::call_guard() 72 | ); 73 | 74 | } 75 | -------------------------------------------------------------------------------- /pysdsl/bits.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "sdsl/bits.hpp" 5 | #include 6 | 7 | 8 | namespace py = pybind11; 9 | 10 | 11 | template 12 | decltype(auto) as_tuple(const T (&a) [N]) 13 | { 14 | py::tuple result{N}; 15 | for (std::size_t i; i < N; i++) result[i] = a[i]; 16 | return result; 17 | } 18 | 19 | 20 | auto cnt11 = [] (uint64_t x, uint64_t& c) { 21 | auto result = sdsl::bits::cnt11(x, c); 22 | return std::make_pair(result, c); 23 | }; 24 | auto cnt10 = [] (uint64_t x, uint64_t& c) { 25 | auto result = sdsl::bits::cnt10(x, c); 26 | return std::make_pair(result, c); 27 | }; 28 | auto cnt01 = [] (uint64_t x, uint64_t& c) { 29 | auto result = sdsl::bits::cnt01(x, c); 30 | return std::make_pair(result, c); 31 | }; 32 | auto sel = [] (uint64_t x, uint32_t i) { 33 | if (i >= sizeof(sdsl::bits::ps_overflow) / sizeof(sdsl::bits::ps_overflow[0])) { 34 | throw py::index_error(std::to_string(i)); 35 | } 36 | return sdsl::bits::sel(x, i); 37 | }; 38 | auto sel11 = [] (uint64_t x, uint32_t i, uint32_t c=0) { 39 | if (i >= sizeof(sdsl::bits::ps_overflow) / sizeof(sdsl::bits::ps_overflow[0])) { 40 | throw py::index_error(std::to_string(i)); 41 | } 42 | return sdsl::bits::sel11(x, i, c); 43 | }; 44 | 45 | 46 | PYBIND11_MODULE(bits, m) { 47 | m.doc() = "bitwise tricks on 64 bit words."; 48 | 49 | 50 | auto bits_cls = py::class_(m, "bits") 51 | .def_property_readonly_static("all_set", [](py::object /* self */) { return sdsl::bits::all_set; }) 52 | .def_static("all_set_", []() { return sdsl::bits::all_set; }, 53 | "64bit mask with all bits set to 1.") 54 | 55 | .def_property_readonly_static("deBruijn64", [](py::object) { return sdsl::bits::deBruijn64; }) 56 | .def_static("deBruijn64_", []() { return sdsl::bits::deBruijn64; }, 57 | "This constant represents a de Bruijn sequence B(k,n) for k=2 and n=6. " 58 | "Details for de Bruijn sequences see " 59 | "http://en.wikipedia.org/wiki/De_bruijn_sequence " 60 | "deBruijn64 is used in combination with the " 61 | "array lt_deBruijn_to_idx.") 62 | 63 | .def_property_readonly_static("lt_deBruijn_to_idx", [](py::object) { return as_tuple(sdsl::bits::lt_deBruijn_to_idx); }) 64 | .def_static("lt_deBruijn_to_idx_", []() { return as_tuple(sdsl::bits::lt_deBruijn_to_idx); }, 65 | "This table maps a 6-bit subsequence S[idx...idx+5] of constant deBruijn64 to idx.") 66 | 67 | .def_property_readonly_static("lt_fib", [](py::object) { return as_tuple(sdsl::bits::lt_fib); }) 68 | .def_static("lt_fib_", []() { return as_tuple(sdsl::bits::lt_fib); }, 69 | "Array containing Fibonacci numbers less than 2**64") 70 | 71 | .def_property_readonly_static("lt_cnt", [](py::object) { 72 | return py::bytes(reinterpret_cast(sdsl::bits::lt_cnt), sizeof(sdsl::bits::lt_cnt)); 73 | }) 74 | .def_static("lt_cnt_", []() { 75 | return py::bytes(reinterpret_cast(sdsl::bits::lt_cnt), sizeof(sdsl::bits::lt_cnt)); 76 | }, "Lookup table for byte popcounts.") 77 | 78 | .def_property_readonly_static("lt_hi", [](py::object) { return as_tuple(sdsl::bits::lt_hi); }) 79 | .def_static("lt_hi_", []() { return as_tuple(sdsl::bits::lt_hi); }, 80 | "Lookup table for most significant set bit in a byte.") 81 | 82 | .def_property_readonly_static("lo_set", [](py::object) { return as_tuple(sdsl::bits::lo_set); }) 83 | .def_static("lo_set_", []() { return as_tuple(sdsl::bits::lo_set); }, 84 | "lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set. " 85 | "lo_set[0] = 0ULL, lo_set[1]=1ULL, lo_set[2]=3ULL...") 86 | 87 | .def_property_readonly_static("lo_unset", [](py::object) { return as_tuple(sdsl::bits::lo_unset); }) 88 | .def_static("lo_unset_", []() { return as_tuple(sdsl::bits::lo_unset); }, 89 | "lo_unset[i] is a 64-bit word with the i least significant bits not set and the high bits set. " 90 | "lo_unset[0] = FFFFFFFFFFFFFFFFULL, lo_unset_set[1]=FFFFFFFFFFFFFFFEULL, ...") 91 | 92 | .def_property_readonly_static("lt_lo", [](py::object) { 93 | return py::bytes(reinterpret_cast(sdsl::bits::lt_lo), sizeof(sdsl::bits::lt_lo)); }) 94 | .def_static("lt_lo_", []() { 95 | return py::bytes(reinterpret_cast(sdsl::bits::lt_lo), sizeof(sdsl::bits::lt_lo)); 96 | }, "Lookup table for least significant set bit in a byte.") 97 | 98 | .def_property_readonly_static("lt_sel", [](py::object) { 99 | return py::bytes(reinterpret_cast(sdsl::bits::lt_sel), sizeof(sdsl::bits::lt_sel)); 100 | }) 101 | .def_static("lt_sel_", []() { 102 | return py::bytes(reinterpret_cast(sdsl::bits::lt_sel), sizeof(sdsl::bits::lt_sel)); 103 | }, "Lookup table for select on bytes. " 104 | "Entry at idx = 256*j + i equals the position of the " 105 | "(j+1)-th set bit in byte i. Positions lie in the range [0..7].") 106 | 107 | .def_property_readonly_static("ps_overflow", [](py::object) { return as_tuple(sdsl::bits::ps_overflow); }) 108 | .def_static("ps_overflow_", []() { return as_tuple(sdsl::bits::ps_overflow); }, 109 | "Use to help to decide if a prefix sum stored in a byte overflows.") 110 | 111 | .def_static("cnt", &sdsl::bits::cnt, "Counts the number of set bits in x.", py::arg("x")) 112 | .def_static("cnt32", &sdsl::bits::cnt32, "Counts the number of set bits in 32-bit integer x.", py::arg("x")) 113 | .def_static("hi", &sdsl::bits::hi, "The position (in 0..63) of the most significant set bit " 114 | "in `x` or 0 if x equals 0.", py::arg("x")) 115 | .def_static("lo", &sdsl::bits::lo, "The position (in 0..63) of the rightmost 1-bit in the 64bit integer x if " 116 | "x>0 and 0 if x equals 0.", py::arg("x")) 117 | 118 | .def_static("cnt11", (uint32_t (*) (uint64_t)) &sdsl::bits::cnt11, py::arg("x"), 119 | "Count the number of consecutive and distinct 11 in the 64bit integer x.\n" 120 | "x: 64bit integer to count the terminating sequence 11 of a Fibonacci code.") 121 | .def_static("cnt11", cnt11, "Count the number of consecutive and distinct 11 in the 64bit integer x.\n" 122 | "x: 64bit integer to count the terminating sequence 11 of a Fibonacci code.\n" 123 | "c: Carry equals msb of the previous 64bit integer.", py::arg("x"), py::arg("c")) 124 | .def_static("cnt10", cnt10, "Count 10 bit pairs in the word x.\n" 125 | "x: 64bit integer to count the 10 bit pairs.\n" 126 | "c: Carry equals msb of the previous 64bit integer.", py::arg("x"), py::arg("c")) 127 | .def_static("cnt01", cnt01, "Count 01 bit pairs in the word x.\n" 128 | "x: 64bit integer to count the 01 bit pairs.\n" 129 | "c: Carry equals msb of the previous 64bit integer.", py::arg("x"), py::arg("c")) 130 | 131 | .def_static("map10", &sdsl::bits::map10, py::arg("x"), py::arg("c") = 0, 132 | "Map all 10 bit pairs to 01 or 1 if c=1 and the lsb=0. All other pairs are mapped to 00.") 133 | .def_static("map01", &sdsl::bits::map01, py::arg("x"), py::arg("c") = 1, 134 | "Map all 01 bit pairs to 01 or 1 if c=1 and the lsb=0. All other pairs are mapped to 00.") 135 | 136 | .def_static("sel", sel, py::arg("x"), py::arg("i"), 137 | "Calculate the position of the i-th rightmost 1 bit in the 64bit integer x\n" 138 | "x: 64bit integer.\ni: Argument i must be in the range [1..cnt(x)].") 139 | .def_static("sel11", sel11, py::arg("x"), py::arg("i"), py::arg("c") = 0, 140 | "The position (in 1..63) of the i-th 11-bit-pattern which terminates a Fibonacci coded integer in x if " 141 | "x contains at least i 11-bit-patterns and a undefined value otherwise.\n" 142 | "x: 64 bit integer.\n" 143 | "i: Index of 11-bit-pattern. i in [1..cnt11(x)]\n" 144 | "c: Carry bit from word before") 145 | .def_static("hi11", &sdsl::bits::hi11, py::arg("x"), 146 | "The position (in 1..63) of the leftmost 1 of the leftmost 11-bit-pattern which " 147 | "terminates a Fibonacci coded integer in x if x contains a 11-bit-pattern and 0 otherwise.") 148 | .def_static("rev", &sdsl::bits::rev, "reverses a given 64 bit word") 149 | ; 150 | 151 | m.attr("all_set") = bits_cls.attr("all_set_"); 152 | m.attr("deBruijn64") = bits_cls.attr("deBruijn64_"); 153 | m.attr("lt_deBruijn_to_idx") = bits_cls.attr("lt_deBruijn_to_idx_"); 154 | m.attr("lt_fib") = bits_cls.attr("lt_fib_"); 155 | m.attr("lt_cnt") = bits_cls.attr("lt_cnt_"); 156 | m.attr("lt_hi") = bits_cls.attr("lt_hi_"); 157 | m.attr("lo_set") = bits_cls.attr("lo_set_"); 158 | m.attr("lo_unset") = bits_cls.attr("lo_unset_"); 159 | m.attr("lt_lo") = bits_cls.attr("lt_lo_"); 160 | m.attr("lt_sel") = bits_cls.attr("lt_sel_"); 161 | m.attr("ps_overflow") = bits_cls.attr("ps_overflow_"); 162 | 163 | m.attr("cnt") = bits_cls.attr("cnt"); 164 | m.attr("cnt32") = bits_cls.attr("cnt32"); 165 | m.attr("hi") = bits_cls.attr("hi"); 166 | m.attr("lo") = bits_cls.attr("lo"); 167 | m.attr("cnt11") = bits_cls.attr("cnt11"); 168 | m.attr("cnt10") = bits_cls.attr("cnt10"); 169 | m.attr("cnt01") = bits_cls.attr("cnt01"); 170 | m.attr("map10") = bits_cls.attr("map10"); 171 | m.attr("map01") = bits_cls.attr("map01"); 172 | m.attr("sel") = bits_cls.attr("sel"); 173 | m.attr("sel11") = bits_cls.attr("sel11"); 174 | m.attr("hi11") = bits_cls.attr("hi11"); 175 | m.attr("rev") = bits_cls.attr("rev"); 176 | } 177 | -------------------------------------------------------------------------------- /pysdsl/calc.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include "operations/creation.hpp" 9 | #include "operations/iteration.hpp" 10 | #include "operations/sizes.hpp" 11 | 12 | 13 | namespace py = pybind11; 14 | 15 | 16 | namespace { 17 | using detail::cbegin; 18 | using detail::cend; 19 | } // namespace 20 | 21 | 22 | template 23 | inline auto add_read_access(py::class_& cls) 24 | { 25 | typedef typename Sequence::value_type value_type; 26 | 27 | add_iteration(cls); 28 | 29 | cls.def( 30 | "__getitem__", 31 | [](const Sequence &self, size_t position) -> T { 32 | if (position >= detail::size(self)) { 33 | throw std::out_of_range(std::to_string(position)); } 34 | return self[position]; }); 35 | cls.def( 36 | "__getitem__", 37 | [](const Sequence &self, int64_t position) -> T { 38 | auto abs_position = std::abs(position); 39 | if (position >= 0) { 40 | throw std::exception(); } 41 | if (abs_position > detail::size(self)) { 42 | throw std::out_of_range(std::to_string(position)); } 43 | return self[detail::size(self) - abs_position]; }); 44 | cls.def( 45 | "__getitem__", 46 | [](const Sequence& self, py::slice slice) { 47 | size_t start, stop, step, slicelength; 48 | if (!slice.compute(detail::size(self), &start, &stop, &step, 49 | &slicelength)) { 50 | throw py::error_already_set{}; } 51 | 52 | typename 53 | detail::IntermediateVector::type result(slicelength); 54 | 55 | for (size_t i = 0; i < slicelength; i++) { 56 | result[i] = self[start]; 57 | start += step; } 58 | return result; }); 59 | //return construct_from(result); }); 60 | return cls; 61 | } 62 | 63 | 64 | template 65 | inline 66 | auto add_std_algo(py::class_& cls) 67 | { 68 | typedef typename Sequence::value_type value_type; 69 | 70 | cls.def( 71 | "__contains__", 72 | [](const Sequence &self, typename Sequence::value_type element) { 73 | return std::find(cbegin(self), 74 | cend(self), element) != cend(self); }, 75 | py::call_guard()); 76 | cls.def( 77 | "max", 78 | [](const Sequence &self) -> T { 79 | return *std::max_element(cbegin(self), cend(self)); }, 80 | py::call_guard()); 81 | cls.def( 82 | "argmax", 83 | [](const Sequence &self) { 84 | return std::distance(cbegin(self), 85 | std::max_element(cbegin(self), 86 | cend(self))); }, 87 | py::call_guard()); 88 | cls.def( 89 | "min", 90 | [](const Sequence &self) -> T { 91 | return *std::min_element(cbegin(self), cend(self)); }, 92 | py::call_guard()); 93 | cls.def( 94 | "argmin", 95 | [](const Sequence &self) { 96 | return std::distance(cbegin(self), 97 | std::min_element(cbegin(self), 98 | cend(self))); }, 99 | py::call_guard()); 100 | cls.def( 101 | "minmax", 102 | [](const Sequence &self) -> std::pair { 103 | auto result = std::minmax_element(cbegin(self), 104 | cend(self)); 105 | return std::make_pair(*std::get<0>(result), 106 | *std::get<1>(result)); }, 107 | py::call_guard()); 108 | cls.def( 109 | "sum", 110 | [](const Sequence &self) { 111 | return std::accumulate(cbegin(self), cend(self), 112 | uint64_t(0)); }, 113 | py::call_guard()); 114 | cls.def( 115 | "all", 116 | [](const Sequence &self) { 117 | return std::all_of( 118 | cbegin(self), cend(self), 119 | [] (const value_type value) -> bool { 120 | return value; }); }, 121 | py::call_guard()); 122 | cls.def( 123 | "any", 124 | [](const Sequence &self) { 125 | return std::any_of( 126 | cbegin(self), cend(self), 127 | [] (const value_type value) -> bool { 128 | return value; }); }, 129 | py::call_guard()); 130 | cls.def( 131 | "none", 132 | [](const Sequence &self) { 133 | return std::none_of( 134 | cbegin(self), cend(self), 135 | [] (const value_type value) -> bool { 136 | return value; }); }, 137 | py::call_guard()); 138 | cls.def( 139 | "is_sorted", 140 | [](const Sequence &self) { 141 | return std::is_sorted(cbegin(self), cend(self)); }, 142 | py::call_guard()); 143 | 144 | return cls; 145 | } 146 | -------------------------------------------------------------------------------- /pysdsl/docstrings.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | 4 | const char* doc_int_vector( 5 | "This generic vector class could be used to generate a vector that " 6 | "contains integers of fixed width `w` in [1..64]." 7 | ); 8 | 9 | const char* doc_capacity( 10 | "Returns the size of the occupied bits of the int_vector. The capacity of " 11 | "a int_vector is greater or equal to the bit_size of the " 12 | "vector: capacity ≥ bit_size)." 13 | ); 14 | 15 | const char* doc_bit_compress( 16 | "Bit compress the int_vector. Determine the biggest value X " 17 | "and then set the int_width to the smallest possible so that " 18 | "we still can represent X." 19 | ); 20 | 21 | const char* doc_set_to_value( 22 | "Set all entries of int_vector to value k. This method pre-calculates " 23 | "the content of at most 64 words and then repeatedly inserts these words." 24 | ); 25 | 26 | const char* doc_dac_vector( 27 | "A generic immutable space-saving vector class for unsigned integers.\n" 28 | "The values of a dac_vector are immutable after the constructor call.\n" 29 | "The `escaping` technique is used to encode values.\n" 30 | "This is defined as follows (see [1]):\n" 31 | "A k-bit integer is split into `K=k/(b-1)` bits each and " 32 | "encoded into `K` blocks of `b` bits each. All but the last block " 33 | "are marked with by a 1 in the most significant bit. Escaping with " 34 | "`b=8` is also known as vbyte-coding (see [2]). A experimental study " 35 | "of using escaping for the LCP array is given in [3].\n" 36 | "Time complexity: Order(log n/b) worst case, where b is the number " 37 | "of bits in a block\nReferences:\n" 38 | "[1] F. Transier and P. Sanders: `Engineering Basic Search Algorithms " 39 | "of an In-Memory Text Search Engine`, ACM Transactions on " 40 | "Information Systems, Vol. 29, No.1, Article 2, 2010\n" 41 | "[2] H.E. Williams and J. Zobel: `Compressing integers for fast file " 42 | "access`, Computing Journal Vol 43, No.3, 1999\n" 43 | "[3] N. Brisboa, S. Ladra, G. Navarro: `Directly addressable " 44 | "variable-length codes'', Proceedings of SPIRE 2009." 45 | ); 46 | 47 | const char* doc_dac_vector_dp( 48 | "A generic immutable space-saving vector class for unsigned integers.\n" 49 | "The values of a dac_vector are immutable after the constructor call.\n" 50 | "The \"escaping\" technique is used to encode values. Bit widths of " 51 | "each encoding level are chosen optimally via dynamic programming.\n" 52 | "References\n [1] N. Brisaboa and S. Ladra and G. Navarro: `DACs: " 53 | "Bringing Direct Access to Variable-Length Codes`, " 54 | "Information Processing and Management (IPM) 2013" 55 | ); 56 | 57 | const char* doc_bit_vector_il( 58 | "A bit vector which interleaves the original bit_vector with rank " 59 | "information. \nThis class is a uncompressed bit vector " 60 | "representation. It copies the original bit_vector and interleaves " 61 | "the data every t_bs bits with a cumulative sum of set bits before " 62 | "the current position. Each cumulative sum is stored in a 64 bit " 63 | "word." 64 | ); 65 | 66 | const char* doc_rrr_vector( 67 | "An H_0-compressed bitvector representation.\n" 68 | "References:\n" 69 | "— Rasmus Pagh, Low redundancy in dictionaries with O(1) worst " 70 | "case lookup time, Technical Report 1998. " 71 | "ftp://ftp.cs.au.dk/BRICS/Reports/RS/98/28/BRICS-RS-98-28.pdf, " 72 | "Section 2.\n" 73 | "— Rajeev Raman, V. Raman and S. Srinivasa Rao, Succinct Indexable " 74 | "Dictionaries with Applications to representations of k-ary trees " 75 | "and multi-sets. SODA 2002.\n" 76 | "— Francisco Claude, Gonzalo Navarro: Practical Rank/Select " 77 | "Queries over Arbitrary Sequences. SPIRE 2008: 176-187\n" 78 | "— On the fly-decoding and encoding was discovered in; Gonzalo " 79 | "Navarro, Eliana Providel: Fast, Small, Simple Rank/Select on " 80 | "Bitmaps. SEA 2012" 81 | ); 82 | 83 | const char* doc_sd_vector( 84 | "A bit vector which compresses very sparse populated bit vectors " 85 | "by representing the positions of 1 by the Elias-Fano " 86 | "representation for non-decreasing sequences\n" 87 | "References:\n" 88 | "— P. Elias: ''Efficient storage and retrieval by content and " 89 | "address of static files'', Journal of the ACM, 1974\n" 90 | "— R. Fano: ''On the number of bits required to implement an " 91 | "associative memory'', Memorandum 61. Computer Structures Group, " 92 | "Project MAC, MIT, 1971\n" 93 | "— D. Okanohara, K. Sadakane: ''Practical Entropy-Compressed " 94 | "Rank/Select Dictionary'', Proceedings of ALENEX 2007." 95 | ); 96 | 97 | const char* doc_hyb_vector( 98 | "A hybrid-encoded compressed bitvector representation\n" 99 | "References:\n— Juha Karkkainen, Dominik Kempa and " 100 | "Simon J. Puglisi. ''Hybrid Compression of Bitvectors for the " 101 | "FM-Index.'' DCC 2014." 102 | ); 103 | 104 | const char* doc_rank_v( 105 | "A rank structure proposed by Sebastiano Vigna\nSpace complexity: " 106 | "0.25n for a bit vector of length n bits.\n\nThe superblock size is " 107 | "512. Each superblock is subdivided into 512/64 = 8 blocks. " 108 | "So absolute counts for the superblock add 64/512 bits on top of each " 109 | "supported bit. Since the first of the 8 relative count values is 0, " 110 | "we can fit the remaining 7 (each of width log(512)=9) in a 64bit " 111 | "word. The relative counts add another 64/512 bits on top of each " 112 | "supported bit.\nIn total this results in 128/512=25% overhead.\n" 113 | "Reference\nSebastiano Vigna: Broadword Implementation of Rank/Select " 114 | "Queries. WEA 2008: 154-168" 115 | ); 116 | 117 | const char* doc_rank_v5( 118 | "A class supporting rank queries in constant time.\n" 119 | "Space complexity: 0.0625n bits for a bit vector of length n bits.\n\n" 120 | "The superblock size is 2048. Each superblock is subdivided into " 121 | "2048/(6*64) = 5 blocks (with some bit remaining). So absolute counts " 122 | "for the superblock add 64/2048 bits on top of each supported bit. " 123 | "Since the first of the 6 relative count values is 0, we can fit the " 124 | "remaining 5 (each of width log(2048)=11) in a 64 bit word. The " 125 | "relative counts add another 64/2048 bits bits on top of each " 126 | "supported bit. In total this results in 128/2048=6.25% overhead." 127 | ); 128 | 129 | const char* doc_rank_scan( 130 | "A class supporting rank queries in linear time.\n" 131 | "Space complexity: Constant.\n" 132 | "Time complexity: Linear in the size of the supported vector." 133 | ); 134 | 135 | const char* doc_select_mcl( 136 | "A class supporting constant time select queries.\n" 137 | "Space usage: The space usage of the data structure depends on the " 138 | "number `m` of ones in the original bitvector `b`. We store the " 139 | "position of every 4096th set bit (called L1-sampled bits) of `b`. " 140 | "This takes in the worst case (m/4096) log(n) ≤ (n/64) bits.\n" 141 | "Next,\n(1) if the distance of two adjacent L1-sampled bits " 142 | "b[i] and b[j] is greater or equal than log^4 (n), then we store " 143 | "each of the 4096 positions of the set `b` in [i..j-1] with " 144 | "log(n) bits. This results in at most " 145 | "`4096 log(n) / log^4(n)=4096 / log^3(n)` bits per bit.\nFor a " 146 | "bitvector of 4GB, i.e. log(n) = 35 we get about 0.01 bits per bit.\n" 147 | "If the j-i+1 < log^4(n) then\n(2) we store the relative position of " 148 | "every 64th set bit (called L2-sampled bits) in b[i..j-1] in at most " 149 | "4 log log (n) bits per L2-sampled bits.\nAn pessimistic upper bound " 150 | "for the space would be `4 log log (n) / 64 ≤ 24/64 = 0.375` bit per " 151 | "bit (since `log log (n) ≤ 6`. It is very pessimistic, since we store " 152 | "the relative position in `log log (j-i+1) ≤ log log (n)` bits.\n\n" 153 | "The implementation is a practical variant of the following reference:" 154 | "\nDavid Clark: PhD Thesis: Compact Pat Trees, University of Waterloo, " 155 | "1996 (Section 2.2.2). " 156 | "http://www.nlc-bnc.ca/obj/s4/f2/dsk3/ftp04/nq21335.pdf" 157 | ); 158 | 159 | const char* doc_select_scan( 160 | "A class supporting linear time select queries.\n" 161 | "Space complexity: Constant\n" 162 | "Time complexity: Linear in the size of the supported vector." 163 | ); 164 | 165 | const char* doc_wtint( 166 | "A wavelet tree class for integer sequences.\nSpace complexity: " 167 | "Order(n log(|Sigma|)) bits, where `n` is the size of he vector the " 168 | "wavelet tree was build for." 169 | ); 170 | 171 | const char* doc_wt_gmr_rs( 172 | "A wavelet tree class for integer sequences.\n" 173 | "This is an implementation of the first proposal in the SODA paper " 174 | "of Golynski et. al. which support fast rank and select, but not fast " 175 | "access.\nReferences:\n[1] A. Golynski, J. Munro and S. Rao:" 176 | "\"Rank/select operations on large alphabets: a tool for text " 177 | "indexing\", Proceedings of SODA 2006." 178 | ); 179 | 180 | const char* doc_wt_gmr( 181 | "A wavelet tree class for integer sequences.\n" 182 | "This is an implementation of the second proposal in the SODA paper " 183 | "of Golynski et. al. which supports fast access, inverse select, rank, " 184 | "and select.\nReferences:\n[1] A. Golynski, J. Munro and S. Rao:" 185 | "\"Rank/select operations on large alphabets: a tool for text " 186 | "indexing\", Proceedings of SODA 2006." 187 | ); 188 | 189 | const char* doc_wt_ap( 190 | "A wavelet tree class for integer sequences.\nSpace complexity:\n" 191 | "Order(n) (H₀ + 1) bits, where n is the size of the vector the wavelet " 192 | "tree was build for.1\nReferences" 193 | "[1] J. Barbay, F. Claude, T. Gagie, G. Navarro and Y. Nekrich:" 194 | "''Efficient Fully-Compressed Sequence Representations''" 195 | ); 196 | 197 | const char* doc_wt_huff( 198 | "A Huffman-shaped wavelet tree.\n" 199 | "Space complexity: `n * H₀ + 2 * |Sigma| * log n` bits, where n is the " 200 | "size of the vector the wavelet tree was build for.\n" 201 | "A wavelet tree is build for a vector of characters over the byte alphabet " 202 | "Sigma. If you need a wavelet tree for a integer alphabet you should use " 203 | "`WtInt`.\nThe wavelet tree `wt` consists of a tree of bitvectors and " 204 | "provides three efficient methods:\n" 205 | "— The []-operator: wt[i] returns the i-th symbol of vector for which the " 206 | "wavelet tree was build for.\n" 207 | "— The rank method: wt.rank(i, c) returns the number of occurrences " 208 | "of symbol `c` in the prefix [0..i-1] in the vector for which the " 209 | "wavelet tree was build for.\n" 210 | "— The select method: wt.select(j, c) returns the index i from " 211 | "[0..size()-1] of the j-th occurrence of symbol c.\n\n" 212 | "The idea of using a Huffman shaped wavelet was first mentioned on page 17 " 213 | "of the following technical report:\n\tVeli Mäkinen and Gonzalo Navarro: " 214 | "''Succinct Suffix Arrays based on Run-Length Encoding.''\n" 215 | "Available under: http://swp.dcc.uchile.cl/TR/2005/TR_DCC-2005-004.pdf" 216 | ); 217 | 218 | const char* doc_wm_int( 219 | "A wavelet tree class for integer sequences.\n" 220 | "This wavelet tree variant does not store the two children of a node v " 221 | "aligned with v; it is also known as wavelet matrix.\n" 222 | "References:\n[1] F. Claude, G. Navarro: ''The Wavelet Matrix'', " 223 | "Proceedings of SPIRE 2012." 224 | ); 225 | 226 | const char* doc_wt_blcd( 227 | "A balanced wavelet tree.\n" 228 | "Space complexity: Order(n * log(|Sigma|) + 2 * |Sigma| * log(n)) bits, " 229 | "where n is the size of the vector the wavelet tree was build for.\n" 230 | "Reference: Roberto Grossi, Ankur Gupta, Jeffrey Scott Vitter: " 231 | "\"High-order entropy-compressed text indexes\". Proceedings of the 14th " 232 | "Annual ACM-SIAM Symposium on Discrete Algorithms (SODA 2003)." 233 | ); 234 | 235 | const char* doc_wt_hutu( 236 | "A Hu-Tucker-shaped wavelet tree.\n" 237 | "Space complexity: Almost n * H₀ + 2 * |Sigma| * log(n) bits, where " 238 | "n is the size of the vector the wavelet tree was build for." 239 | ); 240 | 241 | const char* doc_csa( 242 | "A class for the uncompressed suffix array (SA).\n" 243 | "This class stores the information of the suffix array and the inverse " 244 | "suffix array in uncompressed form.\nIn contrast to this class, classes " 245 | "like csa_sada, and csa_wt store the suffix array and inverse suffix array " 246 | "data in compressed form.\n\nThe interface of this class is exactly the " 247 | "same as for the compressed indexes. This is the reason why it is in the " 248 | "group of compressed suffix arrays.\n\nSpace complexity:" 249 | "2n log n bits, where n equals the size of the suffix array." 250 | ); 251 | 252 | const char* doc_sada( 253 | "A class for the Compressed Suffix Array (CSA) proposed by Sadakane for " 254 | "practical implementation." 255 | ); 256 | 257 | const char* doc_csa_wt( 258 | "A class for the Compressed Suffix Array (CSA) based on a Wavelet Tree " 259 | "(WT) of the Burrow Wheeler Transform of the original text." 260 | ); 261 | 262 | const char* doc_sorted_int_stack( 263 | "A stack class which can contain integers in strictly increasing order." 264 | ); 265 | -------------------------------------------------------------------------------- /pysdsl/io.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | #include 8 | 9 | #include "operations/iteration.hpp" 10 | 11 | 12 | namespace py = pybind11; 13 | 14 | 15 | template class support_helper; 16 | 17 | 18 | template 19 | decltype(auto) to_string(const T &self, const size_t max_elements=100, 20 | const char* sep=", ", const char* start="[", 21 | const char* ends="]") 22 | { 23 | std::ostringstream fout; 24 | fout.exceptions(std::ostringstream::failbit | std::ostringstream::badbit); 25 | 26 | fout << start; 27 | size_t count = 0; 28 | for (auto i = detail::cbegin(self); i != detail::cend(self); i++) { 29 | if (count) fout << sep; 30 | 31 | const value_type value = *i; 32 | 33 | fout << value; 34 | 35 | if (max_elements > 0 && count >= max_elements) { 36 | fout << sep << "...(" << self.size() << " elements)"; 37 | break; } 38 | count++; } 39 | fout << ends; 40 | 41 | return fout.str(); 42 | } 43 | 44 | 45 | template 46 | inline auto add_to_string(py::class_& cls) 47 | { 48 | cls.def("__str__", [](const T& self) { return to_string(self); }); 49 | 50 | cls.def( 51 | "to_string", &to_string, 52 | py::arg("max_elements") = 0, py::arg("sep") = ", ", 53 | py::arg("begin") = "[", py::arg("end") = "]"); 54 | 55 | const auto name = py::cast(cls.attr("__name__")); 56 | 57 | cls.def( 58 | "__repr__", 59 | [name] (const T& self) { 60 | return "<" + name + 61 | to_string(self, 100, ", ", " [", "]>"); }); 62 | 63 | return cls; 64 | } 65 | 66 | 67 | template 68 | inline auto add_serialization(py::class_& cls, TCtorArgs&&... args) 69 | { 70 | cls.def(py::pickle( 71 | [&](const T& self){ 72 | std::stringstream fout; 73 | self.serialize(fout); 74 | return py::bytes(fout.str()); }, 75 | [&](const py::bytes& serialized){ 76 | T result(args...); 77 | std::stringstream fin(serialized); 78 | result.load(fin); 79 | return result; })); 80 | cls.def( 81 | "store_to_file", 82 | [](const T &self, const std::string& file_name) { 83 | return sdsl::store_to_file(self, file_name); }, 84 | py::arg("file_name"), 85 | py::call_guard()); 86 | 87 | cls.def_static( 88 | "load_from_file", 89 | [&](const std::string& file_name) { 90 | T self(args...); 91 | if (sdsl::load_from_file(self, file_name)) { 92 | return self; } 93 | throw std::exception(); }, 94 | py::arg("file_name"), 95 | py::call_guard()); 96 | 97 | cls.def( 98 | "store_to_checked_file", 99 | [](const T &self, const std::string& file_name) { 100 | return sdsl::store_to_checked_file(self, file_name); }, 101 | py::arg("file_name"), 102 | py::call_guard() ); 103 | 104 | cls.def_static( 105 | "load_from_checkded_file", 106 | [&](const std::string& file_name) { 107 | T self(args...); 108 | if (sdsl::load_from_checked_file(self, file_name)) { 109 | return self; } 110 | throw std::exception(); }, 111 | py::arg("file_name"), 112 | py::call_guard()); 113 | return cls; 114 | } 115 | 116 | 117 | template 118 | inline auto add_description(X& cls) 119 | { 120 | typedef typename X::type P; 121 | cls.def( 122 | "write_structure_json", 123 | [](const P& self, const std::string& file_name) { 124 | std::ofstream fout; 125 | fout.open(file_name, std::ios::out | std::ios::binary); 126 | if (!fout.good()) throw std::runtime_error("Can't write to file"); 127 | sdsl::write_structure(self, fout); 128 | if (!fout.good()) throw std::runtime_error("Error during write"); 129 | fout.close(); }, 130 | py::arg("file_name"), 131 | py::call_guard()); 132 | cls.def( 133 | "write_structure_html", 134 | [](const P& self, const std::string& file_name) { 135 | std::ofstream fout; 136 | fout.open(file_name, std::ios::out | std::ios::binary); 137 | if (!fout.good()) throw std::runtime_error("Can't write to file"); 138 | sdsl::write_structure(self, fout); 139 | if (!fout.good()) throw std::runtime_error("Error during write"); 140 | fout.close(); }, 141 | py::arg("file_name"), 142 | py::call_guard()); 143 | 144 | cls.def_property_readonly( 145 | "structure_json", 146 | [](const P& self) { 147 | std::ostringstream fout; 148 | fout.exceptions(std::ostringstream::failbit | 149 | std::ostringstream::badbit); 150 | 151 | sdsl::write_structure(self, fout); 152 | return fout.str(); }, 153 | py::call_guard()); 154 | 155 | cls.def_property_readonly( 156 | "structure_html", 157 | [](const P& self) { 158 | std::ostringstream fout; 159 | fout.exceptions(std::ostringstream::failbit | 160 | std::ostringstream::badbit); 161 | 162 | sdsl::write_structure(self, fout); 163 | return fout.str();}, 164 | py::call_guard()); 165 | 166 | cls.def_property_readonly( 167 | "structure", 168 | [](const P& self) { 169 | std::ostringstream fout; 170 | fout.exceptions(std::ostringstream::failbit | 171 | std::ostringstream::badbit); 172 | 173 | sdsl::write_structure(self, fout); 174 | auto json = py::module::import("json"); 175 | return json.attr("loads")(fout.str()); }); 176 | cls.def_property_readonly( 177 | "size_in_mega_bytes", 178 | [](const P &self) { return sdsl::size_in_mega_bytes(self); }); 179 | cls.def_property_readonly( 180 | "size_in_bytes", 181 | [](const P &self) { return sdsl::size_in_bytes(self); }); 182 | 183 | return cls; 184 | } 185 | 186 | 187 | template 188 | inline auto add_description(py::class_>& cls) 189 | { 190 | return add_description>, T>(cls); 191 | } 192 | -------------------------------------------------------------------------------- /pysdsl/memory_monitor.py: -------------------------------------------------------------------------------- 1 | from pysdsl import _memory_monitor 2 | 3 | 4 | class MemoryMonitor(object): 5 | 6 | def __init__(self, out_html=None, out_json=None): 7 | self.out_html = out_html 8 | self.out_json = out_json 9 | 10 | def __enter__(self): 11 | _memory_monitor.start() 12 | 13 | def __exit__(self, exc_type, exc_value, traceback): 14 | _memory_monitor.stop() 15 | 16 | if self.out_html is not None: 17 | _memory_monitor.report_html(self.out_html) 18 | 19 | if self.out_json is not None: 20 | _memory_monitor.report_json(self.out_json) 21 | -------------------------------------------------------------------------------- /pysdsl/operations/creation.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #include "operations/iteration.hpp" 14 | #include "operations/sizes.hpp" 15 | #include "types/pysequence.hpp" 16 | #include "util/tupletricks.hpp" 17 | 18 | 19 | namespace py = pybind11; 20 | 21 | 22 | namespace detail 23 | { 24 | 25 | template , T>::value> 27 | struct IntermediateVector { using type = sdsl::int_vector<>; }; 28 | 29 | 30 | template 31 | struct IntermediateVector { using type = sdsl::int_vector<1>; }; 32 | 33 | 34 | template 35 | struct IntermediateVector, value_type, false> 36 | { 37 | using type = sdsl::int_vector; 38 | }; 39 | 40 | 41 | template < 42 | typename T, 43 | typename = typename std::enable_if< 44 | !std::is_same::value 46 | >::type, 47 | typename = decltype(std::declval().begin())> 48 | std::true_type has_non_const_begin_impl(T *); 49 | 50 | std::false_type has_non_const_begin_impl(...); 51 | 52 | 53 | template 54 | using has_non_const_begin = decltype(has_non_const_begin_impl( 55 | std::declval())); 56 | 57 | 58 | struct construct_explicit { int value; }; 59 | struct construct_iter { int value; }; 60 | struct construct_copy_empty { int value; }; 61 | struct construct_copy_size { int value; }; 62 | 63 | } // namespace detail 64 | 65 | 66 | // The only version of construct_from if T can be constructed from From 67 | template ::value 70 | >::type> 71 | constexpr T construct_from(const From& obj, 72 | detail::construct_explicit /* unused */ = {}) { 73 | return T(obj); } 74 | 75 | 76 | // The only version of construct_from if T can be constructed from iterator 77 | template < 78 | class T, class From, 79 | typename /* no direct construction */ = typename std::enable_if< 80 | !std::is_constructible::value 81 | >::type, 82 | typename /* construct from iterator */ = typename std::enable_if< 83 | std::is_constructible< 84 | T, 85 | decltype(detail::cbegin(std::declval())), 86 | decltype(detail::cend(std::declval())) 87 | >::value 88 | >::type> 89 | constexpr T construct_from(const From& obj, 90 | detail::construct_iter /* unused */ = {}) { 91 | return T(detail::cbegin(obj), detail::cend(obj)); } 92 | 93 | 94 | // The only version of construct_from if T can be filled from iterator and 95 | // can only be created empty 96 | template < 97 | class T, class From, 98 | typename /* no direct construction */ = typename std::enable_if< 99 | !std::is_constructible::value 100 | >::type, 101 | typename /* no construction from iterator */ = typename std::enable_if< 102 | !std::is_constructible< 103 | T, 104 | decltype(detail::cbegin(std::declval())), 105 | decltype(detail::cend(std::declval()))>::value 106 | >::type, 107 | typename /* default constructable */ = typename std::enable_if< 108 | std::is_constructible::value 109 | >::type, 110 | typename /* no construction with size */ = typename std::enable_if< 111 | !std::is_constructible::value 112 | >::type, 113 | typename /* can be modified after construction */ = typename std::enable_if< 114 | detail::has_non_const_begin::value 115 | >::type> 116 | inline T construct_from(const From& obj, 117 | detail::construct_copy_empty /* unused */ = {}) 118 | { 119 | T result; 120 | std::copy(detail::cbegin(obj), detail::cend(obj), result.begin()); 121 | return result; 122 | } 123 | 124 | 125 | // The only version of construct_from if T can be filled from iterator and 126 | // can allocate memory aforehand 127 | template < 128 | class T, class From, 129 | typename /* no direct construction */ = typename std::enable_if< 130 | !std::is_constructible::value 131 | >::type, 132 | typename /* no construction from iterator */ = typename std::enable_if< 133 | !std::is_constructible< 134 | T, 135 | decltype(detail::cbegin(std::declval())), 136 | decltype(detail::cend(std::declval()))>::value 137 | >::type, 138 | typename /* construction with known size */ = typename std::enable_if< 139 | std::is_constructible< 140 | T, decltype(detail::size(std::declval()))>::value 141 | >::type, 142 | typename /* can be modified after construction */ = typename std::enable_if< 143 | detail::has_non_const_begin::value 144 | >::type> 145 | inline T construct_from(const From& obj, 146 | detail::construct_copy_size /* unused */ = {}) 147 | { 148 | T result(detail::size(obj)); 149 | std::copy(detail::cbegin(obj), detail::cend(obj), result.begin()); 150 | return result; 151 | } 152 | 153 | // The only version of construct_from if T can only be constructed 154 | // via sdsl::construct_im 155 | template < 156 | class T, class From, 157 | class With = typename detail::IntermediateVector::type, 158 | typename /* unused */ = typename std::enable_if< 159 | std::is_constructible::value 160 | >::type, 161 | typename = typename std::enable_if< 162 | !std::is_constructible::value 163 | >::type, 164 | typename = typename std::enable_if< 165 | !std::is_constructible< 166 | T, 167 | decltype(detail::cbegin(std::declval())), 168 | decltype(detail::cend(std::declval()))>::value 169 | >::type, 170 | typename = typename std::enable_if< 171 | !std::is_constructible::value 172 | >::type, 173 | typename = typename std::enable_if< 174 | !detail::has_non_const_begin::value 175 | >::type> 176 | inline T construct_from(const From& obj) 177 | { 178 | T result; 179 | sdsl::construct_im(result, construct_from(obj)); 180 | 181 | return result; 182 | } 183 | 184 | 185 | namespace detail 186 | { 187 | template > 189 | class add_init_functor 190 | { 191 | public: 192 | add_init_functor(BindCls &cls_to_add_def) : m_cls_to(cls_to_add_def) {} 193 | 194 | template 195 | decltype(auto) operator()(const InputCls &) 196 | { 197 | m_cls_to.def(py::init( 198 | [] (const typename InputCls::type& from) { 199 | return construct_from(from); }), 200 | py::arg("v"), 201 | py::call_guard()); 202 | return m_cls_to; 203 | } 204 | 205 | private: 206 | BindCls& m_cls_to; 207 | }; 208 | 209 | 210 | template 211 | class add_many_inits_to_each 212 | { 213 | public: 214 | add_many_inits_to_each(const std::tuple& from_each): 215 | m_from_each(from_each) {} 216 | 217 | template 218 | decltype(auto) operator()(BindCls& cls) { 219 | return for_each(m_from_each, add_init_functor(cls)); } 220 | 221 | private: 222 | const std::tuple& m_from_each; 223 | }; 224 | 225 | 226 | template 227 | class pysequence_init_functor 228 | { 229 | typedef typename T::value_type value_type; 230 | public: 231 | decltype(auto) operator()(py::class_& cls) { 232 | return cls.def(py::init( 233 | [] (const py::sequence& v) { 234 | return construct_from( 235 | sequence_wrapper(v)); }), 236 | py::arg("v")); } 237 | }; 238 | 239 | 240 | class add_pysequence_init_functor 241 | { 242 | public: 243 | add_pysequence_init_functor() {} 244 | 245 | template > 247 | decltype(auto) operator()(BindCls& cls) { 248 | return pysequence_init_functor()(cls); } 249 | }; 250 | } 251 | 252 | 253 | inline 254 | auto make_pysequence_init_functor() { 255 | return detail::add_pysequence_init_functor(); } 256 | 257 | 258 | template 259 | inline 260 | auto make_inits_many_functor(const std::tuple& from_each) { 261 | return detail::add_many_inits_to_each(from_each); } 262 | -------------------------------------------------------------------------------- /pysdsl/operations/iteration.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "operations/sizes.hpp" 4 | #include "util/indexiterator.hpp" 5 | 6 | 7 | namespace detail 8 | { 9 | struct no_iterator {}; 10 | struct has_iterator: no_iterator {}; 11 | 12 | template 13 | constexpr 14 | auto cbegin_impl(const Sequence& sequence, has_iterator /* unused */) 15 | { 16 | return sequence.begin(); 17 | } 18 | 19 | template 20 | constexpr 21 | auto cend_impl(const Sequence& sequence, has_iterator /* unused */) 22 | { 23 | return sequence.end(); 24 | } 25 | 26 | template 27 | constexpr 28 | auto cbegin_impl(const Sequence& sequence, no_iterator /* unused */) 29 | { 30 | return count_index_iterator(&sequence, 0); 31 | } 32 | 33 | template 34 | constexpr 35 | auto cend_impl(const Sequence& sequence, no_iterator /* unused */) 36 | { 37 | return count_index_iterator(&sequence, size(sequence)); 38 | } 39 | 40 | template 41 | constexpr auto cbegin(const Sequence& sequence) 42 | { 43 | return cbegin_impl(sequence, has_iterator()); 44 | } 45 | 46 | template 47 | constexpr auto cend(const Sequence& sequence) 48 | { 49 | return cend_impl(sequence, has_iterator()); 50 | } 51 | 52 | } // namespace detail 53 | 54 | 55 | template 56 | inline auto add_iteration(py::class_& cls) 57 | { 58 | return cls.def( 59 | "__iter__", 60 | [](const Sequence &sequence) { 61 | return py::make_iterator(detail::cbegin(sequence), 62 | detail::cend(sequence)); }, 63 | py::keep_alive<0, 1>() 64 | ); 65 | } 66 | -------------------------------------------------------------------------------- /pysdsl/operations/sizes.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | 7 | namespace py = pybind11; 8 | 9 | 10 | namespace detail 11 | { 12 | struct no_max_size {}; 13 | struct has_max_size: no_max_size {}; 14 | 15 | template 16 | inline 17 | auto add_max_size_impl(py::class_& cls, has_max_size /* unused */) 18 | { 19 | cls.def_property_readonly_static( 20 | "max_size", 21 | [](py::object /* self */) { return T::max_size(); }, 22 | "Maximum size of the int_vector."); 23 | return cls; 24 | } 25 | 26 | template 27 | constexpr 28 | auto add_max_size_impl(py::class_& cls, no_max_size /* unused */) 29 | { return cls; } 30 | 31 | template constexpr auto size(const T& seq) { return seq.size(); } 32 | } // namespace detail 33 | 34 | 35 | template 36 | constexpr auto add_max_size(py::class_& cls) 37 | { 38 | return detail::add_max_size_impl(cls, detail::has_max_size()); 39 | } 40 | 41 | 42 | template 43 | inline auto add_max_size(py::class_>& cls) 44 | { 45 | cls.def_property_readonly_static( 46 | "max_size", 47 | [](py::object /* self */) { 48 | return sdsl::bit_vector::max_size();}, 49 | "Maximum size of the bit_vector_il."); 50 | return cls; 51 | } 52 | 53 | 54 | template 55 | inline auto add_sizes(py::class_& cls) 56 | { 57 | add_max_size(cls); 58 | 59 | auto size = [] (const Sequence& self) { 60 | return detail::size(self); }; 61 | 62 | cls.def("__len__", size, 63 | "The number of elements in the container."); 64 | cls.def_property_readonly("size", size, 65 | "The number of elements in the container."); 66 | return cls; 67 | } 68 | -------------------------------------------------------------------------------- /pysdsl/supports.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include 9 | 10 | #include "docstrings.hpp" 11 | #include "io.hpp" 12 | 13 | 14 | namespace py = pybind11; 15 | 16 | 17 | template 18 | class support_helper 19 | { 20 | private: 21 | const sdsl::bit_vector& m_vec; 22 | const T m_support; 23 | public: 24 | typedef T type; 25 | 26 | support_helper(const sdsl::bit_vector& vec, const T&& support): 27 | m_vec(vec), 28 | m_support(std::move(support)) 29 | {} 30 | auto size() const { return m_vec.size(); } 31 | auto operator()(size_t idx) const { return m_support(idx); } 32 | 33 | operator const T&() const { return m_support; } 34 | }; 35 | 36 | 37 | template 38 | inline 39 | auto add_support_class(py::module &m, 40 | const std::string&& name, 41 | const std::string&& method_name, 42 | const std::string&& doc_call, 43 | const char* doc = nullptr) 44 | { 45 | auto cls = py::class_(m, name.c_str()); 46 | 47 | cls.def( 48 | method_name.c_str(), 49 | [](const Base& self, size_t idx) { 50 | if (idx >= self.size()) { 51 | throw std::out_of_range(std::to_string(idx)); } 52 | return self(idx); }, 53 | py::call_guard(), 54 | py::arg("idx"), 55 | doc_call.c_str()); 56 | cls.attr("__call__") = cls.attr(method_name.c_str()); 57 | 58 | add_description(cls); 59 | 60 | if (doc) cls.doc() = doc; 61 | 62 | return cls; 63 | } 64 | 65 | 66 | template 67 | inline 68 | decltype(auto) bind_support(const S*, 69 | py::class_& cls, const std::string& call_name, 70 | const char* alt_name=nullptr) 71 | { 72 | cls.def( 73 | call_name.c_str(), 74 | [](T& self) { 75 | S support; 76 | sdsl::util::init_support(support, &self); 77 | 78 | return support; }, 79 | py::keep_alive<0, 1>()); 80 | 81 | if (alt_name) cls.attr(alt_name) = cls.attr(call_name.c_str()); 82 | 83 | return cls; 84 | } 85 | 86 | 87 | template 88 | inline 89 | decltype(auto) bind_support(const support_helper*, 90 | py::class_& cls, const std::string& call_name, 91 | const char* alt_name=nullptr) 92 | { 93 | cls.def( 94 | call_name.c_str(), 95 | [](T& self) { 96 | S support; 97 | sdsl::util::init_support(support, &self); 98 | 99 | return support_helper(self, std::move(support)); }, 100 | py::keep_alive<0, 1>() ); 101 | 102 | if (alt_name) cls.attr(alt_name) = cls.attr(call_name.c_str()); 103 | 104 | return cls; 105 | } 106 | 107 | 108 | template 110 | inline 111 | auto add_rank_support(py::module &m, py::class_& cls, 112 | const std::string& base_name, 113 | const char* suffix = "", 114 | bool defaults = true, 115 | const std::string s0 = "0", const std::string s1 = "1", 116 | const char* doc_rank = nullptr) 117 | { 118 | add_support_class(m, base_name + "Rank" + suffix + "_" + s0, "rank", 119 | "number of patterns `" + s0 + "` in the prefix " 120 | "[0..idx) in supported vector", doc_rank); 121 | bind_support((R0 *)nullptr, cls, 122 | std::string("init_rank") + suffix + "_" + s0); 123 | 124 | add_support_class(m, base_name + "Rank" + suffix + "_" + s1, "rank", 125 | "number of patterns `" + s1 + "` in the prefix " 126 | "[0..idx) in supported vector", doc_rank); 127 | bind_support((R1 *)nullptr, cls, 128 | std::string("init_rank") + suffix + "_" + s1, 129 | defaults ? 130 | (std::string("init_rank") + suffix).c_str() : 131 | nullptr); 132 | 133 | return cls; 134 | } 135 | 136 | 137 | template 140 | inline 141 | auto add_select_support(py::module &m, py::class_& cls, 142 | const std::string& base_name, 143 | const char* suffix = "", 144 | bool defaults = true, 145 | const std::string s0 = "0", const std::string s1 = "1", 146 | const char* doc_select = nullptr) 147 | { 148 | add_support_class(m, base_name + "Select" + suffix + "_" + s0, "select", 149 | "position of the idx-th pattern `" + s0 + 150 | "` in supported vector", doc_select); 151 | bind_support((S0 *)nullptr, 152 | cls, std::string("init_select") + suffix + "_" + s0); 153 | 154 | add_support_class(m, base_name + "Select" + suffix + "_" + s1, "select", 155 | "position of the idx-th pattern `" + s1 + 156 | "` in supported vector", doc_select); 157 | bind_support((S1 *)nullptr, cls, 158 | std::string("init_select") + suffix + "_" + s1, 159 | defaults ? 160 | (std::string("init_select") + suffix).c_str() : 161 | nullptr); 162 | 163 | return cls; 164 | } 165 | 166 | 167 | inline 168 | void add_bitvector_supports(py::module& m, py::class_& cls) 169 | { 170 | add_rank_support, 172 | sdsl::rank_support_v<1, 1>>( 173 | m, cls, "_BitVector", "V", true, "0", "1", doc_rank_v); 174 | add_rank_support, 176 | sdsl::rank_support_v<01, 2>>( 177 | m, cls, "_BitVector", "V", false, "00", "01", doc_rank_v); 178 | add_rank_support, 180 | sdsl::rank_support_v<11, 2>>( 181 | m, cls, "_BitVector", "V", false, "10", "11", doc_rank_v); 182 | add_rank_support, 184 | sdsl::rank_support_v5<1, 1>>( 185 | m, cls, "_BitVector", "V5", false, "0", "1", doc_rank_v5); 186 | add_rank_support, 188 | sdsl::rank_support_v5<01, 2>>( 189 | m, cls, "_BitVector", "V5", false, "00", "01", doc_rank_v5); 190 | add_rank_support, 192 | sdsl::rank_support_v5<11, 2>>( 193 | m, cls, "_BitVector", "V5", false, "10", "11", doc_rank_v5); 194 | cls.attr("init_rank") = cls.attr("init_rankV"); 195 | cls.attr("init_rank_0") = cls.attr("init_rankV_0"); 196 | cls.attr("init_rank_1") = cls.attr("init_rankV_1"); 197 | 198 | add_select_support>, 200 | support_helper>>( 201 | m, cls, "_BitVector", "MCL", true, "0", "1", doc_select_mcl); 202 | add_select_support>, 204 | support_helper>>( 205 | m, cls, "_BitVector", "MCL", false, "10", "11", doc_select_mcl); 206 | cls.attr("init_select") = cls.attr("init_selectMCL"); 207 | cls.attr("init_select_0") = cls.attr("init_selectMCL_0"); 208 | cls.attr("init_select_1") = cls.attr("init_selectMCL_1"); 209 | 210 | add_rank_support, 212 | sdsl::rank_support_scan<1, 1>>( 213 | m, cls, "_BitVector", "Scan", false, "0", "1", doc_rank_scan); 214 | add_rank_support, 216 | sdsl::rank_support_scan<01, 2>>( 217 | m, cls, "_BitVector", "Scan", false, "00", "01", doc_rank_scan); 218 | add_rank_support, 220 | sdsl::rank_support_scan<11, 2>>( 221 | m, cls, "_BitVector", "Scan", false, "10", "11", doc_rank_scan); 222 | 223 | add_select_support>, 225 | support_helper>>( 226 | m, cls, "_BitVector", "Scan", false, "0", "1", doc_select_scan); 227 | add_select_support>, 229 | support_helper>>( 230 | m, cls, "_BitVector", "Scan", false, "10", "01", doc_select_scan); 231 | } 232 | -------------------------------------------------------------------------------- /pysdsl/types/bitvector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include "calc.hpp" 13 | #include "docstrings.hpp" 14 | #include "io.hpp" 15 | #include "supports.hpp" 16 | #include "operations/sizes.hpp" 17 | #include "operations/iteration.hpp" 18 | 19 | 20 | namespace py = pybind11; 21 | 22 | 23 | template 24 | inline 25 | auto add_bitvector_class(py::module &m, const std::string&& name, 26 | const char* doc = nullptr, 27 | const char* doc_rank = nullptr, 28 | const char* doc_select = nullptr) 29 | { 30 | auto cls = py::class_(m, name.c_str()).def(py::init()); 31 | 32 | add_sizes(cls); 33 | add_description(cls); 34 | add_serialization(cls); 35 | add_to_string(cls); 36 | 37 | add_read_access(cls); 38 | add_std_algo(cls); 39 | 40 | if (doc) cls.doc() = doc; 41 | 42 | cls.def( 43 | "get_int", 44 | [](const T &self, size_t idx, uint8_t len) { 45 | if (idx + len - 1 >= self.size()) { 46 | throw std::out_of_range(std::to_string(idx)); } 47 | if (len > 64) { 48 | throw std::invalid_argument("len should be <= 64"); } 49 | return self.get_int(idx, len); }, 50 | py::arg("idx"), 51 | py::arg("len") = 64, 52 | "Get the integer value of the binary string of length `len` " 53 | "starting at position `idx`.", 54 | py::call_guard()); 55 | 56 | add_rank_support(m, cls, "_" + name, "", true, "0", "1", doc_rank); 57 | add_select_support(m, cls, "_" + name, "", true, "0", "1", doc_select); 58 | 59 | m.attr("all_immutable_bitvectors").attr("append")(cls); 60 | 61 | return cls; 62 | } 63 | 64 | 65 | template 66 | inline 67 | auto add_bitvector_class(py::module &m, const char* name, 68 | const char* doc = nullptr, 69 | const char* doc_rank = nullptr, 70 | const char* doc_select = nullptr) 71 | { 72 | return add_bitvector_class(m, std::string(name), 73 | doc, doc_rank, doc_select); 74 | } 75 | 76 | 77 | template 78 | inline 79 | auto add_bitvector_class(py::module &m, const std::string& name, 80 | const char* doc = nullptr, 81 | const char* doc_rank = nullptr, 82 | const char* doc_select = nullptr) 83 | { 84 | return add_bitvector_class(m, std::string(name), // i.e. copy name 85 | doc, doc_rank, doc_select); 86 | } 87 | 88 | 89 | template 90 | inline auto add_bit_vector_il(py::module& m) 91 | { 92 | auto cls = add_bitvector_class>( 93 | m, 94 | std::string("BitVectorInterLeaved") + std::to_string(t_bs), 95 | doc_bit_vector_il); 96 | 97 | m.attr("bit_vector_interleaved").attr("__setitem__")(t_bs, cls); 98 | 99 | return cls; 100 | } 101 | 102 | 103 | template , uint16_t t_k=32> 104 | inline auto add_rrr_vector(py::module& m, 105 | const char* name = "RamanRamanRaoVector") 106 | { 107 | auto cls = add_bitvector_class>( 108 | m, 109 | std::string(name) + std::to_string(t_bs), 110 | doc_rrr_vector); 111 | 112 | m.attr("raman_raman_rao_vectors").attr("__setitem__")(t_bs, cls); 113 | 114 | return cls; 115 | } 116 | 117 | 118 | template , uint16_t t_k=32> 119 | inline auto add_rrr_vector(py::module& m, const py::class_& py_rac, 120 | const char* name = "RamanRamanRaoVector") 121 | { 122 | auto cls_name = std::string(name) + py::cast(py_rac.attr("__name__")) + std::to_string(t_bs); 123 | 124 | auto cls = add_bitvector_class>( 125 | m, cls_name, doc_rrr_vector); 126 | 127 | m.attr("raman_raman_rao_vectors").attr("__setitem__")(cls_name, cls); 128 | 129 | return cls; 130 | } 131 | 132 | 133 | template 134 | inline auto add_sd_vector(py::module& m, const char* name="SDVector") 135 | { 136 | auto cls = add_bitvector_class>( 137 | m, 138 | std::string(name), 139 | doc_sd_vector); 140 | 141 | m.attr("sparse_bit_vectors").attr("__setitem__")(name, cls); 142 | 143 | return cls; 144 | } 145 | 146 | 147 | template 148 | inline auto add_sd_vector(py::module& m, const py::class_& base_cls, 149 | const char* name="SDVector") 150 | { 151 | auto cls_name = std::string(name) + py::cast( 152 | base_cls.attr("__name__")); 153 | 154 | auto cls = add_bitvector_class>( 155 | m, 156 | cls_name, 157 | doc_sd_vector); 158 | 159 | m.attr("sparse_bit_vectors").attr("__setitem__")(cls_name, cls); 160 | 161 | return cls; 162 | } 163 | 164 | 165 | template 166 | inline auto add_hyb_vector(py::module& m) 167 | { 168 | auto cls = add_bitvector_class>( 169 | m, "HybVector" + std::to_string(k_sblock_rate), 170 | doc_hyb_vector); 171 | 172 | m.attr("hybrid_bit_vectors").attr("__setitem__")(k_sblock_rate, cls); 173 | 174 | return cls; 175 | } 176 | 177 | 178 | template 179 | auto add_bitvectors(py::module& m, py::class_& bit_vector_cls) 180 | { 181 | add_bitvector_supports(m, bit_vector_cls); 182 | 183 | m.attr("all_immutable_bitvectors") = py::list(); 184 | m.attr("bit_vector_interleaved") = py::dict(); 185 | m.attr("raman_raman_rao_vectors") = py::dict(); 186 | m.attr("sparse_bit_vectors") = py::dict(); 187 | m.attr("hybrid_bit_vectors") = py::dict(); 188 | 189 | auto bvil_classes = std::make_tuple(add_bit_vector_il<64>(m), 190 | add_bit_vector_il<128>(m), 191 | add_bit_vector_il<256>(m), 192 | add_bit_vector_il<512>(m)); 193 | 194 | auto hyb_classes = std::make_tuple( 195 | add_hyb_vector<4>(m), 196 | add_hyb_vector<8>(m), 197 | add_hyb_vector<16>(m), 198 | add_hyb_vector<256>(m)); 199 | 200 | auto rrr_classes = std::make_tuple( 201 | add_rrr_vector<3>(m), 202 | add_rrr_vector<15>(m), 203 | add_rrr_vector<63>(m), 204 | add_rrr_vector<256>(m)); 205 | //add_rrr_vector<63, sdsl::wt_int<>>(m, "RamanRamanRaoWTVector")); 206 | 207 | auto sd_classes = std::make_tuple( 208 | add_sd_vector<>(m), 209 | add_sd_vector>(m, "SDVectorSD"), 210 | add_sd_vector(m, std::get<1>(rrr_classes))); 211 | 212 | return std::make_tuple( 213 | std::tuple_cat(bvil_classes, rrr_classes, sd_classes, hyb_classes), 214 | std::make_tuple( // propagate 215 | std::get<2>(rrr_classes), 216 | std::get<0>(sd_classes), 217 | std::get<3>(bvil_classes) 218 | ) 219 | ); 220 | 221 | } 222 | -------------------------------------------------------------------------------- /pysdsl/types/encodedvector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include "calc.hpp" 13 | #include "docstrings.hpp" 14 | #include "io.hpp" 15 | #include "operations/iteration.hpp" 16 | #include "operations/sizes.hpp" 17 | #include "util/tupletricks.hpp" 18 | 19 | 20 | namespace py = pybind11; 21 | 22 | 23 | namespace { 24 | template using dens = std::integral_constant; 25 | 26 | template using width = std::integral_constant; 27 | } // namespace 28 | 29 | 30 | auto constexpr coders = std::make_tuple( 31 | std::make_tuple("EliasDelta", dens<128>{}, width<0>{}, 32 | (sdsl::coder::elias_delta*) nullptr), 33 | std::make_tuple("EliasGamma", dens<128>{}, width<0>{}, 34 | (sdsl::coder::elias_gamma*) nullptr), 35 | std::make_tuple("Fibonacci", dens<128>{}, width<0>{}, 36 | (sdsl::coder::fibonacci*) nullptr), 37 | std::make_tuple("Comma2", dens<128>{}, width<0>{}, 38 | (sdsl::coder::comma<2>*) nullptr), 39 | std::make_tuple("Comma4", dens<128>{}, width<0>{}, 40 | (sdsl::coder::comma<4>*) nullptr)); 41 | 42 | 43 | class add_enc_coders_functor 44 | { 45 | public: 46 | constexpr add_enc_coders_functor(py::module& m): m(m) {} 47 | 48 | template 49 | inline 50 | decltype(auto) operator()(const std::tuple, 51 | width, Coder*> &t) 52 | { 53 | using enc = sdsl::enc_vector; 54 | 55 | auto cls = py::class_( 56 | m, 57 | (std::string("EncVector") + std::get<0>(t)).c_str() 58 | ).def(py::init()); 59 | 60 | add_sizes(cls); 61 | add_description(cls); 62 | add_serialization(cls); 63 | add_to_string(cls); 64 | 65 | add_read_access(cls); 66 | add_std_algo(cls); 67 | 68 | cls.doc() = "A vector `v` is stored more space-efficiently by " 69 | "self-delimiting coding the deltas v[i+1]-v[i] (v[-1]:=0)."; 70 | 71 | cls.def_property_readonly("sample_dens", &enc::get_sample_dens) 72 | .def( 73 | "sample", 74 | [] (const enc& self, typename enc::size_type i) { 75 | if (i >= self.size() / self.get_sample_dens()) { 76 | throw std::out_of_range(std::to_string(i)); } 77 | return self.sample(i); }, 78 | "Returns the i-th sample of the compressed vector" 79 | "i: The index of the sample. 0 <= i < size()/get_sample_dens()", 80 | py::call_guard()) 81 | .def( 82 | "samples", 83 | [] (const enc& self) { 84 | const typename enc::size_type size = self.size() / 85 | self.get_sample_dens(); 86 | 87 | sdsl::int_vector samples(size); 88 | for (std::size_t i = 0; i <= size; i++) { 89 | samples[i] = self.sample(i); } 90 | return samples; }, 91 | py::call_guard()); 92 | 93 | m.attr("enc_vector").attr("__setitem__")(std::get<0>(t), cls); 94 | m.attr("all_compressed_integer_vectors").attr("append")(cls); 95 | 96 | return cls; 97 | } 98 | 99 | private: 100 | py::module& m; 101 | }; 102 | 103 | 104 | class add_vlc_coders_functor 105 | { 106 | public: 107 | constexpr add_vlc_coders_functor(py::module& m): m(m) {} 108 | 109 | template 110 | inline 111 | decltype(auto) operator()(const std::tuple, 112 | width, Coder*> &t) 113 | { 114 | using vlc = sdsl::vlc_vector; 115 | 116 | auto cls = py::class_(m, ( 117 | std::string("VariableLengthCodesVector") + std::get<0>(t) 118 | ).c_str()).def(py::init()); 119 | 120 | add_sizes(cls); 121 | add_description(cls); 122 | add_serialization(cls); 123 | add_to_string(cls); 124 | 125 | add_read_access(cls); 126 | add_std_algo(cls); 127 | 128 | cls.doc() = "A vector which stores the values with " 129 | "variable length codes."; 130 | 131 | cls.def_property_readonly("sample_dens", &vlc::get_sample_dens); 132 | 133 | m.attr("variable_length_codes_vector").attr( 134 | "__setitem__")(std::get<0>(t), cls); 135 | m.attr("all_compressed_integer_vectors").attr("append")(cls); 136 | 137 | return cls; 138 | } 139 | 140 | private: 141 | py::module& m; 142 | }; 143 | 144 | 145 | inline std::string key_to_string(const char* key) { return std::string(key); } 146 | 147 | template 148 | inline std::string key_to_string(KEY_T key) { return std::to_string(key); } 149 | 150 | 151 | namespace { 152 | 153 | const char dprrr[] = "DPRRR"; 154 | const char dp[] = "DP"; 155 | 156 | template ::value> 157 | struct get_vector_type {}; 158 | 159 | template 160 | struct get_vector_type { 161 | using type = sdsl::dac_vector; 162 | }; 163 | 164 | template <> 165 | struct get_vector_type { 166 | using type = sdsl::dac_vector_dp<>; 167 | }; 168 | 169 | template <> 170 | struct get_vector_type { 171 | using type = sdsl::dac_vector_dp>; 172 | }; 173 | 174 | template 175 | using get_vector_type_t = typename get_vector_type::type; 176 | 177 | } // namespace 178 | 179 | 180 | class add_dac_vector_functor 181 | { 182 | public: 183 | constexpr add_dac_vector_functor(py::module& m, const char* doc = nullptr, 184 | const char* doc_dp = nullptr): 185 | m(m), doc(doc), doc_dp(doc_dp) {} 186 | 187 | 188 | template 189 | inline 190 | decltype(auto) get_vector(std::integral_constant) { 191 | using type = get_vector_type_t; 192 | auto name = "DirectAccessibleCodesVector" + key_to_string(key); 193 | 194 | auto cls = py::class_(m, name.c_str()).def(py::init()); 195 | 196 | add_sizes(cls); 197 | add_description(cls); 198 | add_serialization(cls); 199 | add_to_string(cls); 200 | 201 | add_read_access(cls); 202 | add_std_algo(cls); 203 | 204 | if (doc && std::is_integral::value) 205 | cls.doc() = doc; 206 | else if (doc_dp && !std::is_integral::value) 207 | cls.doc() = doc_dp; 208 | 209 | 210 | cls.def_property_readonly("levels", &type::levels); 211 | 212 | m.attr("direct_accessible_codes_vector").attr("__setitem__")(key, cls); 213 | m.attr("all_compressed_integer_vectors").attr("append")(cls); 214 | 215 | return cls; 216 | } 217 | 218 | template ::value>::type* dummy = nullptr> 221 | inline 222 | decltype(auto) operator()(std::integral_constant t) { 223 | return get_vector(t); 224 | } 225 | template ::value>::type* dummy = nullptr> 228 | inline 229 | decltype(auto) operator()(std::integral_constant t) { 230 | return get_vector(t).def("cost", &get_vector_type_t::cost, 231 | py::arg("n"), py::arg("m")); 232 | } 233 | 234 | private: 235 | py::module& m; 236 | const char* doc; 237 | const char* doc_dp; 238 | }; 239 | 240 | 241 | auto add_encoded_vectors(py::module& m) 242 | { 243 | m.attr("enc_vector") = py::dict(); 244 | m.attr("variable_length_codes_vector") = py::dict(); 245 | m.attr("direct_accessible_codes_vector") = py::dict(); 246 | m.attr("all_compressed_integer_vectors") = py::list(); 247 | 248 | auto enc_classes = for_each_in_tuple(coders, add_enc_coders_functor(m)); 249 | auto vlc_classes = for_each_in_tuple(coders, add_vlc_coders_functor(m)); 250 | 251 | using dac_params = std::tuple< 252 | std::integral_constant, 253 | std::integral_constant, 254 | std::integral_constant, 255 | std::integral_constant, 256 | std::integral_constant, 257 | std::integral_constant 258 | >; 259 | auto dac_classes = for_each_in_tuple(dac_params(), 260 | add_dac_vector_functor(m, doc_dac_vector, doc_dac_vector_dp)); 261 | 262 | m.attr("DACVector") = m.attr("DirectAccessibleCodesVector4"); 263 | m.attr("DirectAccessibleCodesVector") = m.attr( 264 | "DirectAccessibleCodesVector4"); 265 | 266 | return std::tuple_cat(enc_classes, vlc_classes, dac_classes); 267 | } 268 | -------------------------------------------------------------------------------- /pysdsl/types/intvector.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include "calc.hpp" 12 | #include "io.hpp" 13 | #include "operations/iteration.hpp" 14 | #include "operations/sizes.hpp" 15 | #include "docstrings.hpp" 16 | 17 | 18 | constexpr char sym_for_width(unsigned int width) { 19 | switch (width) { 20 | case 8: 21 | return 'B'; 22 | case 16: 23 | return 'H'; 24 | case 32: 25 | return 'I'; 26 | case 64: 27 | return 'Q'; 28 | default: __builtin_unreachable(); 29 | } 30 | } 31 | 32 | 33 | // checks whether width is a power of 2 (width & (width - 1) == 0) 34 | // and this power is between 8 and 64 35 | // without dummy redefinition error 36 | template (T::fixed_int_width), 38 | typename std::enable_if::type* dummy = nullptr> 40 | inline auto add_int_init(py::module& m, const char* name) 41 | { 42 | return py::class_(m, name, py::buffer_protocol()) 43 | .def_buffer([] (T& self) { 44 | return py::buffer_info( 45 | reinterpret_cast(self.data()), 46 | width / 8, 47 | std::string(1, sym_for_width(width)), 48 | 1, 49 | { detail::size(self) }, 50 | { width / 8 } 51 | ); }); 52 | } 53 | 54 | template (T::fixed_int_width), 56 | typename std::enable_if<(width & (width - 1)) || 57 | !(width & (128u - 8u))>::type* dummy = nullptr> 58 | inline auto add_int_init(py::module& m, const char* name) 59 | { 60 | return py::class_(m, name); 61 | } 62 | 63 | 64 | template 65 | inline auto add_int_class(py::module& m, py::dict& dict, KEY_T key, 66 | const char *name, const char *doc = nullptr) 67 | { 68 | auto cls = add_int_init(m, name) 69 | .def_property_readonly("width", static_cast (&T::width)) 70 | .def_property_readonly("data", 71 | static_cast (&T::data)) 72 | 73 | .def_property_readonly("bit_size", &T::bit_size, 74 | "The number of bits in the int_vector.") 75 | 76 | .def("resize", &T::resize, 77 | "Resize the int_vector in terms of elements.") 78 | .def("bit_resize", &T::bit_resize, 79 | "Resize the int_vector in terms of bits.") 80 | .def_property_readonly("capacity", &T::capacity, doc_capacity) 81 | 82 | .def( 83 | "__setitem__", 84 | [](T &self, size_t position, S value) { 85 | if (position >= self.size()) { 86 | throw std::out_of_range(std::to_string(position)); } 87 | self[position] = value; }) 88 | 89 | .def("set_to_id", 90 | [](T &self) { sdsl::util::set_to_id(self); }, 91 | py::call_guard(), 92 | "Sets each entry of the vector at position `i` to value `i`") 93 | .def("set_to_value", 94 | [](T &self, S value) { sdsl::util::set_to_value(self, value); }, 95 | py::arg("k"), 96 | doc_set_to_value, 97 | py::call_guard()) 98 | .def("set_zero_bits", 99 | [](T &self) { sdsl::util::_set_zero_bits(self); }, 100 | "Sets all bits of the int_vector to 0-bits.", 101 | py::call_guard()) 102 | .def("set_one_bits", 103 | [](T &self) { sdsl::util::_set_one_bits(self); }, 104 | "Sets all bits of the int_vector to 1-bits.", 105 | py::call_guard()) 106 | .def( 107 | "set_random_bits", 108 | [](T &self, int seed) { 109 | sdsl::util::set_random_bits(self, seed); }, 110 | py::arg_v( 111 | "seed", 112 | 0, 113 | "If seed = 0, the time is used to initialize the pseudo " 114 | "random number generator, otherwise the seed parameter is used." 115 | ), 116 | "Sets all bits of the int_vector to pseudo-random bits.", 117 | py::call_guard()) 118 | .def_static( 119 | "rnd_positions", 120 | [](uint8_t log_s, uint64_t mod, uint64_t seed) { 121 | uint64_t mask; 122 | 123 | auto res = sdsl::util::rnd_positions(log_s, mask, mod, seed); 124 | 125 | return std::make_tuple(res, mask); }, 126 | py::arg("log_s"), py::arg("mod") = 0, py::arg("seed") = 0, 127 | "Create `2**{log_s}` random integers mod `mod` with seed `seed`", 128 | py::call_guard()) 129 | .def( 130 | "__imod__", 131 | [](T &self, uint64_t m) { 132 | sdsl::util::mod(self, m); 133 | return self; }, 134 | py::is_operator()) 135 | 136 | .def("cnt_one_bits", 137 | [](const T &self) { return sdsl::util::cnt_one_bits(self); }, 138 | "Number of set bits in vector", 139 | py::call_guard()) 140 | .def("cnt_onezero_bits", 141 | [](const T &self) { return sdsl::util::cnt_onezero_bits(self); }, 142 | "Number of occurrences of bit pattern `10` in vector", 143 | py::call_guard()) 144 | .def("cnt_zeroone_bits", 145 | [](const T &self) { return sdsl::util::cnt_zeroone_bits(self); }, 146 | "Number of occurrences of bit pattern `01` in vector", 147 | py::call_guard()) 148 | 149 | .def( 150 | "next_bit", 151 | [](const T &self, size_t idx) { 152 | if (idx >= self.bit_size()) { 153 | throw std::out_of_range(std::to_string(idx)); } 154 | return sdsl::util::next_bit(self, idx); }, 155 | py::arg("idx"), 156 | "Get the smallest position `i` >= `idx` where a bit is set", 157 | py::call_guard()) 158 | .def( 159 | "prev_bit", 160 | [](const T &self, size_t idx) { 161 | if (idx >= self.bit_size()) { 162 | throw std::out_of_range(std::to_string(idx)); } 163 | return sdsl::util::prev_bit(self, idx); }, 164 | py::arg("idx"), 165 | "Get the largest position `i` <= `idx` where a bit is set", 166 | py::call_guard()); 167 | 168 | add_sizes(cls); 169 | add_description(cls); 170 | add_serialization(cls); 171 | add_to_string(cls); 172 | 173 | add_read_access(cls); 174 | add_std_algo(cls); 175 | 176 | if (doc) cls.doc() = doc; 177 | 178 | dict.attr("__setitem__")(key, cls); 179 | 180 | return cls; 181 | } 182 | 183 | 184 | struct add_int_vector_functor { 185 | py::module& m; 186 | py::dict& int_vectors_dict; 187 | 188 | constexpr add_int_vector_functor(py::module& m, py::dict& int_vectors_dict) noexcept 189 | : m(m), int_vectors_dict(int_vectors_dict) {} 190 | 191 | template 192 | auto operator()(std::integral_constant t) { 193 | using return_type = sdsl::int_vector; 194 | std::string name = "Int" + std::to_string(N) + "Vector"; 195 | return add_int_class( 196 | m, int_vectors_dict, N, name.c_str()) 197 | .def(py::init( 198 | [](size_t size, typename return_type::value_type default_value) { 199 | return return_type(size, default_value, N); }), 200 | py::arg("size") = 0, py::arg("default_value") = 0); 201 | } 202 | 203 | auto operator()(std::integral_constant t) { 204 | return add_int_class>( 205 | m, int_vectors_dict, "dynamic", "IntVector", doc_int_vector) 206 | .def( 207 | py::init([](size_t size, 208 | uint64_t default_value, 209 | uint8_t bit_width) { 210 | return sdsl::int_vector<0>(size, default_value, bit_width); 211 | }), 212 | py::arg("size") = 0, 213 | py::arg("default_value") = 0, 214 | py::arg("bit_width") = 64, 215 | py::call_guard()) 216 | .def( 217 | "expand_width", 218 | [](sdsl::int_vector<0> &self, size_t width) { 219 | sdsl::util::expand_width(self, width); }, 220 | "Expands the integer width to new_width >= v.width().", 221 | py::call_guard()) 222 | .def("bit_compress", 223 | [](sdsl::int_vector<0> &self) { 224 | sdsl::util::bit_compress(self); }, 225 | doc_bit_compress, 226 | py::call_guard()); 227 | } 228 | 229 | auto operator()(std::integral_constant t) { 230 | return add_int_class, bool>( 231 | m, int_vectors_dict, 1ul , "BitVector") 232 | .def(py::init( 233 | [](size_t size, bool default_value) { 234 | return sdsl::int_vector<1>(size, default_value, 1); }), 235 | py::arg("size") = 0, py::arg("default_value") = false) 236 | .def("flip", &sdsl::int_vector<1>::flip, 237 | "Flip all bits of bit_vector", 238 | py::call_guard()); 239 | } 240 | }; 241 | 242 | 243 | template 244 | using general_int_vector = py::class_>; 245 | 246 | 247 | inline auto add_int_vectors(py::module& m) 248 | { 249 | py::dict int_vectors_dict; 250 | 251 | m.attr("int_vector") = int_vectors_dict; 252 | 253 | using params = std::tuple< 254 | std::integral_constant, 255 | std::integral_constant, 256 | std::integral_constant, 257 | std::integral_constant, 258 | std::integral_constant, 259 | std::integral_constant, 260 | std::integral_constant, 261 | std::integral_constant, 262 | std::integral_constant>; 263 | 264 | using as_params = std::tuple< 265 | std::tuple>, 266 | std::tuple>, 267 | std::tuple>, 268 | std::tuple>>; 269 | 270 | auto iv = for_each_in_tuple(params(), add_int_vector_functor(m, int_vectors_dict)); 271 | auto iv_as_params = forward_each_in_tuple(as_params(), 272 | make_general_subset_functor(iv)); 273 | 274 | return std::forward_as_tuple(iv, iv_as_params); 275 | } 276 | -------------------------------------------------------------------------------- /pysdsl/types/pysequence.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | 7 | namespace py = pybind11; 8 | 9 | 10 | namespace detail 11 | { 12 | template 13 | class sequence_iterator_wrapper 14 | { 15 | public: 16 | using difference_type = std::size_t; 17 | using value_type = T; 18 | using pointer = T*; 19 | using reference = T&; 20 | using iterator_category = std::random_access_iterator_tag; 21 | 22 | sequence_iterator_wrapper(Base it): m_it(it) {} 23 | 24 | bool operator!=(const sequence_iterator_wrapper& other) const { 25 | return m_it != other.m_it; } 26 | 27 | bool operator==(const sequence_iterator_wrapper& other) const { 28 | return m_it == other.m_it; } 29 | 30 | value_type operator*() { return py::cast(*m_it); } 31 | 32 | decltype(auto) operator++() 33 | { 34 | ++m_it; 35 | return *this; 36 | } 37 | 38 | decltype(auto) operator++(int) { 39 | return *sequence_iterator_wrapper(m_it++); } 40 | 41 | decltype(auto) operator-(difference_type step) const { 42 | return sequence_iterator_wrapper(m_it - step); } 43 | 44 | difference_type operator-(sequence_iterator_wrapper other) const { 45 | return m_it - other.m_it; } 46 | 47 | decltype(auto) operator=(sequence_iterator_wrapper other) 48 | { 49 | if (this != &other) { 50 | m_it = other.m_it; } 51 | return *this; 52 | } 53 | 54 | private: 55 | Base m_it; 56 | }; 57 | } // namespace detail 58 | 59 | 60 | template 61 | class sequence_wrapper 62 | { 63 | private: 64 | //py::detail::sequence_iterator; 65 | using raw_iterator = decltype(std::declval().begin()); 66 | 67 | public: 68 | using const_iterator = detail::sequence_iterator_wrapper; 69 | using value_type = T; 70 | using size_type = std::size_t; 71 | 72 | sequence_wrapper(const py::sequence& seq): m_seq(seq) {}; 73 | 74 | bool empty() const { return m_seq.size() == 0; } 75 | 76 | const_iterator begin() const { return std::cbegin(m_seq); } 77 | const_iterator end() const { return std::cend(m_seq); } 78 | 79 | size_t size() const { return m_seq.size(); } 80 | 81 | decltype(auto) 82 | operator[] (const size_t i) const { return py::cast(m_seq[i]); } 83 | 84 | private: 85 | const py::sequence& m_seq; 86 | }; 87 | -------------------------------------------------------------------------------- /pysdsl/types/sorted_int_stack.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include "calc.hpp" 12 | #include "io.hpp" 13 | #include "operations/iteration.hpp" 14 | #include "operations/sizes.hpp" 15 | #include "docstrings.hpp" 16 | 17 | 18 | namespace py = pybind11; 19 | 20 | 21 | inline auto add_sorted_int_stack(py::module& m) 22 | { 23 | using Stack = sdsl::sorted_int_stack; 24 | 25 | auto cls = py::class_(m, "SortedIntStack") 26 | .def("empty", &Stack::empty, "Checks whether the stack is empty.") 27 | .def("top", [](const Stack& self) { 28 | if (self.size() > 0u) 29 | return self.top(); 30 | throw py::index_error("top from empty stack"); 31 | }, "Returns the topmost element of the stack.") 32 | .def("pop", [](Stack& self) { 33 | if (self.size() == 0u) 34 | throw py::index_error("pop from empty stack"); 35 | auto ans = self.top(); 36 | self.pop(); 37 | return ans; 38 | }, "Removes the topmost element from the stack and returns its copy. Not thread safe.") 39 | .def("push", [](Stack& self, const Stack::size_type& x) { 40 | if (self.empty() || self.top() < x) 41 | self.push(x); 42 | else 43 | throw py::value_error("elements have to be pushed in strictly increasing order"); 44 | }, "Adds new element to the top of the stack." 45 | "(n.b. it has to be not less than the stored ones). Not thread safe.") 46 | .def(py::init([](Stack::size_type x) { 47 | return Stack(x); 48 | }), py::arg("max_value"), 49 | "Creates a stack which can store integers not greater than max_value."); 50 | 51 | 52 | cls.doc() = doc_sorted_int_stack; 53 | 54 | add_sizes(cls); 55 | add_serialization(cls, 0); 56 | add_description(cls); 57 | 58 | return std::make_tuple(cls); 59 | } 60 | -------------------------------------------------------------------------------- /pysdsl/types/suffixarray.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | 11 | #include "operations/sizes.hpp" 12 | #include "operations/iteration.hpp" 13 | #include "docstrings.hpp" 14 | #include "io.hpp" 15 | #include "calc.hpp" 16 | 17 | namespace py = pybind11; 18 | 19 | 20 | template 21 | inline auto add_csa_member_type(py::module& m, 22 | std::string&& name, 23 | const std::string& parent_name) 24 | { 25 | try { 26 | auto cls = py::class_( 27 | m, ("_" + name + "ofSuffixArray" + parent_name).c_str()); 28 | add_read_access(cls); 29 | add_to_string(cls); 30 | } catch (std::runtime_error& /* ignore */) {} 31 | } 32 | 33 | 34 | template 35 | inline 36 | auto add_csa_class(py::module& m, std::string&& name, const char* doc = nullptr) 37 | { 38 | auto cls = py::class_(m, ("SuffixArray" + name).c_str()); 39 | 40 | add_csa_member_type(m, "ISA", name); 41 | add_csa_member_type(m, "BWT", name); 42 | add_csa_member_type(m, "LF", name); 43 | add_csa_member_type(m, "PSI", name); 44 | add_csa_member_type(m, "Text", name); 45 | add_csa_member_type(m, "FirstRow", name); 46 | 47 | try { 48 | using char2comp_type = typename T::alphabet_type::char2comp_type; 49 | auto cls_char2comp = py::class_( 50 | m, ("_Char2CompOf" + name).c_str()); 51 | cls_char2comp.def( 52 | "__getitem__", 53 | [] (const char2comp_type& self, uint64_t c) { return self[c]; } 54 | ); 55 | } catch (std::runtime_error& /* ignore */) {} 56 | try { 57 | using comp2char_type = typename T::alphabet_type::comp2char_type; 58 | auto cls_comp2char = py::class_( 59 | m, ("_Comp2CharOf" + name).c_str()); 60 | cls_comp2char.def( 61 | "__getitem__", 62 | [] (const comp2char_type& self, uint64_t c) { return self[c]; } 63 | ); 64 | } catch (std::runtime_error& /* ignore */) {} 65 | 66 | cls.def_property_readonly("isa", [] (const T& self ) { return &self.isa; }); 67 | cls.def_property_readonly("bwt", [] (const T& self ) { return &self.bwt; }); 68 | cls.def_property_readonly("lf", [] (const T& self ) { return &self.lf; }); 69 | cls.def_property_readonly("psi", [] (const T& self ) { return &self.psi; }); 70 | cls.def_property_readonly("text", [] (const T& self ) { 71 | return &self.text; }); 72 | cls.def_property_readonly("L", [] (const T& self ) { return &self.L; }); 73 | cls.def_property_readonly("F", [] (const T& self ) { return &self.F; }); 74 | cls.def_property_readonly("C", [] (const T& self ) { return &self.C; }); 75 | cls.def_property_readonly("char2comp", [] (const T& self ) { 76 | return &self.char2comp; }); 77 | cls.def_property_readonly("comp2char", [] (const T& self ) { 78 | return &self.comp2char; }); 79 | cls.def_property_readonly("sigma", [] (const T& self ) { 80 | return self.sigma; }); 81 | 82 | cls.def( 83 | "extract", 84 | [] (const T& self, typename T::size_type begin, 85 | typename T::size_type end) { 86 | if (end >= detail::size(self)) { 87 | throw std::out_of_range(std::to_string(end)); } 88 | if (begin >= end) { 89 | throw std::invalid_argument("begin should be less than end"); } 90 | return sdsl::extract(self, begin, end); }, 91 | py::arg("begin"), 92 | py::arg("end"), 93 | "Reconstructs the subarray T[begin:end] of the original array T\n" 94 | "\n\tbegin: Position of the first character which should be extracted " 95 | "(inclusive)" 96 | "\n\tend: Position of the last character which should be extracted " 97 | "(inclusive)\n\n" 98 | "Time complexity: Order{(end - begin+1) * t_{Psi} + t_{ISA} }", 99 | py::call_guard() 100 | ); 101 | cls.def( 102 | "count", 103 | [] (const T& self, const typename T::string_type& pattern) { 104 | return sdsl::count(self, pattern); }, 105 | py::arg("pattern"), 106 | "Counts the number of occurrences of a pattern in a CSA", 107 | py::call_guard()); 108 | cls.def( 109 | "locate", 110 | [] (const T& self, const typename T::string_type& pattern) { 111 | return sdsl::locate(self, pattern); }, 112 | py::arg("pattern"), 113 | "Calculates all occurrences of a pattern in a CSA\n" 114 | "Time complexity:" 115 | "Order{ t_{backward_search} + z * t_{SA} },\n" 116 | "where `z` is the number of occurrences of pattern in the CSA", 117 | py::call_guard()); 118 | cls.def(py::init( 119 | [] (const typename T::string_type& data) 120 | { 121 | T self; 122 | sdsl::construct_im(self, data, 123 | sizeof(typename T::string_type::value_type)); 124 | return self; 125 | } 126 | )); 127 | 128 | add_sizes(cls); 129 | add_description(cls); 130 | add_serialization(cls); 131 | add_to_string(cls); 132 | 133 | add_read_access(cls); 134 | add_std_algo(cls); 135 | 136 | if (doc) cls.doc() = doc; 137 | 138 | m.attr("suffix_array").attr("__setitem__")(name, cls); 139 | 140 | return cls; 141 | } 142 | 143 | 144 | inline auto add_csa(py::module& m) 145 | { 146 | m.attr("suffix_array") = py::dict(); 147 | 148 | auto csa_classes = std::make_tuple( 149 | add_csa_class>(m, "Bitcompressed", doc_csa), 150 | add_csa_class>(m, "Sadakane", doc_sada), 151 | add_csa_class>(m, "SadakaneInt", doc_sada), 152 | add_csa_class>(m, "WaveletTree", doc_csa_wt), 153 | add_csa_class>(m, "WaveletTreeInt", doc_csa_wt)); 154 | 155 | return csa_classes; 156 | } 157 | -------------------------------------------------------------------------------- /pysdsl/types/wavelet.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include "calc.hpp" 16 | #include "docstrings.hpp" 17 | #include "io.hpp" 18 | 19 | 20 | namespace py = pybind11; 21 | 22 | 23 | 24 | template 25 | class add_lex_functor; 26 | 27 | template ::value> 28 | class add_traversable_functor; 29 | 30 | 31 | template 32 | class add_lex_functor 33 | { 34 | public: 35 | py::class_& operator() (py::class_& cls) { return cls; } 36 | }; 37 | 38 | 39 | template 40 | class add_lex_functor 41 | { 42 | public: 43 | py::class_& operator() (py::class_& cls) 44 | { 45 | typedef typename T::size_type size_type; 46 | typedef typename T::value_type value_type; 47 | 48 | cls.def( 49 | "quantile_freq", 50 | [] (const T& self, typename T::size_type lb, 51 | typename T::size_type rb, typename T::size_type q) { 52 | return sdsl::quantile_freq(self, lb, rb, q); }, 53 | py::arg("lb"), py::arg("rb"), py::arg("q"), 54 | "Returns the q-th smallest element and its frequency in wt[lb..rb]." 55 | "\n\tlb: Left array bound in T" 56 | "\n\trb: Right array bound in T" 57 | "\n\tq: q-th largest element ('quantile'), 0-based indexed.", 58 | py::call_guard()); 59 | cls.def( 60 | "lex_count", 61 | [] (const T& self, size_t i, size_t j, typename T::value_type c) { 62 | if (j >= self.size()) { 63 | throw std::invalid_argument("j should be less than size"); } 64 | if (i >= j) { 65 | throw std::invalid_argument("i should be less than j"); } 66 | return self.lex_count(i, j, c); }, 67 | py::arg("i"), py::arg("j"), py::arg("c"), 68 | "How many values are lexicographic smaller/greater than c in " 69 | "[i..j-1].\n\ti: Start index (inclusive) of the interval." 70 | "\n\tj: End index (exclusive) of the interval." 71 | "\n\tc: Value c.\nreturn A triple containing:\n\trank(i, c)" 72 | "\n\tnumber of values smaller than c in [i..j-1]" 73 | "\n\tnumber of values greater than c in [i..j-1]", 74 | py::call_guard()); 75 | cls.def( 76 | "lex_smaller_count", 77 | [] (const T& self, size_t i, typename T::value_type c) { 78 | if (i >= self.size()) { 79 | throw std::invalid_argument("i should be less than size"); } 80 | return self.lex_smaller_count(i, c); }, 81 | py::arg("i"), py::arg("c"), 82 | "How many values are lexicographic smaller than c in [0..i-1]." 83 | "\n\ti: Exclusive right bound of the range." 84 | "\nreturn: A tuple containing:\n\trank(i, c)\n\tnumber of values " 85 | "smaller than c in [0..i-1]", 86 | py::call_guard()); 87 | cls.def( 88 | "symbol_lte", 89 | [] (const T& self, typename T::value_type c) { 90 | auto result = sdsl::symbol_lte(self, c); 91 | if (!std::get<0>(result)) { 92 | throw std::runtime_error("Symbol not found"); } 93 | return std::get<1>(result); }, 94 | py::arg("c"), 95 | "Returns for a symbol c the previous smaller or equal symbol in " 96 | "the WT"); 97 | cls.def( 98 | "symbol_gte", 99 | [] (const T& self, typename T::value_type c) { 100 | auto result = sdsl::symbol_gte(self, c); 101 | if (!std::get<0>(result)) { 102 | throw std::runtime_error("Symbol not found"); } 103 | return std::get<1>(result); }, 104 | py::arg("c"), 105 | "Returns for a symbol c the next larger or equal symbol in the WT"); 106 | cls.def( 107 | "restricted_unique_range_values", 108 | [] (const T& self, size_type x_i, size_type x_j, value_type y_i, 109 | value_type y_j 110 | ) { return sdsl::restricted_unique_range_values(self, x_i, x_j, 111 | y_i, y_j); }, 112 | py::arg("x_i"), py::arg("x_j"), py::arg("y_i"), py::arg("y_j"), 113 | "For an x range [x_i, x_j] and a value range [y_i, y_j] " 114 | "return all unique y values occuring in [x_i, x_j] " 115 | "in ascending order.", 116 | py::call_guard()); 117 | return cls; 118 | } 119 | }; 120 | 121 | 122 | template 123 | class add_traversable_functor 124 | { 125 | public: 126 | py::class_& operator() (py::module&, py::class_& cls, std::string&&) { 127 | return cls; } 128 | }; 129 | 130 | 131 | template 132 | class add_traversable_functor 133 | { 134 | public: 135 | py::class_& operator() (py::module& m, py::class_& cls, 136 | std::string&& name) 137 | { 138 | typedef typename T::node_type t_node; 139 | 140 | try 141 | { 142 | py::class_ node_cls(m, name.c_str()) 143 | // .def_property_readonly("sym", &t_node::sym ) 144 | ; 145 | } 146 | catch(std::runtime_error& /* ignore */) {} 147 | 148 | cls.def("root_node", &T::root); 149 | cls.def("node_is_leaf", &T::is_leaf); 150 | cls.def( 151 | "node_empty", 152 | [] (const T& self, const t_node& node) 153 | { return self.empty(node); }); 154 | cls.def( 155 | "node_size", 156 | [] (const T& self, const t_node& node) 157 | { return self.size(node); }); 158 | cls.def("node_sym", &T::sym); 159 | cls.def( 160 | "node_expand", 161 | [] (const T& self, const t_node& node) 162 | { return self.expand(node); }); 163 | cls.def( 164 | "node_expand_ranges", 165 | [] (const T& self, const t_node& node, 166 | const sdsl::range_vec_type& ranges) 167 | { 168 | return self.expand(node, ranges); 169 | }, 170 | py::arg("node"), py::arg("ranges")); 171 | cls.def( 172 | "node_bit_vec", 173 | [] (const T& self, const t_node& node) { 174 | auto bit_vec = self.bit_vec(node); 175 | return std::make_pair( 176 | bit_vec.size(), 177 | py::make_iterator( 178 | detail::cbegin(bit_vec), 179 | detail::cend(bit_vec))); }); 180 | cls.def( 181 | "node_seq", 182 | [] (const T& self, const t_node& node) { 183 | auto seq = self.seq(node); 184 | sdsl::int_vector<> s(seq.size()); 185 | std::copy(seq.begin(), seq.end(), s.begin()); 186 | return s; } ); 187 | 188 | cls.def( 189 | "intersect", 190 | [] (const T& self, std::vector ranges, size_t t) { 191 | return sdsl::intersect(self, ranges, t); }, 192 | py::arg("ranges"), py::arg("t") = 0, 193 | "Intersection of elements in " 194 | "WT[s₀, e₀], WT[s₁, e₁], ...,WT[sₖ,eₖ]\n" 195 | "\tranges: The ranges.\n\tt: Threshold in how many distinct ranges " 196 | "the value has to be present. Default: t=ranges.size()\n" 197 | "Return a vector containing (value, frequency) - of value which " 198 | "are contained in t different ranges. Frequency = accumulated " 199 | "frequencies in all ranges. The tuples are ordered according " 200 | "to value, if wt is lex_ordered."); 201 | cls.def( 202 | "interval_symbols", 203 | [] (const T& self, size_t i, size_t j) { 204 | if (j > self.size()) { 205 | throw std::invalid_argument("j should be less or equal " 206 | "than size"); } 207 | if (i > j) { 208 | throw std::invalid_argument("i should be less or equal " 209 | "than j"); } 210 | size_t k; 211 | std::vector cs(self.sigma); 212 | std::vector rank_c_i(self.sigma); 213 | std::vector rank_c_j(self.sigma); 214 | 215 | sdsl::interval_symbols(self, i, j, k, cs, rank_c_i, rank_c_j); 216 | 217 | return std::make_tuple(k, cs, rank_c_i, rank_c_j); }, 218 | py::arg("i"), py::arg("j"), 219 | "For each symbol c in wt[i..j - 1] get rank(i, c) and rank(j, c)."); 220 | return cls; 221 | } 222 | }; 223 | 224 | 225 | template 226 | auto add_wavelet_specific(py::class_& cls) { return cls; } 227 | 228 | 229 | template 230 | auto add_wavelet_specific(py::class_>& cls) 231 | { 232 | typedef sdsl::wt_int base_cls; 233 | typedef typename base_cls::size_type size_type; 234 | typedef typename base_cls::value_type value_type; 235 | 236 | cls.def_property_readonly( 237 | "tree", 238 | [] (const base_cls& self) { return self.tree; }, 239 | "A concatenation of all bit vectors of the wavelet tree."); 240 | cls.def( 241 | "get_tree", 242 | [] (const base_cls& self) { return self.tree; }, 243 | "A concatenation of all bit vectors of the wavelet tree."); 244 | cls.def_property_readonly( 245 | "max_level", 246 | [] (const base_cls& self) { return self.max_level; }, 247 | "Maximal level of the wavelet tree."); 248 | cls.def( 249 | "get_max_level", 250 | [] (const base_cls& self) { return self.max_level; }, 251 | "Maximal level of the wavelet tree."); 252 | cls.def( 253 | "range_search_2d", 254 | [] (const base_cls& self, size_type lb, size_type rb, 255 | value_type vlb, value_type vrb, bool report=true) 256 | { 257 | return self.range_search_2d(lb, rb, vlb, vrb, report); 258 | }, 259 | py::arg("lb"), py::arg("rb"), py::arg("vlb"), py::arg("vrb"), 260 | py::arg("report"), 261 | "searches points in the index interval [lb..rb] and " 262 | "value interval [vlb..vrb].\n" 263 | "\tlb: Left bound of index interval (inclusive)\n" 264 | "\trb: Right bound of index interval (inclusive)\n" 265 | "\tvlb: Left bound of value interval (inclusive)\n" 266 | "\tvrb: Right bound of value interval (inclusive)\n" 267 | "\treport: Should the matching points be returned?\n" 268 | "returns pair (number of found points, vector of points), " 269 | "the vector is empty when report = false.", 270 | py::call_guard()); 271 | 272 | return cls; 273 | } 274 | 275 | 276 | template 277 | inline auto add_wavelet_class(py::module& m, const std::string&& name, 278 | const char* doc= nullptr) 279 | { 280 | auto cls = py::class_(m, name.c_str()) 281 | .def_property_readonly( 282 | "sigma", 283 | [] (const T& self) { return self.sigma; }, 284 | "Effective alphabet size of the wavelet tree") 285 | .def( 286 | "get_sigma", 287 | [] (const T& self) { return self.sigma; }, 288 | "Effective alphabet size of the wavelet tree") 289 | .def_static( 290 | "from_bytes", 291 | [] (const py::bytes& bytes) 292 | { 293 | T wt; 294 | sdsl::construct_im(wt, std::string(bytes), 295 | sizeof(typename T::value_type)); 296 | return wt; 297 | }, 298 | py::arg("s"), 299 | "Construct from a build sequence", 300 | py::call_guard()) 301 | .def_static( 302 | "from_binary_file", 303 | [] (const std::string& file_name) { 304 | T wt; 305 | sdsl::construct(wt, file_name, sizeof(typename T::value_type)); 306 | return wt; }, 307 | py::arg("file_name"), 308 | py::call_guard()) 309 | .def_static( 310 | "parse_string", 311 | [] (const std::string& s) 312 | { 313 | T wt; 314 | sdsl::construct_im(wt, s, 'd'); 315 | return wt; 316 | }, 317 | py::arg("s"), 318 | "Construct from space-separated human-readable string") 319 | .def( 320 | "rank", 321 | [] (const T& self, typename T::size_type i, 322 | typename T::value_type c) 323 | { 324 | if (i >= self.size()) { 325 | throw std::out_of_range(std::to_string(i)); } 326 | return self.rank(i, c); 327 | }, 328 | "Calculates how many values c are in the prefix [0..i-1] of the " 329 | "supported vector (i in [0..size]).\nTime complexity: " 330 | "Order(log(|Sigma|))", 331 | py::arg("i"), py::arg("c"), 332 | py::call_guard()) 333 | .def( 334 | "inverse_select", 335 | [] (const T& self, typename T::size_type i) { 336 | if (i >= self.size()) { 337 | throw std::out_of_range(std::to_string(i)); } 338 | return self.inverse_select(i); }, 339 | py::arg("i"), 340 | "Calculates how many occurrences of value wt[i] are in the prefix" 341 | "[0..i-1] of the original sequence, returns pair " 342 | "(rank(wt[i], i), wt[i])", 343 | py::call_guard()) 344 | .def( 345 | "select", 346 | [] (const T& self, typename T::size_type i, 347 | typename T::value_type c) 348 | { 349 | if (i < 1 || i >= self.size()) { 350 | throw std::out_of_range(std::to_string(i)); } 351 | if (i > self.rank(self.size(), c)) { 352 | throw std::invalid_argument( 353 | std::to_string(i) + " is greater than rank(" + 354 | std::to_string(i) + ", " + std::to_string(c) + ")"); } 355 | return self.select(i, c); }, 356 | py::arg("i"), py::arg("c"), 357 | "Calculates the i-th occurrence of the value c in the supported " 358 | "vector.\nTime complexity: Order(log(|Sigma|))", 359 | py::call_guard()); 360 | 361 | add_wavelet_specific(cls); 362 | 363 | add_lex_functor()(cls); 364 | add_traversable_functor()(m, cls, "_" + name + "Node"); 365 | 366 | add_sizes(cls); 367 | add_description(cls); 368 | add_serialization(cls); 369 | add_to_string(cls); 370 | 371 | add_read_access(cls); 372 | add_std_algo(cls); 373 | 374 | if (doc) cls.doc() = doc; 375 | 376 | m.attr("all_wavelet_trees").attr("append")(cls); 377 | 378 | return cls; 379 | } 380 | 381 | 382 | template 383 | inline auto add_wt_int(py::module& m, std::string&& base_name) 384 | { 385 | auto cls = add_wavelet_class>( 386 | m, ("WaveletTreeInt" + base_name).c_str(), doc_wtint); 387 | m.attr("wavelet_tree_int").attr("__setitem__")(base_name, cls); 388 | 389 | return cls; 390 | } 391 | 392 | template 393 | inline auto add_wm_int(py::module& m, std::string&& base_name) 394 | { 395 | auto cls = add_wavelet_class>( 396 | m, ("WaveletMatrixInt" + base_name).c_str(), doc_wm_int); 397 | m.attr("wavelet_matrix_int").attr("__setitem__")(base_name, cls); 398 | 399 | return cls; 400 | } 401 | 402 | template 403 | inline auto add_wt_huff(py::module& m, std::string&& base_name) 404 | { 405 | auto cls = add_wavelet_class>( 406 | m, ("WaveletTreeHuffman" + base_name).c_str(), doc_wt_huff); 407 | m.attr("wavelet_tree_huffman").attr("__setitem__")(base_name, cls); 408 | 409 | return cls; 410 | } 411 | 412 | template 413 | inline auto add_wt_huff_int(py::module& m, std::string&& base_name) 414 | { 415 | auto cls = add_wavelet_class>( 416 | m, ("WaveletTreeHuffmanInt" + base_name).c_str(), doc_wt_huff); 417 | m.attr("wavelet_tree_huffman_int").attr("__setitem__")(base_name, cls); 418 | 419 | return cls; 420 | } 421 | 422 | 423 | template 424 | inline auto add_wt_hutu(py::module& m, std::string&& base_name) 425 | { 426 | auto cls = add_wavelet_class>( 427 | m, ("WaveletTreeHuTucker" + base_name).c_str(), doc_wt_hutu); 428 | m.attr("wavelet_tree_hu_tucker").attr("__setitem__")(base_name, cls); 429 | 430 | return cls; 431 | } 432 | 433 | template 434 | inline auto add_wt_hutu_int(py::module& m, std::string&& base_name) 435 | { 436 | auto cls = add_wavelet_class>( 437 | m, ("WaveletTreeHuTuckerInt" + base_name).c_str(), doc_wt_hutu); 438 | m.attr("wavelet_tree_hu_tucker_int").attr("__setitem__")(base_name, cls); 439 | 440 | return cls; 441 | } 442 | 443 | 444 | template 445 | inline auto add_wt_blcd(py::module& m, std::string&& base_name) 446 | { 447 | auto cls = add_wavelet_class>( 448 | m, ("WaveletTreeBalanced" + base_name).c_str(), doc_wt_blcd); 449 | m.attr("wavelet_tree_balanced").attr("__setitem__")(base_name, cls); 450 | 451 | return cls; 452 | } 453 | 454 | 455 | template 456 | inline auto add_wt_blcd_int(py::module& m, std::string&& base_name) 457 | { 458 | auto cls = add_wavelet_class>( 459 | m, ("WaveletTreeBalancedInt" + base_name).c_str(), doc_wt_blcd); 460 | m.attr("wavelet_tree_balanced_int").attr("__setitem__")(base_name, cls); 461 | 462 | return cls; 463 | } 464 | 465 | 466 | template 467 | inline auto add_wt_int(py::module& m, const std::string& base_name) 468 | { return add_wt_int(m, std::string(base_name)); } 469 | 470 | 471 | template 472 | inline auto add_wm_int(py::module& m, const std::string& base_name) 473 | { return add_wm_int(m, std::string(base_name)); } 474 | 475 | 476 | template 477 | inline auto add_wt_huff(py::module& m, const std::string& base_name) 478 | { return add_wt_huff(m, std::string(base_name)); } 479 | 480 | template 481 | inline auto add_wt_huff_int(py::module& m, const std::string& base_name) 482 | { return add_wt_huff_int(m, std::string(base_name)); } 483 | 484 | template 485 | inline auto add_wt_hutu(py::module& m, const std::string& base_name) 486 | { return add_wt_hutu(m, std::string(base_name)); } 487 | 488 | template 489 | inline auto add_wt_hutu_int(py::module& m, const std::string& base_name) 490 | { return add_wt_hutu_int(m, std::string(base_name)); } 491 | 492 | template 493 | inline auto add_wt_blcd(py::module& m, const std::string& base_name) 494 | { return add_wt_blcd(m, std::string(base_name)); } 495 | 496 | template 497 | inline auto add_wt_blcd_int(py::module& m, const std::string& base_name) 498 | { return add_wt_blcd_int(m, std::string(base_name)); } 499 | 500 | 501 | template 502 | inline auto add_wt_int(py::module& m, const py::class_& base) 503 | { 504 | auto base_name = py::cast(base.attr("__name__")); 505 | auto cls = add_wt_int(m, base_name); 506 | m.attr("wavelet_tree_int_by_base").attr("__setitem__")(base, cls); 507 | return cls; 508 | } 509 | 510 | 511 | template 512 | inline auto add_wm_int(py::module& m, const py::class_& base) 513 | { 514 | auto base_name = py::cast(base.attr("__name__")); 515 | auto cls = add_wm_int(m, base_name); 516 | m.attr("wavelet_matrix_int_by_base").attr("__setitem__")(base, cls); 517 | return cls; 518 | } 519 | 520 | template 521 | inline auto add_wt_huff(py::module& m, const py::class_& base) 522 | { 523 | auto base_name = py::cast(base.attr("__name__")); 524 | auto cls = add_wt_huff(m, base_name); 525 | m.attr("wavelet_tree_huffman_by_base").attr("__setitem__")(base, cls); 526 | return cls; 527 | } 528 | 529 | template 530 | inline auto add_wt_huff_int(py::module& m, const py::class_& base) 531 | { 532 | auto base_name = py::cast(base.attr("__name__")); 533 | auto cls = add_wt_huff_int(m, base_name); 534 | m.attr("wavelet_tree_huffman_int_by_base").attr("__setitem__")(base, cls); 535 | return cls; 536 | } 537 | 538 | template 539 | inline auto add_wt_hutu(py::module& m, const py::class_& base) 540 | { 541 | auto base_name = py::cast(base.attr("__name__")); 542 | auto cls = add_wt_hutu(m, base_name); 543 | m.attr("wavelet_tree_hu_tucker_by_base").attr("__setitem__")(base, cls); 544 | return cls; 545 | } 546 | 547 | template 548 | inline auto add_wt_hutu_int(py::module& m, const py::class_& base) 549 | { 550 | auto base_name = py::cast(base.attr("__name__")); 551 | auto cls = add_wt_hutu_int(m, base_name); 552 | m.attr("wavelet_tree_hu_tucker_int_by_base").attr("__setitem__")(base, cls); 553 | return cls; 554 | } 555 | 556 | 557 | template 558 | inline auto add_wt_blcd(py::module& m, const py::class_& base) 559 | { 560 | auto base_name = py::cast(base.attr("__name__")); 561 | auto cls = add_wt_blcd(m, base_name); 562 | m.attr("wavelet_tree_balanced_by_base").attr("__setitem__")(base, cls); 563 | return cls; 564 | } 565 | 566 | template 567 | inline auto add_wt_blcd_int(py::module& m, const py::class_& base) 568 | { 569 | auto base_name = py::cast(base.attr("__name__")); 570 | auto cls = add_wt_blcd_int(m, base_name); 571 | m.attr("wavelet_tree_balanced_int_by_base").attr("__setitem__")(base, cls); 572 | return cls; 573 | } 574 | 575 | 576 | template 577 | inline auto add_wavelet(py::module& m, 578 | const std::tuple...> t) 579 | { 580 | m.attr("all_wavelet_trees") = py::list(); 581 | m.attr("wavelet_tree_int") = py::dict(); 582 | m.attr("wavelet_tree_int_by_base") = py::dict(); 583 | m.attr("wavelet_matrix_int") = py::dict(); 584 | m.attr("wavelet_matrix_int_by_base") = py::dict(); 585 | m.attr("wavelet_tree_huffman") = py::dict(); 586 | m.attr("wavelet_tree_huffman_by_base") = py::dict(); 587 | m.attr("wavelet_tree_huffman_int") = py::dict(); 588 | m.attr("wavelet_tree_huffman_int_by_base") = py::dict(); 589 | m.attr("wavelet_tree_hu_tucker") = py::dict(); 590 | m.attr("wavelet_tree_hu_tucker_by_base") = py::dict(); 591 | m.attr("wavelet_tree_hu_tucker_int") = py::dict(); 592 | m.attr("wavelet_tree_hu_tucker_int_by_base") = py::dict(); 593 | m.attr("wavelet_tree_balanced") = py::dict(); 594 | m.attr("wavelet_tree_balanced_by_base") = py::dict(); 595 | m.attr("wavelet_tree_balanced_int") = py::dict(); 596 | m.attr("wavelet_tree_balanced_int_by_base") = py::dict(); 597 | 598 | return std::make_tuple( 599 | add_wt_int<>(m, ""), 600 | add_wt_int(m, std::get<0>(t)), 601 | add_wt_int(m, std::get<1>(t)), 602 | add_wt_int(m, std::get<2>(t)), 603 | 604 | add_wm_int<>(m, ""), 605 | add_wm_int(m, std::get<0>(t)), 606 | add_wm_int(m, std::get<1>(t)), 607 | add_wm_int(m, std::get<2>(t)), 608 | 609 | add_wt_huff<>(m, ""), 610 | add_wt_huff(m, std::get<0>(t)), 611 | add_wt_huff(m, std::get<1>(t)), 612 | add_wt_huff(m, std::get<2>(t)), 613 | 614 | add_wt_huff_int<>(m, ""), 615 | add_wt_huff_int(m, std::get<0>(t)), 616 | add_wt_huff_int(m, std::get<1>(t)), 617 | add_wt_huff_int(m, std::get<2>(t)), 618 | 619 | add_wt_hutu<>(m, ""), 620 | add_wt_hutu(m, std::get<0>(t)), 621 | add_wt_hutu(m, std::get<1>(t)), 622 | add_wt_hutu(m, std::get<2>(t)), 623 | 624 | add_wt_hutu_int<>(m, ""), 625 | add_wt_hutu_int(m, std::get<0>(t)), 626 | add_wt_hutu_int(m, std::get<1>(t)), 627 | add_wt_hutu_int(m, std::get<2>(t)), 628 | 629 | add_wt_blcd<>(m, ""), 630 | add_wt_blcd(m, std::get<0>(t)), 631 | add_wt_blcd(m, std::get<1>(t)), 632 | add_wt_blcd(m, std::get<2>(t)), 633 | 634 | add_wt_blcd_int<>(m, ""), 635 | add_wt_blcd_int(m, std::get<0>(t)), 636 | add_wt_blcd_int(m, std::get<1>(t)), 637 | add_wt_blcd_int(m, std::get<2>(t)), 638 | 639 | add_wavelet_class>(m, "WaveletTreeGMRrankselect", 640 | doc_wt_gmr_rs), 641 | add_wavelet_class>>( 642 | m, "WaveletTreeGMRrankselectEnc", doc_wt_gmr_rs), 643 | add_wavelet_class>(m, "WaveletTreeGolynskiMunroRao", 644 | doc_wt_gmr), 645 | add_wavelet_class>>( 646 | m, "WaveletTreeGolynskiMunroRaoEnc", doc_wt_gmr), 647 | 648 | add_wavelet_class>(m, "WaveletTreeAP", doc_wt_ap)); 649 | } 650 | -------------------------------------------------------------------------------- /pysdsl/util/indexiterator.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace { 8 | template struct int_ { using type = int; }; } 9 | 10 | 11 | namespace detail 12 | { 13 | template 14 | struct get_reference { using type = TRef; }; 15 | 16 | template 17 | struct get_reference 18 | { using type = typename Container::reference; }; 19 | 20 | template ::type> 24 | class count_index_iterator 25 | { 26 | public: 27 | using difference_type = S; 28 | using size_type = S; 29 | using value_type = T; 30 | using pointer = T*; 31 | using reference = TRef; 32 | using const_reference = const T&; 33 | using iterator_category = std::random_access_iterator_tag; 34 | 35 | constexpr count_index_iterator() noexcept: 36 | m_parent(nullptr), m_index(0) 37 | {} 38 | 39 | constexpr count_index_iterator(const Container* container, 40 | S index) noexcept: 41 | m_parent(container), 42 | m_index(index) 43 | {} 44 | ~count_index_iterator() = default; 45 | 46 | constexpr count_index_iterator( 47 | const count_index_iterator& other 48 | ) noexcept = default; 49 | 50 | constexpr 51 | count_index_iterator(count_index_iterator&& other) noexcept = default; 52 | 53 | constexpr count_index_iterator& 54 | operator=(const count_index_iterator& other) noexcept = default; 55 | 56 | constexpr count_index_iterator& 57 | operator=(count_index_iterator&& other) noexcept = default; 58 | 59 | constexpr 60 | bool operator!=(const count_index_iterator& other) const noexcept { 61 | return (m_parent != other.m_parent) || (m_index != other.m_index); } 62 | 63 | constexpr 64 | bool operator==(const count_index_iterator& other) const noexcept { 65 | return (m_parent == other.m_parent) && (m_index == other.m_index); } 66 | 67 | constexpr 68 | bool operator>(const count_index_iterator& other) const noexcept { 69 | return m_index > other.m_index; } 70 | 71 | constexpr 72 | bool operator>=(const count_index_iterator& other) const noexcept { 73 | return m_index >= other.m_index; } 74 | 75 | constexpr 76 | bool operator<(const count_index_iterator& other) const noexcept { 77 | return m_index < other.m_index; } 78 | 79 | constexpr 80 | bool operator<=(const count_index_iterator& other) const noexcept { 81 | return m_index <= other.m_index; } 82 | 83 | value_type operator*() { return (*m_parent)[m_index]; } 84 | 85 | decltype(auto) operator++() 86 | { 87 | ++m_index; 88 | return *this; 89 | } 90 | 91 | const auto operator++(int) { 92 | return *count_index_iterator(m_parent, 93 | m_index++); } 94 | 95 | decltype(auto) operator--() 96 | { 97 | --m_index; 98 | return *this; 99 | } 100 | 101 | const auto operator--(int) { 102 | return *count_index_iterator(m_parent, 103 | m_index--); } 104 | 105 | constexpr 106 | difference_type 107 | operator-(const count_index_iterator& other) const noexcept { 108 | return m_index - other.m_index; } 109 | 110 | constexpr 111 | decltype(auto) operator-(const difference_type step) const noexcept { 112 | return count_index_iterator(m_parent, 113 | m_index - step); } 114 | 115 | constexpr 116 | decltype(auto) operator+(const difference_type step) const noexcept { 117 | return count_index_iterator(m_parent, 118 | m_index + step); } 119 | 120 | friend constexpr decltype(auto) 121 | operator+(const difference_type step, 122 | const count_index_iterator &self) noexcept { 123 | return count_index_iterator(self.m_parent, 124 | self.m_index + step); } 125 | 126 | friend constexpr decltype(auto) 127 | operator-(const difference_type step, 128 | const count_index_iterator &self) noexcept { 129 | return count_index_iterator(self.m_parent, 130 | self.m_index - step); } 131 | 132 | count_index_iterator& operator+=(const difference_type i) 133 | { 134 | m_index += i; 135 | return *this; 136 | } 137 | 138 | count_index_iterator& operator-=(difference_type i) 139 | { 140 | m_index -= i; 141 | return *this; 142 | } 143 | 144 | const_reference operator[](difference_type i) const { 145 | return (*m_parent)[m_index + i]; } 146 | 147 | void swap(count_index_iterator& other) noexcept 148 | { 149 | std::swap(m_parent, other.m_parent); 150 | std::swap(m_index, other.m_index); 151 | } 152 | 153 | private: 154 | const Container* m_parent; 155 | S m_index; 156 | }; 157 | 158 | 159 | template 160 | void swap(count_index_iterator& first, 161 | count_index_iterator& second) 162 | { 163 | first.swap(second); 164 | } 165 | } // namespace detail 166 | -------------------------------------------------------------------------------- /pysdsl/util/tupletricks.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | 7 | namespace detail 8 | { 9 | 10 | template 11 | constexpr 12 | decltype(auto) for_each_impl(P&& t, Function&& f, std::index_sequence) { 13 | return std::make_tuple(f(std::get(t))...); } 14 | 15 | template 16 | constexpr 17 | decltype(auto) for_each_impl(const P& t, Function&& f, std::index_sequence) { 18 | return std::make_tuple(f(std::get(t))...); } 19 | 20 | template 21 | constexpr 22 | decltype(auto) for_each(const std::tuple& t, Function&& f) { 23 | return for_each_impl(t, f, std::index_sequence_for{}); } 24 | 25 | template 26 | constexpr 27 | decltype(auto) for_each(std::tuple& t, Function&& f) { 28 | return for_each_impl(t, f, std::index_sequence_for{}); } 29 | 30 | template 31 | constexpr 32 | decltype(auto) forward_each_impl(P&& t, Function&& f, std::index_sequence) { 33 | return std::forward_as_tuple(f(std::get(t))...); } 34 | 35 | template 36 | constexpr 37 | decltype(auto) forward_each_impl(const P& t, Function&& f, std::index_sequence) { 38 | return std::forward_as_tuple(f(std::get(t))...); } 39 | 40 | template 41 | constexpr 42 | decltype(auto) forward_each(const std::tuple& t, Function&& f) { 43 | return forward_each_impl(t, f, std::index_sequence_for{}); } 44 | 45 | template 46 | constexpr 47 | decltype(auto) forward_each(std::tuple& t, Function&& f) { 48 | return forward_each_impl(t, f, std::index_sequence_for{}); } 49 | 50 | } // namespace detail 51 | 52 | 53 | template 54 | constexpr 55 | decltype(auto) for_each_in_tuple(const std::tuple &t, F f) { 56 | return detail::for_each(t, f); } 57 | 58 | 59 | template 60 | constexpr 61 | decltype(auto) for_each_in_tuple(std::tuple &t, F f) { 62 | return detail::for_each(t, f); } 63 | 64 | template 65 | constexpr 66 | decltype(auto) forward_each_in_tuple(const std::tuple &t, F f) { 67 | return detail::forward_each(t, f); } 68 | 69 | 70 | template 71 | constexpr 72 | decltype(auto) forward_each_in_tuple(std::tuple &t, F f) { 73 | return detail::forward_each(t, f); } 74 | 75 | 76 | // subset functor 77 | template