├── .droneci
    ├── NiceLinuxDockerfile
    └── config.yml
├── .gitignore
├── .gitmodules
├── README.md
├── pysdsl
    ├── __init__.cpp
    ├── __init__.py
    ├── _memory_monitor.cpp
    ├── bits.cpp
    ├── calc.hpp
    ├── docstrings.hpp
    ├── io.hpp
    ├── memory_monitor.py
    ├── operations
    │   ├── creation.hpp
    │   ├── iteration.hpp
    │   └── sizes.hpp
    ├── supports.hpp
    ├── types
    │   ├── bitvector.hpp
    │   ├── encodedvector.hpp
    │   ├── intvector.hpp
    │   ├── pysequence.hpp
    │   ├── sorted_int_stack.hpp
    │   ├── suffixarray.hpp
    │   └── wavelet.hpp
    └── util
    │   ├── indexiterator.hpp
    │   └── tupletricks.hpp
├── setup.py
└── tests
    ├── test_encvector.py
    ├── test_intvector.py
    ├── test_sorted_int_stack.py
    ├── test_suffixarray.py
    └── test_wavelet.py


/.droneci/NiceLinuxDockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:latest
 2 | RUN mkdir /work
 3 | WORKDIR /work
 4 | RUN \
 5 |   apt update && \
 6 |   apt install -y --no-install-recommends clang ssh git tar zip ca-certificates software-properties-common && \
 7 |   apt install -y cmake python-pip python3-pip && \
 8 |   add-apt-repository -y ppa:deadsnakes/ppa && \
 9 |   apt install -y python3.4 python3.4-dev python3.5 python3.5-dev python3.6 python3.6-dev python3.7 python3.7-dev && \
10 |   rm -rf /var/lib/apt/lists/*
11 | 


--------------------------------------------------------------------------------
/.droneci/config.yml:
--------------------------------------------------------------------------------
  1 | kind: pipeline
  2 | name: build-2.7
  3 | 
  4 | steps:
  5 | - name: build
  6 |   image: ivigns/nice-linux:ubuntu-0.0.4
  7 |   volumes:
  8 |   - name: cache
  9 |     path: /tmp/cache
 10 |   commands:
 11 |   - git submodule sync
 12 |   - git submodule update --init --recursive
 13 |   - cd /drone/src/libdivsufsort
 14 |   - git pull origin master
 15 |   - cd /drone/src/sdsl-lite
 16 |   - git pull origin master
 17 |   - cd /drone/src/sdsl-lite/external/libdivsufsort
 18 |   - git pull origin master
 19 |   - cd /drone/src/pybind11
 20 |   - git pull origin master
 21 |   - cd /tmp
 22 |   - mkdir sdsl-lite-build
 23 |   - mkdir libdivsufsort-build
 24 |   - export CC=clang
 25 |   - export CXX=clang++
 26 |   - export CFLAGS="-O3 -static -lc -fPIC -static-libgcc -pipe -fcolor-diagnostics -lpthread -Wl,-Bstatic"
 27 |   - cd /tmp/libdivsufsort-build
 28 |   - cmake -D BUILD_SHARED_LIBS=OFF /drone/src/libdivsufsort
 29 |   - make install
 30 |   - export CXXFLAGS="-O3 -pipe -fcolor-diagnostics -static-libgcc -static-libstdc++ -fPIC -lpthread  -Wl,-Bstatic -Wl,-Bdynamic"
 31 |   - cd /tmp/sdsl-lite-build
 32 |   - cmake /drone/src/sdsl-lite
 33 |   - make install
 34 |   - export CXXFLAGS="-O3 -pipe -stdlib=libc++ -fcolor-diagnostics -fPIC"
 35 |   - export CFLAGS="-O3 -pipe -fcolor-diagnostics -fPIC"
 36 |   - cd /drone/src/pybind11
 37 |   - pip install --no-binary ':all:' --no-cache-dir . --verbose
 38 |   - cd /drone/src
 39 |   - pip wheel --no-binary ':all:' --no-cache-dir -w dist . --verbose
 40 |   - cp -r . /tmp/cache
 41 | 
 42 | - name: show
 43 |   image: debian:testing
 44 |   volumes:
 45 |   - name: cache
 46 |     path: /tmp/cache
 47 |   commands:
 48 |   - cd /tmp/cache && ls -aR
 49 | 
 50 | - name: test-wheel
 51 |   image: debian:testing
 52 |   volumes:
 53 |   - name: cache
 54 |     path: /tmp/cache
 55 |   commands:
 56 |   - apt-get -qq update
 57 |   - apt-get -qqy install python python-pip
 58 |   - pip install $(ls -d /tmp/cache/dist/pysdsl*.whl)
 59 |   - pip install -U pytest
 60 |   - pytest --verbose --showlocals --color=yes tests
 61 | 
 62 | - name: publish
 63 |   image: plugins/github-release
 64 |   volumes:
 65 |   - name: cache
 66 |     path: /tmp/cache
 67 |   settings:
 68 |     api_key:
 69 |       from_secret: repo_token
 70 |     files:
 71 |       - /tmp/cache/dist/pysdsl*.whl
 72 |     title: ${DRONE_TAG}
 73 |   when:
 74 |     event: tag
 75 | 
 76 | volumes:
 77 | - name: cache
 78 |   temp: {}
 79 | 
 80 | ---
 81 | kind: pipeline
 82 | name: build-3.4
 83 | 
 84 | steps:
 85 | - name: build
 86 |   image: ivigns/nice-linux:ubuntu-0.0.4
 87 |   volumes:
 88 |   - name: cache
 89 |     path: /tmp/cache
 90 |   commands:
 91 |   - git submodule sync
 92 |   - git submodule update --init --recursive
 93 |   - cd /drone/src/libdivsufsort
 94 |   - git pull origin master
 95 |   - cd /drone/src/sdsl-lite
 96 |   - git pull origin master
 97 |   - cd /drone/src/sdsl-lite/external/libdivsufsort
 98 |   - git pull origin master
 99 |   - cd /drone/src/pybind11
100 |   - git pull origin master
101 |   - cd /tmp
102 |   - mkdir sdsl-lite-build
103 |   - mkdir libdivsufsort-build
104 |   - export CC=clang
105 |   - export CXX=clang++
106 |   - export CFLAGS="-O3 -static -lc -fPIC -static-libgcc -pipe -fcolor-diagnostics -lpthread -Wl,-Bstatic"
107 |   - cd /tmp/libdivsufsort-build
108 |   - cmake -D BUILD_SHARED_LIBS=OFF /drone/src/libdivsufsort
109 |   - make install
110 |   - export CXXFLAGS="-O3 -pipe -fcolor-diagnostics -static-libgcc -static-libstdc++ -fPIC -lpthread  -Wl,-Bstatic -Wl,-Bdynamic"
111 |   - cd /tmp/sdsl-lite-build
112 |   - cmake /drone/src/sdsl-lite
113 |   - make install
114 |   - export CXXFLAGS="-O3 -pipe -stdlib=libc++ -fcolor-diagnostics -fPIC"
115 |   - export CFLAGS="-O3 -pipe -fcolor-diagnostics -fPIC"
116 |   - cd /drone/src/pybind11
117 |   - python3.4 -m pip install --no-binary ':all:' --no-cache-dir . --verbose
118 |   - cd /drone/src
119 |   - python3.4 -m pip wheel --no-binary ':all:' --no-cache-dir -w dist . --verbose
120 |   - cp -r . /tmp/cache
121 | 
122 | - name: show
123 |   image: debian:testing
124 |   volumes:
125 |   - name: cache
126 |     path: /tmp/cache
127 |   commands:
128 |   - cd /tmp/cache && ls -aR
129 | 
130 | - name: test-wheel
131 |   image: ivigns/nice-linux:ubuntu-0.0.4
132 |   volumes:
133 |   - name: cache
134 |     path: /tmp/cache
135 |   commands:
136 |   - python3.4 -m pip install $(ls -d /tmp/cache/dist/pysdsl*.whl)
137 |   - python3.4 -m pip install -U pytest
138 |   - pytest --verbose --showlocals --color=yes tests
139 | 
140 | - name: publish
141 |   image: plugins/github-release
142 |   volumes:
143 |   - name: cache
144 |     path: /tmp/cache
145 |   settings:
146 |     api_key:
147 |       from_secret: repo_token
148 |     files:
149 |       - /tmp/cache/dist/pysdsl*.whl
150 |     title: ${DRONE_TAG}
151 |   when:
152 |     event: tag
153 | 
154 | volumes:
155 | - name: cache
156 |   temp: {}
157 | 
158 | ---
159 | kind: pipeline
160 | name: build-3.5
161 | 
162 | steps:
163 | - name: build
164 |   image: ivigns/nice-linux:ubuntu-0.0.4
165 |   volumes:
166 |   - name: cache
167 |     path: /tmp/cache
168 |   commands:
169 |   - git submodule sync
170 |   - git submodule update --init --recursive
171 |   - cd /drone/src/libdivsufsort
172 |   - git pull origin master
173 |   - cd /drone/src/sdsl-lite
174 |   - git pull origin master
175 |   - cd /drone/src/sdsl-lite/external/libdivsufsort
176 |   - git pull origin master
177 |   - cd /drone/src/pybind11
178 |   - git pull origin master
179 |   - cd /tmp
180 |   - mkdir sdsl-lite-build
181 |   - mkdir libdivsufsort-build
182 |   - export CC=clang
183 |   - export CXX=clang++
184 |   - export CFLAGS="-O3 -static -lc -fPIC -static-libgcc -pipe -fcolor-diagnostics -lpthread -Wl,-Bstatic"
185 |   - cd /tmp/libdivsufsort-build
186 |   - cmake -D BUILD_SHARED_LIBS=OFF /drone/src/libdivsufsort
187 |   - make install
188 |   - export CXXFLAGS="-O3 -pipe -fcolor-diagnostics -static-libgcc -static-libstdc++ -fPIC -lpthread  -Wl,-Bstatic -Wl,-Bdynamic"
189 |   - cd /tmp/sdsl-lite-build
190 |   - cmake /drone/src/sdsl-lite
191 |   - make install
192 |   - export CXXFLAGS="-O3 -pipe -stdlib=libc++ -fcolor-diagnostics -fPIC"
193 |   - export CFLAGS="-O3 -pipe -fcolor-diagnostics -fPIC"
194 |   - cd /drone/src/pybind11
195 |   - python3.5 -m pip install --no-binary ':all:' --no-cache-dir . --verbose
196 |   - cd /drone/src
197 |   - python3.5 -m pip wheel --no-binary ':all:' --no-cache-dir -w dist . --verbose
198 |   - cp -r . /tmp/cache
199 | 
200 | - name: show
201 |   image: debian:testing
202 |   volumes:
203 |   - name: cache
204 |     path: /tmp/cache
205 |   commands:
206 |   - cd /tmp/cache && ls -aR
207 | 
208 | - name: test-wheel
209 |   image: ivigns/nice-linux:ubuntu-0.0.4
210 |   volumes:
211 |   - name: cache
212 |     path: /tmp/cache
213 |   commands:
214 |   - python3.5 -m pip install $(ls -d /tmp/cache/dist/pysdsl*.whl)
215 |   - python3.5 -m pip install -U pytest
216 |   - pytest --verbose --showlocals --color=yes tests
217 | 
218 | - name: publish
219 |   image: plugins/github-release
220 |   volumes:
221 |   - name: cache
222 |     path: /tmp/cache
223 |   settings:
224 |     api_key:
225 |       from_secret: repo_token
226 |     files:
227 |       - /tmp/cache/dist/pysdsl*.whl
228 |     title: ${DRONE_TAG}
229 |   when:
230 |     event: tag
231 | 
232 | volumes:
233 | - name: cache
234 |   temp: {}
235 | 
236 | ---
237 | kind: pipeline
238 | name: build-3.6
239 | 
240 | steps:
241 | - name: build
242 |   image: ivigns/nice-linux:ubuntu-0.0.4
243 |   volumes:
244 |   - name: cache
245 |     path: /tmp/cache
246 |   commands:
247 |   - git submodule sync
248 |   - git submodule update --init --recursive
249 |   - cd /drone/src/libdivsufsort
250 |   - git pull origin master
251 |   - cd /drone/src/sdsl-lite
252 |   - git pull origin master
253 |   - cd /drone/src/sdsl-lite/external/libdivsufsort
254 |   - git pull origin master
255 |   - cd /drone/src/pybind11
256 |   - git pull origin master
257 |   - cd /tmp
258 |   - mkdir sdsl-lite-build
259 |   - mkdir libdivsufsort-build
260 |   - export CC=clang
261 |   - export CXX=clang++
262 |   - export CFLAGS="-O3 -static -lc -fPIC -static-libgcc -pipe -fcolor-diagnostics -lpthread -Wl,-Bstatic"
263 |   - cd /tmp/libdivsufsort-build
264 |   - cmake -D BUILD_SHARED_LIBS=OFF /drone/src/libdivsufsort
265 |   - make install
266 |   - export CXXFLAGS="-O3 -pipe -fcolor-diagnostics -static-libgcc -static-libstdc++ -fPIC -lpthread  -Wl,-Bstatic -Wl,-Bdynamic"
267 |   - cd /tmp/sdsl-lite-build
268 |   - cmake /drone/src/sdsl-lite
269 |   - make install
270 |   - export CXXFLAGS="-O3 -pipe -stdlib=libc++ -fcolor-diagnostics -fPIC"
271 |   - export CFLAGS="-O3 -pipe -fcolor-diagnostics -fPIC"
272 |   - cd /drone/src/pybind11
273 |   - python3.6 -m pip install --no-binary ':all:' --no-cache-dir . --verbose
274 |   - cd /drone/src
275 |   - python3.6 -m pip wheel --no-binary ':all:' --no-cache-dir -w dist . --verbose
276 |   - cp -r . /tmp/cache
277 | 
278 | - name: show
279 |   image: debian:testing
280 |   volumes:
281 |   - name: cache
282 |     path: /tmp/cache
283 |   commands:
284 |   - cd /tmp/cache && ls -aR
285 | 
286 | - name: test-wheel
287 |   image: ivigns/nice-linux:ubuntu-0.0.4
288 |   volumes:
289 |   - name: cache
290 |     path: /tmp/cache
291 |   commands:
292 |   - python3.6 -m pip install $(ls -d /tmp/cache/dist/pysdsl*.whl)
293 |   - python3.6 -m pip install -U pytest
294 |   - pytest --verbose --showlocals --color=yes tests
295 | 
296 | - name: publish
297 |   image: plugins/github-release
298 |   volumes:
299 |   - name: cache
300 |     path: /tmp/cache
301 |   settings:
302 |     api_key:
303 |       from_secret: repo_token
304 |     files:
305 |       - /tmp/cache/dist/pysdsl*.whl
306 |     title: ${DRONE_TAG}
307 |   when:
308 |     event: tag
309 | 
310 | volumes:
311 | - name: cache
312 |   temp: {}
313 | 
314 | ---
315 | kind: pipeline
316 | name: build-3.7
317 | 
318 | steps:
319 | - name: build
320 |   image: ivigns/nice-linux:ubuntu-0.0.4
321 |   volumes:
322 |   - name: cache
323 |     path: /tmp/cache
324 |   commands:
325 |   - git submodule sync
326 |   - git submodule update --init --recursive
327 |   - cd /drone/src/libdivsufsort
328 |   - git pull origin master
329 |   - cd /drone/src/sdsl-lite
330 |   - git pull origin master
331 |   - cd /drone/src/sdsl-lite/external/libdivsufsort
332 |   - git pull origin master
333 |   - cd /drone/src/pybind11
334 |   - git pull origin master
335 |   - cd /tmp
336 |   - mkdir sdsl-lite-build
337 |   - mkdir libdivsufsort-build
338 |   - export CC=clang
339 |   - export CXX=clang++
340 |   - export CFLAGS="-O3 -static -lc -fPIC -static-libgcc -pipe -fcolor-diagnostics -lpthread -Wl,-Bstatic"
341 |   - cd /tmp/libdivsufsort-build
342 |   - cmake -D BUILD_SHARED_LIBS=OFF /drone/src/libdivsufsort
343 |   - make install
344 |   - export CXXFLAGS="-O3 -pipe -fcolor-diagnostics -static-libgcc -static-libstdc++ -fPIC -lpthread  -Wl,-Bstatic -Wl,-Bdynamic"
345 |   - cd /tmp/sdsl-lite-build
346 |   - cmake /drone/src/sdsl-lite
347 |   - make install
348 |   - export CXXFLAGS="-O3 -pipe -stdlib=libc++ -fcolor-diagnostics -fPIC"
349 |   - export CFLAGS="-O3 -pipe -fcolor-diagnostics -fPIC"
350 |   - cd /drone/src/pybind11
351 |   - python3.7 -m pip install --no-binary ':all:' --no-cache-dir . --verbose
352 |   - cd /drone/src
353 |   - python3.7 -m pip wheel --no-binary ':all:' --no-cache-dir -w dist . --verbose
354 |   - cp -r . /tmp/cache
355 | 
356 | - name: show
357 |   image: debian:testing
358 |   volumes:
359 |   - name: cache
360 |     path: /tmp/cache
361 |   commands:
362 |   - cd /tmp/cache && ls -aR
363 | 
364 | - name: test-wheel
365 |   image: ivigns/nice-linux:ubuntu-0.0.4
366 |   volumes:
367 |   - name: cache
368 |     path: /tmp/cache
369 |   commands:
370 |   - python3.7 -m pip install $(ls -d /tmp/cache/dist/pysdsl*.whl)
371 |   - python3.7 -m pip install -U pytest
372 |   - pytest --verbose --showlocals --color=yes tests
373 | 
374 | - name: publish
375 |   image: plugins/github-release
376 |   volumes:
377 |   - name: cache
378 |     path: /tmp/cache
379 |   settings:
380 |     api_key:
381 |       from_secret: repo_token
382 |     files:
383 |       - /tmp/cache/dist/pysdsl*.whl
384 |     title: ${DRONE_TAG}
385 |   when:
386 |     event: tag
387 | 
388 | volumes:
389 | - name: cache
390 |   temp: {}
391 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .rendered.*
 2 | .*.cppimporthash
 3 | *.so
 4 | .vscode/*
 5 | python?.*/
 6 | tmp/
 7 | *.whl
 8 | *.pyc
 9 | __pycache__/
10 | .pytest_cache/
11 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "sdsl-lite"]
 2 | 	path = sdsl-lite
 3 | 	url = https://github.com/simongog/sdsl-lite
 4 | [submodule "pybind11"]
 5 | 	path = pybind11
 6 | 	url = https://github.com/pybind/pybind11
 7 | [submodule "libdivsufsort"]
 8 | 	path = libdivsufsort
 9 | 	url = https://github.com/y-256/libdivsufsort
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Python bindings to Succinct Data Structure Library 2.0
  2 | 
  3 | The Succinct Data Structure Library ([SDSL][SDSL]) is a powerful and flexible C++11 library implementing succinct data structures. In total, the library contains the highlights of 40 [research publications][SDSLLIT]. Succinct data structures can represent an object (such as a bitvector or a tree) in space close to the information-theoretic lower bound of the object while supporting operations of the original object efficiently. The theoretical time complexity of an operation performed on the classical data structure and the equivalent succinct data structure are (most of the time) identical.
  4 | 
  5 | Most of examples from [SDSL cheat sheet][SDSL-CHEAT-SHEET] and [SDSL tutorial][SDSL-TUTORIAL] are implemented.
  6 | 
  7 | ## Mutable bit-compressed vectors
  8 | 
  9 | Core classes (see `pysdsl.int_vector` for dict of all of them):
 10 | 
 11 |  * `pysdsl.IntVector(size, default_value, bit_width=64)` — dynamic bit width
 12 |  * `pysdsl.BitVector(size, default_value)` — static (fixed) bit width (1)
 13 |  * `pysdsl.Int4Vector(size, default_value)` — static bit width (4)
 14 |  * `pysdsl.Int8Vector(size, default_value)` — static bit width (8)
 15 |  * `pysdsl.Int16Vector(size, default_value)` — static bit width (16)
 16 |  * `pysdsl.Int24Vector(size, default_value)` — static bit width (24)
 17 |  * `pysdsl.Int32Vector(size, default_value)` — static bit width (32)
 18 |  * `pysdsl.Int64Vector(size, default_value)` — static bit width (64)
 19 | 
 20 | Construction from python sequences is also supported.
 21 | 
 22 | ```python
 23 | 
 24 | In [1]: import pysdsl
 25 | 
 26 | In [2]: %time v = pysdsl.IntVector(1024 * 1024 * 256)
 27 | CPU times: user 914 ms, sys: 509 ms, total: 1.42 s
 28 | Wall time: 1.42 s
 29 | 
 30 | In [3]: v.size_in_mega_bytes
 31 | Out[3]: 2048.000008583069
 32 | 
 33 | In [4]: %time v.set_to_id()  # like *v = range(len(v))
 34 | CPU times: user 8.19 s, sys: 1.3 ms, total: 8.19 s
 35 | Wall time: 8.19 s
 36 | 
 37 | In [5]: v.width
 38 | Out[5]: 64
 39 | 
 40 | In [6]: %time v.bit_compress()
 41 | CPU times: user 23.3 s, sys: 155 ms, total: 23.5 s
 42 | Wall time: 23.5 s
 43 | 
 44 | In [7]: v.width
 45 | Out[7]: 28
 46 | 
 47 | In [8]: v.size_in_mega_bytes
 48 | Out[8]: 896.0000085830688
 49 | 
 50 | ```
 51 | 
 52 | Buffer interface:
 53 | 
 54 | ```python
 55 | In [9]: import array
 56 | 
 57 | In [10]: v = pysdsl.Int64Vector([1, 2, 3])
 58 | 
 59 | In [11]: array.array('Q', v)
 60 | Out[11]: array('Q', [1, 2, 3])
 61 | ```
 62 | 
 63 | ## Immutable compressed integer vectors
 64 | 
 65 | (See `pysdsl.enc_vector`):
 66 | 
 67 |  * `EncVectorEliasDelta(IntVector)`
 68 |  * `EncVectorEliasGamma(IntVector)`
 69 |  * `EncVectorFibonacci(IntVector)`
 70 |  * `EncVectorComma2(IntVector)`
 71 |  * `EncVectorComma4(IntVector)`
 72 | 
 73 | ```python
 74 | In [9]: %time ev = pysdsl.EncVectorEliasDelta(v)
 75 | CPU times: user 26.5 s, sys: 31.8 ms, total: 26.5 s
 76 | Wall time: 26.5 s
 77 | 
 78 | In [10]: ev.size_in_mega_bytes
 79 | Out[10]: 45.75003242492676
 80 | ```
 81 | 
 82 | Encoding values with variable length codes (see `pysdsl.variable_length_codes_vector`):
 83 | 
 84 |  * `VariableLengthCodesVectorEliasDelta(IntVector)`
 85 |  * `VariableLengthCodesVectorEliasGamma(IntVector)`
 86 |  * `VariableLengthCodesVectorFibonacci(IntVector)`
 87 |  * `VariableLengthCodesVectorComma2(IntVector)`
 88 |  * `VariableLengthCodesVectorComma4(IntVector)`
 89 | 
 90 | Encoding values with "escaping" technique (see `pysdsl.direct_accessible_codes_vector`):
 91 | 
 92 |  * `DirectAccessibleCodesVector(IntVector)`
 93 |  * `DirectAccessibleCodesVector8(IntVector)`,
 94 |  * `DirectAccessibleCodesVector16(IntVector)`,
 95 |  * `DirectAccessibleCodesVector63(IntVector)`,
 96 |  * `DirectAccessibleCodesVectorDP(IntVector)` — number of layers is chosen
 97 |                                                 with dynamic programming
 98 |  * `DirectAccessibleCodesVectorDPRRR(IntVector)` — same but built on top of
 99 |                                                    RamanRamanRaoVector (see later)
100 | 
101 | Construction from python sequences is also supported.
102 | 
103 | ## Immutable compressed bit (boolean) vectors
104 | 
105 | (See pysdsl.`all_immutable_bitvectors`)
106 | 
107 |  * `BitVectorInterLeaved64(BitVector)`
108 |  * `BitVectorInterLeaved128(BitVector)`
109 |  * `BitVectorInterLeaved256(BitVector)`
110 |  * `BitVectorInterLeaved512(BitVector)` — A bit vector which interleaves the
111 |                                           original `BitVector` with rank information
112 |                                           (see later)
113 |  * `SDVector(BitVector)` — A bit vector which compresses very sparse populated
114 |                            bit vectors by representing the positions of 1 by the
115 |                            Elias-Fano representation for
116 |                            non-decreasing sequences
117 |  * `RamanRamanRaoVector15(BitVector)`
118 |  * `RamanRamanRaoVector63(BitVector)`
119 |  * `RamanRamanRaoVector256(BitVector)` — An H₀-compressed bitvector representation.
120 |  * `HybVector8(BitVector)`
121 |  * `HybVector16(BitVector)` — A hybrid-encoded compressed bitvector
122 |                               representation
123 | 
124 | See also: `pysdsl.raman_raman_rao_vectors`, `pysdsl.sparse_bit_vectors`,
125 | `pysdsl.hybrid_bit_vectors` and `pysdsl.bit_vector_interleaved`.
126 | 
127 | ## Rank and select operations on bitvectors
128 | 
129 | For bitvector `v` `rank(i)` for pattern `P` (by default `P` is a bitstring of
130 | len 1: `1`) is the number of patterns `P` in the prefix `[0..i)` in vector `v`.
131 | 
132 | For bitvector `v` `select(i)` for pattern `P` (by default `P`=`1`) is the
133 | position of the `i`-th occurrence of pattern `P` in vector `v`.
134 | 
135 | Create support instances for rank and/or select for different patterns via:
136 | 
137 |  * `v.init_rank()` or `v.init_rank_1()` for ranks of pattern `1`
138 |     (e.g. the number of set bits in `v`)
139 |  * `v.init_rank_0()` for ranks of pattern `0`
140 |  * `v.init_rank_00()` (if supported by vector class) for ranks of pattern `00`
141 |  * `v.init_rank_01()` (if supported by vector class) for ranks of pattern `01`
142 |  * `v.init_rank_10()` (if supported by vector class) for ranks of pattern `10`
143 |  * `v.init_rank_11()` (if supported by vector class) for ranks of pattern `11`
144 |  * `v.init_support()` or `v.init_support_1()` for support of pattern `1`
145 |     (e.g. the positions of set bits)
146 |  * `v.init_support_0()` for ranks of pattern `0`
147 |  * `v.init_support_00()` (if supported by vector class) for ranks of pattern `00`
148 |  * `v.init_support_01()` (if supported by vector class) for ranks of pattern `01`
149 |  * `v.init_support_10()` (if supported by vector class) for ranks of pattern `10`
150 |  * `v.init_support_11()` (if supported by vector class) for ranks of pattern `11`
151 | 
152 | Once support instance `s` is created call it (`s(idx)` or `s.__call__(idx)`)
153 | or use corresponding methods `s.rank(idx)` or `s.select(idx)` to get
154 | the results.
155 | 
156 | `s.rank(idx)` and `s.select(idx)` are undefined if original bitvector is
157 | mutable and was modified.
158 | 
159 | 
160 | ## Wavelet trees
161 | 
162 | The wavelet tree is a data structure that provides three efficient methods:
163 | 
164 | * The `[]`-operator: `wt[i]` returns the `i`-th symbol of vector for which the wavelet tree was build for.
165 | * The rank method: `wt.rank(i, c)` returns the number of occurrences of symbol `c` in the prefix `[0..i-1]` in the vector for which the wavelet tree was build for.
166 | * The select method: `wt.select(j, c)` returns the index `i` from `[0..size()-1]` of the `j`-th occurrence of symbol `c`.
167 | 
168 | ## Comressed suffix arrays
169 | 
170 | Suffix array is a sorted array of all suffixes of a string.
171 | 
172 | SDSL supports bitcompressed and compressed suffix arrays.
173 | 
174 | Byte representaion of original IntVector should have no zero symbols in order to construct SuffixArray.
175 | 
176 | ## Objects memory structure
177 | 
178 | Any object has a `.structure` property with technical information about an
179 | object. `.structure_json` also provided for web-view implementations.
180 | `.write_structure_json()` method puts that information into a file.
181 | 
182 | `.size_in_bytes` and `.size_in_mega_bytes` properties show how much memory the
183 | object is occupying.
184 | 
185 | ## Saving/Loading objects
186 | 
187 | All objects provide `.store_to_checked_file()` method allowing one to save
188 | object into a file.
189 | 
190 | All classes provide `.load_from_checkded_file()` static method allowing one to
191 | load object stored  with `.store_to_checked_file()`
192 | 
193 | 
194 | ## Building
195 | 
196 | Requirements: static libraries for sdsl and divsufsort.
197 | 
198 | Call `pip` with binaries disabled to fetch sources and build the package:
199 | 
200 | ```bash
201 | pip install --no-binaries :all: pysdsl
202 | ```
203 | 
204 | 
205 | [SDSL]: https://github.com/simongog/sdsl-lite
206 | [SDSLLIT]: https://github.com/simongog/sdsl-lite/wiki/Literature
207 | "Succinct Data Structure Literature"
208 | [SDSL-CHEAT-SHEET]: https://simongog.github.io/assets/data/sdsl-cheatsheet.pdf
209 | [SDSL-TUTORIAL]: https://simongog.github.io/assets/data/sdsl-slides/tutorial
210 | 


--------------------------------------------------------------------------------
/pysdsl/__init__.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdint>
 2 | #include <string>
 3 | #include <tuple>
 4 | #include <stdexcept>
 5 | 
 6 | #define assert(x) if(!x) {throw std::runtime_error("assertion failed");}
 7 | 
 8 | #include <sdsl/vectors.hpp>
 9 | 
10 | #include <pybind11/pybind11.h>
11 | 
12 | #include "docstrings.hpp"
13 | #include "operations/creation.hpp"
14 | #include "types/bitvector.hpp"
15 | #include "types/encodedvector.hpp"
16 | #include "types/intvector.hpp"
17 | #include "types/suffixarray.hpp"
18 | #include "types/wavelet.hpp"
19 | #include "types/sorted_int_stack.hpp"
20 | 
21 | namespace py = pybind11;
22 | 
23 | 
24 | PYBIND11_MODULE(pysdsl, m)
25 | {
26 |     m.doc() = "sdsl-lite bindings for python";
27 | 
28 |     auto iv_classes_tpl = add_int_vectors(m);
29 | 
30 |     auto iv_classes = std::get<0>(iv_classes_tpl);
31 |     auto iv_classes_as_params = std::get<1>(iv_classes_tpl);
32 | 
33 |     py::class_<sdsl::int_vector<1>>& bit_vector_cls = std::get<1>(iv_classes);
34 | 
35 |     auto bit_vector_classes = std::make_tuple(bit_vector_cls);
36 | 
37 |     auto tmp = add_bitvectors(m, bit_vector_cls);
38 |     auto compressed_bit_vector_classes = std::get<0>(tmp);
39 |     auto cbv_propagate = std::get<1>(tmp);
40 | 
41 |     auto enc_classes = add_encoded_vectors(m);
42 | 
43 |     auto wavelet_classes = add_wavelet(m, cbv_propagate);
44 | 
45 |     auto csa_classes = add_csa(m);
46 | 
47 |     auto sorted_stack = add_sorted_int_stack(m);
48 | 
49 |     for_each_in_tuple(iv_classes, make_inits_many_functor(iv_classes));
50 |     for_each_in_tuple(iv_classes, make_inits_many_functor(enc_classes));
51 |     for_each_in_tuple(iv_classes,
52 |                       make_inits_many_functor(compressed_bit_vector_classes));
53 |     for_each_in_tuple(iv_classes, make_inits_many_functor(wavelet_classes));
54 | 
55 |     for_each_in_tuple(enc_classes, make_inits_many_functor(iv_classes));
56 | 
57 |     for_each_in_tuple(sorted_stack, make_inits_many_functor(sorted_stack));
58 | #ifndef NOCROSSCONSTRUCTORS
59 |     for_each_in_tuple(enc_classes, make_inits_many_functor(enc_classes));
60 |     //for_each_in_tuple(enc_classes, make_inits_many_functor(wavelet_classes));
61 | #endif
62 | 
63 |     for_each_in_tuple(compressed_bit_vector_classes,
64 |                       make_inits_many_functor(bit_vector_classes));
65 | #ifndef NOCROSSCONSTRUCTORS
66 |     for_each_in_tuple(compressed_bit_vector_classes, 
67 |                       make_inits_many_functor(compressed_bit_vector_classes));
68 | #endif
69 | 
70 |     for_each_in_tuple(wavelet_classes, make_inits_many_functor(iv_classes));
71 | #ifndef NOCROSSCONSTRUCTORS
72 |     for_each_in_tuple(wavelet_classes, make_inits_many_functor(enc_classes));
73 |     for_each_in_tuple(wavelet_classes,
74 |                       make_inits_many_functor(wavelet_classes));
75 | #endif
76 | 
77 |     for_each_in_tuple(iv_classes, make_pysequence_init_functor());
78 |     for_each_in_tuple(enc_classes, make_pysequence_init_functor());
79 |     //for_each_in_tuple(compressed_bit_vector_classes,
80 |     //                  make_pysequence_init_functor());
81 | 
82 |     for_each_in_tuple(wavelet_classes, make_pysequence_init_functor());
83 |     for_each_in_tuple(csa_classes, make_pysequence_init_functor());
84 | }
85 | 


--------------------------------------------------------------------------------
/pysdsl/__init__.py:
--------------------------------------------------------------------------------
1 | from pysdsl import *
2 | 


--------------------------------------------------------------------------------
/pysdsl/_memory_monitor.cpp:
--------------------------------------------------------------------------------
 1 | #include <string>
 2 | #include <sstream>
 3 | 
 4 | #include <pybind11/pybind11.h>
 5 | 
 6 | #include <sdsl/memory_management.hpp>
 7 | 
 8 | namespace py = pybind11;
 9 | 
10 | 
11 | PYBIND11_MODULE(_memory_monitor, m)
12 | {
13 |     m.doc() = "Internals of memory monitor module";
14 | 
15 |     m.def("start", [] () { return sdsl::memory_monitor::start(); });
16 |     m.def("stop", [] () { return sdsl::memory_monitor::stop(); });
17 |     m.def(
18 |         "report",
19 |         [] ()
20 |         {
21 |             std::stringstream fout;
22 |             sdsl::memory_monitor::write_memory_log<sdsl::JSON_FORMAT>(fout);
23 |             auto json = py::module::import("json");
24 |             return json.attr("loads")(fout.str());
25 |         }
26 |     );
27 |     m.def(
28 |         "report_json",
29 |         [] ()
30 |         {
31 |             std::stringstream fout;
32 |             sdsl::memory_monitor::write_memory_log<sdsl::JSON_FORMAT>(fout);
33 |             return fout.str();
34 |         }
35 |     );
36 |     m.def(
37 |         "report_html",
38 |         [] ()
39 |         {
40 |             std::stringstream fout;
41 |             sdsl::memory_monitor::write_memory_log<sdsl::HTML_FORMAT>(fout);
42 |             return fout.str();
43 |         }
44 |     );
45 |     m.def(
46 |         "report_html",
47 |         [](const std::string& file_name)
48 |         {
49 |             std::ofstream fout;
50 |             fout.open(file_name, std::ios::out | std::ios::binary);
51 |             if (!fout.good()) throw std::runtime_error("Can't write to file");
52 |             sdsl::memory_monitor::write_memory_log<sdsl::HTML_FORMAT>(fout);
53 |             if (!fout.good()) throw std::runtime_error("Error during write");
54 |             fout.close();
55 |         },
56 |         py::arg("file_name"),
57 |         py::call_guard<py::gil_scoped_release>()
58 |     );
59 |     m.def(
60 |         "report_json",
61 |         [](const std::string& file_name)
62 |         {
63 |             std::ofstream fout;
64 |             fout.open(file_name, std::ios::out | std::ios::binary);
65 |             if (!fout.good()) throw std::runtime_error("Can't write to file");
66 |             sdsl::memory_monitor::write_memory_log<sdsl::JSON_FORMAT>(fout);
67 |             if (!fout.good()) throw std::runtime_error("Error during write");
68 |             fout.close();
69 |         },
70 |         py::arg("file_name"),
71 |         py::call_guard<py::gil_scoped_release>()
72 |     );
73 | 
74 | }
75 | 


--------------------------------------------------------------------------------
/pysdsl/bits.cpp:
--------------------------------------------------------------------------------
  1 | #include <string>
  2 | #include <tuple>
  3 | 
  4 | #include "sdsl/bits.hpp"
  5 | #include <pybind11/pybind11.h>
  6 | 
  7 | 
  8 | namespace py = pybind11;
  9 | 
 10 | 
 11 | template<typename T, std::size_t N>
 12 | decltype(auto) as_tuple(const T (&a) [N])
 13 | {
 14 |     py::tuple result{N};
 15 |     for (std::size_t i; i < N; i++) result[i] = a[i];
 16 |     return result;
 17 | }
 18 | 
 19 | 
 20 | auto cnt11 = [] (uint64_t x, uint64_t& c) {
 21 |     auto result = sdsl::bits::cnt11(x, c);
 22 |     return std::make_pair(result, c);
 23 | };
 24 | auto cnt10 = [] (uint64_t x, uint64_t& c) {
 25 |     auto result = sdsl::bits::cnt10(x, c);
 26 |     return std::make_pair(result, c);
 27 | };
 28 | auto cnt01 = [] (uint64_t x, uint64_t& c) {
 29 |     auto result = sdsl::bits::cnt01(x, c);
 30 |     return std::make_pair(result, c);
 31 | };
 32 | auto sel = [] (uint64_t x, uint32_t i) {
 33 |     if (i >= sizeof(sdsl::bits::ps_overflow) / sizeof(sdsl::bits::ps_overflow[0])) {
 34 |         throw py::index_error(std::to_string(i));
 35 |     }
 36 |     return sdsl::bits::sel(x, i);
 37 | };
 38 | auto sel11 = [] (uint64_t x, uint32_t i, uint32_t c=0) {
 39 |     if (i >= sizeof(sdsl::bits::ps_overflow) / sizeof(sdsl::bits::ps_overflow[0])) {
 40 |         throw py::index_error(std::to_string(i));
 41 |     }
 42 |     return sdsl::bits::sel11(x, i, c);
 43 | };
 44 | 
 45 | 
 46 | PYBIND11_MODULE(bits, m) {
 47 |     m.doc() = "bitwise tricks on 64 bit words.";
 48 | 
 49 | 
 50 |     auto bits_cls = py::class_<sdsl::bits>(m, "bits")
 51 |         .def_property_readonly_static("all_set", [](py::object /* self */) { return sdsl::bits::all_set; })
 52 |         .def_static("all_set_", []() { return sdsl::bits::all_set; },
 53 |                     "64bit mask with all bits set to 1.")
 54 | 
 55 |         .def_property_readonly_static("deBruijn64", [](py::object) { return sdsl::bits::deBruijn64; })
 56 |         .def_static("deBruijn64_", []() { return sdsl::bits::deBruijn64; },
 57 |                     "This constant represents a de Bruijn sequence B(k,n) for k=2 and n=6. "
 58 |                     "Details for de Bruijn sequences see "
 59 |                     "http://en.wikipedia.org/wiki/De_bruijn_sequence "
 60 |                     "deBruijn64 is used in combination with the "
 61 |                     "array lt_deBruijn_to_idx.")
 62 | 
 63 |         .def_property_readonly_static("lt_deBruijn_to_idx", [](py::object) { return as_tuple(sdsl::bits::lt_deBruijn_to_idx); })
 64 |         .def_static("lt_deBruijn_to_idx_", []() { return as_tuple(sdsl::bits::lt_deBruijn_to_idx); },
 65 |                     "This table maps a 6-bit subsequence S[idx...idx+5] of constant deBruijn64 to idx.")
 66 | 
 67 |         .def_property_readonly_static("lt_fib", [](py::object) { return as_tuple(sdsl::bits::lt_fib); })
 68 |         .def_static("lt_fib_", []() { return as_tuple(sdsl::bits::lt_fib); },
 69 |                     "Array containing Fibonacci numbers less than 2**64")
 70 | 
 71 |         .def_property_readonly_static("lt_cnt", [](py::object) {
 72 |             return py::bytes(reinterpret_cast<const char*>(sdsl::bits::lt_cnt), sizeof(sdsl::bits::lt_cnt));
 73 |         })
 74 |         .def_static("lt_cnt_", []() {
 75 |             return py::bytes(reinterpret_cast<const char*>(sdsl::bits::lt_cnt), sizeof(sdsl::bits::lt_cnt));
 76 |         }, "Lookup table for byte popcounts.")
 77 | 
 78 |         .def_property_readonly_static("lt_hi", [](py::object) { return as_tuple(sdsl::bits::lt_hi); })
 79 |         .def_static("lt_hi_", []() { return as_tuple(sdsl::bits::lt_hi); },
 80 |                     "Lookup table for most significant set bit in a byte.")
 81 | 
 82 |         .def_property_readonly_static("lo_set", [](py::object) { return as_tuple(sdsl::bits::lo_set); })
 83 |         .def_static("lo_set_", []() { return as_tuple(sdsl::bits::lo_set); },
 84 |                     "lo_set[i] is a 64-bit word with the i least significant bits set and the high bits not set. "
 85 |                     "lo_set[0] = 0ULL, lo_set[1]=1ULL, lo_set[2]=3ULL...")
 86 | 
 87 |         .def_property_readonly_static("lo_unset", [](py::object) { return as_tuple(sdsl::bits::lo_unset); })
 88 |         .def_static("lo_unset_", []() { return as_tuple(sdsl::bits::lo_unset); },
 89 |                     "lo_unset[i] is a 64-bit word with the i least significant bits not set and the high bits set. "
 90 |                     "lo_unset[0] = FFFFFFFFFFFFFFFFULL, lo_unset_set[1]=FFFFFFFFFFFFFFFEULL, ...")
 91 | 
 92 |         .def_property_readonly_static("lt_lo", [](py::object) {
 93 |             return py::bytes(reinterpret_cast<const char*>(sdsl::bits::lt_lo), sizeof(sdsl::bits::lt_lo)); })
 94 |         .def_static("lt_lo_", []() {
 95 |             return py::bytes(reinterpret_cast<const char*>(sdsl::bits::lt_lo), sizeof(sdsl::bits::lt_lo));
 96 |         }, "Lookup table for least significant set bit in a byte.")
 97 | 
 98 |         .def_property_readonly_static("lt_sel", [](py::object) {
 99 |             return py::bytes(reinterpret_cast<const char*>(sdsl::bits::lt_sel), sizeof(sdsl::bits::lt_sel));
100 |         })
101 |         .def_static("lt_sel_", []() {
102 |             return py::bytes(reinterpret_cast<const char*>(sdsl::bits::lt_sel), sizeof(sdsl::bits::lt_sel));
103 |          }, "Lookup table for select on bytes. "
104 |             "Entry at idx = 256*j + i equals the position of the "
105 |             "(j+1)-th set bit in byte i. Positions lie in the range [0..7].")
106 | 
107 |         .def_property_readonly_static("ps_overflow", [](py::object) { return as_tuple(sdsl::bits::ps_overflow); })
108 |         .def_static("ps_overflow_", []() { return as_tuple(sdsl::bits::ps_overflow); },
109 |                     "Use to help to decide if a prefix sum stored in a byte overflows.")
110 | 
111 |         .def_static("cnt", &sdsl::bits::cnt, "Counts the number of set bits in x.", py::arg("x"))
112 |         .def_static("cnt32", &sdsl::bits::cnt32, "Counts the number of set bits in 32-bit integer x.", py::arg("x"))
113 |         .def_static("hi", &sdsl::bits::hi, "The position (in 0..63) of the most significant set bit "
114 |                                            "in `x` or 0 if x equals 0.", py::arg("x"))
115 |         .def_static("lo", &sdsl::bits::lo, "The position (in 0..63) of the rightmost 1-bit in the 64bit integer x if "
116 |                                            "x>0 and 0 if x equals 0.", py::arg("x"))
117 | 
118 |         .def_static("cnt11", (uint32_t (*) (uint64_t)) &sdsl::bits::cnt11, py::arg("x"),
119 |                     "Count the number of consecutive and distinct 11 in the 64bit integer x.\n"
120 |                     "x: 64bit integer to count the terminating sequence 11 of a Fibonacci code.")
121 |         .def_static("cnt11", cnt11, "Count the number of consecutive and distinct 11 in the 64bit integer x.\n"
122 |                                     "x: 64bit integer to count the terminating sequence 11 of a Fibonacci code.\n"
123 |                                     "c: Carry equals msb of the previous 64bit integer.", py::arg("x"), py::arg("c"))
124 |         .def_static("cnt10", cnt10, "Count 10 bit pairs in the word x.\n"
125 |                                     "x: 64bit integer to count the 10 bit pairs.\n"
126 |                                     "c: Carry equals msb of the previous 64bit integer.", py::arg("x"), py::arg("c"))
127 |         .def_static("cnt01", cnt01, "Count 01 bit pairs in the word x.\n"
128 |                                     "x: 64bit integer to count the 01 bit pairs.\n"
129 |                                     "c: Carry equals msb of the previous 64bit integer.", py::arg("x"), py::arg("c"))
130 | 
131 |         .def_static("map10", &sdsl::bits::map10, py::arg("x"), py::arg("c") = 0,
132 |                     "Map all 10 bit pairs to 01 or 1 if c=1 and the lsb=0. All other pairs are mapped to 00.")
133 |         .def_static("map01", &sdsl::bits::map01, py::arg("x"), py::arg("c") = 1,
134 |                     "Map all 01 bit pairs to 01 or 1 if c=1 and the lsb=0. All other pairs are mapped to 00.")
135 | 
136 |         .def_static("sel",  sel, py::arg("x"), py::arg("i"),
137 |                     "Calculate the position of the i-th rightmost 1 bit in the 64bit integer x\n"
138 |                     "x: 64bit integer.\ni: Argument i must be in the range [1..cnt(x)].")
139 |         .def_static("sel11", sel11, py::arg("x"), py::arg("i"), py::arg("c") = 0,
140 |                      "The position (in 1..63) of the i-th 11-bit-pattern which terminates a Fibonacci coded integer in x if "
141 |                      "x contains at least i 11-bit-patterns and a undefined value otherwise.\n"
142 |                      "x: 64 bit integer.\n"
143 |                      "i: Index of 11-bit-pattern. i in [1..cnt11(x)]\n"
144 |                      "c: Carry bit from word before")
145 |         .def_static("hi11", &sdsl::bits::hi11, py::arg("x"),
146 |                     "The position (in 1..63) of the leftmost 1 of the leftmost 11-bit-pattern which "
147 |                     "terminates a Fibonacci coded integer in x if x contains a 11-bit-pattern and 0 otherwise.")
148 |         .def_static("rev", &sdsl::bits::rev, "reverses a given 64 bit word")
149 |         ;
150 | 
151 |     m.attr("all_set") = bits_cls.attr("all_set_");
152 |     m.attr("deBruijn64") = bits_cls.attr("deBruijn64_");
153 |     m.attr("lt_deBruijn_to_idx") = bits_cls.attr("lt_deBruijn_to_idx_");
154 |     m.attr("lt_fib") = bits_cls.attr("lt_fib_");
155 |     m.attr("lt_cnt") = bits_cls.attr("lt_cnt_");
156 |     m.attr("lt_hi") = bits_cls.attr("lt_hi_");
157 |     m.attr("lo_set") = bits_cls.attr("lo_set_");
158 |     m.attr("lo_unset") = bits_cls.attr("lo_unset_");
159 |     m.attr("lt_lo") = bits_cls.attr("lt_lo_");
160 |     m.attr("lt_sel") = bits_cls.attr("lt_sel_");
161 |     m.attr("ps_overflow") = bits_cls.attr("ps_overflow_");
162 | 
163 |     m.attr("cnt") = bits_cls.attr("cnt");
164 |     m.attr("cnt32") = bits_cls.attr("cnt32");
165 |     m.attr("hi") = bits_cls.attr("hi");
166 |     m.attr("lo") = bits_cls.attr("lo");
167 |     m.attr("cnt11") = bits_cls.attr("cnt11");
168 |     m.attr("cnt10") = bits_cls.attr("cnt10");
169 |     m.attr("cnt01") = bits_cls.attr("cnt01");
170 |     m.attr("map10") = bits_cls.attr("map10");
171 |     m.attr("map01") = bits_cls.attr("map01");
172 |     m.attr("sel") = bits_cls.attr("sel");
173 |     m.attr("sel11") = bits_cls.attr("sel11");
174 |     m.attr("hi11") = bits_cls.attr("hi11");
175 |     m.attr("rev") = bits_cls.attr("rev");
176 | }
177 | 


--------------------------------------------------------------------------------
/pysdsl/calc.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <algorithm>
  4 | #include <tuple>
  5 | 
  6 | #include <pybind11/pybind11.h>
  7 | 
  8 | #include "operations/creation.hpp"
  9 | #include "operations/iteration.hpp"
 10 | #include "operations/sizes.hpp"
 11 | 
 12 | 
 13 | namespace py = pybind11;
 14 | 
 15 | 
 16 | namespace {
 17 |     using detail::cbegin;
 18 |     using detail::cend;
 19 | }  // namespace
 20 | 
 21 | 
 22 | template <class Sequence, typename T = typename Sequence::value_type>
 23 | inline auto add_read_access(py::class_<Sequence>& cls)
 24 | {
 25 |     typedef typename Sequence::value_type value_type;
 26 | 
 27 |     add_iteration(cls);
 28 | 
 29 |     cls.def(
 30 |         "__getitem__",
 31 |         [](const Sequence &self, size_t position) -> T {
 32 |             if (position >= detail::size(self)) {
 33 |                 throw std::out_of_range(std::to_string(position)); }
 34 |             return self[position]; });
 35 |     cls.def(
 36 |         "__getitem__",
 37 |         [](const Sequence &self, int64_t position) -> T {
 38 |             auto abs_position = std::abs(position);
 39 |             if (position >= 0) {
 40 |                 throw std::exception(); }
 41 |             if (abs_position > detail::size(self)) {
 42 |                 throw std::out_of_range(std::to_string(position)); }
 43 |             return self[detail::size(self) - abs_position]; });
 44 |     cls.def(
 45 |         "__getitem__",
 46 |         [](const Sequence& self, py::slice slice) {
 47 |             size_t start, stop, step, slicelength;
 48 |             if (!slice.compute(detail::size(self), &start, &stop, &step,
 49 |                                &slicelength)) {
 50 |                 throw py::error_already_set{}; }
 51 | 
 52 |             typename
 53 |             detail::IntermediateVector<Sequence, T>::type result(slicelength);
 54 | 
 55 |             for (size_t i = 0; i < slicelength; i++) {
 56 |                 result[i] = self[start];
 57 |                 start += step; }
 58 |             return result; });
 59 |             //return construct_from<Sequence>(result); });
 60 |     return cls;
 61 | }
 62 | 
 63 | 
 64 | template <class Sequence, typename T = typename Sequence::value_type>
 65 | inline
 66 | auto add_std_algo(py::class_<Sequence>& cls)
 67 | {
 68 |     typedef typename Sequence::value_type value_type;
 69 | 
 70 |     cls.def(
 71 |         "__contains__",
 72 |         [](const Sequence &self, typename Sequence::value_type element) {
 73 |             return std::find(cbegin(self),
 74 |                              cend(self), element) != cend(self); },
 75 |         py::call_guard<py::gil_scoped_release>());
 76 |     cls.def(
 77 |         "max",
 78 |         [](const Sequence &self) -> T {
 79 |             return *std::max_element(cbegin(self), cend(self)); },
 80 |         py::call_guard<py::gil_scoped_release>());
 81 |     cls.def(
 82 |         "argmax",
 83 |         [](const Sequence &self) {
 84 |             return std::distance(cbegin(self),
 85 |                                  std::max_element(cbegin(self),
 86 |                                                   cend(self))); },
 87 |         py::call_guard<py::gil_scoped_release>());
 88 |     cls.def(
 89 |         "min",
 90 |         [](const Sequence &self) -> T {
 91 |             return *std::min_element(cbegin(self), cend(self)); },
 92 |         py::call_guard<py::gil_scoped_release>());
 93 |     cls.def(
 94 |         "argmin",
 95 |         [](const Sequence &self) {
 96 |             return std::distance(cbegin(self),
 97 |                                  std::min_element(cbegin(self),
 98 |                                                   cend(self))); },
 99 |         py::call_guard<py::gil_scoped_release>());
100 |     cls.def(
101 |         "minmax",
102 |         [](const Sequence &self) -> std::pair<T, T> {
103 |             auto result = std::minmax_element(cbegin(self),
104 |                                               cend(self));
105 |             return std::make_pair(*std::get<0>(result),
106 |                                   *std::get<1>(result)); },
107 |         py::call_guard<py::gil_scoped_release>());
108 |     cls.def(
109 |         "sum",
110 |         [](const Sequence &self) {
111 |             return std::accumulate(cbegin(self), cend(self),
112 |                                    uint64_t(0)); },
113 |         py::call_guard<py::gil_scoped_release>());
114 |     cls.def(
115 |         "all",
116 |         [](const Sequence &self) {
117 |             return std::all_of(
118 |                 cbegin(self), cend(self),
119 |                 [] (const value_type value) -> bool {
120 |                     return value; }); },
121 |         py::call_guard<py::gil_scoped_release>());
122 |     cls.def(
123 |         "any",
124 |         [](const Sequence &self) {
125 |             return std::any_of(
126 |                 cbegin(self), cend(self),
127 |                 [] (const value_type value) -> bool {
128 |                     return value; }); },
129 |         py::call_guard<py::gil_scoped_release>());
130 |     cls.def(
131 |         "none",
132 |         [](const Sequence &self) {
133 |             return std::none_of(
134 |                 cbegin(self), cend(self),
135 |                 [] (const value_type value) -> bool {
136 |                     return value; }); },
137 |         py::call_guard<py::gil_scoped_release>());
138 |     cls.def(
139 |         "is_sorted",
140 |         [](const Sequence &self) {
141 |             return std::is_sorted(cbegin(self), cend(self)); },
142 |         py::call_guard<py::gil_scoped_release>());
143 | 
144 |     return cls;
145 | }
146 | 


--------------------------------------------------------------------------------
/pysdsl/docstrings.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | 
  4 | const char* doc_int_vector(
  5 |     "This generic vector class could be used to generate a vector that "
  6 |     "contains integers of fixed width `w` in [1..64]."
  7 | );
  8 | 
  9 | const char* doc_capacity(
 10 |     "Returns the size of the occupied bits of the int_vector. The capacity of "
 11 |     "a int_vector is greater or equal to the bit_size of the "
 12 |     "vector: capacity ≥ bit_size)."
 13 | );
 14 | 
 15 | const char* doc_bit_compress(
 16 |     "Bit compress the int_vector. Determine the biggest value X "
 17 |     "and then set the int_width to the smallest possible so that "
 18 |     "we still can represent X."
 19 | );
 20 | 
 21 | const char* doc_set_to_value(
 22 |     "Set all entries of int_vector to value k. This method pre-calculates "
 23 |     "the content of at most 64 words and then repeatedly inserts these words."
 24 | );
 25 | 
 26 | const char* doc_dac_vector(
 27 |     "A generic immutable space-saving vector class for unsigned integers.\n"
 28 |     "The values of a dac_vector are immutable after the constructor call.\n"
 29 |     "The `escaping` technique is used to encode values.\n"
 30 |     "This is defined as follows (see [1]):\n"
 31 |     "A k-bit integer is split into `K=k/(b-1)` bits each and "
 32 |     "encoded into `K` blocks of `b` bits each. All but the last block "
 33 |     "are marked with by a 1 in the most significant bit. Escaping with "
 34 |     "`b=8` is also known as vbyte-coding (see [2]). A experimental study "
 35 |     "of using escaping for the LCP array is given in [3].\n"
 36 |     "Time complexity: Order(log n/b) worst case, where b is the number "
 37 |     "of bits in a block\nReferences:\n"
 38 |     "[1] F. Transier and P. Sanders: `Engineering Basic Search Algorithms "
 39 |     "of an In-Memory Text Search Engine`, ACM Transactions on "
 40 |     "Information Systems, Vol. 29, No.1, Article 2, 2010\n"
 41 |     "[2] H.E. Williams and J. Zobel: `Compressing integers for fast file "
 42 |     "access`, Computing Journal Vol 43, No.3, 1999\n"
 43 |     "[3] N. Brisboa, S. Ladra, G. Navarro: `Directly addressable "
 44 |     "variable-length codes'', Proceedings of SPIRE 2009."
 45 | );
 46 | 
 47 | const char* doc_dac_vector_dp(
 48 |     "A generic immutable space-saving vector class for unsigned integers.\n"
 49 |     "The values of a dac_vector are immutable after the constructor call.\n"
 50 |     "The \"escaping\" technique is used to encode values. Bit widths of "
 51 |     "each encoding level are chosen optimally via dynamic programming.\n"
 52 |     "References\n [1] N. Brisaboa and S. Ladra and G. Navarro: `DACs: "
 53 |     "Bringing Direct Access to Variable-Length Codes`, "
 54 |     "Information Processing and Management (IPM) 2013"
 55 | );
 56 | 
 57 | const char* doc_bit_vector_il(
 58 |     "A bit vector which interleaves the original bit_vector with rank "
 59 |     "information. \nThis class is a uncompressed bit vector "
 60 |     "representation. It copies the original bit_vector and interleaves "
 61 |     "the data every t_bs bits with a cumulative sum of set bits before "
 62 |     "the current position. Each cumulative sum is stored in a 64 bit "
 63 |     "word."
 64 | );
 65 | 
 66 | const char* doc_rrr_vector(
 67 |     "An H_0-compressed bitvector representation.\n"
 68 |     "References:\n"
 69 |     "— Rasmus Pagh, Low redundancy in dictionaries with O(1) worst "
 70 |     "case lookup time, Technical Report 1998. "
 71 |     "ftp://ftp.cs.au.dk/BRICS/Reports/RS/98/28/BRICS-RS-98-28.pdf, "
 72 |     "Section 2.\n"
 73 |     "— Rajeev Raman, V. Raman and S. Srinivasa Rao, Succinct Indexable "
 74 |     "Dictionaries with Applications to representations of k-ary trees "
 75 |     "and multi-sets. SODA 2002.\n"
 76 |     "— Francisco Claude, Gonzalo Navarro: Practical Rank/Select "
 77 |     "Queries over Arbitrary Sequences. SPIRE 2008: 176-187\n"
 78 |     "— On the fly-decoding and encoding was discovered in; Gonzalo "
 79 |     "Navarro, Eliana Providel: Fast, Small, Simple Rank/Select on "
 80 |     "Bitmaps. SEA 2012"
 81 | );
 82 | 
 83 | const char* doc_sd_vector(
 84 |     "A bit vector which compresses very sparse populated bit vectors "
 85 |     "by representing the positions of 1 by the Elias-Fano "
 86 |     "representation for non-decreasing sequences\n"
 87 |     "References:\n"
 88 |     "— P. Elias: ''Efficient storage and retrieval by content and "
 89 |     "address of static files'', Journal of the ACM, 1974\n"
 90 |     "— R. Fano: ''On the number of bits required to implement an "
 91 |     "associative memory'', Memorandum 61. Computer Structures Group, "
 92 |     "Project MAC, MIT, 1971\n"
 93 |     "— D. Okanohara, K. Sadakane: ''Practical Entropy-Compressed "
 94 |     "Rank/Select Dictionary'', Proceedings of ALENEX 2007."
 95 | );
 96 | 
 97 | const char* doc_hyb_vector(
 98 |     "A hybrid-encoded compressed bitvector representation\n"
 99 |     "References:\n— Juha Karkkainen, Dominik Kempa and "
100 |     "Simon J. Puglisi. ''Hybrid Compression of Bitvectors for the "
101 |     "FM-Index.'' DCC 2014."
102 | );
103 | 
104 | const char* doc_rank_v(
105 |     "A rank structure proposed by Sebastiano Vigna\nSpace complexity: "
106 |     "0.25n for a bit vector of length n bits.\n\nThe superblock size is "
107 |     "512. Each superblock is subdivided into 512/64 = 8 blocks. "
108 |     "So absolute counts for the superblock add 64/512 bits on top of each "
109 |     "supported bit. Since the first of the 8 relative count values is 0, "
110 |     "we can fit the remaining 7 (each of width log(512)=9) in a 64bit "
111 |     "word. The relative counts add another 64/512 bits on top of each "
112 |     "supported bit.\nIn total this results in 128/512=25% overhead.\n"
113 |     "Reference\nSebastiano Vigna: Broadword Implementation of Rank/Select "
114 |     "Queries. WEA 2008: 154-168"
115 | );
116 | 
117 | const char* doc_rank_v5(
118 |     "A class supporting rank queries in constant time.\n"
119 |     "Space complexity: 0.0625n bits for a bit vector of length n bits.\n\n"
120 |     "The superblock size is 2048. Each superblock is subdivided into "
121 |     "2048/(6*64) = 5 blocks (with some bit remaining). So absolute counts "
122 |     "for the superblock add 64/2048 bits on top of each supported bit. "
123 |     "Since the first of the 6 relative count values is 0, we can fit the "
124 |     "remaining 5 (each of width log(2048)=11) in a 64 bit word. The "
125 |     "relative counts add another 64/2048 bits bits on top of each "
126 |     "supported bit. In total this results in 128/2048=6.25% overhead."
127 | );
128 | 
129 | const char* doc_rank_scan(
130 |     "A class supporting rank queries in linear time.\n"
131 |     "Space complexity: Constant.\n"
132 |     "Time complexity: Linear in the size of the supported vector."
133 | );
134 | 
135 | const char* doc_select_mcl(
136 |     "A class supporting constant time select queries.\n"
137 |     "Space usage: The space usage of the data structure depends on the "
138 |     "number `m` of ones in the original bitvector `b`. We store the "
139 |     "position of every 4096th set bit (called L1-sampled bits) of `b`. "
140 |     "This takes in the worst case (m/4096) log(n) ≤ (n/64) bits.\n"
141 |     "Next,\n(1) if the distance of two adjacent L1-sampled bits "
142 |     "b[i] and b[j] is greater or equal than log^4 (n), then we store "
143 |     "each of the 4096 positions of the set `b` in [i..j-1] with "
144 |     "log(n) bits. This results in at most "
145 |     "`4096 log(n) / log^4(n)=4096 / log^3(n)` bits per bit.\nFor a "
146 |     "bitvector of 4GB, i.e. log(n) = 35 we get about 0.01 bits per bit.\n"
147 |     "If the j-i+1 < log^4(n) then\n(2) we store the relative position of "
148 |     "every 64th set bit (called L2-sampled bits) in b[i..j-1] in at most "
149 |     "4 log log (n) bits per L2-sampled bits.\nAn pessimistic upper bound "
150 |     "for the space would be `4 log log (n) / 64 ≤ 24/64 = 0.375` bit per "
151 |     "bit (since `log log (n) ≤ 6`. It is very pessimistic, since we store "
152 |     "the relative position in `log log (j-i+1) ≤ log log (n)` bits.\n\n"
153 |     "The implementation is a practical variant of the following reference:"
154 |     "\nDavid Clark: PhD Thesis: Compact Pat Trees, University of Waterloo, "
155 |     "1996 (Section 2.2.2). "
156 |     "http://www.nlc-bnc.ca/obj/s4/f2/dsk3/ftp04/nq21335.pdf"
157 | );
158 | 
159 | const char* doc_select_scan(
160 |     "A class supporting linear time select queries.\n"
161 |     "Space complexity: Constant\n"
162 |     "Time complexity: Linear in the size of the supported vector."
163 | );
164 | 
165 | const char* doc_wtint(
166 |     "A wavelet tree class for integer sequences.\nSpace complexity: "
167 |     "Order(n log(|Sigma|)) bits, where `n` is the size of he vector the "
168 |     "wavelet tree was build for."
169 | );
170 | 
171 | const char* doc_wt_gmr_rs(
172 |     "A wavelet tree class for integer sequences.\n"
173 |     "This is an implementation of the first proposal in the SODA paper "
174 |     "of Golynski et. al. which support fast rank and select, but not fast "
175 |     "access.\nReferences:\n[1] A. Golynski, J. Munro and S. Rao:"
176 |     "\"Rank/select operations on large alphabets: a tool for text "
177 |     "indexing\", Proceedings of SODA 2006."
178 | );
179 | 
180 | const char* doc_wt_gmr(
181 |     "A wavelet tree class for integer sequences.\n"
182 |     "This is an implementation of the second proposal in the SODA paper "
183 |     "of Golynski et. al. which supports fast access, inverse select, rank, "
184 |     "and select.\nReferences:\n[1] A. Golynski, J. Munro and S. Rao:"
185 |     "\"Rank/select operations on large alphabets: a tool for text "
186 |     "indexing\", Proceedings of SODA 2006."
187 | );
188 | 
189 | const char* doc_wt_ap(
190 |     "A wavelet tree class for integer sequences.\nSpace complexity:\n"
191 |     "Order(n) (H₀ + 1) bits, where n is the size of the vector the wavelet "
192 |     "tree was build for.1\nReferences"
193 |     "[1] J. Barbay, F. Claude, T. Gagie, G. Navarro and Y. Nekrich:"
194 |     "''Efficient Fully-Compressed Sequence Representations''"
195 | );
196 | 
197 | const char* doc_wt_huff(
198 |     "A Huffman-shaped wavelet tree.\n"
199 |     "Space complexity: `n * H₀ + 2 * |Sigma| * log n` bits, where n is the "
200 |     "size of the vector the wavelet tree was build for.\n"
201 |     "A wavelet tree is build for a vector of characters over the byte alphabet "
202 |     "Sigma. If you need a wavelet tree for a integer alphabet you should use "
203 |     "`WtInt`.\nThe wavelet tree `wt` consists of a tree of bitvectors and "
204 |     "provides three efficient methods:\n"
205 |     "— The []-operator: wt[i] returns the i-th symbol of vector for which the "
206 |     "wavelet tree was build for.\n"
207 |     "— The rank method: wt.rank(i, c) returns the number of occurrences "
208 |     "of symbol `c` in the prefix [0..i-1] in the vector for which the "
209 |     "wavelet tree was build for.\n"
210 |     "— The select method: wt.select(j, c) returns the index i from "
211 |     "[0..size()-1] of the j-th occurrence of symbol c.\n\n"
212 |     "The idea of using a Huffman shaped wavelet was first mentioned on page 17 "
213 |     "of the following technical report:\n\tVeli Mäkinen and Gonzalo Navarro: "
214 |     "''Succinct Suffix Arrays based on Run-Length Encoding.''\n"
215 |     "Available under: http://swp.dcc.uchile.cl/TR/2005/TR_DCC-2005-004.pdf"
216 | );
217 | 
218 | const char* doc_wm_int(
219 |     "A wavelet tree class for integer sequences.\n"
220 |     "This wavelet tree variant does not store the two children of a node v "
221 |     "aligned with v; it is also known as wavelet matrix.\n"
222 |     "References:\n[1] F. Claude, G. Navarro: ''The Wavelet Matrix'', "
223 |     "Proceedings of SPIRE 2012."
224 | );
225 | 
226 | const char* doc_wt_blcd(
227 |     "A balanced wavelet tree.\n"
228 |     "Space complexity: Order(n * log(|Sigma|) + 2 * |Sigma| * log(n)) bits, "
229 |     "where n is the size of the vector the wavelet tree was build for.\n"
230 |     "Reference: Roberto Grossi, Ankur Gupta, Jeffrey Scott Vitter: "
231 |     "\"High-order entropy-compressed text indexes\". Proceedings of the 14th "
232 |     "Annual ACM-SIAM Symposium on Discrete Algorithms (SODA 2003)."
233 | );
234 | 
235 | const char* doc_wt_hutu(
236 |     "A Hu-Tucker-shaped wavelet tree.\n"
237 |     "Space complexity: Almost n * H₀ + 2 * |Sigma| * log(n) bits, where "
238 |     "n is the size of the vector the wavelet tree was build for."
239 | );
240 | 
241 | const char* doc_csa(
242 |     "A class for the uncompressed suffix array (SA).\n"
243 |     "This class stores the information of the suffix array and the inverse "
244 |     "suffix array in uncompressed form.\nIn contrast to this class, classes "
245 |     "like csa_sada, and csa_wt store the suffix array and inverse suffix array "
246 |     "data in compressed form.\n\nThe interface of this class is exactly the "
247 |     "same as for the compressed indexes. This is the reason why it is in the "
248 |     "group of compressed suffix arrays.\n\nSpace complexity:"
249 |     "2n log n bits, where n equals the size of the suffix array."
250 | );
251 | 
252 | const char* doc_sada(
253 |     "A class for the Compressed Suffix Array (CSA) proposed by Sadakane for "
254 |     "practical implementation."
255 | );
256 | 
257 | const char* doc_csa_wt(
258 |     "A class for the Compressed Suffix Array (CSA) based on a Wavelet Tree "
259 |     "(WT) of the Burrow Wheeler Transform of the original text."
260 | );
261 | 
262 | const char* doc_sorted_int_stack(
263 |     "A stack class which can contain integers in strictly increasing order."
264 | );
265 | 


--------------------------------------------------------------------------------
/pysdsl/io.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <fstream>
  4 | 
  5 | #include <sdsl/io.hpp>
  6 | 
  7 | #include <pybind11/pybind11.h>
  8 | 
  9 | #include "operations/iteration.hpp"
 10 | 
 11 | 
 12 | namespace py = pybind11;
 13 | 
 14 | 
 15 | template <class T> class support_helper;
 16 | 
 17 | 
 18 | template <class T, typename value_type = typename T::value_type>
 19 | decltype(auto) to_string(const T &self, const size_t max_elements=100,
 20 |                          const char* sep=", ", const char* start="[",
 21 |                                                const char* ends="]")
 22 | {
 23 |     std::ostringstream fout;
 24 |     fout.exceptions(std::ostringstream::failbit | std::ostringstream::badbit);
 25 | 
 26 |     fout << start;
 27 |     size_t count = 0;
 28 |     for (auto i = detail::cbegin(self); i != detail::cend(self); i++) {
 29 |         if (count) fout << sep;
 30 | 
 31 |         const value_type value = *i;
 32 | 
 33 |         fout << value;
 34 | 
 35 |         if (max_elements > 0 && count >= max_elements) {
 36 |             fout << sep << "...(" << self.size() << " elements)";
 37 |             break; }
 38 |         count++; }
 39 |     fout << ends;
 40 | 
 41 |     return fout.str();
 42 | }
 43 | 
 44 | 
 45 | template <class T, typename S = typename T::value_type>
 46 | inline auto add_to_string(py::class_<T>& cls)
 47 | {
 48 |     cls.def("__str__", [](const T& self) { return to_string<T, S>(self); });
 49 | 
 50 |     cls.def(
 51 |         "to_string", &to_string<T, S>,
 52 |         py::arg("max_elements") = 0, py::arg("sep") = ", ",
 53 |         py::arg("begin") = "[", py::arg("end") = "]");
 54 | 
 55 |     const auto name = py::cast<std::string>(cls.attr("__name__"));
 56 | 
 57 |     cls.def(
 58 |         "__repr__",
 59 |         [name] (const T& self) {
 60 |             return "<" + name +
 61 |                 to_string<T, S>(self, 100, ", ", " [", "]>"); });
 62 | 
 63 |     return cls;
 64 | }
 65 | 
 66 | 
 67 | template <class T, typename... TCtorArgs>
 68 | inline auto add_serialization(py::class_<T>& cls, TCtorArgs&&... args)
 69 | {
 70 |     cls.def(py::pickle(
 71 |         [&](const T& self){
 72 |             std::stringstream fout;
 73 |             self.serialize(fout);
 74 |             return py::bytes(fout.str()); },
 75 |         [&](const py::bytes& serialized){
 76 |             T result(args...);
 77 |             std::stringstream fin(serialized);
 78 |             result.load(fin);
 79 |             return result; }));
 80 |     cls.def(
 81 |         "store_to_file",
 82 |         [](const T &self, const std::string& file_name) {
 83 |             return sdsl::store_to_file(self, file_name); },
 84 |         py::arg("file_name"),
 85 |         py::call_guard<py::gil_scoped_release>());
 86 | 
 87 |     cls.def_static(
 88 |         "load_from_file",
 89 |         [&](const std::string& file_name) {
 90 |             T self(args...);
 91 |             if (sdsl::load_from_file(self, file_name)) {
 92 |                 return self; }
 93 |             throw std::exception(); },
 94 |         py::arg("file_name"),
 95 |         py::call_guard<py::gil_scoped_release>());
 96 | 
 97 |     cls.def(
 98 |         "store_to_checked_file",
 99 |         [](const T &self, const std::string& file_name) {
100 |             return sdsl::store_to_checked_file(self, file_name); },
101 |         py::arg("file_name"),
102 |         py::call_guard<py::gil_scoped_release>() );
103 | 
104 |     cls.def_static(
105 |         "load_from_checkded_file",
106 |         [&](const std::string& file_name) {
107 |             T self(args...);
108 |             if (sdsl::load_from_checked_file(self, file_name)) {
109 |                 return self; }
110 |             throw std::exception(); },
111 |         py::arg("file_name"),
112 |         py::call_guard<py::gil_scoped_release>());
113 |     return cls;
114 | }
115 | 
116 | 
117 | template <class X, class T = typename X::type>
118 | inline auto add_description(X& cls)
119 | {
120 |     typedef typename X::type P;
121 |     cls.def(
122 |         "write_structure_json",
123 |         [](const P& self, const std::string& file_name) {
124 |             std::ofstream fout;
125 |             fout.open(file_name, std::ios::out | std::ios::binary);
126 |             if (!fout.good()) throw std::runtime_error("Can't write to file");
127 |             sdsl::write_structure<sdsl::JSON_FORMAT, T>(self, fout);
128 |             if (!fout.good()) throw std::runtime_error("Error during write");
129 |             fout.close(); },
130 |         py::arg("file_name"),
131 |         py::call_guard<py::gil_scoped_release>());
132 |     cls.def(
133 |         "write_structure_html",
134 |         [](const P& self, const std::string& file_name) {
135 |             std::ofstream fout;
136 |             fout.open(file_name, std::ios::out | std::ios::binary);
137 |             if (!fout.good()) throw std::runtime_error("Can't write to file");
138 |             sdsl::write_structure<sdsl::HTML_FORMAT, T>(self, fout);
139 |             if (!fout.good()) throw std::runtime_error("Error during write");
140 |             fout.close(); },
141 |         py::arg("file_name"),
142 |         py::call_guard<py::gil_scoped_release>());
143 | 
144 |     cls.def_property_readonly(
145 |         "structure_json",
146 |         [](const P& self) {
147 |             std::ostringstream fout;
148 |             fout.exceptions(std::ostringstream::failbit |
149 |                             std::ostringstream::badbit);
150 | 
151 |             sdsl::write_structure<sdsl::JSON_FORMAT, T>(self, fout);
152 |             return fout.str(); },
153 |         py::call_guard<py::gil_scoped_release>());
154 | 
155 |     cls.def_property_readonly(
156 |         "structure_html",
157 |         [](const P& self) {
158 |             std::ostringstream fout;
159 |             fout.exceptions(std::ostringstream::failbit |
160 |                             std::ostringstream::badbit);
161 | 
162 |             sdsl::write_structure<sdsl::HTML_FORMAT, T>(self, fout);
163 |             return fout.str();},
164 |         py::call_guard<py::gil_scoped_release>());
165 | 
166 |     cls.def_property_readonly(
167 |         "structure",
168 |         [](const P& self) {
169 |             std::ostringstream fout;
170 |             fout.exceptions(std::ostringstream::failbit |
171 |                             std::ostringstream::badbit);
172 | 
173 |             sdsl::write_structure<sdsl::JSON_FORMAT, T>(self, fout);
174 |             auto json = py::module::import("json");
175 |             return json.attr("loads")(fout.str()); });
176 |     cls.def_property_readonly(
177 |         "size_in_mega_bytes",
178 |         [](const P &self) { return sdsl::size_in_mega_bytes<T>(self); });
179 |     cls.def_property_readonly(
180 |         "size_in_bytes",
181 |         [](const P &self) { return sdsl::size_in_bytes<T>(self); });
182 | 
183 |     return cls;
184 | }
185 | 
186 | 
187 | template <class T>
188 | inline auto add_description(py::class_<support_helper<T>>& cls)
189 | {
190 |     return add_description<py::class_<support_helper<T>>, T>(cls);
191 | }
192 | 


--------------------------------------------------------------------------------
/pysdsl/memory_monitor.py:
--------------------------------------------------------------------------------
 1 | from pysdsl import _memory_monitor
 2 | 
 3 | 
 4 | class MemoryMonitor(object):
 5 | 
 6 |     def __init__(self, out_html=None, out_json=None):
 7 |         self.out_html = out_html
 8 |         self.out_json = out_json
 9 | 
10 |     def __enter__(self):
11 |         _memory_monitor.start()
12 | 
13 |     def __exit__(self, exc_type, exc_value, traceback):
14 |         _memory_monitor.stop()
15 | 
16 |         if self.out_html is not None:
17 |             _memory_monitor.report_html(self.out_html)
18 | 
19 |         if self.out_json is not None:
20 |             _memory_monitor.report_json(self.out_json)
21 | 


--------------------------------------------------------------------------------
/pysdsl/operations/creation.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <algorithm>
  4 | #include <utility>
  5 | #include <tuple>
  6 | #include <type_traits>
  7 | 
  8 | #include <sdsl/vectors.hpp>
  9 | #include <sdsl/construct.hpp>
 10 | 
 11 | #include <pybind11/pybind11.h>
 12 | 
 13 | #include "operations/iteration.hpp"
 14 | #include "operations/sizes.hpp"
 15 | #include "types/pysequence.hpp"
 16 | #include "util/tupletricks.hpp"
 17 | 
 18 | 
 19 | namespace py = pybind11;
 20 | 
 21 | 
 22 | namespace detail
 23 | {
 24 | 
 25 | template <class T, typename value_type = typename T::value_type,
 26 |           bool is_bitvector1 = std::is_same<sdsl::int_vector<1>, T>::value>
 27 | struct IntermediateVector { using type = sdsl::int_vector<>; };
 28 | 
 29 | 
 30 | template <class T, bool b>
 31 | struct IntermediateVector<T, bool, b> { using type = sdsl::int_vector<1>; };
 32 | 
 33 | 
 34 | template <uint8_t N, typename value_type>
 35 | struct IntermediateVector<sdsl::int_vector<N>, value_type, false>
 36 | {
 37 |     using type = sdsl::int_vector<N>;
 38 | };
 39 | 
 40 | 
 41 | template <
 42 |     typename T,
 43 |     typename = typename std::enable_if<
 44 |             !std::is_same<typename T::iterator,
 45 |                           typename T::const_iterator>::value
 46 |         >::type,
 47 |     typename = decltype(std::declval<T>().begin())>
 48 | std::true_type has_non_const_begin_impl(T *);
 49 | 
 50 | std::false_type has_non_const_begin_impl(...);
 51 | 
 52 | 
 53 | template <class T>
 54 | using has_non_const_begin = decltype(has_non_const_begin_impl(
 55 |         std::declval<T*>()));
 56 | 
 57 | 
 58 | struct construct_explicit { int value; };
 59 | struct construct_iter { int value; };
 60 | struct construct_copy_empty { int value; };
 61 | struct construct_copy_size { int value; };
 62 | 
 63 | }  // namespace detail
 64 | 
 65 | 
 66 | // The only version of construct_from if T can be constructed from From
 67 | template <class T, class From,
 68 |     typename /* direct construction */ = typename std::enable_if<
 69 |             std::is_constructible<T, const From&>::value
 70 |         >::type>
 71 | constexpr T construct_from(const From& obj,
 72 |                            detail::construct_explicit /* unused */ = {}) {
 73 |     return T(obj); }
 74 | 
 75 | 
 76 | // The only version of construct_from if T can be constructed from iterator
 77 | template <
 78 |     class T, class From,
 79 |     typename /* no direct construction */ = typename std::enable_if<
 80 |             !std::is_constructible<T, const From&>::value
 81 |         >::type,
 82 |     typename /* construct from iterator */ = typename std::enable_if<
 83 |             std::is_constructible<
 84 |                 T,
 85 |                 decltype(detail::cbegin(std::declval<From>())),
 86 |                 decltype(detail::cend(std::declval<From>()))
 87 |             >::value
 88 |         >::type>
 89 | constexpr T construct_from(const From& obj,
 90 |                            detail::construct_iter /* unused */ = {}) {
 91 |     return T(detail::cbegin(obj), detail::cend(obj)); }
 92 | 
 93 | 
 94 | // The only version of construct_from if T can be filled from iterator and
 95 | // can only be created empty
 96 | template <
 97 |     class T, class From,
 98 |     typename /* no direct construction */ = typename std::enable_if<
 99 |             !std::is_constructible<T, const From&>::value
100 |         >::type,
101 |     typename /* no construction from iterator */ = typename std::enable_if<
102 |             !std::is_constructible<
103 |                 T,
104 |                 decltype(detail::cbegin(std::declval<From>())),
105 |                 decltype(detail::cend(std::declval<From>()))>::value
106 |         >::type,
107 |     typename /* default constructable */ = typename std::enable_if<
108 |             std::is_constructible<T>::value
109 |         >::type,
110 |     typename /* no construction with size */ = typename std::enable_if<
111 |             !std::is_constructible<T, std::size_t>::value
112 |         >::type,
113 |     typename /* can be modified after construction */ = typename std::enable_if<
114 |             detail::has_non_const_begin<T>::value
115 |         >::type>
116 | inline T construct_from(const From& obj,
117 |                         detail::construct_copy_empty /* unused */ = {})
118 | {
119 |     T result;
120 |     std::copy(detail::cbegin(obj), detail::cend(obj), result.begin());
121 |     return result;
122 | }
123 | 
124 | 
125 | // The only version of construct_from if T can be filled from iterator and
126 | // can allocate memory aforehand
127 | template <
128 |     class T, class From,
129 |     typename /* no direct construction */ = typename std::enable_if<
130 |             !std::is_constructible<T, const From&>::value
131 |         >::type,
132 |     typename /* no construction from iterator */ = typename std::enable_if<
133 |             !std::is_constructible<
134 |                 T,
135 |                 decltype(detail::cbegin(std::declval<From>())),
136 |                 decltype(detail::cend(std::declval<From>()))>::value
137 |         >::type,
138 |     typename /* construction with known size */ = typename std::enable_if<
139 |             std::is_constructible<
140 |                 T, decltype(detail::size(std::declval<From>()))>::value
141 |         >::type,
142 |     typename /* can be modified after construction */ = typename std::enable_if<
143 |             detail::has_non_const_begin<T>::value
144 |         >::type>
145 | inline T construct_from(const From& obj,
146 |                         detail::construct_copy_size /* unused */ = {})
147 | {
148 |     T result(detail::size(obj));
149 |     std::copy(detail::cbegin(obj), detail::cend(obj), result.begin());
150 |     return result;
151 | }
152 | 
153 | // The only version of construct_from if T can only be constructed
154 | // via sdsl::construct_im
155 | template <
156 |     class T, class From,
157 |     class With = typename detail::IntermediateVector<T>::type,
158 |     typename /* unused */ = typename std::enable_if<
159 |             std::is_constructible<T>::value
160 |         >::type,
161 |     typename = typename std::enable_if<
162 |             !std::is_constructible<T, const From&>::value
163 |         >::type,
164 |     typename = typename std::enable_if<
165 |             !std::is_constructible<
166 |                 T,
167 |                 decltype(detail::cbegin(std::declval<From>())),
168 |                 decltype(detail::cend(std::declval<From>()))>::value
169 |         >::type,
170 |     typename = typename std::enable_if<
171 |             !std::is_constructible<T, std::size_t>::value
172 |         >::type,
173 |     typename = typename std::enable_if<
174 |             !detail::has_non_const_begin<T>::value
175 |         >::type>
176 | inline T construct_from(const From& obj)
177 | {
178 |     T result;
179 |     sdsl::construct_im(result, construct_from<With>(obj));
180 | 
181 |     return result;
182 | }
183 | 
184 | 
185 | namespace detail
186 | {
187 |     template <class BindCls,
188 |               typename py_class = py::class_<typename BindCls::type>>
189 |     class add_init_functor
190 |     {
191 |     public:
192 |         add_init_functor(BindCls &cls_to_add_def) : m_cls_to(cls_to_add_def) {}
193 | 
194 |         template <typename InputCls>
195 |         decltype(auto) operator()(const InputCls &)
196 |         {
197 |             m_cls_to.def(py::init(
198 |                 [] (const typename InputCls::type& from) {
199 |                     return construct_from<typename BindCls::type>(from); }),
200 |                 py::arg("v"),
201 |                 py::call_guard<py::gil_scoped_release>());
202 |             return m_cls_to;
203 |         }
204 | 
205 |     private:
206 |         BindCls& m_cls_to;
207 |     };
208 | 
209 | 
210 |     template <class... From>
211 |     class add_many_inits_to_each
212 |     {
213 |     public:
214 |         add_many_inits_to_each(const std::tuple<From...>& from_each):
215 |                                m_from_each(from_each) {}
216 | 
217 |         template <typename BindCls>
218 |         decltype(auto) operator()(BindCls& cls) {
219 |             return for_each(m_from_each, add_init_functor<BindCls>(cls)); }
220 | 
221 |     private:
222 |         const std::tuple<From...>& m_from_each;
223 |     };
224 | 
225 | 
226 |     template <class T>
227 |     class pysequence_init_functor
228 |     {
229 |         typedef typename T::value_type value_type;
230 |     public:
231 |         decltype(auto) operator()(py::class_<T>& cls) {
232 |             return cls.def(py::init(
233 |                 [] (const py::sequence& v) {
234 |                     return construct_from<T>(
235 |                         sequence_wrapper<value_type>(v)); }),
236 |                 py::arg("v")); }
237 |     };
238 | 
239 | 
240 |     class add_pysequence_init_functor
241 |     {
242 |     public:
243 |         add_pysequence_init_functor() {}
244 | 
245 |         template <class BindCls,
246 |                   class py_class = py::class_<typename BindCls::type>>
247 |         decltype(auto) operator()(BindCls& cls) {
248 |             return pysequence_init_functor<typename BindCls::type>()(cls); }
249 |     };
250 | }
251 | 
252 | 
253 | inline
254 | auto make_pysequence_init_functor() {
255 |     return detail::add_pysequence_init_functor(); }
256 | 
257 | 
258 | template <class... From>
259 | inline
260 | auto make_inits_many_functor(const std::tuple<From...>& from_each) {
261 |     return detail::add_many_inits_to_each<From...>(from_each); }
262 | 


--------------------------------------------------------------------------------
/pysdsl/operations/iteration.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "operations/sizes.hpp"
 4 | #include "util/indexiterator.hpp"
 5 | 
 6 | 
 7 | namespace detail
 8 | {
 9 |     struct no_iterator {};
10 |     struct has_iterator: no_iterator {};
11 | 
12 |     template <class Sequence, typename /* unused */ = decltype(Sequence::begin)>
13 |     constexpr
14 |     auto cbegin_impl(const Sequence& sequence, has_iterator /* unused */)
15 |     {
16 |         return sequence.begin();
17 |     }
18 | 
19 |     template <class Sequence, typename /* unused */ = decltype(Sequence::end)>
20 |     constexpr
21 |     auto cend_impl(const Sequence& sequence, has_iterator /* unused */)
22 |     {
23 |         return sequence.end();
24 |     }
25 | 
26 |     template <class Sequence>
27 |     constexpr
28 |     auto cbegin_impl(const Sequence& sequence, no_iterator /* unused */)
29 |     {
30 |         return count_index_iterator<Sequence>(&sequence, 0);
31 |     }
32 | 
33 |     template <class Sequence>
34 |     constexpr
35 |     auto cend_impl(const Sequence& sequence, no_iterator /* unused */)
36 |     {
37 |         return count_index_iterator<Sequence>(&sequence, size(sequence));
38 |     }
39 | 
40 |     template <class Sequence>
41 |     constexpr auto cbegin(const Sequence& sequence)
42 |     {
43 |         return cbegin_impl(sequence, has_iterator());
44 |     }
45 | 
46 |     template <class Sequence>
47 |     constexpr auto cend(const Sequence& sequence)
48 |     {
49 |         return cend_impl(sequence, has_iterator());
50 |     }
51 | 
52 | }  // namespace detail
53 | 
54 | 
55 | template <class Sequence>
56 | inline auto add_iteration(py::class_<Sequence>& cls)
57 | {
58 |     return cls.def(
59 |         "__iter__",
60 |         [](const Sequence &sequence) {
61 |             return py::make_iterator(detail::cbegin(sequence),
62 |                                      detail::cend(sequence)); },
63 |         py::keep_alive<0, 1>()
64 |     );
65 | }
66 | 


--------------------------------------------------------------------------------
/pysdsl/operations/sizes.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <pybind11/pybind11.h>
 4 | #include <sdsl/bit_vectors.hpp>
 5 | 
 6 | 
 7 | namespace py = pybind11;
 8 | 
 9 | 
10 | namespace detail
11 | {
12 |     struct no_max_size {};
13 |     struct has_max_size: no_max_size {};
14 | 
15 |     template <class T, typename /* unused */ = decltype(T::max_size)>
16 |     inline
17 |     auto add_max_size_impl(py::class_<T>& cls, has_max_size /* unused */)
18 |     {
19 |         cls.def_property_readonly_static(
20 |             "max_size",
21 |             [](py::object /* self */) { return T::max_size(); },
22 |             "Maximum size of the int_vector.");
23 |         return cls;
24 |     }
25 | 
26 |     template <class T>
27 |     constexpr
28 |     auto add_max_size_impl(py::class_<T>& cls, no_max_size /* unused */)
29 |     { return cls; }
30 | 
31 |     template <class T> constexpr auto size(const T& seq) { return seq.size(); }
32 | }  // namespace detail
33 | 
34 | 
35 | template <class T>
36 | constexpr auto add_max_size(py::class_<T>& cls)
37 | {
38 |     return detail::add_max_size_impl(cls, detail::has_max_size());
39 | }
40 | 
41 | 
42 | template <uint32_t B>
43 | inline auto add_max_size(py::class_<sdsl::bit_vector_il<B>>& cls)
44 | {
45 |     cls.def_property_readonly_static(
46 |         "max_size",
47 |         [](py::object /* self */) {
48 |             return sdsl::bit_vector::max_size();},
49 |         "Maximum size of the bit_vector_il.");
50 |     return cls;
51 | }
52 | 
53 | 
54 | template <class Sequence>
55 | inline auto add_sizes(py::class_<Sequence>& cls)
56 | {
57 |     add_max_size(cls);
58 | 
59 |     auto size = [] (const Sequence& self) {
60 |         return detail::size(self); };
61 | 
62 |     cls.def("__len__", size,
63 |             "The number of elements in the container.");
64 |     cls.def_property_readonly("size", size,
65 |                               "The number of elements in the container.");
66 |     return cls;
67 | }
68 | 


--------------------------------------------------------------------------------
/pysdsl/supports.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <stdexcept>
  4 | #include <string>
  5 | 
  6 | #include <sdsl/bit_vectors.hpp>
  7 | 
  8 | #include <pybind11/pybind11.h>
  9 | 
 10 | #include "docstrings.hpp"
 11 | #include "io.hpp"
 12 | 
 13 | 
 14 | namespace py = pybind11;
 15 | 
 16 | 
 17 | template <class T>
 18 | class support_helper
 19 | {
 20 | private:
 21 |     const sdsl::bit_vector& m_vec;
 22 |     const T m_support;
 23 | public:
 24 |     typedef T type;
 25 | 
 26 |     support_helper(const sdsl::bit_vector& vec, const T&& support):
 27 |         m_vec(vec),
 28 |         m_support(std::move(support))
 29 |     {}
 30 |     auto size() const { return m_vec.size(); }
 31 |     auto operator()(size_t idx) const { return m_support(idx); }
 32 | 
 33 |     operator const T&() const { return m_support; }
 34 | };
 35 | 
 36 | 
 37 | template <class Base>
 38 | inline
 39 | auto add_support_class(py::module &m,
 40 |                        const std::string&& name,
 41 |                        const std::string&& method_name,
 42 |                        const std::string&& doc_call,
 43 |                        const char* doc = nullptr)
 44 | {
 45 |     auto cls = py::class_<Base>(m, name.c_str());
 46 | 
 47 |     cls.def(
 48 |         method_name.c_str(),
 49 |         [](const Base& self, size_t idx) {
 50 |             if (idx >= self.size()) {
 51 |                 throw std::out_of_range(std::to_string(idx)); }
 52 |             return self(idx); },
 53 |         py::call_guard<py::gil_scoped_release>(),
 54 |         py::arg("idx"),
 55 |         doc_call.c_str());
 56 |     cls.attr("__call__") = cls.attr(method_name.c_str());
 57 | 
 58 |     add_description(cls);
 59 | 
 60 |     if (doc) cls.doc() = doc;
 61 | 
 62 |      return cls;
 63 | }
 64 | 
 65 | 
 66 | template <class S, class T>
 67 | inline
 68 | decltype(auto) bind_support(const S*,
 69 |                             py::class_<T>& cls, const std::string& call_name,
 70 |                             const char* alt_name=nullptr)
 71 | {
 72 |     cls.def(
 73 |         call_name.c_str(),
 74 |         [](T& self) {
 75 |             S support;
 76 |             sdsl::util::init_support(support, &self);
 77 | 
 78 |             return support; },
 79 |         py::keep_alive<0, 1>());
 80 | 
 81 |     if (alt_name) cls.attr(alt_name) = cls.attr(call_name.c_str());
 82 | 
 83 |     return cls;
 84 | }
 85 | 
 86 | 
 87 | template <class S, class T>
 88 | inline
 89 | decltype(auto) bind_support(const support_helper<S>*,
 90 |                             py::class_<T>& cls, const std::string& call_name,
 91 |                             const char* alt_name=nullptr)
 92 | {
 93 |     cls.def(
 94 |         call_name.c_str(),
 95 |         [](T& self) {
 96 |             S support;
 97 |             sdsl::util::init_support(support, &self);
 98 | 
 99 |             return support_helper<S>(self, std::move(support)); },
100 |         py::keep_alive<0, 1>() );
101 | 
102 |     if (alt_name) cls.attr(alt_name) = cls.attr(call_name.c_str());
103 | 
104 |     return cls;
105 | }
106 | 
107 | 
108 | template <class T,
109 |           class R0=typename T::rank_0_type, class R1=typename T::rank_1_type>
110 | inline
111 | auto add_rank_support(py::module &m, py::class_<T>& cls,
112 |                       const std::string& base_name,
113 |                       const char* suffix = "",
114 |                       bool defaults = true,
115 |                       const std::string s0 = "0", const std::string s1 = "1",
116 |                       const char* doc_rank = nullptr)
117 | {
118 |     add_support_class<R0>(m, base_name + "Rank" + suffix + "_" + s0, "rank",
119 |                           "number of patterns `" + s0 + "` in the prefix "
120 |                           "[0..idx) in supported vector", doc_rank);
121 |     bind_support((R0 *)nullptr, cls,
122 |                  std::string("init_rank") + suffix + "_" + s0);
123 | 
124 |     add_support_class<R1>(m, base_name + "Rank" + suffix + "_" + s1, "rank",
125 |                           "number of patterns `" + s1 + "` in the prefix "
126 |                           "[0..idx) in supported vector", doc_rank);
127 |     bind_support((R1 *)nullptr, cls,
128 |                  std::string("init_rank") + suffix + "_" + s1,
129 |                  defaults ?
130 |                     (std::string("init_rank") + suffix).c_str() :
131 |                     nullptr);
132 | 
133 |     return cls;
134 | }
135 | 
136 | 
137 | template <class T,
138 |           class S0=typename T::select_0_type,
139 |           class S1=typename T::select_1_type>
140 | inline
141 | auto add_select_support(py::module &m, py::class_<T>& cls,
142 |                         const std::string& base_name,
143 |                         const char* suffix = "",
144 |                         bool defaults = true,
145 |                         const std::string s0 = "0", const std::string s1 = "1",
146 |                         const char* doc_select = nullptr)
147 | {
148 |     add_support_class<S0>(m, base_name + "Select" + suffix + "_" + s0, "select",
149 |                           "position of the idx-th pattern `" + s0 +
150 |                           "` in supported vector", doc_select);
151 |     bind_support((S0 *)nullptr,
152 |                  cls, std::string("init_select") + suffix + "_" + s0);
153 | 
154 |     add_support_class<S1>(m, base_name + "Select" + suffix + "_" + s1, "select",
155 |                           "position of the idx-th pattern `" + s1 +
156 |                           "` in supported vector", doc_select);
157 |     bind_support((S1 *)nullptr, cls,
158 |                  std::string("init_select") + suffix + "_" + s1,
159 |                  defaults ?
160 |                     (std::string("init_select") + suffix).c_str() :
161 |                     nullptr);
162 | 
163 |     return cls;
164 | }
165 | 
166 | 
167 | inline
168 | void add_bitvector_supports(py::module& m, py::class_<sdsl::bit_vector>& cls)
169 | {
170 |     add_rank_support<sdsl::bit_vector,
171 |                     sdsl::rank_support_v<0, 1>,
172 |                     sdsl::rank_support_v<1, 1>>(
173 |         m, cls, "_BitVector", "V", true, "0", "1", doc_rank_v);
174 |     add_rank_support<sdsl::bit_vector,
175 |                      sdsl::rank_support_v<00, 2>,
176 |                      sdsl::rank_support_v<01, 2>>(
177 |         m, cls, "_BitVector", "V", false, "00", "01", doc_rank_v);
178 |     add_rank_support<sdsl::bit_vector,
179 |                      sdsl::rank_support_v<10, 2>,
180 |                      sdsl::rank_support_v<11, 2>>(
181 |         m, cls, "_BitVector", "V", false, "10", "11", doc_rank_v);
182 |     add_rank_support<sdsl::bit_vector,
183 |                     sdsl::rank_support_v5<0, 1>,
184 |                     sdsl::rank_support_v5<1, 1>>(
185 |         m, cls, "_BitVector", "V5", false, "0", "1", doc_rank_v5);
186 |     add_rank_support<sdsl::bit_vector,
187 |                      sdsl::rank_support_v5<00, 2>,
188 |                      sdsl::rank_support_v5<01, 2>>(
189 |         m, cls, "_BitVector", "V5", false, "00", "01", doc_rank_v5);
190 |     add_rank_support<sdsl::bit_vector,
191 |                      sdsl::rank_support_v5<10, 2>,
192 |                      sdsl::rank_support_v5<11, 2>>(
193 |         m, cls, "_BitVector", "V5", false, "10", "11", doc_rank_v5);
194 |     cls.attr("init_rank") = cls.attr("init_rankV");
195 |     cls.attr("init_rank_0") = cls.attr("init_rankV_0");
196 |     cls.attr("init_rank_1") = cls.attr("init_rankV_1");
197 | 
198 |     add_select_support<sdsl::bit_vector,
199 |                        support_helper<sdsl::select_support_mcl<0, 1>>,
200 |                        support_helper<sdsl::select_support_mcl<1, 1>>>(
201 |         m, cls, "_BitVector", "MCL", true, "0", "1", doc_select_mcl);
202 |     add_select_support<sdsl::bit_vector,
203 |                        support_helper<sdsl::select_support_mcl<10, 2>>,
204 |                        support_helper<sdsl::select_support_mcl<11, 2>>>(
205 |         m, cls, "_BitVector", "MCL", false, "10", "11", doc_select_mcl);
206 |     cls.attr("init_select") = cls.attr("init_selectMCL");
207 |     cls.attr("init_select_0") = cls.attr("init_selectMCL_0");
208 |     cls.attr("init_select_1") = cls.attr("init_selectMCL_1");
209 | 
210 |     add_rank_support<sdsl::bit_vector,
211 |                      sdsl::rank_support_scan<0, 1>,
212 |                      sdsl::rank_support_scan<1, 1>>(
213 |         m, cls, "_BitVector", "Scan", false, "0", "1", doc_rank_scan);
214 |     add_rank_support<sdsl::bit_vector,
215 |                      sdsl::rank_support_scan<00, 2>,
216 |                      sdsl::rank_support_scan<01, 2>>(
217 |         m, cls, "_BitVector", "Scan", false, "00", "01", doc_rank_scan);
218 |     add_rank_support<sdsl::bit_vector,
219 |                      sdsl::rank_support_scan<10, 2>,
220 |                      sdsl::rank_support_scan<11, 2>>(
221 |         m, cls, "_BitVector", "Scan", false, "10", "11", doc_rank_scan);
222 | 
223 |     add_select_support<sdsl::bit_vector,
224 |                        support_helper<sdsl::select_support_scan<0, 1>>,
225 |                        support_helper<sdsl::select_support_scan<1, 1>>>(
226 |         m, cls, "_BitVector", "Scan", false, "0", "1", doc_select_scan);
227 |     add_select_support<sdsl::bit_vector,
228 |                        support_helper<sdsl::select_support_scan<10, 2>>,
229 |                        support_helper<sdsl::select_support_scan<01, 2>>>(
230 |         m, cls, "_BitVector", "Scan", false, "10", "01", doc_select_scan);
231 | }
232 | 


--------------------------------------------------------------------------------
/pysdsl/types/bitvector.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cstdint>
  4 | #include <stdexcept>
  5 | #include <string>
  6 | 
  7 | #include <sdsl/bit_vectors.hpp>
  8 | #include <sdsl/vectors.hpp>
  9 | 
 10 | #include <pybind11/pybind11.h>
 11 | 
 12 | #include "calc.hpp"
 13 | #include "docstrings.hpp"
 14 | #include "io.hpp"
 15 | #include "supports.hpp"
 16 | #include "operations/sizes.hpp"
 17 | #include "operations/iteration.hpp"
 18 | 
 19 | 
 20 | namespace py = pybind11;
 21 | 
 22 | 
 23 | template <class T>
 24 | inline
 25 | auto add_bitvector_class(py::module &m, const std::string&& name,
 26 |                          const char* doc = nullptr,
 27 |                          const char* doc_rank = nullptr,
 28 |                          const char* doc_select = nullptr)
 29 | {
 30 |     auto cls = py::class_<T>(m, name.c_str()).def(py::init());
 31 | 
 32 |     add_sizes(cls);
 33 |     add_description(cls);
 34 |     add_serialization(cls);
 35 |     add_to_string(cls);
 36 | 
 37 |     add_read_access<T, bool>(cls);
 38 |     add_std_algo<T, bool>(cls);
 39 | 
 40 |     if (doc) cls.doc() = doc;
 41 | 
 42 |     cls.def(
 43 |         "get_int",
 44 |         [](const T &self, size_t idx, uint8_t len) {
 45 |             if (idx + len - 1 >= self.size()) {
 46 |                 throw std::out_of_range(std::to_string(idx)); }
 47 |             if (len > 64) {
 48 |                 throw std::invalid_argument("len should be <= 64"); }
 49 |             return self.get_int(idx, len); },
 50 |         py::arg("idx"),
 51 |         py::arg("len") = 64,
 52 |         "Get the integer value of the binary string of length `len` "
 53 |         "starting at position `idx`.",
 54 |         py::call_guard<py::gil_scoped_release>());
 55 | 
 56 |     add_rank_support(m, cls, "_" + name, "", true, "0", "1", doc_rank);
 57 |     add_select_support(m, cls, "_" + name, "", true, "0", "1", doc_select);
 58 | 
 59 |     m.attr("all_immutable_bitvectors").attr("append")(cls);
 60 | 
 61 |     return cls;
 62 | }
 63 | 
 64 | 
 65 | template <class T>
 66 | inline
 67 | auto add_bitvector_class(py::module &m, const char* name,
 68 |                          const char* doc = nullptr,
 69 |                          const char* doc_rank = nullptr,
 70 |                          const char* doc_select = nullptr)
 71 | {
 72 |     return add_bitvector_class<T>(m, std::string(name),
 73 |                                   doc, doc_rank, doc_select);
 74 | }
 75 | 
 76 | 
 77 | template <class T>
 78 | inline
 79 | auto add_bitvector_class(py::module &m, const std::string& name,
 80 |                          const char* doc = nullptr,
 81 |                          const char* doc_rank = nullptr,
 82 |                          const char* doc_select = nullptr)
 83 | {
 84 |     return add_bitvector_class<T>(m, std::string(name), // i.e. copy name
 85 |                                   doc, doc_rank, doc_select);
 86 | }
 87 | 
 88 | 
 89 | template <uint32_t t_bs>
 90 | inline auto add_bit_vector_il(py::module& m)
 91 | {
 92 |     auto cls = add_bitvector_class<sdsl::bit_vector_il<t_bs>>(
 93 |         m,
 94 |         std::string("BitVectorInterLeaved") + std::to_string(t_bs),
 95 |         doc_bit_vector_il);
 96 | 
 97 |     m.attr("bit_vector_interleaved").attr("__setitem__")(t_bs, cls);
 98 | 
 99 |     return cls;
100 | }
101 | 
102 | 
103 | template <uint16_t t_bs=63, class t_rac=sdsl::int_vector<>, uint16_t t_k=32>
104 | inline auto add_rrr_vector(py::module& m,
105 |                            const char* name = "RamanRamanRaoVector")
106 | {
107 |     auto cls = add_bitvector_class<sdsl::rrr_vector<t_bs, t_rac, t_k>>(
108 |         m,
109 |         std::string(name) + std::to_string(t_bs),
110 |                     doc_rrr_vector);
111 | 
112 |     m.attr("raman_raman_rao_vectors").attr("__setitem__")(t_bs, cls);
113 | 
114 |     return cls;
115 | }
116 | 
117 | 
118 | template <uint16_t t_bs=63, class t_rac=sdsl::int_vector<>, uint16_t t_k=32>
119 | inline auto add_rrr_vector(py::module& m, const py::class_<t_rac>& py_rac,
120 |                            const char* name = "RamanRamanRaoVector")
121 | {
122 |     auto cls_name = std::string(name) + py::cast<std::string>(py_rac.attr("__name__")) + std::to_string(t_bs);
123 | 
124 |     auto cls = add_bitvector_class<sdsl::rrr_vector<t_bs, t_rac, t_k>>(
125 |             m, cls_name, doc_rrr_vector);
126 | 
127 |     m.attr("raman_raman_rao_vectors").attr("__setitem__")(cls_name, cls);
128 | 
129 |     return cls;
130 | }
131 | 
132 | 
133 | template <class Base=sdsl::bit_vector>
134 | inline auto add_sd_vector(py::module& m, const char* name="SDVector")
135 | {
136 |     auto cls = add_bitvector_class<sdsl::sd_vector<Base>>(
137 |         m,
138 |         std::string(name),
139 |         doc_sd_vector);
140 | 
141 |     m.attr("sparse_bit_vectors").attr("__setitem__")(name, cls);
142 | 
143 |     return cls;
144 | }
145 | 
146 | 
147 | template <class Base=sdsl::bit_vector>
148 | inline auto add_sd_vector(py::module& m, const py::class_<Base>& base_cls,
149 |                           const char* name="SDVector")
150 | {
151 |     auto cls_name = std::string(name) + py::cast<std::string>(
152 |         base_cls.attr("__name__"));
153 | 
154 |     auto cls = add_bitvector_class<sdsl::sd_vector<Base>>(
155 |         m,
156 |         cls_name,
157 |         doc_sd_vector);
158 | 
159 |     m.attr("sparse_bit_vectors").attr("__setitem__")(cls_name, cls);
160 | 
161 |     return cls;
162 | }
163 | 
164 | 
165 | template <uint32_t k_sblock_rate>
166 | inline auto add_hyb_vector(py::module& m)
167 | {
168 |     auto cls = add_bitvector_class<sdsl::hyb_vector<k_sblock_rate>>(
169 |         m, "HybVector" + std::to_string(k_sblock_rate),
170 |         doc_hyb_vector);
171 | 
172 |     m.attr("hybrid_bit_vectors").attr("__setitem__")(k_sblock_rate, cls);
173 | 
174 |     return cls;
175 | }
176 | 
177 | 
178 | template <class B>
179 | auto add_bitvectors(py::module& m, py::class_<B>& bit_vector_cls)
180 | {
181 |     add_bitvector_supports(m, bit_vector_cls);
182 | 
183 |     m.attr("all_immutable_bitvectors") = py::list();
184 |     m.attr("bit_vector_interleaved") = py::dict();
185 |     m.attr("raman_raman_rao_vectors") = py::dict();
186 |     m.attr("sparse_bit_vectors") = py::dict();
187 |     m.attr("hybrid_bit_vectors") = py::dict();
188 | 
189 |     auto bvil_classes = std::make_tuple(add_bit_vector_il<64>(m),
190 |                                         add_bit_vector_il<128>(m),
191 |                                         add_bit_vector_il<256>(m),
192 |                                         add_bit_vector_il<512>(m));
193 | 
194 |     auto hyb_classes = std::make_tuple(
195 |         add_hyb_vector<4>(m),
196 |         add_hyb_vector<8>(m),
197 |         add_hyb_vector<16>(m),
198 |         add_hyb_vector<256>(m));
199 | 
200 |     auto rrr_classes = std::make_tuple(
201 |         add_rrr_vector<3>(m),
202 |         add_rrr_vector<15>(m),
203 |         add_rrr_vector<63>(m),
204 |         add_rrr_vector<256>(m));
205 |         //add_rrr_vector<63, sdsl::wt_int<>>(m, "RamanRamanRaoWTVector"));
206 | 
207 |     auto sd_classes = std::make_tuple(
208 |         add_sd_vector<>(m),
209 |         add_sd_vector<sdsl::sd_vector<>>(m, "SDVectorSD"),
210 |         add_sd_vector(m, std::get<1>(rrr_classes)));
211 | 
212 |     return std::make_tuple(
213 |         std::tuple_cat(bvil_classes, rrr_classes, sd_classes, hyb_classes),
214 |         std::make_tuple(  // propagate
215 |             std::get<2>(rrr_classes),
216 |             std::get<0>(sd_classes),
217 |             std::get<3>(bvil_classes)
218 |         )
219 |     );
220 | 
221 | }
222 | 


--------------------------------------------------------------------------------
/pysdsl/types/encodedvector.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <string>
  4 | #include <tuple>
  5 | #include <type_traits>
  6 | 
  7 | #include <pybind11/pybind11.h>
  8 | 
  9 | #include <sdsl/bit_vectors.hpp>
 10 | #include <sdsl/vectors.hpp>
 11 | 
 12 | #include "calc.hpp"
 13 | #include "docstrings.hpp"
 14 | #include "io.hpp"
 15 | #include "operations/iteration.hpp"
 16 | #include "operations/sizes.hpp"
 17 | #include "util/tupletricks.hpp"
 18 | 
 19 | 
 20 | namespace py = pybind11;
 21 | 
 22 | 
 23 | namespace {
 24 | template <std::size_t I> using dens = std::integral_constant<uint32_t, I>;
 25 | 
 26 | template <std::size_t W> using width = std::integral_constant<uint8_t, W>;
 27 | }  // namespace
 28 | 
 29 | 
 30 | auto constexpr coders = std::make_tuple(
 31 |     std::make_tuple("EliasDelta", dens<128>{}, width<0>{},
 32 |         (sdsl::coder::elias_delta*) nullptr),
 33 |     std::make_tuple("EliasGamma", dens<128>{}, width<0>{},
 34 |         (sdsl::coder::elias_gamma*) nullptr),
 35 |     std::make_tuple("Fibonacci", dens<128>{}, width<0>{},
 36 |         (sdsl::coder::fibonacci*) nullptr),
 37 |     std::make_tuple("Comma2", dens<128>{}, width<0>{},
 38 |         (sdsl::coder::comma<2>*) nullptr),
 39 |     std::make_tuple("Comma4", dens<128>{}, width<0>{},
 40 |         (sdsl::coder::comma<4>*) nullptr));
 41 | 
 42 | 
 43 | class add_enc_coders_functor
 44 | {
 45 | public:
 46 |     constexpr add_enc_coders_functor(py::module& m): m(m) {}
 47 | 
 48 |     template <typename Coder, uint32_t t_dens, uint8_t t_width>
 49 |     inline
 50 |     decltype(auto) operator()(const std::tuple<const char*, dens<t_dens>,
 51 |                                                width<t_width>, Coder*> &t)
 52 |     {
 53 |         using enc = sdsl::enc_vector<Coder, t_dens, t_width>;
 54 | 
 55 |         auto cls = py::class_<enc>(
 56 |                 m,
 57 |                 (std::string("EncVector") + std::get<0>(t)).c_str()
 58 |             ).def(py::init());
 59 | 
 60 |         add_sizes(cls);
 61 |         add_description(cls);
 62 |         add_serialization(cls);
 63 |         add_to_string(cls);
 64 | 
 65 |         add_read_access<enc>(cls);
 66 |         add_std_algo<enc>(cls);
 67 | 
 68 |         cls.doc() = "A vector `v` is stored more space-efficiently by "
 69 |                     "self-delimiting coding the deltas v[i+1]-v[i] (v[-1]:=0).";
 70 | 
 71 |         cls.def_property_readonly("sample_dens", &enc::get_sample_dens)
 72 |         .def(
 73 |             "sample",
 74 |             [] (const enc& self, typename enc::size_type i) {
 75 |                 if (i >= self.size() / self.get_sample_dens()) {
 76 |                     throw std::out_of_range(std::to_string(i)); }
 77 |                 return self.sample(i); },
 78 |             "Returns the i-th sample of the compressed vector"
 79 |             "i: The index of the sample. 0 <= i < size()/get_sample_dens()",
 80 |             py::call_guard<py::gil_scoped_release>())
 81 |         .def(
 82 |             "samples",
 83 |             [] (const enc& self) {
 84 |                 const typename enc::size_type size = self.size() /
 85 |                                                         self.get_sample_dens();
 86 | 
 87 |                 sdsl::int_vector<t_width> samples(size);
 88 |                 for (std::size_t i = 0; i <= size; i++) {
 89 |                     samples[i] = self.sample(i); }
 90 |                 return samples; },
 91 |             py::call_guard<py::gil_scoped_release>());
 92 | 
 93 |         m.attr("enc_vector").attr("__setitem__")(std::get<0>(t), cls);
 94 |         m.attr("all_compressed_integer_vectors").attr("append")(cls);
 95 | 
 96 |         return cls;
 97 |     }
 98 | 
 99 | private:
100 |     py::module& m;
101 | };
102 | 
103 | 
104 | class add_vlc_coders_functor
105 | {
106 | public:
107 |     constexpr add_vlc_coders_functor(py::module& m): m(m) {}
108 | 
109 |     template <typename Coder, uint32_t t_dens, uint8_t t_width>
110 |     inline
111 |     decltype(auto) operator()(const std::tuple<const char*, dens<t_dens>,
112 |                                                width<t_width>, Coder*> &t)
113 |     {
114 |         using vlc = sdsl::vlc_vector<Coder, t_dens, t_width>;
115 | 
116 |         auto cls = py::class_<vlc>(m, (
117 |                 std::string("VariableLengthCodesVector") + std::get<0>(t)
118 |         ).c_str()).def(py::init());
119 | 
120 |         add_sizes(cls);
121 |         add_description(cls);
122 |         add_serialization(cls);
123 |         add_to_string(cls);
124 | 
125 |         add_read_access<vlc>(cls);
126 |         add_std_algo<vlc>(cls);
127 | 
128 |         cls.doc() = "A vector which stores the values with "
129 |                     "variable length codes.";
130 | 
131 |         cls.def_property_readonly("sample_dens", &vlc::get_sample_dens);
132 | 
133 |         m.attr("variable_length_codes_vector").attr(
134 |                 "__setitem__")(std::get<0>(t), cls);
135 |         m.attr("all_compressed_integer_vectors").attr("append")(cls);
136 | 
137 |         return cls;
138 |     }
139 | 
140 | private:
141 |     py::module& m;
142 | };
143 | 
144 | 
145 | inline std::string key_to_string(const char* key) { return std::string(key); }
146 | 
147 | template <class KEY_T>
148 | inline std::string key_to_string(KEY_T key) { return std::to_string(key); }
149 | 
150 | 
151 | namespace {
152 | 
153 | const char dprrr[] = "DPRRR";
154 | const char dp[] = "DP";
155 | 
156 | template <typename T, T t, bool = std::is_integral<T>::value>
157 | struct get_vector_type {};
158 | 
159 | template <typename T, T t>
160 | struct get_vector_type<T, t, true> {
161 |     using type = sdsl::dac_vector<t>;
162 | };
163 | 
164 | template <>
165 | struct get_vector_type<const char*, dp, false> {
166 |     using type = sdsl::dac_vector_dp<>;
167 | };
168 | 
169 | template <>
170 | struct get_vector_type<const char*, dprrr, false> {
171 |     using type = sdsl::dac_vector_dp<sdsl::rrr_vector<>>;
172 | };
173 | 
174 | template <typename T, T t>
175 | using get_vector_type_t = typename get_vector_type<T, t>::type;
176 | 
177 | } // namespace
178 | 
179 | 
180 | class add_dac_vector_functor
181 | {
182 | public:
183 |     constexpr add_dac_vector_functor(py::module& m, const char* doc = nullptr,
184 |                                                     const char* doc_dp = nullptr): 
185 |         m(m), doc(doc), doc_dp(doc_dp) {}
186 | 
187 | 
188 |     template <typename KEY_T, KEY_T key>
189 |     inline
190 |     decltype(auto) get_vector(std::integral_constant<KEY_T, key>) {
191 |         using type = get_vector_type_t<KEY_T, key>;
192 |         auto name = "DirectAccessibleCodesVector" + key_to_string(key);
193 | 
194 |         auto cls = py::class_<type>(m, name.c_str()).def(py::init());
195 | 
196 |         add_sizes(cls);
197 |         add_description(cls);
198 |         add_serialization(cls);
199 |         add_to_string(cls);
200 | 
201 |         add_read_access<type>(cls);
202 |         add_std_algo<type>(cls);
203 | 
204 |         if (doc && std::is_integral<KEY_T>::value)
205 |             cls.doc() = doc;
206 |         else if (doc_dp && !std::is_integral<KEY_T>::value)
207 |             cls.doc() = doc_dp;
208 | 
209 | 
210 |         cls.def_property_readonly("levels", &type::levels);
211 | 
212 |         m.attr("direct_accessible_codes_vector").attr("__setitem__")(key, cls);
213 |         m.attr("all_compressed_integer_vectors").attr("append")(cls);
214 | 
215 |         return cls;
216 |     }
217 | 
218 |     template <typename KEY_T, KEY_T key, 
219 |               typename std::enable_if<
220 |                             std::is_integral<KEY_T>::value>::type* dummy = nullptr>
221 |     inline
222 |     decltype(auto) operator()(std::integral_constant<KEY_T, key> t) {
223 |         return get_vector(t);
224 |     }
225 |     template <typename KEY_T, KEY_T key,
226 |               typename std::enable_if<
227 |                             std::is_same<const char*, KEY_T>::value>::type* dummy = nullptr>
228 |     inline
229 |     decltype(auto) operator()(std::integral_constant<KEY_T, key> t) {
230 |         return get_vector(t).def("cost", &get_vector_type_t<KEY_T, key>::cost,
231 |                                     py::arg("n"), py::arg("m"));
232 |     }
233 | 
234 | private:
235 |     py::module& m;
236 |     const char* doc;
237 |     const char* doc_dp;
238 | };
239 | 
240 | 
241 | auto add_encoded_vectors(py::module& m)
242 | {
243 |     m.attr("enc_vector") = py::dict();
244 |     m.attr("variable_length_codes_vector") = py::dict();
245 |     m.attr("direct_accessible_codes_vector") = py::dict();
246 |     m.attr("all_compressed_integer_vectors") = py::list();
247 | 
248 |     auto enc_classes = for_each_in_tuple(coders, add_enc_coders_functor(m));
249 |     auto vlc_classes = for_each_in_tuple(coders, add_vlc_coders_functor(m));
250 | 
251 |     using dac_params = std::tuple<
252 |         std::integral_constant<size_t, 4>,
253 |         std::integral_constant<size_t, 8>,
254 |         std::integral_constant<size_t, 16>,
255 |         std::integral_constant<size_t, 63>,
256 |         std::integral_constant<const char*, dp>,
257 |         std::integral_constant<const char*, dprrr>
258 |     >;
259 |     auto dac_classes = for_each_in_tuple(dac_params(), 
260 |                             add_dac_vector_functor(m, doc_dac_vector, doc_dac_vector_dp));
261 | 
262 |     m.attr("DACVector") = m.attr("DirectAccessibleCodesVector4");
263 |     m.attr("DirectAccessibleCodesVector") = m.attr(
264 |         "DirectAccessibleCodesVector4");
265 | 
266 |     return std::tuple_cat(enc_classes, vlc_classes, dac_classes);
267 | }
268 | 


--------------------------------------------------------------------------------
/pysdsl/types/intvector.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <string>
  4 | #include <tuple>
  5 | 
  6 | #include <pybind11/pybind11.h>
  7 | 
  8 | #include <sdsl/util.hpp>
  9 | #include <sdsl/vectors.hpp>
 10 | 
 11 | #include "calc.hpp"
 12 | #include "io.hpp"
 13 | #include "operations/iteration.hpp"
 14 | #include "operations/sizes.hpp"
 15 | #include "docstrings.hpp"
 16 | 
 17 | 
 18 | constexpr char sym_for_width(unsigned int width) {
 19 |     switch (width) {
 20 |         case 8:
 21 |             return 'B';
 22 |         case 16:
 23 |             return 'H';
 24 |         case 32:
 25 |             return 'I';
 26 |         case 64:
 27 |             return 'Q';
 28 |         default: __builtin_unreachable();
 29 |     }
 30 | }
 31 | 
 32 | 
 33 | // checks whether width is a power of 2 (width & (width - 1) == 0)
 34 | //                              and this power is between 8 and 64
 35 | // without dummy redefinition error
 36 | template <class T,
 37 |           unsigned int width = static_cast<unsigned int>(T::fixed_int_width),
 38 |           typename std::enable_if<!(width & (width - 1)) &&
 39 |                                   (width & (128u - 8u))>::type* dummy = nullptr>
 40 | inline auto add_int_init(py::module& m, const char* name)
 41 | {
 42 |     return py::class_<T>(m, name, py::buffer_protocol())
 43 |         .def_buffer([] (T& self) {
 44 |             return py::buffer_info(
 45 |                 reinterpret_cast<void*>(self.data()),
 46 |                 width / 8,
 47 |                 std::string(1, sym_for_width(width)),
 48 |                 1,
 49 |                 { detail::size(self) },
 50 |                 { width / 8 }
 51 |             ); });
 52 | }
 53 | 
 54 | template <class T,
 55 |           unsigned int width = static_cast<unsigned int>(T::fixed_int_width),
 56 |           typename std::enable_if<(width & (width - 1)) ||
 57 |                                   !(width & (128u - 8u))>::type* dummy = nullptr>
 58 | inline auto add_int_init(py::module& m, const char* name)
 59 | {
 60 |     return py::class_<T>(m, name);
 61 | }
 62 |         
 63 | 
 64 | template <class T, typename S = typename T::value_type, typename KEY_T>
 65 | inline auto add_int_class(py::module& m, py::dict& dict, KEY_T key,
 66 |                           const char *name, const char *doc = nullptr)
 67 | {
 68 |     auto cls = add_int_init<T>(m, name)
 69 |         .def_property_readonly("width", static_cast<uint8_t(T::*)(void) const> (&T::width))
 70 |         .def_property_readonly("data",
 71 |                                static_cast<const uint64_t *(T::*)(void)const> (&T::data))
 72 | 
 73 |         .def_property_readonly("bit_size", &T::bit_size,
 74 |                                "The number of bits in the int_vector.")
 75 | 
 76 |         .def("resize", &T::resize,
 77 |              "Resize the int_vector in terms of elements.")
 78 |         .def("bit_resize", &T::bit_resize,
 79 |              "Resize the int_vector in terms of bits.")
 80 |         .def_property_readonly("capacity", &T::capacity, doc_capacity)
 81 | 
 82 |         .def(
 83 |             "__setitem__",
 84 |             [](T &self, size_t position, S value) {
 85 |                 if (position >= self.size()) {
 86 |                     throw std::out_of_range(std::to_string(position)); }
 87 |                 self[position] = value; })
 88 | 
 89 |         .def("set_to_id",
 90 |              [](T &self) { sdsl::util::set_to_id(self); },
 91 |              py::call_guard<py::gil_scoped_release>(),
 92 |              "Sets each entry of the vector at position `i` to value `i`")
 93 |         .def("set_to_value",
 94 |              [](T &self, S value) { sdsl::util::set_to_value(self, value); },
 95 |              py::arg("k"),
 96 |              doc_set_to_value,
 97 |              py::call_guard<py::gil_scoped_release>())
 98 |         .def("set_zero_bits",
 99 |              [](T &self) { sdsl::util::_set_zero_bits(self); },
100 |              "Sets all bits of the int_vector to 0-bits.",
101 |              py::call_guard<py::gil_scoped_release>())
102 |         .def("set_one_bits",
103 |              [](T &self) { sdsl::util::_set_one_bits(self); },
104 |              "Sets all bits of the int_vector to 1-bits.",
105 |              py::call_guard<py::gil_scoped_release>())
106 |         .def(
107 |             "set_random_bits",
108 |             [](T &self, int seed) {
109 |                 sdsl::util::set_random_bits(self, seed); },
110 |             py::arg_v(
111 |                 "seed",
112 |                 0,
113 |                 "If seed = 0, the time is used to initialize the pseudo "
114 |                 "random number generator, otherwise the seed parameter is used."
115 |             ),
116 |             "Sets all bits of the int_vector to pseudo-random bits.",
117 |             py::call_guard<py::gil_scoped_release>())
118 |         .def_static(
119 |             "rnd_positions",
120 |             [](uint8_t log_s, uint64_t mod, uint64_t seed) {
121 |                 uint64_t mask;
122 | 
123 |                 auto res = sdsl::util::rnd_positions<T>(log_s, mask, mod, seed);
124 | 
125 |                 return std::make_tuple(res, mask); },
126 |             py::arg("log_s"), py::arg("mod") = 0, py::arg("seed") = 0,
127 |             "Create `2**{log_s}` random integers mod `mod` with seed `seed`",
128 |             py::call_guard<py::gil_scoped_release>())
129 |         .def(
130 |             "__imod__",
131 |             [](T &self, uint64_t m) {
132 |                 sdsl::util::mod(self, m);
133 |                 return self; },
134 |             py::is_operator())
135 | 
136 |         .def("cnt_one_bits",
137 |             [](const T &self) { return sdsl::util::cnt_one_bits(self); },
138 |             "Number of set bits in vector",
139 |             py::call_guard<py::gil_scoped_release>())
140 |         .def("cnt_onezero_bits",
141 |              [](const T &self) { return sdsl::util::cnt_onezero_bits(self); },
142 |              "Number of occurrences of bit pattern `10` in vector",
143 |              py::call_guard<py::gil_scoped_release>())
144 |         .def("cnt_zeroone_bits",
145 |              [](const T &self) { return sdsl::util::cnt_zeroone_bits(self); },
146 |              "Number of occurrences of bit pattern `01` in vector",
147 |              py::call_guard<py::gil_scoped_release>())
148 | 
149 |         .def(
150 |             "next_bit",
151 |             [](const T &self, size_t idx) {
152 |                 if (idx >= self.bit_size()) {
153 |                     throw std::out_of_range(std::to_string(idx)); }
154 |                 return sdsl::util::next_bit(self, idx); },
155 |             py::arg("idx"),
156 |             "Get the smallest position `i` >= `idx` where a bit is set",
157 |             py::call_guard<py::gil_scoped_release>())
158 |         .def(
159 |             "prev_bit",
160 |             [](const T &self, size_t idx) {
161 |                 if (idx >= self.bit_size()) {
162 |                     throw std::out_of_range(std::to_string(idx)); }
163 |                 return sdsl::util::prev_bit(self, idx); },
164 |             py::arg("idx"),
165 |             "Get the largest position `i` <= `idx` where a bit is set",
166 |             py::call_guard<py::gil_scoped_release>());
167 | 
168 |     add_sizes(cls);
169 |     add_description(cls);
170 |     add_serialization(cls);
171 |     add_to_string<T, S>(cls);
172 | 
173 |     add_read_access<T, S>(cls);
174 |     add_std_algo<T, S>(cls);
175 | 
176 |     if (doc) cls.doc() = doc;
177 | 
178 |     dict.attr("__setitem__")(key, cls);
179 | 
180 |     return cls;
181 | }
182 | 
183 | 
184 | struct add_int_vector_functor {
185 |     py::module& m;
186 |     py::dict& int_vectors_dict;
187 | 
188 |     constexpr add_int_vector_functor(py::module& m, py::dict& int_vectors_dict) noexcept
189 |         : m(m), int_vectors_dict(int_vectors_dict) {}
190 | 
191 |     template <size_t N>
192 |     auto operator()(std::integral_constant<size_t, N> t) {
193 |         using return_type = sdsl::int_vector<N>;
194 |         std::string name = "Int" + std::to_string(N) + "Vector";
195 |         return add_int_class<return_type, typename return_type::value_type>(
196 |                     m, int_vectors_dict, N, name.c_str())
197 |                 .def(py::init(
198 |                     [](size_t size, typename return_type::value_type default_value) {
199 |                         return return_type(size, default_value, N); }),
200 |                     py::arg("size") = 0, py::arg("default_value") = 0);
201 |     }
202 | 
203 |     auto operator()(std::integral_constant<size_t, 0> t) {
204 |         return add_int_class<sdsl::int_vector<0>>(
205 |                     m, int_vectors_dict, "dynamic", "IntVector", doc_int_vector)
206 |                 .def(
207 |                     py::init([](size_t size,
208 |                                 uint64_t default_value,
209 |                                 uint8_t bit_width) {
210 |                         return sdsl::int_vector<0>(size, default_value, bit_width);
211 |                     }),
212 |                     py::arg("size") = 0,
213 |                     py::arg("default_value") = 0,
214 |                     py::arg("bit_width") = 64,
215 |                     py::call_guard<py::gil_scoped_release>())
216 |                 .def(
217 |                     "expand_width",
218 |                     [](sdsl::int_vector<0> &self, size_t width) {
219 |                         sdsl::util::expand_width(self, width); },
220 |                     "Expands the integer width to new_width >= v.width().",
221 |                     py::call_guard<py::gil_scoped_release>())
222 |                 .def("bit_compress",
223 |                     [](sdsl::int_vector<0> &self) {
224 |                         sdsl::util::bit_compress(self); },
225 |                     doc_bit_compress,
226 |                     py::call_guard<py::gil_scoped_release>());
227 |     }
228 | 
229 |     auto operator()(std::integral_constant<size_t, 1> t) {
230 |         return add_int_class<sdsl::int_vector<1>, bool>(
231 |                 m, int_vectors_dict, 1ul , "BitVector")
232 |             .def(py::init(
233 |                 [](size_t size, bool default_value) {
234 |                     return sdsl::int_vector<1>(size, default_value, 1); }),
235 |                 py::arg("size") = 0, py::arg("default_value") = false)
236 |             .def("flip", &sdsl::int_vector<1>::flip,
237 |                  "Flip all bits of bit_vector",
238 |                  py::call_guard<py::gil_scoped_release>());
239 |     }
240 | };
241 | 
242 | 
243 | template <typename N_integral_constant>
244 | using general_int_vector = py::class_<typename sdsl::int_vector<N_integral_constant::value>>;
245 | 
246 | 
247 | inline auto add_int_vectors(py::module& m)
248 | {
249 |     py::dict int_vectors_dict;
250 | 
251 |     m.attr("int_vector") = int_vectors_dict;
252 | 
253 |     using params = std::tuple<
254 |         std::integral_constant<size_t, 0>,
255 |         std::integral_constant<size_t, 1>,
256 |         std::integral_constant<size_t, 4>,
257 |         std::integral_constant<size_t, 8>,
258 |         std::integral_constant<size_t, 16>,
259 |         std::integral_constant<size_t, 24>,
260 |         std::integral_constant<size_t, 32>,
261 |         std::integral_constant<size_t, 48>,
262 |         std::integral_constant<size_t, 64>>;
263 | 
264 |     using as_params = std::tuple<
265 |         std::tuple<std::integral_constant<size_t, 1>>,
266 |         std::tuple<std::integral_constant<size_t, 4>>,
267 |         std::tuple<std::integral_constant<size_t, 8>>,
268 |         std::tuple<std::integral_constant<size_t, 64>>>;
269 | 
270 |     auto iv = for_each_in_tuple(params(), add_int_vector_functor(m, int_vectors_dict));
271 |     auto iv_as_params = forward_each_in_tuple(as_params(),
272 |         make_general_subset_functor<general_int_vector>(iv));
273 | 
274 |     return std::forward_as_tuple(iv, iv_as_params);
275 | }
276 | 


--------------------------------------------------------------------------------
/pysdsl/types/pysequence.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <utility>
 4 | 
 5 | #include <pybind11/pybind11.h>
 6 | 
 7 | namespace py = pybind11;
 8 | 
 9 | 
10 | namespace detail
11 | {
12 |     template <class T, class Base>
13 |     class sequence_iterator_wrapper
14 |     {
15 |     public:
16 |         using difference_type = std::size_t;
17 |         using value_type = T;
18 |         using pointer = T*;
19 |         using reference = T&;
20 |         using iterator_category = std::random_access_iterator_tag;
21 | 
22 |         sequence_iterator_wrapper(Base it): m_it(it) {}
23 | 
24 |         bool operator!=(const sequence_iterator_wrapper& other) const {
25 |             return m_it != other.m_it; }
26 | 
27 |         bool operator==(const sequence_iterator_wrapper& other) const {
28 |             return m_it == other.m_it; }
29 | 
30 |         value_type operator*() { return py::cast<T>(*m_it); }
31 | 
32 |         decltype(auto) operator++()
33 |         {
34 |             ++m_it;
35 |             return *this;
36 |         }
37 | 
38 |         decltype(auto) operator++(int) {
39 |             return *sequence_iterator_wrapper<T, Base>(m_it++); }
40 | 
41 |         decltype(auto) operator-(difference_type step) const {
42 |             return sequence_iterator_wrapper<T, Base>(m_it - step); }
43 | 
44 |         difference_type operator-(sequence_iterator_wrapper other) const {
45 |             return m_it - other.m_it; }
46 | 
47 |         decltype(auto) operator=(sequence_iterator_wrapper other)
48 |         {
49 |             if (this != &other) {
50 |                 m_it = other.m_it; }
51 |             return *this;
52 |         }
53 | 
54 |     private:
55 |         Base m_it;
56 |     };
57 | }  // namespace detail
58 | 
59 | 
60 | template <class T>
61 | class sequence_wrapper
62 | {
63 | private:
64 |     //py::detail::sequence_iterator;
65 |     using raw_iterator = decltype(std::declval<py::sequence>().begin());
66 | 
67 | public:
68 |     using const_iterator = detail::sequence_iterator_wrapper<T, raw_iterator>;
69 |     using value_type = T;
70 |     using size_type = std::size_t;
71 | 
72 |     sequence_wrapper(const py::sequence& seq): m_seq(seq) {};
73 | 
74 |     bool empty() const { return m_seq.size() == 0; }
75 | 
76 |     const_iterator begin() const { return std::cbegin(m_seq); }
77 |     const_iterator end() const { return std::cend(m_seq); }
78 | 
79 |     size_t size() const { return m_seq.size(); }
80 | 
81 |     decltype(auto)
82 |     operator[] (const size_t i) const { return py::cast<T>(m_seq[i]); }
83 | 
84 | private:
85 |     const py::sequence& m_seq;
86 | };
87 | 


--------------------------------------------------------------------------------
/pysdsl/types/sorted_int_stack.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <string>
 4 | #include <tuple>
 5 | 
 6 | #include <pybind11/pybind11.h>
 7 | 
 8 | #include <sdsl/util.hpp>
 9 | #include <sdsl/sorted_int_stack.hpp>
10 | 
11 | #include "calc.hpp"
12 | #include "io.hpp"
13 | #include "operations/iteration.hpp"
14 | #include "operations/sizes.hpp"
15 | #include "docstrings.hpp"
16 | 
17 | 
18 | namespace py = pybind11;
19 | 
20 | 
21 | inline auto add_sorted_int_stack(py::module& m)
22 | {
23 |     using Stack = sdsl::sorted_int_stack;
24 | 
25 |     auto cls = py::class_<Stack>(m, "SortedIntStack")
26 |         .def("empty", &Stack::empty, "Checks whether the stack is empty.")
27 |         .def("top", [](const Stack& self) {
28 |             if (self.size() > 0u)
29 |                 return self.top();
30 |             throw py::index_error("top from empty stack");
31 |         }, "Returns the topmost element of the stack.")
32 |         .def("pop", [](Stack& self) {
33 |             if (self.size() == 0u)
34 |                 throw py::index_error("pop from empty stack");
35 |             auto ans = self.top(); 
36 |             self.pop(); 
37 |             return ans;
38 |         }, "Removes the topmost element from the stack and returns its copy. Not thread safe.")
39 |         .def("push", [](Stack& self, const Stack::size_type& x) {
40 |             if (self.empty() || self.top() < x)
41 |                 self.push(x);
42 |             else
43 |                 throw py::value_error("elements have to be pushed in strictly increasing order");
44 |         }, "Adds new element to the top of the stack."
45 |            "(n.b. it has to be not less than the stored ones). Not thread safe.")
46 |         .def(py::init([](Stack::size_type x) {
47 |             return Stack(x);
48 |         }), py::arg("max_value"),
49 |         "Creates a stack which can store integers not greater than max_value.");
50 | 
51 | 
52 |     cls.doc() = doc_sorted_int_stack;
53 | 
54 |     add_sizes(cls);
55 |     add_serialization(cls, 0);
56 |     add_description(cls);
57 | 
58 |     return std::make_tuple(cls);
59 | }
60 | 


--------------------------------------------------------------------------------
/pysdsl/types/suffixarray.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <string>
  4 | #include <tuple>
  5 | #include <utility>
  6 | 
  7 | #include <pybind11/pybind11.h>
  8 | 
  9 | #include <sdsl/suffix_arrays.hpp>
 10 | 
 11 | #include "operations/sizes.hpp"
 12 | #include "operations/iteration.hpp"
 13 | #include "docstrings.hpp"
 14 | #include "io.hpp"
 15 | #include "calc.hpp"
 16 | 
 17 | namespace py = pybind11;
 18 | 
 19 | 
 20 | template <class T>
 21 | inline auto add_csa_member_type(py::module& m,
 22 |                                 std::string&& name,
 23 |                                 const std::string& parent_name)
 24 | {
 25 |     try {
 26 |         auto cls = py::class_<T>(
 27 |             m, ("_" + name + "ofSuffixArray" + parent_name).c_str());
 28 |         add_read_access(cls);
 29 |         add_to_string(cls);
 30 |     } catch (std::runtime_error& /* ignore */) {}
 31 | }
 32 | 
 33 | 
 34 | template <class T>
 35 | inline
 36 | auto add_csa_class(py::module& m, std::string&& name, const char* doc = nullptr)
 37 | {
 38 |     auto cls = py::class_<T>(m, ("SuffixArray" + name).c_str());
 39 | 
 40 |     add_csa_member_type<typename T::isa_type>(m, "ISA", name);
 41 |     add_csa_member_type<typename T::bwt_type>(m, "BWT", name);
 42 |     add_csa_member_type<typename T::lf_type>(m, "LF", name);
 43 |     add_csa_member_type<typename T::psi_type>(m, "PSI", name);
 44 |     add_csa_member_type<typename T::text_type>(m, "Text", name);
 45 |     add_csa_member_type<typename T::first_row_type>(m, "FirstRow", name);
 46 | 
 47 |     try {
 48 |         using char2comp_type = typename T::alphabet_type::char2comp_type;
 49 |         auto cls_char2comp = py::class_<char2comp_type>(
 50 |             m, ("_Char2CompOf" + name).c_str());
 51 |         cls_char2comp.def(
 52 |             "__getitem__",
 53 |             [] (const char2comp_type& self, uint64_t c) { return self[c]; }
 54 |         );
 55 |     } catch (std::runtime_error& /* ignore */) {}
 56 |     try {
 57 |         using comp2char_type = typename T::alphabet_type::comp2char_type;
 58 |         auto cls_comp2char = py::class_<comp2char_type>(
 59 |             m, ("_Comp2CharOf" + name).c_str());
 60 |         cls_comp2char.def(
 61 |             "__getitem__",
 62 |             [] (const comp2char_type& self, uint64_t c) { return self[c]; }
 63 |         );
 64 |     } catch (std::runtime_error& /* ignore */) {}
 65 | 
 66 |     cls.def_property_readonly("isa", [] (const T& self ) { return &self.isa; });
 67 |     cls.def_property_readonly("bwt", [] (const T& self ) { return &self.bwt; });
 68 |     cls.def_property_readonly("lf", [] (const T& self ) { return &self.lf; });
 69 |     cls.def_property_readonly("psi", [] (const T& self ) { return &self.psi; });
 70 |     cls.def_property_readonly("text", [] (const T& self ) {
 71 |         return &self.text; });
 72 |     cls.def_property_readonly("L", [] (const T& self ) { return &self.L; });
 73 |     cls.def_property_readonly("F", [] (const T& self ) { return &self.F; });
 74 |     cls.def_property_readonly("C", [] (const T& self ) { return &self.C; });
 75 |     cls.def_property_readonly("char2comp", [] (const T& self ) {
 76 |         return &self.char2comp; });
 77 |     cls.def_property_readonly("comp2char", [] (const T& self ) {
 78 |         return &self.comp2char; });
 79 |     cls.def_property_readonly("sigma", [] (const T& self ) {
 80 |         return self.sigma; });
 81 | 
 82 |     cls.def(
 83 |         "extract",
 84 |         [] (const T& self, typename T::size_type begin,
 85 |             typename T::size_type end) {
 86 |             if (end >= detail::size(self)) {
 87 |                 throw std::out_of_range(std::to_string(end)); }
 88 |             if (begin >= end) {
 89 |                 throw std::invalid_argument("begin should be less than end"); }
 90 |             return sdsl::extract(self, begin, end); },
 91 |         py::arg("begin"),
 92 |         py::arg("end"),
 93 |         "Reconstructs the subarray T[begin:end] of the original array T\n"
 94 |         "\n\tbegin: Position of the first character which should be extracted "
 95 |         "(inclusive)"
 96 |         "\n\tend: Position of the last character which should be extracted "
 97 |         "(inclusive)\n\n"
 98 |         "Time complexity: Order{(end - begin+1) * t_{Psi} + t_{ISA} }",
 99 |         py::call_guard<py::gil_scoped_release>()
100 |     );
101 |     cls.def(
102 |         "count",
103 |         [] (const T& self, const typename T::string_type& pattern) {
104 |             return sdsl::count(self, pattern); },
105 |         py::arg("pattern"),
106 |         "Counts the number of occurrences of a pattern in a CSA",
107 |         py::call_guard<py::gil_scoped_release>());
108 |     cls.def(
109 |         "locate",
110 |         [] (const T& self, const typename T::string_type& pattern) {
111 |             return sdsl::locate(self, pattern); },
112 |         py::arg("pattern"),
113 |         "Calculates all occurrences of a pattern in a CSA\n"
114 |         "Time complexity:"
115 |         "Order{ t_{backward_search} + z * t_{SA} },\n"
116 |         "where `z` is the number of occurrences of pattern in the CSA",
117 |         py::call_guard<py::gil_scoped_release>());
118 |     cls.def(py::init(
119 |         [] (const typename T::string_type& data)
120 |         {
121 |             T self;
122 |             sdsl::construct_im(self, data,
123 |                                sizeof(typename T::string_type::value_type));
124 |             return self;
125 |         }
126 |     ));
127 | 
128 |     add_sizes(cls);
129 |     add_description(cls);
130 |     add_serialization(cls);
131 |     add_to_string(cls);
132 | 
133 |     add_read_access<T>(cls);
134 |     add_std_algo<T>(cls);
135 | 
136 |     if (doc) cls.doc() = doc;
137 | 
138 |     m.attr("suffix_array").attr("__setitem__")(name, cls);
139 | 
140 |     return cls;
141 | }
142 | 
143 | 
144 | inline auto add_csa(py::module& m)
145 | {
146 |     m.attr("suffix_array") = py::dict();
147 | 
148 |     auto csa_classes = std::make_tuple(
149 |         add_csa_class<sdsl::csa_bitcompressed<>>(m, "Bitcompressed", doc_csa),
150 |         add_csa_class<sdsl::csa_sada<>>(m, "Sadakane", doc_sada),
151 |         add_csa_class<sdsl::csa_sada_int<>>(m, "SadakaneInt", doc_sada),
152 |         add_csa_class<sdsl::csa_wt<>>(m, "WaveletTree", doc_csa_wt),
153 |         add_csa_class<sdsl::csa_wt_int<>>(m, "WaveletTreeInt", doc_csa_wt));
154 | 
155 |     return csa_classes;
156 | }
157 | 


--------------------------------------------------------------------------------
/pysdsl/types/wavelet.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <stdexcept>
  4 | #include <string>
  5 | #include <tuple>
  6 | #include <utility>
  7 | #include <vector>
  8 | 
  9 | #include <sdsl/vectors.hpp>
 10 | #include <sdsl/wavelet_trees.hpp>
 11 | 
 12 | #include <pybind11/pybind11.h>
 13 | #include <pybind11/stl.h>
 14 | 
 15 | #include "calc.hpp"
 16 | #include "docstrings.hpp"
 17 | #include "io.hpp"
 18 | 
 19 | 
 20 | namespace py = pybind11;
 21 | 
 22 | 
 23 | 
 24 | template <class T, bool /* enable */ = T::lex_ordered>
 25 | class add_lex_functor;
 26 | 
 27 | template <class T, bool /* enable */ = sdsl::has_node_type<T>::value>
 28 | class add_traversable_functor;
 29 | 
 30 | 
 31 | template <class T>
 32 | class add_lex_functor<T, false>
 33 | {
 34 | public:
 35 |     py::class_<T>& operator() (py::class_<T>& cls) { return cls; }
 36 | };
 37 | 
 38 | 
 39 | template <class T>
 40 | class add_lex_functor<T, true>
 41 | {
 42 | public:
 43 |     py::class_<T>& operator() (py::class_<T>& cls)
 44 |     {
 45 |         typedef typename T::size_type size_type;
 46 |         typedef typename T::value_type value_type;
 47 | 
 48 |         cls.def(
 49 |             "quantile_freq",
 50 |             [] (const T& self, typename T::size_type lb,
 51 |                 typename T::size_type rb, typename T::size_type q) {
 52 |                 return sdsl::quantile_freq(self, lb, rb, q); },
 53 |             py::arg("lb"), py::arg("rb"), py::arg("q"),
 54 |             "Returns the q-th smallest element and its frequency in wt[lb..rb]."
 55 |             "\n\tlb: Left array bound in T"
 56 |             "\n\trb: Right array bound in T"
 57 |             "\n\tq: q-th largest element ('quantile'), 0-based indexed.",
 58 |             py::call_guard<py::gil_scoped_release>());
 59 |         cls.def(
 60 |             "lex_count",
 61 |             [] (const T& self, size_t i, size_t j, typename T::value_type c) {
 62 |                 if (j >= self.size()) {
 63 |                     throw std::invalid_argument("j should be less than size"); }
 64 |                 if (i >= j) {
 65 |                     throw std::invalid_argument("i should be less than j"); }
 66 |                 return self.lex_count(i, j, c); },
 67 |             py::arg("i"), py::arg("j"), py::arg("c"),
 68 |             "How many values are lexicographic smaller/greater than c in "
 69 |             "[i..j-1].\n\ti: Start index (inclusive) of the interval."
 70 |             "\n\tj: End index (exclusive) of the interval."
 71 |             "\n\tc: Value c.\nreturn A triple containing:\n\trank(i, c)"
 72 |             "\n\tnumber of values smaller than c in [i..j-1]"
 73 |             "\n\tnumber of values greater than c in [i..j-1]",
 74 |             py::call_guard<py::gil_scoped_release>());
 75 |         cls.def(
 76 |             "lex_smaller_count",
 77 |             [] (const T& self, size_t i, typename T::value_type c) {
 78 |                 if (i >= self.size()) {
 79 |                     throw std::invalid_argument("i should be less than size"); }
 80 |                 return self.lex_smaller_count(i, c); },
 81 |             py::arg("i"), py::arg("c"),
 82 |             "How many values are lexicographic smaller than c in [0..i-1]."
 83 |             "\n\ti: Exclusive right bound of the range."
 84 |             "\nreturn: A tuple containing:\n\trank(i, c)\n\tnumber of values "
 85 |             "smaller than c in [0..i-1]",
 86 |             py::call_guard<py::gil_scoped_release>());
 87 |         cls.def(
 88 |             "symbol_lte",
 89 |             [] (const T& self, typename T::value_type c) {
 90 |                 auto result = sdsl::symbol_lte(self, c);
 91 |                 if (!std::get<0>(result)) {
 92 |                     throw std::runtime_error("Symbol not found"); }
 93 |                 return std::get<1>(result); },
 94 |             py::arg("c"),
 95 |             "Returns for a symbol c the previous smaller or equal symbol in "
 96 |             "the WT");
 97 |         cls.def(
 98 |             "symbol_gte",
 99 |             [] (const T& self, typename T::value_type c) {
100 |                 auto result = sdsl::symbol_gte(self, c);
101 |                 if (!std::get<0>(result)) {
102 |                     throw std::runtime_error("Symbol not found"); }
103 |                 return std::get<1>(result); },
104 |             py::arg("c"),
105 |             "Returns for a symbol c the next larger or equal symbol in the WT");
106 |         cls.def(
107 |             "restricted_unique_range_values",
108 |             [] (const T& self, size_type x_i, size_type x_j, value_type y_i,
109 |                 value_type y_j
110 |             ) { return sdsl::restricted_unique_range_values(self, x_i, x_j,
111 |                                                             y_i, y_j); },
112 |             py::arg("x_i"), py::arg("x_j"), py::arg("y_i"), py::arg("y_j"),
113 |             "For an x range [x_i, x_j] and a value range [y_i, y_j] "
114 |             "return all unique y values occuring in [x_i, x_j] "
115 |             "in ascending order.",
116 |             py::call_guard<py::gil_scoped_release>());
117 |         return cls;
118 |     }
119 | };
120 | 
121 | 
122 | template <class T>
123 | class add_traversable_functor<T, false>
124 | {
125 | public:
126 |     py::class_<T>& operator() (py::module&, py::class_<T>& cls, std::string&&) {
127 |         return cls; }
128 | };
129 | 
130 | 
131 | template <class T>
132 | class add_traversable_functor<T, true>
133 | {
134 | public:
135 |     py::class_<T>& operator() (py::module& m, py::class_<T>& cls,
136 |                                std::string&& name)
137 |     {
138 |         typedef typename T::node_type t_node;
139 | 
140 |         try
141 |         {
142 |             py::class_<t_node> node_cls(m, name.c_str())
143 |     //            .def_property_readonly("sym", &t_node::sym )
144 |             ;
145 |         }
146 |         catch(std::runtime_error& /* ignore */) {}
147 | 
148 |         cls.def("root_node", &T::root);
149 |         cls.def("node_is_leaf", &T::is_leaf);
150 |         cls.def(
151 |             "node_empty",
152 |             [] (const T& self, const t_node& node)
153 |             { return self.empty(node); });
154 |         cls.def(
155 |             "node_size",
156 |             [] (const T& self, const t_node& node)
157 |             { return self.size(node); });
158 |         cls.def("node_sym", &T::sym);
159 |         cls.def(
160 |             "node_expand",
161 |             [] (const T& self, const t_node& node)
162 |             { return self.expand(node); });
163 |         cls.def(
164 |             "node_expand_ranges",
165 |             [] (const T& self, const t_node& node,
166 |                 const sdsl::range_vec_type& ranges)
167 |             {
168 |                 return self.expand(node, ranges);
169 |             },
170 |             py::arg("node"), py::arg("ranges"));
171 |         cls.def(
172 |             "node_bit_vec",
173 |             [] (const T& self, const t_node& node) {
174 |                 auto bit_vec = self.bit_vec(node);
175 |                 return std::make_pair(
176 |                     bit_vec.size(),
177 |                     py::make_iterator(
178 |                         detail::cbegin(bit_vec),
179 |                         detail::cend(bit_vec))); });
180 |         cls.def(
181 |             "node_seq",
182 |             [] (const T& self, const t_node& node) {
183 |                 auto seq = self.seq(node);
184 |                 sdsl::int_vector<> s(seq.size());
185 |                 std::copy(seq.begin(), seq.end(), s.begin());
186 |                 return s; } );
187 | 
188 |         cls.def(
189 |             "intersect",
190 |             [] (const T& self, std::vector<sdsl::range_type> ranges, size_t t) {
191 |                 return sdsl::intersect(self, ranges, t); },
192 |             py::arg("ranges"), py::arg("t") = 0,
193 |             "Intersection of elements in "
194 |             "WT[s₀, e₀], WT[s₁, e₁], ...,WT[sₖ,eₖ]\n"
195 |             "\tranges: The ranges.\n\tt: Threshold in how many distinct ranges "
196 |             "the value has to be present. Default: t=ranges.size()\n"
197 |             "Return a vector containing (value, frequency) - of value which "
198 |             "are contained in t different ranges. Frequency = accumulated "
199 |             "frequencies in all ranges. The tuples are ordered according "
200 |             "to value, if wt is lex_ordered.");
201 |         cls.def(
202 |             "interval_symbols",
203 |             [] (const T& self, size_t i, size_t j) {
204 |                 if (j > self.size()) {
205 |                     throw std::invalid_argument("j should be less or equal "
206 |                                                 "than size"); }
207 |                 if (i > j) {
208 |                     throw std::invalid_argument("i should be less or equal "
209 |                                                 "than j"); }
210 |                 size_t k;
211 |                 std::vector<typename T::value_type> cs(self.sigma);
212 |                 std::vector<size_t> rank_c_i(self.sigma);
213 |                 std::vector<size_t> rank_c_j(self.sigma);
214 | 
215 |                 sdsl::interval_symbols(self, i, j, k, cs, rank_c_i, rank_c_j);
216 | 
217 |                 return std::make_tuple(k, cs, rank_c_i, rank_c_j); },
218 |             py::arg("i"), py::arg("j"),
219 |             "For each symbol c in wt[i..j - 1] get rank(i, c) and rank(j, c).");
220 |         return cls;
221 |     }
222 | };
223 | 
224 | 
225 | template <class T>
226 | auto add_wavelet_specific(py::class_<T>& cls) { return cls; }
227 | 
228 | 
229 | template <class... T>
230 | auto add_wavelet_specific(py::class_<sdsl::wt_int<T...>>& cls)
231 | {
232 |     typedef sdsl::wt_int<T...> base_cls;
233 |     typedef typename base_cls::size_type size_type;
234 |     typedef typename base_cls::value_type value_type;
235 | 
236 |     cls.def_property_readonly(
237 |         "tree",
238 |         [] (const base_cls& self) { return self.tree; },
239 |         "A concatenation of all bit vectors of the wavelet tree.");
240 |     cls.def(
241 |         "get_tree",
242 |         [] (const base_cls& self) { return self.tree; },
243 |         "A concatenation of all bit vectors of the wavelet tree.");
244 |     cls.def_property_readonly(
245 |         "max_level",
246 |         [] (const base_cls& self) { return self.max_level; },
247 |         "Maximal level of the wavelet tree.");
248 |     cls.def(
249 |         "get_max_level",
250 |         [] (const base_cls& self) { return self.max_level; },
251 |         "Maximal level of the wavelet tree.");
252 |     cls.def(
253 |         "range_search_2d",
254 |         [] (const base_cls& self, size_type lb, size_type rb,
255 |             value_type vlb, value_type vrb, bool report=true)
256 |         {
257 |             return self.range_search_2d(lb, rb, vlb, vrb, report);
258 |         },
259 |         py::arg("lb"), py::arg("rb"), py::arg("vlb"), py::arg("vrb"),
260 |         py::arg("report"),
261 |         "searches points in the index interval [lb..rb] and "
262 |         "value interval [vlb..vrb].\n"
263 |         "\tlb: Left bound of index interval (inclusive)\n"
264 |         "\trb: Right bound of index interval (inclusive)\n"
265 |         "\tvlb: Left bound of value interval (inclusive)\n"
266 |         "\tvrb: Right bound of value interval (inclusive)\n"
267 |         "\treport: Should the matching points be returned?\n"
268 |         "returns pair (number of found points, vector of points), "
269 |         "the vector is empty when report = false.",
270 |         py::call_guard<py::gil_scoped_release>());
271 | 
272 |     return cls;
273 | }
274 | 
275 | 
276 | template <class T>
277 | inline auto add_wavelet_class(py::module& m, const std::string&& name,
278 |                               const char* doc= nullptr)
279 | {
280 |     auto cls = py::class_<T>(m, name.c_str())
281 |         .def_property_readonly(
282 |             "sigma",
283 |             [] (const T& self) { return self.sigma; },
284 |             "Effective alphabet size of the wavelet tree")
285 |         .def(
286 |             "get_sigma",
287 |             [] (const T& self) { return self.sigma; },
288 |             "Effective alphabet size of the wavelet tree")
289 |         .def_static(
290 |             "from_bytes",
291 |             [] (const py::bytes& bytes)
292 |             {
293 |                 T wt;
294 |                 sdsl::construct_im(wt, std::string(bytes),
295 |                                    sizeof(typename T::value_type));
296 |                 return wt;
297 |             },
298 |             py::arg("s"),
299 |             "Construct from a build sequence",
300 |             py::call_guard<py::gil_scoped_release>())
301 |         .def_static(
302 |             "from_binary_file",
303 |             [] (const std::string& file_name) {
304 |                 T wt;
305 |                 sdsl::construct(wt, file_name, sizeof(typename T::value_type));
306 |                 return wt; },
307 |             py::arg("file_name"),
308 |             py::call_guard<py::gil_scoped_release>())
309 |         .def_static(
310 |             "parse_string",
311 |             [] (const std::string& s)
312 |             {
313 |                 T wt;
314 |                 sdsl::construct_im(wt, s, 'd');
315 |                 return wt;
316 |             },
317 |             py::arg("s"),
318 |             "Construct from space-separated human-readable string")
319 |         .def(
320 |             "rank",
321 |             [] (const T& self, typename T::size_type i,
322 |                 typename T::value_type c)
323 |             {
324 |                 if (i >= self.size()) {
325 |                     throw std::out_of_range(std::to_string(i)); }
326 |                 return self.rank(i, c);
327 |             },
328 |             "Calculates how many values c are in the prefix [0..i-1] of the "
329 |             "supported vector (i in [0..size]).\nTime complexity: "
330 |             "Order(log(|Sigma|))",
331 |             py::arg("i"), py::arg("c"),
332 |             py::call_guard<py::gil_scoped_release>())
333 |         .def(
334 |             "inverse_select",
335 |             [] (const T& self, typename T::size_type i) {
336 |                 if (i >= self.size()) {
337 |                     throw std::out_of_range(std::to_string(i)); }
338 |                 return self.inverse_select(i); },
339 |             py::arg("i"),
340 |             "Calculates how many occurrences of value wt[i] are in the prefix"
341 |             "[0..i-1] of the original sequence, returns pair "
342 |             "(rank(wt[i], i), wt[i])",
343 |             py::call_guard<py::gil_scoped_release>())
344 |         .def(
345 |             "select",
346 |             [] (const T& self, typename T::size_type i,
347 |                 typename T::value_type c)
348 |             {
349 |                 if (i < 1 || i >= self.size()) {
350 |                     throw std::out_of_range(std::to_string(i)); }
351 |                 if (i > self.rank(self.size(), c)) {
352 |                     throw std::invalid_argument(
353 |                         std::to_string(i) + " is greater than rank(" +
354 |                         std::to_string(i) + ", " + std::to_string(c) + ")"); }
355 |                 return self.select(i, c); },
356 |             py::arg("i"), py::arg("c"),
357 |             "Calculates the i-th occurrence of the value c in the supported "
358 |             "vector.\nTime complexity: Order(log(|Sigma|))",
359 |             py::call_guard<py::gil_scoped_release>());
360 | 
361 |     add_wavelet_specific(cls);
362 | 
363 |     add_lex_functor<T>()(cls);
364 |     add_traversable_functor<T>()(m, cls, "_" + name + "Node");
365 | 
366 |     add_sizes(cls);
367 |     add_description(cls);
368 |     add_serialization(cls);
369 |     add_to_string(cls);
370 | 
371 |     add_read_access(cls);
372 |     add_std_algo(cls);
373 | 
374 |     if (doc) cls.doc() = doc;
375 | 
376 |     m.attr("all_wavelet_trees").attr("append")(cls);
377 | 
378 |     return cls;
379 | }
380 | 
381 | 
382 | template <class bit_vector=sdsl::bit_vector>
383 | inline auto add_wt_int(py::module& m, std::string&& base_name)
384 | {
385 |     auto cls = add_wavelet_class<sdsl::wt_int<bit_vector>>(
386 |         m, ("WaveletTreeInt" + base_name).c_str(), doc_wtint);
387 |     m.attr("wavelet_tree_int").attr("__setitem__")(base_name, cls);
388 | 
389 |     return cls;
390 | }
391 | 
392 | template <class bit_vector=sdsl::bit_vector>
393 | inline auto add_wm_int(py::module& m, std::string&& base_name)
394 | {
395 |     auto cls = add_wavelet_class<sdsl::wm_int<bit_vector>>(
396 |         m, ("WaveletMatrixInt" + base_name).c_str(), doc_wm_int);
397 |     m.attr("wavelet_matrix_int").attr("__setitem__")(base_name, cls);
398 | 
399 |     return cls;
400 | }
401 | 
402 | template <class bit_vector=sdsl::bit_vector>
403 | inline auto add_wt_huff(py::module& m, std::string&& base_name)
404 | {
405 |     auto cls = add_wavelet_class<sdsl::wt_huff<bit_vector>>(
406 |         m, ("WaveletTreeHuffman" + base_name).c_str(), doc_wt_huff);
407 |     m.attr("wavelet_tree_huffman").attr("__setitem__")(base_name, cls);
408 | 
409 |     return cls;
410 | }
411 | 
412 | template <class bit_vector=sdsl::bit_vector>
413 | inline auto add_wt_huff_int(py::module& m, std::string&& base_name)
414 | {
415 |     auto cls = add_wavelet_class<sdsl::wt_huff_int<bit_vector>>(
416 |         m, ("WaveletTreeHuffmanInt" + base_name).c_str(), doc_wt_huff);
417 |     m.attr("wavelet_tree_huffman_int").attr("__setitem__")(base_name, cls);
418 | 
419 |     return cls;
420 | }
421 | 
422 | 
423 | template <class bit_vector=sdsl::bit_vector>
424 | inline auto add_wt_hutu(py::module& m, std::string&& base_name)
425 | {
426 |     auto cls = add_wavelet_class<sdsl::wt_hutu<bit_vector>>(
427 |         m, ("WaveletTreeHuTucker" + base_name).c_str(), doc_wt_hutu);
428 |     m.attr("wavelet_tree_hu_tucker").attr("__setitem__")(base_name, cls);
429 | 
430 |     return cls;
431 | }
432 | 
433 | template <class bit_vector=sdsl::bit_vector>
434 | inline auto add_wt_hutu_int(py::module& m, std::string&& base_name)
435 | {
436 |     auto cls = add_wavelet_class<sdsl::wt_hutu_int<bit_vector>>(
437 |         m, ("WaveletTreeHuTuckerInt" + base_name).c_str(), doc_wt_hutu);
438 |     m.attr("wavelet_tree_hu_tucker_int").attr("__setitem__")(base_name, cls);
439 | 
440 |     return cls;
441 | }
442 | 
443 | 
444 | template <class bit_vector=sdsl::bit_vector>
445 | inline auto add_wt_blcd(py::module& m, std::string&& base_name)
446 | {
447 |     auto cls = add_wavelet_class<sdsl::wt_blcd<bit_vector>>(
448 |         m, ("WaveletTreeBalanced" + base_name).c_str(), doc_wt_blcd);
449 |     m.attr("wavelet_tree_balanced").attr("__setitem__")(base_name, cls);
450 | 
451 |     return cls;
452 | }
453 | 
454 | 
455 | template <class bit_vector=sdsl::bit_vector>
456 | inline auto add_wt_blcd_int(py::module& m, std::string&& base_name)
457 | {
458 |     auto cls = add_wavelet_class<sdsl::wt_blcd_int<bit_vector>>(
459 |         m, ("WaveletTreeBalancedInt" + base_name).c_str(), doc_wt_blcd);
460 |     m.attr("wavelet_tree_balanced_int").attr("__setitem__")(base_name, cls);
461 | 
462 |     return cls;
463 | }
464 | 
465 | 
466 | template <class bit_vector=sdsl::bit_vector>
467 | inline auto add_wt_int(py::module& m, const std::string& base_name)
468 | { return add_wt_int<bit_vector>(m, std::string(base_name)); }
469 | 
470 | 
471 | template <class bit_vector=sdsl::bit_vector>
472 | inline auto add_wm_int(py::module& m, const std::string& base_name)
473 | { return add_wm_int<bit_vector>(m, std::string(base_name)); }
474 | 
475 | 
476 | template <class bit_vector=sdsl::bit_vector>
477 | inline auto add_wt_huff(py::module& m, const std::string& base_name)
478 | { return add_wt_huff<bit_vector>(m, std::string(base_name)); }
479 | 
480 | template <class bit_vector=sdsl::bit_vector>
481 | inline auto add_wt_huff_int(py::module& m, const std::string& base_name)
482 | { return add_wt_huff_int<bit_vector>(m, std::string(base_name)); }
483 | 
484 | template <class bit_vector=sdsl::bit_vector>
485 | inline auto add_wt_hutu(py::module& m, const std::string& base_name)
486 | { return add_wt_hutu<bit_vector>(m, std::string(base_name)); }
487 | 
488 | template <class bit_vector=sdsl::bit_vector>
489 | inline auto add_wt_hutu_int(py::module& m, const std::string& base_name)
490 | { return add_wt_hutu_int<bit_vector>(m, std::string(base_name)); }
491 | 
492 | template <class bit_vector=sdsl::bit_vector>
493 | inline auto add_wt_blcd(py::module& m, const std::string& base_name)
494 | { return add_wt_blcd<bit_vector>(m, std::string(base_name)); }
495 | 
496 | template <class bit_vector=sdsl::bit_vector>
497 | inline auto add_wt_blcd_int(py::module& m, const std::string& base_name)
498 | { return add_wt_blcd_int<bit_vector>(m, std::string(base_name)); }
499 | 
500 | 
501 | template <class bit_vector=sdsl::bit_vector>
502 | inline auto add_wt_int(py::module& m, const py::class_<bit_vector>& base)
503 | {
504 |     auto base_name = py::cast<std::string>(base.attr("__name__"));
505 |     auto cls = add_wt_int<bit_vector>(m, base_name);
506 |     m.attr("wavelet_tree_int_by_base").attr("__setitem__")(base, cls);
507 |     return cls;
508 | }
509 | 
510 | 
511 | template <class bit_vector=sdsl::bit_vector>
512 | inline auto add_wm_int(py::module& m, const py::class_<bit_vector>& base)
513 | {
514 |     auto base_name = py::cast<std::string>(base.attr("__name__"));
515 |     auto cls = add_wm_int<bit_vector>(m, base_name);
516 |     m.attr("wavelet_matrix_int_by_base").attr("__setitem__")(base, cls);
517 |     return cls;
518 | }
519 | 
520 | template <class bit_vector=sdsl::bit_vector>
521 | inline auto add_wt_huff(py::module& m, const py::class_<bit_vector>& base)
522 | {
523 |     auto base_name = py::cast<std::string>(base.attr("__name__"));
524 |     auto cls = add_wt_huff<bit_vector>(m, base_name);
525 |     m.attr("wavelet_tree_huffman_by_base").attr("__setitem__")(base, cls);
526 |     return cls;
527 | }
528 | 
529 | template <class bit_vector=sdsl::bit_vector>
530 | inline auto add_wt_huff_int(py::module& m, const py::class_<bit_vector>& base)
531 | {
532 |     auto base_name = py::cast<std::string>(base.attr("__name__"));
533 |     auto cls = add_wt_huff_int<bit_vector>(m, base_name);
534 |     m.attr("wavelet_tree_huffman_int_by_base").attr("__setitem__")(base, cls);
535 |     return cls;
536 | }
537 | 
538 | template <class bit_vector=sdsl::bit_vector>
539 | inline auto add_wt_hutu(py::module& m, const py::class_<bit_vector>& base)
540 | {
541 |     auto base_name = py::cast<std::string>(base.attr("__name__"));
542 |     auto cls = add_wt_hutu<bit_vector>(m, base_name);
543 |     m.attr("wavelet_tree_hu_tucker_by_base").attr("__setitem__")(base, cls);
544 |     return cls;
545 | }
546 | 
547 | template <class bit_vector=sdsl::bit_vector>
548 | inline auto add_wt_hutu_int(py::module& m, const py::class_<bit_vector>& base)
549 | {
550 |     auto base_name = py::cast<std::string>(base.attr("__name__"));
551 |     auto cls = add_wt_hutu_int<bit_vector>(m, base_name);
552 |     m.attr("wavelet_tree_hu_tucker_int_by_base").attr("__setitem__")(base, cls);
553 |     return cls;
554 | }
555 | 
556 | 
557 | template <class bit_vector=sdsl::bit_vector>
558 | inline auto add_wt_blcd(py::module& m, const py::class_<bit_vector>& base)
559 | {
560 |     auto base_name = py::cast<std::string>(base.attr("__name__"));
561 |     auto cls = add_wt_blcd<bit_vector>(m, base_name);
562 |     m.attr("wavelet_tree_balanced_by_base").attr("__setitem__")(base, cls);
563 |     return cls;
564 | }
565 | 
566 | template <class bit_vector=sdsl::bit_vector>
567 | inline auto add_wt_blcd_int(py::module& m, const py::class_<bit_vector>& base)
568 | {
569 |     auto base_name = py::cast<std::string>(base.attr("__name__"));
570 |     auto cls = add_wt_blcd_int<bit_vector>(m, base_name);
571 |     m.attr("wavelet_tree_balanced_int_by_base").attr("__setitem__")(base, cls);
572 |     return cls;
573 | }
574 | 
575 | 
576 | template <class... T>
577 | inline auto add_wavelet(py::module& m,
578 |                         const std::tuple<py::class_<T>...> t)
579 | {
580 |     m.attr("all_wavelet_trees") = py::list();
581 |     m.attr("wavelet_tree_int") = py::dict();
582 |     m.attr("wavelet_tree_int_by_base") = py::dict();
583 |     m.attr("wavelet_matrix_int") = py::dict();
584 |     m.attr("wavelet_matrix_int_by_base") = py::dict();
585 |     m.attr("wavelet_tree_huffman") = py::dict();
586 |     m.attr("wavelet_tree_huffman_by_base") = py::dict();
587 |     m.attr("wavelet_tree_huffman_int") = py::dict();
588 |     m.attr("wavelet_tree_huffman_int_by_base") = py::dict();
589 |     m.attr("wavelet_tree_hu_tucker") = py::dict();
590 |     m.attr("wavelet_tree_hu_tucker_by_base") = py::dict();
591 |     m.attr("wavelet_tree_hu_tucker_int") = py::dict();
592 |     m.attr("wavelet_tree_hu_tucker_int_by_base") = py::dict();
593 |     m.attr("wavelet_tree_balanced") = py::dict();
594 |     m.attr("wavelet_tree_balanced_by_base") = py::dict();
595 |     m.attr("wavelet_tree_balanced_int") = py::dict();
596 |     m.attr("wavelet_tree_balanced_int_by_base") = py::dict();
597 | 
598 |     return std::make_tuple(
599 |         add_wt_int<>(m, ""),
600 |         add_wt_int(m, std::get<0>(t)),
601 |         add_wt_int(m, std::get<1>(t)),
602 |         add_wt_int(m, std::get<2>(t)),
603 | 
604 |         add_wm_int<>(m, ""),
605 |         add_wm_int(m, std::get<0>(t)),
606 |         add_wm_int(m, std::get<1>(t)),
607 |         add_wm_int(m, std::get<2>(t)),
608 | 
609 |         add_wt_huff<>(m, ""),
610 |         add_wt_huff(m, std::get<0>(t)),
611 |         add_wt_huff(m, std::get<1>(t)),
612 |         add_wt_huff(m, std::get<2>(t)),
613 | 
614 |         add_wt_huff_int<>(m, ""),
615 |         add_wt_huff_int(m, std::get<0>(t)),
616 |         add_wt_huff_int(m, std::get<1>(t)),
617 |         add_wt_huff_int(m, std::get<2>(t)),
618 | 
619 |         add_wt_hutu<>(m, ""),
620 |         add_wt_hutu(m, std::get<0>(t)),
621 |         add_wt_hutu(m, std::get<1>(t)),
622 |         add_wt_hutu(m, std::get<2>(t)),
623 | 
624 |         add_wt_hutu_int<>(m, ""),
625 |         add_wt_hutu_int(m, std::get<0>(t)),
626 |         add_wt_hutu_int(m, std::get<1>(t)),
627 |         add_wt_hutu_int(m, std::get<2>(t)),
628 | 
629 |         add_wt_blcd<>(m, ""),
630 |         add_wt_blcd(m, std::get<0>(t)),
631 |         add_wt_blcd(m, std::get<1>(t)),
632 |         add_wt_blcd(m, std::get<2>(t)),
633 | 
634 |         add_wt_blcd_int<>(m, ""),
635 |         add_wt_blcd_int(m, std::get<0>(t)),
636 |         add_wt_blcd_int(m, std::get<1>(t)),
637 |         add_wt_blcd_int(m, std::get<2>(t)),
638 | 
639 |         add_wavelet_class<sdsl::wt_gmr_rs<>>(m, "WaveletTreeGMRrankselect",
640 |                                              doc_wt_gmr_rs),
641 |         add_wavelet_class<sdsl::wt_gmr_rs<sdsl::enc_vector<>>>(
642 |             m, "WaveletTreeGMRrankselectEnc", doc_wt_gmr_rs),
643 |         add_wavelet_class<sdsl::wt_gmr<>>(m, "WaveletTreeGolynskiMunroRao",
644 |                                           doc_wt_gmr),
645 |         add_wavelet_class<sdsl::wt_gmr<sdsl::enc_vector<>>>(
646 |             m, "WaveletTreeGolynskiMunroRaoEnc", doc_wt_gmr),
647 | 
648 |         add_wavelet_class<sdsl::wt_ap<>>(m, "WaveletTreeAP", doc_wt_ap));
649 | }
650 | 


--------------------------------------------------------------------------------
/pysdsl/util/indexiterator.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cstddef>
  4 | #include <iterator>
  5 | #include <utility>
  6 | 
  7 | namespace {
  8 |     template<typename> struct int_ { using type = int; }; }
  9 | 
 10 | 
 11 | namespace detail
 12 | {
 13 |     template <class Container, typename T, typename TRef = T&>
 14 |     struct get_reference { using type = TRef; };
 15 | 
 16 |     template <class Container, typename T>
 17 |     struct get_reference<Container, T, typename Container::reference>
 18 |     { using type = typename Container::reference; };
 19 | 
 20 |     template <class Container,
 21 |               typename T = typename Container::value_type,
 22 |               typename S = typename Container::size_type,
 23 |               typename TRef = typename get_reference<Container, T>::type>
 24 |     class count_index_iterator
 25 |     {
 26 |     public:
 27 |         using difference_type = S;
 28 |         using size_type = S;
 29 |         using value_type = T;
 30 |         using pointer = T*;
 31 |         using reference = TRef;
 32 |         using const_reference = const T&;
 33 |         using iterator_category = std::random_access_iterator_tag;
 34 | 
 35 |         constexpr count_index_iterator() noexcept:
 36 |             m_parent(nullptr), m_index(0)
 37 |         {}
 38 | 
 39 |         constexpr count_index_iterator(const Container* container,
 40 |                                        S index) noexcept:
 41 |             m_parent(container),
 42 |             m_index(index)
 43 |         {}
 44 |         ~count_index_iterator() = default;
 45 | 
 46 |         constexpr count_index_iterator(
 47 |             const count_index_iterator& other
 48 |         ) noexcept = default;
 49 | 
 50 |         constexpr
 51 |         count_index_iterator(count_index_iterator&& other) noexcept = default;
 52 | 
 53 |         constexpr count_index_iterator&
 54 |         operator=(const count_index_iterator& other) noexcept = default;
 55 | 
 56 |         constexpr count_index_iterator&
 57 |         operator=(count_index_iterator&& other) noexcept = default;
 58 | 
 59 |         constexpr
 60 |         bool operator!=(const count_index_iterator& other) const noexcept {
 61 |             return (m_parent != other.m_parent) || (m_index != other.m_index); }
 62 | 
 63 |         constexpr
 64 |         bool operator==(const count_index_iterator& other) const noexcept {
 65 |             return (m_parent == other.m_parent) && (m_index == other.m_index); }
 66 | 
 67 |         constexpr
 68 |         bool operator>(const count_index_iterator& other) const noexcept {
 69 |             return m_index > other.m_index; }
 70 | 
 71 |         constexpr
 72 |         bool operator>=(const count_index_iterator& other) const noexcept {
 73 |             return m_index >= other.m_index; }
 74 | 
 75 |         constexpr
 76 |         bool operator<(const count_index_iterator& other) const noexcept {
 77 |             return m_index < other.m_index; }
 78 | 
 79 |         constexpr
 80 |         bool operator<=(const count_index_iterator& other) const noexcept {
 81 |             return m_index <= other.m_index; }
 82 | 
 83 |         value_type operator*() { return (*m_parent)[m_index]; }
 84 | 
 85 |         decltype(auto) operator++()
 86 |         {
 87 |             ++m_index;
 88 |             return *this;
 89 |         }
 90 | 
 91 |         const auto operator++(int) {
 92 |             return *count_index_iterator<Container, T, S>(m_parent,
 93 |                                                           m_index++); }
 94 | 
 95 |         decltype(auto) operator--()
 96 |         {
 97 |             --m_index;
 98 |             return *this;
 99 |         }
100 | 
101 |         const auto operator--(int) {
102 |             return *count_index_iterator<Container, T, S>(m_parent,
103 |                                                           m_index--); }
104 | 
105 |         constexpr
106 |         difference_type
107 |         operator-(const count_index_iterator& other) const noexcept {
108 |             return m_index - other.m_index; }
109 | 
110 |         constexpr
111 |         decltype(auto) operator-(const difference_type step) const noexcept {
112 |             return count_index_iterator<Container, T, S>(m_parent,
113 |                                                          m_index - step); }
114 | 
115 |         constexpr
116 |         decltype(auto) operator+(const difference_type step) const noexcept {
117 |             return count_index_iterator<Container, T, S>(m_parent,
118 |                                                          m_index + step); }
119 | 
120 |         friend constexpr decltype(auto)
121 |         operator+(const difference_type step,
122 |                   const count_index_iterator &self) noexcept {
123 |             return count_index_iterator<Container, T, S>(self.m_parent,
124 |                                                          self.m_index + step); }
125 | 
126 |         friend constexpr decltype(auto)
127 |         operator-(const difference_type step,
128 |                   const count_index_iterator &self) noexcept {
129 |             return count_index_iterator<Container, T, S>(self.m_parent,
130 |                                                          self.m_index - step); }
131 | 
132 |         count_index_iterator& operator+=(const difference_type i)
133 |         {
134 |             m_index += i;
135 |             return *this;
136 |         }
137 | 
138 |         count_index_iterator& operator-=(difference_type i)
139 |         {
140 |             m_index -= i;
141 |             return *this;
142 |         }
143 | 
144 |         const_reference operator[](difference_type i) const {
145 |             return (*m_parent)[m_index + i]; }
146 | 
147 |         void swap(count_index_iterator& other) noexcept
148 |         {
149 |             std::swap(m_parent, other.m_parent);
150 |             std::swap(m_index, other.m_index);
151 |         }
152 | 
153 |     private:
154 |         const Container* m_parent;
155 |         S m_index;
156 |     };
157 | 
158 | 
159 |     template <typename... P>
160 |     void swap(count_index_iterator<P...>& first,
161 |               count_index_iterator<P...>& second)
162 |     {
163 |         first.swap(second);
164 |     }
165 | }  // namespace detail
166 | 


--------------------------------------------------------------------------------
/pysdsl/util/tupletricks.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <tuple>
 4 | #include <utility>
 5 | 
 6 | 
 7 | namespace detail
 8 | {
 9 | 
10 | template<typename P, typename Function, std::size_t... Is>
11 | constexpr
12 | decltype(auto) for_each_impl(P&& t, Function&& f, std::index_sequence<Is...>) {
13 |     return std::make_tuple(f(std::get<Is>(t))...); }
14 | 
15 | template<typename P, typename Function, std::size_t... Is>
16 | constexpr
17 | decltype(auto) for_each_impl(const P& t, Function&& f, std::index_sequence<Is...>) {
18 |     return std::make_tuple(f(std::get<Is>(t))...); }
19 | 
20 | template<typename... T, typename Function>
21 | constexpr
22 | decltype(auto) for_each(const std::tuple<T...>& t, Function&& f) {
23 |     return for_each_impl(t, f, std::index_sequence_for<T...>{}); }
24 | 
25 | template<typename... T, typename Function>
26 | constexpr
27 | decltype(auto) for_each(std::tuple<T...>& t, Function&& f) {
28 |     return for_each_impl(t, f, std::index_sequence_for<T...>{}); }
29 | 
30 | template<typename P, typename Function, std::size_t... Is>
31 | constexpr
32 | decltype(auto) forward_each_impl(P&& t, Function&& f, std::index_sequence<Is...>) {
33 |     return std::forward_as_tuple(f(std::get<Is>(t))...); }
34 | 
35 | template<typename P, typename Function, std::size_t... Is>
36 | constexpr
37 | decltype(auto) forward_each_impl(const P& t, Function&& f, std::index_sequence<Is...>) {
38 |     return std::forward_as_tuple(f(std::get<Is>(t))...); }
39 | 
40 | template<typename... T, typename Function>
41 | constexpr
42 | decltype(auto) forward_each(const std::tuple<T...>& t, Function&& f) {
43 |     return forward_each_impl(t, f, std::index_sequence_for<T...>{}); }
44 | 
45 | template<typename... T, typename Function>
46 | constexpr
47 | decltype(auto) forward_each(std::tuple<T...>& t, Function&& f) {
48 |     return forward_each_impl(t, f, std::index_sequence_for<T...>{}); }
49 | 
50 | }  // namespace detail
51 | 
52 | 
53 | template <typename... Ts, typename F>
54 | constexpr
55 | decltype(auto) for_each_in_tuple(const std::tuple<Ts...> &t, F f) {
56 |     return detail::for_each(t, f); }
57 | 
58 | 
59 | template <typename... Ts, typename F>
60 | constexpr
61 | decltype(auto) for_each_in_tuple(std::tuple<Ts...> &t, F f) {
62 |     return detail::for_each(t, f); }
63 | 
64 | template <typename... Ts, typename F>
65 | constexpr
66 | decltype(auto) forward_each_in_tuple(const std::tuple<Ts...> &t, F f) {
67 |     return detail::forward_each(t, f); }
68 | 
69 | 
70 | template <typename... Ts, typename F>
71 | constexpr
72 | decltype(auto) forward_each_in_tuple(std::tuple<Ts...> &t, F f) {
73 |     return detail::forward_each(t, f); }
74 | 
75 | 
76 | // subset functor
77 | template <template <typename...> typename general_template, typename... Ts>
78 | struct GeneralSubsetFunctor {
79 |     std::tuple<Ts...>& tpl;
80 | 
81 |     constexpr GeneralSubsetFunctor(std::tuple<Ts...>& tpl) noexcept
82 |         : tpl(tpl) {}
83 | 
84 |     template <typename... Args>
85 |     auto& operator()(std::tuple<Args...>) const {
86 |         return std::get<general_template<Args...>>(tpl);
87 |     }
88 | };
89 | 
90 | template <template <typename...> typename general_template, typename... Ts>
91 | auto make_general_subset_functor(std::tuple<Ts...>& tpl) {
92 |     return GeneralSubsetFunctor<general_template, Ts...>(tpl);
93 | }
94 | 
95 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | # based on https://github.com/pybind/python_example/blob/master/setup.py
  2 | 
  3 | import sys
  4 | 
  5 | from setuptools import setup, Extension
  6 | from setuptools.command.build_ext import build_ext
  7 | import setuptools
  8 | 
  9 | __version__ = '1.0.0a'
 10 | 
 11 | 
 12 | class get_pybind_include(object):
 13 |     """Helper class to determine the pybind11 include path
 14 |     The purpose of this class is to postpone importing pybind11
 15 |     until it is actually installed, so that the ``get_include()``
 16 |     method can be invoked. """
 17 | 
 18 |     def __init__(self, user=False):
 19 |         self.user = user
 20 | 
 21 |     def __str__(self):
 22 |         import pybind11
 23 |         return pybind11.get_include(self.user)
 24 | 
 25 | 
 26 | # As of Python 3.6, CCompiler has a `has_flag` method.
 27 | # cf http://bugs.python.org/issue26689
 28 | def has_flag(compiler, flagname):
 29 |     """Return a boolean indicating whether a flag name is supported on
 30 |     the specified compiler.
 31 |     """
 32 |     import tempfile
 33 |     with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f:
 34 |         f.write('int main (int argc, char **argv) { return 0; }')
 35 |         try:
 36 |             compiler.compile([f.name], extra_postargs=[flagname])
 37 |         except setuptools.distutils.errors.CompileError:
 38 |             return False
 39 |     return True
 40 | 
 41 | 
 42 | def cpp_flag(compiler):
 43 |     """Return the -std=c++14 compiler flag"""
 44 |     if has_flag(compiler, '-std=c++14'):
 45 |         return '-std=c++14'
 46 |     else:
 47 |         raise RuntimeError('Unsupported compiler -- at least C++14 support '
 48 |                            'is needed!')
 49 | 
 50 | 
 51 | class BuildExt(build_ext):
 52 |     """A custom build extension for adding compiler-specific options."""
 53 |     c_opts = {
 54 |         'msvc': ['/EHsc'],
 55 |         'unix': ['-O3'],
 56 |     }
 57 | 
 58 |     if sys.platform == 'darwin':
 59 |         c_opts['unix'] += ['-stdlib=libc++', '-mmacosx-version-min=10.7']
 60 | 
 61 |     def build_extensions(self):
 62 |         compiler_type = self.compiler.compiler_type
 63 |         opts = self.c_opts.get(compiler_type, [])
 64 |         if compiler_type == 'unix':
 65 |             opts.append(
 66 |                 '-DVERSION_INFO="%s"' % self.distribution.get_version()
 67 |             )
 68 |             opts.append('-DNOCROSSCONSTRUCTORS=1')
 69 |             opts.append(cpp_flag(self.compiler))
 70 |             if has_flag(self.compiler, '-fvisibility=hidden'):
 71 |                 opts.append('-fvisibility=hidden')
 72 |         elif compiler_type == 'msvc':
 73 |             opts.append(
 74 |                 '/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version()
 75 |             )
 76 |         for ext in self.extensions:
 77 |             ext.extra_compile_args = opts
 78 |         build_ext.build_extensions(self)
 79 | 
 80 | 
 81 | EXT_MODULES = [
 82 |     Extension(
 83 |         'pysdsl/bits',
 84 |         ['pysdsl/bits.cpp'],
 85 |         include_dirs=[
 86 |             # Path to pybind11 headers
 87 |             get_pybind_include(),
 88 |             get_pybind_include(user=True)
 89 |         ],
 90 |         language='c++',
 91 |         libraries=['sdsl'],
 92 |     ),
 93 |     Extension(
 94 |         (
 95 |             'pysdsl/pysdsl'
 96 |             if sys.version_info.major == 2
 97 |             else 'pysdsl/__init__'
 98 |         ),
 99 |         ['pysdsl/__init__.cpp'],
100 |         include_dirs=[
101 |             # Path to pybind11 headers
102 |             get_pybind_include(),
103 |             get_pybind_include(user=True),
104 |             "pysdsl/",
105 |         ],
106 |         language='c++',
107 |         libraries=['sdsl', 'divsufsort', 'divsufsort64'],
108 |     ),
109 |     Extension(
110 |         'pysdsl/_memory_monitor',
111 |         ['pysdsl/_memory_monitor.cpp'],
112 |         include_dirs=[
113 |             # Path to pybind11 headers
114 |             get_pybind_include(),
115 |             get_pybind_include(user=True)
116 |         ],
117 |         language='c++',
118 |         libraries=['sdsl'],
119 |     ),
120 | ]
121 | 
122 | 
123 | setup(
124 |     name='pysdsl',
125 |     version=__version__,
126 |     author='Konstantin Ignatov',
127 |     author_email='kv@qrator.net',
128 |     url='https://git.qrator.net/podshumok/pysdsl',
129 |     description='Python bindings to Succinct Data Structure Library 2.0',
130 |     ext_modules=EXT_MODULES,
131 |     packages=['pysdsl'],
132 |     install_requires=['pybind11>=2.2'],
133 |     cmdclass={'build_ext': BuildExt},
134 |     zip_safe=False,
135 |     classifiers=(
136 |         "Development Status :: 3 - Alpha",
137 |         "Intended Audience :: Customer Service",
138 |         "Intended Audience :: Education",
139 |         "Intended Audience :: Science/Research",
140 |         ("License :: OSI Approved :: "
141 |          "GNU Lesser General Public License v3 or later (LGPLv3+)"),
142 |         "Operating System :: OS Independent",
143 |         "Programming Language :: C++",
144 |         "Programming Language :: Python",
145 |     )
146 | )
147 | 


--------------------------------------------------------------------------------
/tests/test_encvector.py:
--------------------------------------------------------------------------------
 1 | import pysdsl
 2 | import pytest
 3 | 
 4 | 
 5 | @pytest.mark.parametrize("Type", list(pysdsl.enc_vector.values())
 6 |                          + list(pysdsl.variable_length_codes_vector.values())
 7 |                          + [pysdsl.DirectAccessibleCodesVector4,
 8 |                             pysdsl.DirectAccessibleCodesVector8,
 9 |                             pysdsl.DirectAccessibleCodesVector16,
10 |                             pysdsl.DirectAccessibleCodesVector63])
11 | def test_encvector(Type):
12 |     v = Type([3, 2, 1, 0, 2, 1, 3, 4, 1, 1, 1, 3, 2, 3])
13 |     assert not v.is_sorted()
14 |     assert v.sum() == 27
15 |     assert v.minmax() == (0, 4)
16 |     assert v.size_in_bytes < 200
17 | 
18 | 
19 | @pytest.mark.skip(reason="Issue #16")
20 | @pytest.mark.parametrize("Type", [pysdsl.DirectAccessibleCodesVectorDP,
21 |                                   pysdsl.DirectAccessibleCodesVectorDPRRR])
22 | def test_encvectordp(Type):
23 |     v = Type([3, 2, 1, 0, 2, 1, 3, 4, 1, 1, 1, 3, 2, 3])
24 |     assert not v.is_sorted()
25 |     assert v.sum() == 27
26 |     assert v.minmax() == (0, 4)
27 |     assert v.size_in_bytes < 200
28 | 


--------------------------------------------------------------------------------
/tests/test_intvector.py:
--------------------------------------------------------------------------------
 1 | import pysdsl
 2 | import pytest
 3 | 
 4 | 
 5 | def test_intvector():
 6 |     v = pysdsl.IntVector(1024 * 1024 * 256)
 7 |     assert int(v.size_in_mega_bytes) == 2048
 8 |     v.set_to_id()
 9 |     assert v.width == 64
10 |     v.bit_compress()
11 |     assert v.width == 28
12 |     assert v.size_in_mega_bytes < 900
13 | 
14 | 
15 | @pytest.mark.parametrize("Type", [pysdsl.Int4Vector, pysdsl.Int8Vector,
16 |                                   pysdsl.Int16Vector, pysdsl.Int24Vector,
17 |                                   pysdsl.Int32Vector, pysdsl.Int64Vector])
18 | def test_intNvector(Type):
19 |     v = Type([3, 2, 1, 0, 2, 1, 3, 4, 1, 1, 1, 3, 2, 3])
20 |     assert sorted(v)[2] == 1
21 |     v.bit_resize(5)
22 |     assert v.size_in_bytes == 16
23 | 
24 | 
25 | def test_bitvector():
26 |     v_len = 10
27 |     v = pysdsl.BitVector(v_len)
28 |     for i in range(v_len):
29 |         v[i] = i % 2
30 |     v.flip()
31 |     assert v.width == 1
32 |     assert v.max() == 1
33 |     assert v.min() == 0
34 |     v = pysdsl.BitVector([1, 0, 1])
35 |     assert v.bit_size == 3
36 | 
37 | 
38 | @pytest.mark.parametrize("Type", pysdsl.all_immutable_bitvectors)
39 | def test_immutable_bitvector(Type):
40 |     v = Type(pysdsl.BitVector([0, 1, 0, 1, 0, 1, 0, 1, 0, 1]))
41 |     assert v.size == 10
42 |     assert v.get_int(0, v.size) == 682
43 |     assert v.max() == 1
44 |     assert v.min() == 0
45 | 


--------------------------------------------------------------------------------
/tests/test_sorted_int_stack.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import pytest
 3 | 
 4 | from pysdsl import SortedIntStack
 5 | 
 6 | 
 7 | @pytest.mark.parametrize("max_value", [10, 1000, 100000])
 8 | def test_push_pop_top_empty(max_value):
 9 |     s = SortedIntStack(max_value=max_value)
10 |     a = []
11 | 
12 |     assert s.empty()
13 |     assert s.size == 0
14 |     assert len(s) == 0
15 | 
16 |     # generate a sorted list of different integers less than max_value 
17 |     a = list(range(max_value))
18 |     random.shuffle(a)
19 |     a = sorted(a[:max_value//2])
20 | 
21 |     for i, elem in enumerate(a):
22 |         s.push(elem)
23 |         assert s.top() == elem
24 |         assert not s.empty()
25 |         assert len(s) == i + 1
26 |         assert s.size == i + 1
27 | 
28 |     for _ in range(max_value // 2):
29 |         assert len(a) == len(s)
30 |         assert len(a) == s.size
31 |         assert not s.empty()
32 |         assert a[-1] == s.top()
33 |         assert a[-1] == s.pop()
34 |         a.pop()
35 | 
36 |     assert s.empty()
37 |     assert s.size == 0
38 |     assert len(s) == 0
39 | 
40 | 
41 | def test_copy_assign():
42 |     s1 = SortedIntStack(max_value=1000)
43 | 
44 |     for val in (10, 100, 1000):
45 |         s1.push(val)
46 | 
47 |     s2 = SortedIntStack(s1)
48 | 
49 |     assert len(s1) == len(s2)
50 | 
51 |     while len(s1) > 0:
52 |         assert s1.pop() == s2.pop()
53 | 
54 | 
55 | def test_errors():
56 |     s = SortedIntStack(max_value=1000)
57 |     
58 |     with pytest.raises(IndexError, match="top from empty stack"):
59 |         s.top()
60 |     
61 |     with pytest.raises(IndexError, match="pop from empty stack"):
62 |         s.pop()
63 | 
64 |     with pytest.raises(ValueError, match="elements have to be pushed in strictly increasing order"):
65 |         s.push(1)
66 |         s.push(0)
67 | 


--------------------------------------------------------------------------------
/tests/test_suffixarray.py:
--------------------------------------------------------------------------------
 1 | import pysdsl
 2 | import pytest
 3 | 
 4 | 
 5 | @pytest.mark.parametrize("Type", [pysdsl.SuffixArraySadakaneInt,
 6 |                                   pysdsl.SuffixArrayWaveletTreeInt])
 7 | def test_int_suffixarray(Type):
 8 |     a = Type([3, 2, 1, 5, 2, 1, 3, 4, 1, 1, 1, 3, 2, 1])
 9 |     assert a.count([3, 2, 1]) == 2
10 |     assert a.count([2, 1]) == 3
11 |     assert a.count([1, 2]) == 0
12 |     assert a.count([1, 1]) == 2
13 |     assert a.count([1]) == 6
14 |     assert a.count([]) == 16
15 |     assert a.text[5] == 2
16 |     assert a.sigma == 7
17 | 
18 | 
19 | @pytest.mark.parametrize("Type", [pysdsl.SuffixArrayWaveletTree,
20 |                                   pysdsl.SuffixArraySadakane,
21 |                                   pysdsl.SuffixArrayBitcompressed])
22 | def test_char_suffixarray(Type):
23 |     a = Type("abracadabra")
24 |     assert a.count("abr") == 2
25 |     assert a.count("a") == 5
26 |     assert a.count("dab") == 1
27 |     assert a.count("brac") == 1
28 |     assert a.count("bra") == 2
29 |     assert a.count("") == 12
30 |     assert a.count("aba") == 0
31 |     assert chr(a.text[5]) == "a"
32 |     assert a.sigma == 6
33 | 


--------------------------------------------------------------------------------
/tests/test_wavelet.py:
--------------------------------------------------------------------------------
 1 | import pysdsl
 2 | import pytest
 3 | 
 4 | 
 5 | @pytest.mark.parametrize("Type", list(pysdsl.wavelet_tree_int.values())
 6 |                          + list(pysdsl.wavelet_matrix_int.values())
 7 |                          + list(pysdsl.wavelet_tree_huffman_int.values())
 8 |                          + list(pysdsl.wavelet_tree_balanced_int.values())
 9 |                          + list(pysdsl.wavelet_tree_hu_tucker_int.values())
10 |                          + [pysdsl.WaveletTreeGMRrankselect,
11 |                             pysdsl.WaveletTreeGMRrankselectEnc,
12 |                             pysdsl.WaveletTreeGolynskiMunroRao,
13 |                             pysdsl.WaveletTreeGolynskiMunroRaoEnc,
14 |                             pysdsl.WaveletTreeAP])
15 | def test_wavelet(Type):
16 |     a = Type([3, 2, 1, 0, 2, 1, 3, 4, 1, 1, 1, 3, 2, 3])
17 |     assert a.select(2, 3) == 6
18 | 
19 | 
20 | @pytest.mark.parametrize("Type", list(pysdsl.wavelet_tree_huffman.values())
21 |                          + list(pysdsl.wavelet_tree_hu_tucker.values())
22 |                          + list(pysdsl.wavelet_tree_balanced.values()))
23 | def test_huffman_wavelet(Type):
24 |     a = Type(pysdsl.BitVector([1, 0, 1, 0, 1, 0, 1, 0, 1, 0]))
25 |     assert a.select(1, 0) == 2
26 | 


--------------------------------------------------------------------------------