Fast.select()
.
36 |
37 | * A [paper](http://vigna.di.unimi.it/papers.php#BBPMMPH) on the theory of
38 | monotone minimal perfect hashing.
39 |
40 | * An [experimental paper](http://vigna.di.unimi.it/papers.php#BBPTPMMPH2)
41 | on monotone minimal perfect hashing.
42 |
43 | * A [paper](http://vigna.di.unimi.it/papers.php#GOVFSCF) on the current
44 | implementation of static and minimal perfect hash functions.
45 |
46 | * A [paper](http://vigna.di.unimi.it/papers.php#GeVECSF) on the current
47 | implementation of compressed static functions.
48 |
49 | * A [paper](http://vigna.di.unimi.it/papers.php#MaVCFTDRS) on the C++
50 | implementation dynamic ranking and selection using compact Fenwick trees.
51 |
52 | * A [paper](http://vigna.di.unimi.it/papers.php#EGVRS) on the C++
53 | implementation of RecSplit.
54 |
55 | * A [paper](http://vigna.di.unimi.it/papers.php#VigECS) on the Rust
56 | implementation of functions and filters based on ε-cost sharding.
57 |
--------------------------------------------------------------------------------
/bash/testmain.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 |
3 | # Tests main methods
4 |
5 | KEYS=$(mktemp)
6 | FUNCTION=$(mktemp)
7 | VALUES=$(mktemp)
8 |
9 | LANG="en_US.UTF-8" cat >$KEYS <The help page provides an introduction to the scope and syntax of JavaDoc search.
57 |You can use the <ctrl> or <cmd> keys in combination with the left and right arrow keys to switch between result tabs in this page.
58 |The URL template below may be used to configure this page as a search engine in browsers that support this feature. It has been tested to work in Google Chrome and Mozilla Firefox. Note that other browsers may not support this feature or require a different URL format.
59 | link 60 |61 | 62 |
63 |Loading search index...
65 | 69 |A bit array of viewed by implementations of this class as a string of open (=one) and closed 30 | * (=zero) parentheses, which must be nested correctly. All operations are optional, but by contract 31 | * at least one of {@link #findOpen(long)} and {@link #findClose(long)} must be 32 | * provided. 33 | */ 34 | public interface BalancedParentheses extends Serializable { 35 | 36 | /** Returns the position of the matching open parenthesis (optional operation). 37 | * 38 | *
Note that if you do not implement this method you must 39 | * implement {@link #findClose(long)}. 40 | * 41 | * @param pos a position in the bit vector containing a closed parenthesis (a zero). 42 | * @return the position of the matching open parenthesis. 43 | */ 44 | public long findOpen(long pos); 45 | 46 | /** Returns the position of the matching closed parenthesis (optional operation). 47 | * 48 | *
Note that if you do not implement this method you must 49 | * implement {@link #findOpen(long)}. 50 | * 51 | * @param pos a position in the bit vector containing an open parenthesis (a one). 52 | * @return the position of the matching open parenthesis. 53 | */ 54 | public long findClose(long pos); 55 | 56 | /** Returns the position of the open parenthesis of the pair the most 57 | * tightly encloses the given position (optional operation). 58 | * 59 | * @param pos a position in the bit vector. 60 | * @return the position of the open parenthesis of the pair the most 61 | * tightly encloses the given position. 62 | */ 63 | public long enclose(long pos); 64 | 65 | /** Returns the bit vector indexed by this structure. 66 | * 67 | *
Note that you are not supposed to modify the returned vector. 68 | * 69 | * @return the bit vector indexed by this structure. 70 | */ 71 | public BitVector bitVector(); 72 | 73 | /** Returns the overall number of bits allocated by this structure. 74 | * 75 | * @return the overall number of bits allocated by this structure (not including the bits 76 | * of the {@linkplain #bitVector() indexed vector}). 77 | */ 78 | 79 | public long numBits(); 80 | } 81 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/bits/RankSelect.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.bits; 21 | 22 | import java.io.Serializable; 23 | 24 | import it.unimi.dsi.bits.BitVector; 25 | 26 | /** A serialisation-oriented container for associated rank/select(zero) structures. 27 | * 28 | *
Since structures in Sux4J serialise all contained data, including, if necessary, the underlying bit vector, 29 | * serialising separately a rank and a select structure might result in storing the underlying bit 30 | * vector twice. This class provide a simple solution by allowing one-shot serialisation of 31 | * all structures related to a bit vector. For convenience, it provides also delegate methods, albeit 32 | * the suggested usage is deserialisation and extraction of non-{@code null} structures. 33 | * 34 | */ 35 | public class RankSelect implements Rank, Select, SelectZero, Serializable { 36 | 37 | private static final long serialVersionUID = 1L; 38 | /** A rank structure, or {@code null}. */ 39 | public final Rank rank; 40 | /** A select structure, or {@code null}. */ 41 | public final Select select; 42 | /** A zero-select structure, or {@code null}. */ 43 | public final SelectZero selectZero; 44 | 45 | /** Creates a new rank/select container using the given structures. 46 | * 47 | * @param rank a rank structure, or {@code null}. 48 | * @param select a select structure, or {@code null}. 49 | * @param selectZero a zero-select structure, or {@code null}. 50 | */ 51 | public RankSelect(final Rank rank, final Select select, final SelectZero selectZero) { 52 | this.rank = rank; 53 | this.select = select; 54 | this.selectZero = selectZero; 55 | } 56 | 57 | /** Creates a new rank/select container without zero selection using the given structures. 58 | * 59 | * @param rank a rank structure, or {@code null}. 60 | * @param select a select structure, or {@code null}. 61 | */ 62 | public RankSelect(final Rank rank, final Select select) { 63 | this(rank, select, null); 64 | } 65 | 66 | @Override 67 | public long count() { 68 | return rank.count(); 69 | } 70 | 71 | @Override 72 | public long numBits() { 73 | return (rank != null ? rank.numBits() : 0) + (select != null ? select.numBits() : 0)+ (selectZero != null ? selectZero.numBits() : 0); 74 | } 75 | 76 | @Override 77 | public long rank(final long from, final long to) { 78 | return rank.rank(from, to); 79 | } 80 | 81 | @Override 82 | public long rank(final long pos) { 83 | return rank.rank(pos); 84 | } 85 | 86 | @Override 87 | public long rankZero(final long from, final long to) { 88 | return rank.rankZero(from, to); 89 | } 90 | 91 | @Override 92 | public long rankZero(final long pos) { 93 | return rank.rankZero(pos); 94 | } 95 | 96 | @Override 97 | public long select(final long rank) { 98 | return select.select(rank); 99 | } 100 | 101 | @Override 102 | public long selectZero(final long rank) { 103 | return selectZero.selectZero(rank); 104 | } 105 | 106 | @Override 107 | public BitVector bitVector() { 108 | if (rank != null) return rank.bitVector(); 109 | if (select != null) return select.bitVector(); 110 | if (selectZero != null) return selectZero.bitVector(); 111 | throw new UnsupportedOperationException("All fields are nulls"); 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/bits/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Ranking and selection structures. 3 | * 4 | *
5 | * This package provides a number of implementations of rank/select queries for bits 6 | * vectors. Ranking is counting the number of ones in an initial segment of a bit vector. 7 | * Selection is finding the position of the r-th one. Both operation can be 8 | * performed in constant time on an array of n bits using o(n) 9 | * additional bits, but in practice linear data structures with small constants and theoretically 10 | * non-constant time work much better. Sux4J proposes a number of new, very efficient implementation 11 | * of rank and select oriented to 64-bit processors (in other words: they will be fairly slow on 12 | * 32-bit processors). The implementations are based on broadword programming and described 13 | * in Sebastiano Vigna, “Broadword 14 | * Implementation of Rank/Select Queries”, in Proc. of the 7th International Workshop 15 | * on Experimental Algorithms, WEA 2008, volume 5038 of Lecture Notes in Computer Science, pages 16 | * 154−168. Springer, 2008. 17 | * 18 | *
19 | * For dense arrays, {@link it.unimi.dsi.sux4j.bits.Rank9} is the basic rank implementation; 20 | * {@link it.unimi.dsi.sux4j.bits.Rank16} is slightly slower but occupies much less space. Selection 21 | * can be performed using {@link it.unimi.dsi.sux4j.bits.SimpleSelect} for reasonably uniform bit 22 | * arrays, or using {@link it.unimi.dsi.sux4j.bits.Select9}, which occupies more space but 23 | * guarantees practical constant-time evaluation. 24 | * 25 | *
26 | * For sparse arrays (e.g., representation of pointers in a bitstream) we provide 27 | * {@link it.unimi.dsi.sux4j.bits.SparseRank} and {@link it.unimi.dsi.sux4j.bits.SparseSelect}. 28 | * Their main feature is that they do not require the original bit array, as they use an 29 | * {@link it.unimi.dsi.sux4j.util.EliasFanoMonotoneLongBigList} to implement a succint dictionary 30 | * containing the positions of bits set. If the bit array is sufficiently sparse, such a 31 | * representation provides significant gains in space occupancy. 32 | * 33 | *
34 | * All structures can be serialized. Since in some cases the original bit vector is stored inside 35 | * the structure, to avoid saving and loading twice the same vector we suggest to pack all 36 | * structures into a {@link it.unimi.dsi.sux4j.bits.RankSelect} instance. 37 | * 38 | *
39 | * Note that all methods in this package are considered low-level and do not perform bound checks on
40 | * their arguments. Bound checks can be enabled, however, by enabling assertions.
41 | */
42 | package it.unimi.dsi.sux4j.bits;
43 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/io/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * I/O classes exploiting succinct data structures.
3 | */
4 | package it.unimi.dsi.sux4j.io;
5 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/mph/AbstractHashFunction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2008-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.mph;
21 |
22 | import it.unimi.dsi.fastutil.Size64;
23 | import it.unimi.dsi.fastutil.objects.AbstractObject2LongFunction;
24 |
25 | /** A very minimal abstract hash implementation. It extends {@link AbstractObject2LongFunction},
26 | * by {@link Size64}. Moreover, it provides a deprecated
5 | * This package provides implementations of some succinct techniques for the storage of static
6 | * lists. The main ingredient is the Elias–Fano representation of monotone sequences. For
7 | * monotone sequences, such as file pointers, an
8 | * {@link it.unimi.dsi.sux4j.util.EliasFanoMonotoneLongBigList} is the obvious choice. For general
9 | * sequences, you can either use an {@link it.unimi.dsi.sux4j.util.EliasFanoPrefixSumLongBigList},
10 | * which stores the sequence using its prefix sums, or an
11 | * {@link it.unimi.dsi.sux4j.util.EliasFanoLongBigList}. The former is faster and provides also
12 | * prefix sums, but the latter provides a better compression ratio if the values stored are skewed
13 | * towards small values. {@link it.unimi.dsi.sux4j.util.EliasFanoIndexedMonotoneLongBigList}
14 | * provides {@linkplain it.unimi.dsi.sux4j.util.EliasFanoIndexedMonotoneLongBigList#successor(long)
15 | * content-based addressing methods}.
16 | *
17 | *
18 | * {@link it.unimi.dsi.sux4j.util.MappedEliasFanoMonotoneLongBigList} is a memory-mapped version of
19 | * {@link it.unimi.dsi.sux4j.util.EliasFanoMonotoneLongBigList}.
20 | */
21 | package it.unimi.dsi.sux4j.util;
--------------------------------------------------------------------------------
/src/overview.html:
--------------------------------------------------------------------------------
1 |
2 |
3 | Sux4J is an effort to bring succinct data structures to Java. Presently it provides a number
9 | of related implementations covering ranking/selection over bit arrays, compressed lists
10 | and [[monotone] minimal perfect hash] functions.
11 |
12 | Sux4J is free software
13 | distributed under either the GNU Lesser General Public License 2.1+ or the Apache Software License 2.0.
14 |
15 |
16 |
--------------------------------------------------------------------------------
/sux4j.bnd:
--------------------------------------------------------------------------------
1 | Automatic-Module-Name: it.unimi.dsi.sux4j
2 | Bundle-Name: it.unimi.dsi.sux4j
3 | Bundle-SymbolicName: it.unimi.dsi.sux4j
4 | Export-Package: it.unimi.dsi.sux4j.*
5 | Bundle-Version: ${version}
6 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/sux4j/bits/BalancedParenthesesTestCase.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.bits;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import it.unimi.dsi.bits.BitVector;
25 | import it.unimi.dsi.bits.LongArrayBitVector;
26 | import it.unimi.dsi.fastutil.ints.IntArrayList;
27 | import it.unimi.dsi.lang.MutableString;
28 |
29 | public abstract class BalancedParenthesesTestCase {
30 |
31 | public static String binary(long l, final boolean reverse) {
32 | if (reverse) l = Long.reverse(l);
33 | final MutableString s = new MutableString().append("0000000000000000000000000000000000000000000000000000000000000000000000000").append(Long.toBinaryString(l));
34 | s.delete(0, s.length() - 64);
35 | s.insert(0, '\n');
36 | s.append('\n');
37 | for(int i = 0; i < 32; i++) s.append(" ").append(Long.toHexString((l >>> (31 - i) * 2) & 0x3));
38 | s.append('\n');
39 | for(int i = 0; i < 16; i++) s.append(" ").append(Long.toHexString((l >>> (15 - i) * 4) & 0xF));
40 | s.append('\n');
41 | return s.toString();
42 | }
43 |
44 |
45 | public static LongArrayBitVector parse(final String s, final boolean check) {
46 | int e = 0;
47 | final LongArrayBitVector bv = LongArrayBitVector.getInstance();
48 | for(int i = 0; i < s.length(); i++) {
49 | if (s.charAt(i) == '(') {
50 | bv.add(1);
51 | e++;
52 | }
53 | else {
54 | if (check && e == 0) throw new IllegalArgumentException();
55 | bv.add(0);
56 | e--;
57 | }
58 | }
59 |
60 | if (check && e != 0) throw new IllegalArgumentException();
61 |
62 | return bv;
63 | }
64 |
65 |
66 | public static long parseSmall(final String s, final boolean check) {
67 | if (s.length() > Long.SIZE) throw new IllegalArgumentException();
68 | final LongArrayBitVector bv = parse(s, check);
69 | return bv.getLong(0, s.length());
70 | }
71 |
72 | public static long parseSmall(final String s) {
73 | return parseSmall(s, true);
74 | }
75 |
76 | public void assertBalancedParentheses(final BalancedParentheses balancedParentheses) {
77 | final long length = balancedParentheses.bitVector().length();
78 | final BitVector bits = balancedParentheses.bitVector();
79 |
80 | // Build matching
81 |
82 | final IntArrayList stack = new IntArrayList();
83 | final IntArrayList matches = new IntArrayList();
84 | matches.size((int)length);
85 |
86 | for(int i = 0; i < length; i++) {
87 | if (bits.getBoolean(i)) stack.push(i);
88 | else {
89 | if (stack.isEmpty()) throw new AssertionError("The bit vector does not represent a correctly parenthesised string");
90 | final int pos = stack.popInt();
91 | matches.set(pos, i);
92 | matches.set(i, pos);
93 | }
94 | }
95 |
96 | if (! stack.isEmpty()) throw new AssertionError("The bit vector does not represent a correctly parenthesised string");
97 |
98 | for(int i = 0; i < length; i++) {
99 | if (bits.getBoolean(i)) assertEquals("Finding closing for position " + i, matches.getInt(i), balancedParentheses.findClose(i));
100 | // else assertEquals("Finding opening for position " + i, matches.getInt(i),
101 | // balancedParentheses.findOpen(i));
102 | }
103 | }
104 |
105 | }
106 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/sux4j/bits/Rank12Test.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.bits;
21 |
22 | import java.util.Random;
23 |
24 | import org.junit.Test;
25 |
26 | import it.unimi.dsi.bits.LongArrayBitVector;
27 | import it.unimi.dsi.util.XoRoShiRo128PlusRandom;
28 |
29 | public class Rank12Test extends RankSelectTestCase {
30 |
31 | @Test
32 | public void testEmpty() {
33 | Rank12 rank12;
34 | rank12 = new Rank12(new long[1], 64);
35 | assertRank(rank12);
36 | rank12 = new Rank12(new long[2], 128);
37 | assertRank(rank12);
38 | rank12 = new Rank12(new long[1], 63);
39 | assertRank(rank12);
40 | rank12 = new Rank12(new long[2], 65);
41 | assertRank(rank12);
42 | rank12 = new Rank12(new long[3], 129);
43 | assertRank(rank12);
44 | }
45 |
46 | @Test
47 | public void testSingleton() {
48 | Rank12 rank12;
49 |
50 | rank12 = new Rank12(new long[] { 1L << 63, 0 }, 64);
51 | assertRank(rank12);
52 |
53 | rank12 = new Rank12(new long[] { 1 }, 64);
54 | assertRank(rank12);
55 |
56 | rank12 = new Rank12(new long[] { 1L << 63, 0 }, 128);
57 | assertRank(rank12);
58 |
59 | rank12 = new Rank12(new long[] { 1L << 63, 0 }, 65);
60 | assertRank(rank12);
61 |
62 | rank12 = new Rank12(new long[] { 1L << 63, 0, 0 }, 129);
63 | assertRank(rank12);
64 | }
65 |
66 | @Test
67 | public void testDoubleton() {
68 | Rank12 rank12;
69 |
70 | rank12 = new Rank12(new long[] { 1 | 1L << 32 }, 64);
71 | assertRank(rank12);
72 |
73 | rank12 = new Rank12(new long[] { 1, 1 }, 128);
74 | assertRank(rank12);
75 |
76 | rank12 = new Rank12(new long[] { 1 | 1L << 32, 0 }, 63);
77 | assertRank(rank12);
78 |
79 | rank12 = new Rank12(new long[] { 1, 1, 0 }, 129);
80 | assertRank(rank12);
81 | }
82 |
83 | @Test
84 | public void testAlternating() {
85 | Rank12 rank12;
86 |
87 | rank12 = new Rank12(new long[] { 0xAAAAAAAAAAAAAAAAL }, 64);
88 | assertRank(rank12);
89 |
90 | rank12 = new Rank12(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL }, 128);
91 | assertRank(rank12);
92 |
93 | rank12 = new Rank12(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL }, 64 * 5);
94 | assertRank(rank12);
95 |
96 | rank12 = new Rank12(new long[] { 0xAAAAAAAAL }, 33);
97 | assertRank(rank12);
98 |
99 | rank12 = new Rank12(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAL }, 128);
100 | assertRank(rank12);
101 | }
102 |
103 | @Test
104 | public void testSelect() {
105 | Rank12 rank12;
106 | rank12 = new Rank12(LongArrayBitVector.of(1, 0, 1, 1, 0, 0, 0).bits(), 7);
107 | assertRank(rank12);
108 | }
109 |
110 | @Test
111 | public void testRandom() {
112 | for (int size = 10; size <= 100000000; size *= 10) {
113 | final Random r = new XoRoShiRo128PlusRandom(1);
114 | final LongArrayBitVector bitVector = LongArrayBitVector.getInstance(size);
115 | for (int i = 0; i < size; i++)
116 | bitVector.add(r.nextBoolean());
117 | final Rank12 rank12 = new Rank12(bitVector);
118 | assertRank(rank12);
119 | }
120 | }
121 |
122 | @Test
123 | public void testAllSizes() {
124 | LongArrayBitVector v;
125 | Rank12 rank12;
126 | for (int size = 0; size <= 4096; size++) {
127 | v = LongArrayBitVector.getInstance().length(size);
128 | for (int i = (size + 1) / 2; i-- != 0;)
129 | v.set(i * 2);
130 | rank12 = new Rank12(v);
131 | assertRank(rank12);
132 | }
133 | }
134 | }
135 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/sux4j/bits/RankSelectTestCase.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.bits;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import it.unimi.dsi.bits.BitVector;
25 |
26 | public abstract class RankSelectTestCase {
27 | public void assertRankAndSelect(final Rank rank, final Select select) {
28 | final long length = rank.bitVector().length();
29 | final BitVector bits = rank.bitVector();
30 |
31 | for(int j = 0, i = 0; i < length; i++) {
32 | assertEquals("Ranking " + i, j, rank.rank(i));
33 | if (bits.getBoolean(i)) {
34 | assertEquals("Selecting " + j, i, select.select(j));
35 | j++;
36 | }
37 |
38 | }
39 | }
40 |
41 | public void assertSelect(final Select s) {
42 | final BitVector bits = s.bitVector();
43 | final long length = bits.length();
44 |
45 | for(int j = 0, i = 0; i < length; i++) {
46 | if (bits.getBoolean(i)) {
47 | assertEquals("Selecting " + j, i, s.select(j));
48 | j++;
49 | }
50 |
51 | }
52 | }
53 |
54 | public void assertSelectZero(final SelectZero s) {
55 | final BitVector bits = s.bitVector();
56 | final long length = bits.length();
57 |
58 | for(int j = 0, i = 0; i < length; i++) {
59 | if (! bits.getBoolean(i)) {
60 | assertEquals("Selecting " + j, i, s.selectZero(j));
61 | j++;
62 | }
63 |
64 | }
65 | }
66 |
67 | public void assertRank(final Rank rank) {
68 | final long length = rank.bitVector().length();
69 | final BitVector bits = rank.bitVector();
70 |
71 | for(long j = 0, i = 0; i < length; i++) {
72 | assertEquals("Ranking " + i, j, rank.rank(i));
73 | if (bits.getBoolean(i)) j++;
74 | }
75 | }
76 |
77 | }
78 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/sux4j/bits/TrivialBalancedParentheses.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.bits;
21 |
22 | import it.unimi.dsi.bits.BitVector;
23 |
24 | public class TrivialBalancedParentheses implements BalancedParentheses {
25 | private static final long serialVersionUID = 1L;
26 | private final BitVector v;
27 |
28 | public TrivialBalancedParentheses(final BitVector v) {
29 | this.v = v;
30 | }
31 |
32 | @Override
33 | public BitVector bitVector() {
34 | return v;
35 | }
36 |
37 | @Override
38 | public long enclose(final long pos) {
39 | throw new UnsupportedOperationException();
40 | }
41 |
42 | @Override
43 | public long findClose(long pos) {
44 | if (! v.getBoolean(pos)) throw new IllegalArgumentException();
45 | int c = 1;
46 | while(++pos < v.length()) {
47 | if (! v.getBoolean(pos)) c--;
48 | else c++;
49 | if (c == 0) return pos;
50 | }
51 |
52 | throw new IllegalArgumentException();
53 | }
54 |
55 | @Override
56 | public long findOpen(long pos) {
57 | if (v.getBoolean(pos)) throw new IllegalArgumentException();
58 |
59 | int c = 1;
60 | while(--pos >= 0) {
61 | if (! v.getBoolean(pos)) c++;
62 | else c--;
63 | if (c == 0) return pos;
64 | }
65 |
66 | throw new IllegalArgumentException();
67 | }
68 |
69 | @Override
70 | public long numBits() {
71 | return 0;
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/sux4j/bits/TrivialBalancedParenthesesTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.bits;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import org.junit.Test;
25 |
26 | import it.unimi.dsi.bits.LongArrayBitVector;
27 |
28 | public class TrivialBalancedParenthesesTest extends BalancedParenthesesTestCase {
29 |
30 | @Test
31 | public void testSimple() {
32 | LongArrayBitVector bv = LongArrayBitVector.of(1, 0);
33 | TrivialBalancedParentheses bp = new TrivialBalancedParentheses(bv);
34 | assertBalancedParentheses(bp);
35 | assertEquals(1, bp.findClose(0));
36 | assertEquals(0, bp.findOpen(1));
37 | // assertEquals(0, bp.enclose(1));
38 |
39 | bv = LongArrayBitVector.of(1, 1, 0, 0);
40 | bp = new TrivialBalancedParentheses(bv);
41 | assertBalancedParentheses(bp);
42 | assertEquals(3, bp.findClose(0));
43 | // assertEquals(0, bp.enclose(1));
44 | assertEquals(2, bp.findClose(1));
45 | assertEquals(1, bp.findOpen(2));
46 | // assertEquals(1, bp.enclose(2));
47 | assertEquals(0, bp.findOpen(3));
48 | // assertEquals(1, bp.enclose(3));
49 |
50 | bv = LongArrayBitVector.of(1, 1, 0, 1, 0, 0);
51 | bp = new TrivialBalancedParentheses(bv);
52 | assertBalancedParentheses(bp);
53 | assertEquals(5, bp.findClose(0));
54 | assertEquals(2, bp.findClose(1));
55 | // assertEquals(0, bp.enclose(1));
56 | assertEquals(1, bp.findOpen(2));
57 | // assertEquals(1, bp.enclose(2));
58 | assertEquals(4, bp.findClose(3));
59 | // assertEquals(1, bp.enclose(3));
60 | assertEquals(3, bp.findOpen(4));
61 | // assertEquals(3, bp.enclose(4));
62 | assertEquals(0, bp.findOpen(5));
63 | // assertEquals(3, bp.enclose(5));
64 |
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/sux4j/io/BucketedHashStoreTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2019-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.io;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import java.io.IOException;
25 |
26 | import org.junit.Test;
27 |
28 | import it.unimi.dsi.bits.TransformationStrategies;
29 |
30 | public class BucketedHashStoreTest {
31 |
32 | @Test
33 | public void test() throws IOException {
34 | for(final int s: new int[] { 0, 1, 10, 100, 1000, 1000000 }) {
35 | final BucketedHashStoresize()
method that returns
27 | * -1 if {@link #size64()} is -1 or greater than {@link Integer#MAX_VALUE}, a {@link #size64()} returning -1 (that
28 | * you are invited to override), and a {@link #containsKey(Object)} implementation that returns true.
29 | */
30 |
31 | public abstract class AbstractHashFunctionrank9
implementation.
35 | *
36 | * rank9
is a ranking structure using 25% additional space and providing exceptionally fast ranking.
37 | */
38 |
39 | public class Rank9GogPetri extends AbstractRank implements Rank {
40 | private static final boolean ASSERTS = false;
41 | private static final long serialVersionUID = 1L;
42 |
43 | protected transient long[] bits;
44 | protected final BitVector bitVector;
45 | protected final long[] count;
46 | protected final int numWords;
47 | protected final long numOnes;
48 | protected final long lastOne;
49 |
50 | public Rank9GogPetri(final long[] bits, final long length) {
51 | this(LongArrayBitVector.wrap(bits, length));
52 | }
53 |
54 | public Rank9GogPetri(final BitVector bitVector) {
55 | this.bitVector = bitVector;
56 | this.bits = bitVector.bits();
57 | final long length = bitVector.length();
58 |
59 | numWords = words(length);
60 |
61 | final int numCounts = (int)((length + 8 * Long.SIZE - 1) / (8 * Long.SIZE)) * 2;
62 | // Init rank/select structure
63 | count = new long[numCounts + 1];
64 |
65 | long c = 0, l = -1;
66 | int pos = 0;
67 | for(int i = 0; i < numWords; i += 8, pos += 2) {
68 | count[pos] = c;
69 | c += Long.bitCount(bits[i]);
70 | if (bits[i] != 0) l = i * 64L + Fast.mostSignificantBit(bits[i]);
71 | for(int j = 1; j < 8; j++) {
72 | count[pos + 1] |= (i + j <= numWords ? c - count[pos] : 0x1FFL) << 63 - 9 * j;
73 | if (i + j < numWords) {
74 | c += Long.bitCount(bits[i + j]);
75 | if (bits[i + j] != 0) l = (i + j) * 64L + Fast.mostSignificantBit(bits[i + j]);
76 | }
77 | }
78 | }
79 |
80 | numOnes = c;
81 | lastOne = l;
82 | count[numCounts] = c;
83 | }
84 |
85 |
86 | @Override
87 | public long rank(final long pos) {
88 | if (ASSERTS) assert pos >= 0;
89 | if (ASSERTS) assert pos <= bitVector.length();
90 | // This test can be eliminated if there is always an additional word at the end of the bit array.
91 | if (pos > lastOne) return numOnes;
92 |
93 | final int word = word(pos);
94 | final int block = (word >>> 2) & ~1;
95 | final int offset = word & 7;
96 |
97 | return count[block] + (count[block + 1] >>> (63 - offset * 9) & 0x1FF) + Long.bitCount(bits[word] & (1L << pos) - 1);
98 | }
99 |
100 | @Override
101 | public long numBits() {
102 | return count.length * (long)Long.SIZE;
103 | }
104 |
105 | @Override
106 | public long count() {
107 | return numOnes;
108 | }
109 |
110 | @Override
111 | public long rank(final long from, final long to) {
112 | return rank(to) - rank(from);
113 | }
114 |
115 | public long lastOne() {
116 | return lastOne;
117 | }
118 |
119 | private void readObject(final ObjectInputStream s) throws IOException, ClassNotFoundException {
120 | s.defaultReadObject();
121 | bits = bitVector.bits();
122 | }
123 |
124 | @Override
125 | public BitVector bitVector() {
126 | return bitVector;
127 | }
128 | }
129 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/test/EliasFanoLongBigListSpeedTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.test;
21 |
22 | import org.apache.commons.math3.random.RandomGenerator;
23 |
24 | import com.martiansoftware.jsap.FlaggedOption;
25 | import com.martiansoftware.jsap.JSAP;
26 | import com.martiansoftware.jsap.JSAPException;
27 | import com.martiansoftware.jsap.JSAPResult;
28 | import com.martiansoftware.jsap.Parameter;
29 | import com.martiansoftware.jsap.SimpleJSAP;
30 | import com.martiansoftware.jsap.UnflaggedOption;
31 |
32 | import it.unimi.dsi.fastutil.ints.IntArrayList;
33 | import it.unimi.dsi.fastutil.longs.LongArrayList;
34 | import it.unimi.dsi.sux4j.util.EliasFanoLongBigList;
35 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator;
36 |
37 | public class EliasFanoLongBigListSpeedTest {
38 |
39 | public static void main(final String[] arg) throws JSAPException {
40 |
41 | final SimpleJSAP jsap = new SimpleJSAP(EliasFanoLongBigListSpeedTest.class.getName(), "Tests the speed of Elias-Fano compressed lists.",
42 | new Parameter[] {
43 | new UnflaggedOption("numElements", JSAP.INTSIZE_PARSER, "1Mi", JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The number of elements."),
44 | new UnflaggedOption("density", JSAP.DOUBLE_PARSER, ".5", JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The density."),
45 | new FlaggedOption("numPos", JSAP.INTSIZE_PARSER, "1Mi", JSAP.NOT_REQUIRED, 'p', "positions", "The number of positions to test"),
46 | new FlaggedOption("bulk", JSAP.INTSIZE_PARSER, "10", JSAP.NOT_REQUIRED, 'b', "bulk", "The number of positions to read with the bulk method"),
47 | });
48 |
49 | final JSAPResult jsapResult = jsap.parse(arg);
50 | if (jsap.messagePrinted()) return;
51 |
52 | final int numElements = jsapResult.getInt("numElements");
53 | final double density = jsapResult.getDouble("density");
54 | final int numPos = jsapResult.getInt("numPos");
55 | final int bulk = jsapResult.getInt("bulk");
56 |
57 | final RandomGenerator random = new XoRoShiRo128PlusRandomGenerator(42);
58 | final IntArrayList list = new IntArrayList(numElements);
59 | for(long i = numElements; i-- != 0;) list.add(random.nextDouble() < density ? 0 : 100);
60 |
61 | final int[] position = new int[numPos];
62 |
63 | for(int i = numPos; i-- != 0;) position[i] = (random.nextInt() & 0x7FFFFFFF) % (numElements - bulk);
64 | final long[] elements = new long[list.size()];
65 | elements[0] = list.getInt(0);
66 | for(int i = 1; i < list.size(); i++) elements[i] = list.getInt(i) + elements[i - 1];
67 | final EliasFanoLongBigList eliasFanoLongBigList = new EliasFanoLongBigList(LongArrayList.wrap(elements));
68 | long time;
69 | System.err.println("getLong():");
70 | for(int k = 10; k-- != 0;) {
71 | time = - System.nanoTime();
72 | for(int i = 0; i < numPos; i++) eliasFanoLongBigList.getLong(position[i]);
73 | time += System.nanoTime();
74 | System.err.println(time / 1E9 + "s, " + time / (double)numPos + " ns/element");
75 | }
76 |
77 | final long[] dest = new long[bulk];
78 | System.err.println("get():");
79 | for(int k = 10; k-- != 0;) {
80 | time = - System.nanoTime();
81 | for(int i = 0; i < numPos; i++) eliasFanoLongBigList.get(position[i], dest);
82 | time += System.nanoTime();
83 | System.err.println(time / 1E9 + "s, " + time / (double)(numPos * bulk) + " ns/element");
84 | }
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/test/GenerateGeometricValues.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.test;
21 |
22 | import java.io.DataOutputStream;
23 | import java.io.FileOutputStream;
24 | import java.io.IOException;
25 |
26 | import org.apache.commons.math3.random.RandomGenerator;
27 | import org.slf4j.Logger;
28 | import org.slf4j.LoggerFactory;
29 |
30 | import com.martiansoftware.jsap.JSAP;
31 | import com.martiansoftware.jsap.JSAPException;
32 | import com.martiansoftware.jsap.JSAPResult;
33 | import com.martiansoftware.jsap.Parameter;
34 | import com.martiansoftware.jsap.SimpleJSAP;
35 | import com.martiansoftware.jsap.UnflaggedOption;
36 |
37 | import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
38 | import it.unimi.dsi.logging.ProgressLogger;
39 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator;
40 |
41 | public class GenerateGeometricValues {
42 | public static final Logger LOGGER = LoggerFactory.getLogger(GenerateGeometricValues.class);
43 |
44 | public static void main(final String[] arg) throws JSAPException, IOException {
45 |
46 | final SimpleJSAP jsap = new SimpleJSAP(GenerateGeometricValues.class.getName(), "Generates a binary list of longs geometrically distributed.",
47 | new Parameter[] {
48 | new UnflaggedOption("n", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of longs."),
49 | new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The output file.")
50 | });
51 |
52 | final JSAPResult jsapResult = jsap.parse(arg);
53 | if (jsap.messagePrinted()) return;
54 |
55 | final long n = jsapResult.getLong("n");
56 | final String output = jsapResult.getString("output");
57 |
58 | final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator();
59 |
60 | final ProgressLogger pl = new ProgressLogger(LOGGER);
61 | pl.expectedUpdates = n;
62 | pl.start("Generating... ");
63 | final DataOutputStream dos = new DataOutputStream(new FastBufferedOutputStream(new FileOutputStream(output)));
64 |
65 | for(long i = 0; i < n; i++) dos.writeLong(Long.numberOfTrailingZeros(r.nextLong()));
66 |
67 | pl.done();
68 | dos.close();
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/test/GeneratePowerLawValues.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.test;
21 |
22 | import java.io.DataOutputStream;
23 | import java.io.FileOutputStream;
24 | import java.io.IOException;
25 |
26 | import org.apache.commons.math3.distribution.ZipfDistribution;
27 | import org.apache.commons.math3.random.RandomGenerator;
28 | import org.slf4j.Logger;
29 | import org.slf4j.LoggerFactory;
30 |
31 | import com.martiansoftware.jsap.JSAP;
32 | import com.martiansoftware.jsap.JSAPException;
33 | import com.martiansoftware.jsap.JSAPResult;
34 | import com.martiansoftware.jsap.Parameter;
35 | import com.martiansoftware.jsap.SimpleJSAP;
36 | import com.martiansoftware.jsap.UnflaggedOption;
37 |
38 | import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
39 | import it.unimi.dsi.logging.ProgressLogger;
40 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator;
41 |
42 | public class GeneratePowerLawValues {
43 | public static final Logger LOGGER = LoggerFactory.getLogger(GeneratePowerLawValues.class);
44 |
45 | public static void main(final String[] arg) throws JSAPException, IOException {
46 |
47 | final SimpleJSAP jsap = new SimpleJSAP(GeneratePowerLawValues.class.getName(), "Generates a binary list of power-law distributed longs starting from zero.",
48 | new Parameter[] {
49 | new UnflaggedOption("gamma", JSAP.DOUBLE_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The power law exponent."),
50 | new UnflaggedOption("max", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The strict upper bound for the support of the distribution."),
51 | new UnflaggedOption("n", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of longs."),
52 | new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The output file.")
53 | });
54 |
55 | final JSAPResult jsapResult = jsap.parse(arg);
56 | if (jsap.messagePrinted()) return;
57 |
58 | final double gamma = jsapResult.getDouble("gamma");
59 | final int max = jsapResult.getInt("max");
60 | final long n = jsapResult.getLong("n");
61 | final String output = jsapResult.getString("output");
62 |
63 | final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator();
64 |
65 | final ProgressLogger pl = new ProgressLogger(LOGGER);
66 | pl.expectedUpdates = n;
67 | pl.start("Generating... ");
68 |
69 | final ZipfDistribution zipf = new ZipfDistribution(r, max, gamma);
70 | final DataOutputStream dos = new DataOutputStream(new FastBufferedOutputStream(new FileOutputStream(output)));
71 |
72 | for(long i = 0; i < n; i++) dos.writeLong(zipf.sample() - 1);
73 |
74 | pl.done();
75 | dos.close();
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/test/GenerateRandom32BitStrings.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.test;
21 |
22 | import java.io.FileOutputStream;
23 | import java.io.IOException;
24 |
25 | import org.apache.commons.math3.random.RandomGenerator;
26 | import org.slf4j.Logger;
27 | import org.slf4j.LoggerFactory;
28 |
29 | import com.martiansoftware.jsap.FlaggedOption;
30 | import com.martiansoftware.jsap.JSAP;
31 | import com.martiansoftware.jsap.JSAPException;
32 | import com.martiansoftware.jsap.JSAPResult;
33 | import com.martiansoftware.jsap.Parameter;
34 | import com.martiansoftware.jsap.SimpleJSAP;
35 | import com.martiansoftware.jsap.UnflaggedOption;
36 |
37 | import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
38 | import it.unimi.dsi.logging.ProgressLogger;
39 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator;
40 |
41 | public class GenerateRandom32BitStrings {
42 | public static final Logger LOGGER = LoggerFactory.getLogger(GenerateRandom32BitStrings.class);
43 |
44 | public static void main(final String[] arg) throws JSAPException, IOException {
45 |
46 | final SimpleJSAP jsap = new SimpleJSAP(GenerateRandom32BitStrings.class.getName(), "Generates a list of sorted 32-bit random strings using only characters in the ISO-8859-1 printable range [32..256).",
47 | new Parameter[] {
48 | new FlaggedOption("gap", JSAP.INTSIZE_PARSER, "1", JSAP.NOT_REQUIRED, 'g', "gap", "Impose a minimum gap."),
49 | new UnflaggedOption("n", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of strings (too small values might cause overflow)."),
50 | new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The output file.")
51 | });
52 |
53 | final JSAPResult jsapResult = jsap.parse(arg);
54 | if (jsap.messagePrinted()) return;
55 |
56 | final int n = jsapResult.getInt("n");
57 | final String output = jsapResult.getString("output");
58 | final int gap = jsapResult.getInt("gap");
59 |
60 | final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator();
61 |
62 | final ProgressLogger pl = new ProgressLogger(LOGGER);
63 | pl.expectedUpdates = n;
64 | pl.start("Generating... ");
65 |
66 | double l = 0, t;
67 | final double limit = Math.pow(224, 4);
68 | final int incr = (int)Math.floor(1.99 * (limit / n)) - 1;
69 |
70 | LOGGER.info("Increment: " + incr);
71 |
72 | @SuppressWarnings("resource")
73 | final FastBufferedOutputStream fbs = new FastBufferedOutputStream(new FileOutputStream(output));
74 | final int[] b = new int[4];
75 |
76 | for(int i = 0; i < n; i++) {
77 | t = (l += (r.nextInt(incr) + gap));
78 | if (l >= limit) throw new AssertionError(Integer.toString(i));
79 | for(int j = 4; j-- != 0;) {
80 | b[j] = (int)(t % 224 + 32);
81 | t = Math.floor(t / 224);
82 | }
83 |
84 | for(int j = 0; j < 4; j++) fbs.write(b[j]);
85 | fbs.write(10);
86 |
87 | pl.lightUpdate();
88 | }
89 |
90 |
91 | pl.done();
92 | fbs.close();
93 |
94 | LOGGER.info("Last/limit: " + (l / limit));
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/test/GenerateRandom64BitIntegers.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.test;
21 |
22 | import java.io.DataOutputStream;
23 | import java.io.FileOutputStream;
24 | import java.io.IOException;
25 | import java.math.BigInteger;
26 |
27 | import org.apache.commons.math3.random.RandomGenerator;
28 | import org.slf4j.Logger;
29 | import org.slf4j.LoggerFactory;
30 |
31 | import com.martiansoftware.jsap.FlaggedOption;
32 | import com.martiansoftware.jsap.JSAP;
33 | import com.martiansoftware.jsap.JSAPException;
34 | import com.martiansoftware.jsap.JSAPResult;
35 | import com.martiansoftware.jsap.Parameter;
36 | import com.martiansoftware.jsap.SimpleJSAP;
37 | import com.martiansoftware.jsap.UnflaggedOption;
38 |
39 | import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
40 | import it.unimi.dsi.logging.ProgressLogger;
41 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator;
42 |
43 | public class GenerateRandom64BitIntegers {
44 | public static final Logger LOGGER = LoggerFactory.getLogger(GenerateRandom64BitIntegers.class);
45 |
46 | public static void main(final String[] arg) throws JSAPException, IOException {
47 |
48 | final SimpleJSAP jsap = new SimpleJSAP(GenerateRandom64BitIntegers.class.getName(), "Generates a list of sorted 64-bit random integers in DataOutput format.",
49 | new Parameter[] {
50 | new FlaggedOption("gap", JSAP.INTSIZE_PARSER, "1", JSAP.NOT_REQUIRED, 'g', "gap", "Impose a minimum gap."),
51 | new UnflaggedOption("n", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of integers (too small values might cause overflow)."),
52 | new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The output file.")
53 | });
54 |
55 | final JSAPResult jsapResult = jsap.parse(arg);
56 | if (jsap.messagePrinted()) return;
57 |
58 | final long n = jsapResult.getLong("n");
59 | final int gap = jsapResult.getInt("gap");
60 | final String output = jsapResult.getString("output");
61 |
62 | final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator();
63 |
64 | final ProgressLogger pl = new ProgressLogger(LOGGER);
65 | pl.expectedUpdates = n;
66 | pl.start("Generating... ");
67 |
68 | BigInteger l = BigInteger.ZERO;
69 | final BigInteger limit = BigInteger.valueOf(256).pow(8);
70 | final long incr = (long)Math.floor(1.99 * (limit.divide(BigInteger.valueOf(n)).longValue())) - 1;
71 |
72 | @SuppressWarnings("resource")
73 | final DataOutputStream dos = new DataOutputStream(new FastBufferedOutputStream(new FileOutputStream(output)));
74 |
75 | LOGGER.info("Increment: " + incr);
76 |
77 | for(long i = 0; i < n; i++) {
78 | l = l.add(BigInteger.valueOf((r.nextLong() & 0x7FFFFFFFFFFFFFFFL) % incr + gap));
79 | if (l.compareTo(limit) > 0) throw new AssertionError(Long.toString(i));
80 | dos.writeLong(l.longValue());
81 | pl.lightUpdate();
82 | }
83 |
84 |
85 | pl.done();
86 | dos.close();
87 |
88 | LOGGER.info("Last/limit: " + (l.doubleValue() / limit.doubleValue()));
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/test/GenerateRandomStrings.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.test;
21 |
22 | import java.io.FileNotFoundException;
23 | import java.io.FileOutputStream;
24 | import java.io.OutputStreamWriter;
25 | import java.io.PrintWriter;
26 | import java.io.UnsupportedEncodingException;
27 |
28 | import org.apache.commons.math3.random.RandomGenerator;
29 |
30 | import com.martiansoftware.jsap.JSAP;
31 | import com.martiansoftware.jsap.JSAPException;
32 | import com.martiansoftware.jsap.JSAPResult;
33 | import com.martiansoftware.jsap.Parameter;
34 | import com.martiansoftware.jsap.SimpleJSAP;
35 | import com.martiansoftware.jsap.UnflaggedOption;
36 |
37 | import it.unimi.dsi.lang.MutableString;
38 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator;
39 |
40 | public class GenerateRandomStrings {
41 |
42 | public static void main(final String[] arg) throws JSAPException, UnsupportedEncodingException, FileNotFoundException {
43 |
44 | final SimpleJSAP jsap = new SimpleJSAP(GenerateRandomStrings.class.getName(), "Generates (not necessarily unique) random strings", new Parameter[] {
45 | new UnflaggedOption("n", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of strings."),
46 | new UnflaggedOption("l", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of characters per string."),
47 | new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The output file.") });
48 |
49 | final JSAPResult jsapResult = jsap.parse(arg);
50 | if (jsap.messagePrinted()) return;
51 |
52 | final int n = jsapResult.getInt("n");
53 | final int l = jsapResult.getInt("l");
54 | final String output = jsapResult.getString("output");
55 |
56 | final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator();
57 | final PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), "ISO-8859-1"));
58 |
59 | for (int i = 0; i < n; i++) {
60 | final MutableString t = new MutableString(l);
61 | for (int j = 0; j < l; j++) t.append((char)(32 + r.nextInt(94) + 1));
62 | t.println(pw);
63 | }
64 |
65 | pw.close();
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/test/GenerateUniformValues.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.test;
21 |
22 | import java.io.DataOutputStream;
23 | import java.io.FileOutputStream;
24 | import java.io.IOException;
25 |
26 | import org.apache.commons.math3.random.RandomGenerator;
27 | import org.slf4j.Logger;
28 | import org.slf4j.LoggerFactory;
29 |
30 | import com.martiansoftware.jsap.JSAP;
31 | import com.martiansoftware.jsap.JSAPException;
32 | import com.martiansoftware.jsap.JSAPResult;
33 | import com.martiansoftware.jsap.Parameter;
34 | import com.martiansoftware.jsap.SimpleJSAP;
35 | import com.martiansoftware.jsap.UnflaggedOption;
36 |
37 | import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
38 | import it.unimi.dsi.logging.ProgressLogger;
39 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator;
40 |
41 | public class GenerateUniformValues {
42 | public static final Logger LOGGER = LoggerFactory.getLogger(GenerateUniformValues.class);
43 |
44 | public static void main(final String[] arg) throws JSAPException, IOException {
45 |
46 | final SimpleJSAP jsap = new SimpleJSAP(GenerateUniformValues.class.getName(), "Generates a binary list of uniformly distributed longs using a given number of bits.",
47 | new Parameter[] {
48 | new UnflaggedOption("b", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of bits."),
49 | new UnflaggedOption("n", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of longs."),
50 | new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The output file.")
51 | });
52 |
53 | final JSAPResult jsapResult = jsap.parse(arg);
54 | if (jsap.messagePrinted()) return;
55 |
56 | final int b = jsapResult.getInt("b");
57 | final long n = jsapResult.getLong("n");
58 | final String output = jsapResult.getString("output");
59 |
60 | final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator();
61 | final long mask = b == 64 ? -1L: (1L << b) - 1;
62 |
63 | final ProgressLogger pl = new ProgressLogger(LOGGER);
64 | pl.expectedUpdates = n;
65 | pl.start("Generating... ");
66 |
67 | final DataOutputStream dos = new DataOutputStream(new FastBufferedOutputStream(new FileOutputStream(output)));
68 |
69 | for(long i = 0; i < n; i++) dos.writeLong(r.nextLong() & mask);
70 | pl.done();
71 | dos.close();
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/test/HollowTrieSpeedTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.test;
21 |
22 | import java.io.FileInputStream;
23 | import java.io.IOException;
24 | import java.io.InputStreamReader;
25 | import java.nio.charset.Charset;
26 | import java.util.Iterator;
27 | import java.util.zip.GZIPInputStream;
28 |
29 | import com.martiansoftware.jsap.FlaggedOption;
30 | import com.martiansoftware.jsap.JSAP;
31 | import com.martiansoftware.jsap.JSAPException;
32 | import com.martiansoftware.jsap.JSAPResult;
33 | import com.martiansoftware.jsap.Parameter;
34 | import com.martiansoftware.jsap.SimpleJSAP;
35 | import com.martiansoftware.jsap.Switch;
36 | import com.martiansoftware.jsap.UnflaggedOption;
37 | import com.martiansoftware.jsap.stringparsers.ForNameStringParser;
38 |
39 | import it.unimi.dsi.fastutil.io.BinIO;
40 | import it.unimi.dsi.fastutil.objects.Object2LongFunction;
41 | import it.unimi.dsi.io.FastBufferedReader;
42 | import it.unimi.dsi.io.LineIterator;
43 |
44 | public class HollowTrieSpeedTest {
45 |
46 | public static void main(final String[] arg) throws NoSuchMethodException, IOException, JSAPException, ClassNotFoundException {
47 |
48 | final SimpleJSAP jsap = new SimpleJSAP(HollowTrieSpeedTest.class.getName(), "Tests the speed of a hollow trie.",
49 | new Parameter[] {
50 | new FlaggedOption("bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read terms."),
51 | new FlaggedOption("encoding", ForNameStringParser.getParser(Charset.class), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding."),
52 | new Switch("zipped", 'z', "zipped", "The term list is compressed in gzip format."),
53 | new FlaggedOption("termFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "offline", "Read terms from this file (without loading them into core memory) instead of standard input."),
54 | new UnflaggedOption("trie", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised hollow trie.")
55 | });
56 |
57 | final JSAPResult jsapResult = jsap.parse(arg);
58 | if (jsap.messagePrinted()) return;
59 |
60 | final int bufferSize = jsapResult.getInt("bufferSize");
61 | final String trieName = jsapResult.getString("trie");
62 | final String termFile = jsapResult.getString("termFile");
63 | //final Class> tableClass = jsapResult.getClass("class");
64 | final Charset encoding = (Charset)jsapResult.getObject("encoding");
65 | final boolean zipped = jsapResult.getBoolean("zipped");
66 |
67 | @SuppressWarnings("unchecked")
68 | final Object2LongFunction extends CharSequence> hollowTrie = (Object2LongFunction extends CharSequence>)BinIO.loadObject(trieName);
69 |
70 | Iterator extends CharSequence> i;
71 |
72 | for(int k = 10; k-- != 0;) {
73 | if (termFile == null) i = new LineIterator(new FastBufferedReader(new InputStreamReader(System.in, encoding), bufferSize));
74 | else i = new LineIterator(new FastBufferedReader(new InputStreamReader(zipped ? new GZIPInputStream(new FileInputStream(termFile)) : new FileInputStream(termFile), encoding), bufferSize));
75 | long time = -System.currentTimeMillis();
76 | int j = 0;
77 | while(i.hasNext()) {
78 | hollowTrie.getLong(i.next());
79 | if (j++ % 10000 == 0) System.err.print('.');
80 | }
81 | System.err.println();
82 | time += System.currentTimeMillis();
83 | System.err.println(time / 1E3 + "s, " + (time * 1E6) / j + " ns/vector");
84 | }
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/test/ListSpeedTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.test;
21 |
22 | import java.io.IOException;
23 |
24 | import com.martiansoftware.jsap.JSAP;
25 | import com.martiansoftware.jsap.JSAPException;
26 | import com.martiansoftware.jsap.JSAPResult;
27 | import com.martiansoftware.jsap.Parameter;
28 | import com.martiansoftware.jsap.SimpleJSAP;
29 | import com.martiansoftware.jsap.Switch;
30 | import com.martiansoftware.jsap.UnflaggedOption;
31 |
32 | import it.unimi.dsi.Util;
33 | import it.unimi.dsi.fastutil.io.BinIO;
34 | import it.unimi.dsi.fastutil.longs.LongList;
35 |
36 | public class ListSpeedTest {
37 |
38 | public static void main(final String[] arg) throws IOException, JSAPException, ClassNotFoundException {
39 |
40 | final SimpleJSAP jsap = new SimpleJSAP(ListSpeedTest.class.getName(), "Test the speed of a list",
41 | new Parameter[] {
42 | new Switch("random", 'r', "random", "Do a random test on at most 1 million strings."),
43 | new UnflaggedOption("list", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised list.")
44 | });
45 |
46 | final JSAPResult jsapResult = jsap.parse(arg);
47 | if (jsap.messagePrinted()) return;
48 |
49 | final String listName = jsapResult.getString("list");
50 |
51 | final LongList list = (LongList)BinIO.loadObject(listName);
52 | long total = 0;
53 | final int n = list.size();
54 | for(int k = 13; k-- != 0;) {
55 | long time = -System.currentTimeMillis();
56 | for(int i = 0; i < n; i++) {
57 | list.getLong(i);
58 | if (i++ % 100000 == 0) System.out.print('.');
59 | }
60 | System.out.println();
61 | time += System.currentTimeMillis();
62 | if (k < 10) total += time;
63 | System.out.println(time / 1E3 + "s, " + (time * 1E3) / n + " \u00b5s/item");
64 | }
65 | System.out.println("Average: " + Util.format(total / 10E3) + "s, " + Util.format((total * 1E3) / (10 * n)) + " \u00b5s/item");
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/test/SuccinctTreeDecoder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.test;
21 |
22 | import java.io.IOException;
23 | import java.io.Serializable;
24 |
25 | import it.unimi.dsi.bits.LongArrayBitVector;
26 | import it.unimi.dsi.compression.Decoder;
27 | import it.unimi.dsi.compression.TreeDecoder;
28 | import it.unimi.dsi.fastutil.booleans.BooleanIterator;
29 | import it.unimi.dsi.io.InputBitStream;
30 | import it.unimi.dsi.sux4j.bits.JacobsonBalancedParentheses;
31 |
32 | public class SuccinctTreeDecoder implements Decoder, Serializable {
33 | private static final long serialVersionUID = 1L;
34 |
35 | private final JacobsonBalancedParentheses balParen;
36 | private final LongArrayBitVector bitVector;
37 | private final boolean returnZero;
38 |
39 | public SuccinctTreeDecoder(final TreeDecoder treeDecoder) {
40 | bitVector = treeDecoder.succinctRepresentation();
41 | //System.err.println(bitVector);
42 | //System.err.println(Arrays.toString(treeDecoder.buildCodes()));
43 | returnZero = bitVector.length() <= 2;
44 | balParen = new JacobsonBalancedParentheses(bitVector, false, true, false);
45 | }
46 |
47 | @Override
48 | public int decode(final BooleanIterator iterator) {
49 | if (returnZero) return 0;
50 | int p = 1, index = 0;
51 |
52 | for(;;) {
53 | if (iterator.nextBoolean()) {
54 | final int q = (int)(balParen.findClose(p) + 1);
55 | index += (q - p) / 2;
56 | if (! bitVector.getBoolean(q)) return index;
57 | p = q;
58 | }
59 | else if (! bitVector.getBoolean(++p)) return index;
60 | }
61 | }
62 |
63 | @Override
64 | public int decode(final InputBitStream ibs) throws IOException {
65 | if (returnZero) return 0;
66 | int p = 1, index = 0;
67 |
68 | for(;;) {
69 | if (ibs.readBit() != 0) {
70 | final int q = (int)(balParen.findClose(p) + 1);
71 | index += (q - p) / 2;
72 | if (! bitVector.getBoolean(q)) return index;
73 | p = q;
74 | }
75 | else if (! bitVector.getBoolean(++p)) return index;
76 | }
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/test/ValueStats.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Sux4J: Succinct data structures for Java
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.sux4j.test;
21 |
22 | import java.io.IOException;
23 |
24 | import org.slf4j.Logger;
25 | import org.slf4j.LoggerFactory;
26 |
27 | import com.martiansoftware.jsap.JSAP;
28 | import com.martiansoftware.jsap.JSAPException;
29 | import com.martiansoftware.jsap.JSAPResult;
30 | import com.martiansoftware.jsap.Parameter;
31 | import com.martiansoftware.jsap.SimpleJSAP;
32 | import com.martiansoftware.jsap.UnflaggedOption;
33 |
34 | import it.unimi.dsi.bits.Fast;
35 | import it.unimi.dsi.fastutil.io.BinIO;
36 | import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
37 | import it.unimi.dsi.fastutil.longs.LongIterator;
38 |
39 | public class ValueStats {
40 | public static final Logger LOGGER = LoggerFactory.getLogger(ValueStats.class);
41 |
42 | public static void main(final String[] arg) throws JSAPException, IOException {
43 |
44 | final SimpleJSAP jsap = new SimpleJSAP(ValueStats.class.getName(), "Prints statistical data about a binary list of longs.",
45 | new Parameter[] {
46 | new UnflaggedOption("input", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The input file.")
47 | });
48 |
49 | final JSAPResult jsapResult = jsap.parse(arg);
50 | if (jsap.messagePrinted()) return;
51 |
52 | final String input = jsapResult.getString("input");
53 | long max = Long.MIN_VALUE;
54 | long min = Long.MAX_VALUE;
55 | long tot = 0;
56 | final Long2LongOpenHashMap freqs = new Long2LongOpenHashMap();
57 |
58 | for(final LongIterator i = BinIO.asLongIterator(input); i.hasNext(); ) {
59 | final long x = i.nextLong();
60 | max = Math.max(max, x);
61 | min = Math.min(min, x);
62 | freqs.addTo(x, 1);
63 | tot++;
64 | }
65 |
66 | System.out.println("Min: " + min);
67 | System.out.println("Max: " + max);
68 | double entropy = 0;
69 | for(final LongIterator iterator = freqs.values().iterator(); iterator.hasNext();) {
70 | final double p = (double)iterator.nextLong() / tot;
71 | entropy += -p * Fast.log2(p);
72 | }
73 | System.out.println("Entropy: " + entropy);
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/sux4j/util/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Succinct data structures for collections.
3 | *
4 | *