├── .gitignore ├── CHANGES ├── COPYING.LESSER ├── LICENSE-2.0.txt ├── README.md ├── bash └── testmain.sh ├── bnd └── biz.aQute.bnd-5.2.0.jar ├── build.properties ├── build.xml ├── c ├── README.md ├── comp.sh ├── csf.c ├── csf.h ├── csf3.c ├── csf3.h ├── csf4.c ├── csf4.h ├── mph.c ├── mph.h ├── sf.c ├── sf.h ├── sf3.c ├── sf3.h ├── sf4.c ├── sf4.h ├── spooky.c ├── spooky.h ├── test_byte_array.c ├── test_csf3_byte_array.c ├── test_csf4_byte_array.c ├── test_mph_byte_array.c ├── test_mph_uint128_t.c ├── test_mph_uint64_t.c ├── test_sf3_byte_array.c ├── test_sf3_signature.c ├── test_sf4_byte_array.c ├── test_sf4_signature.c └── test_signature.c ├── docs ├── allclasses-index.html ├── allpackages-index.html ├── constant-values.html ├── copy.svg ├── deprecated-list.html ├── element-list ├── help-doc.html ├── index-all.html ├── index.html ├── it │ └── unimi │ │ └── dsi │ │ └── sux4j │ │ ├── bits │ │ ├── AbstractRank.html │ │ ├── BalancedParentheses.html │ │ ├── HintedBsearchSelect.html │ │ ├── JacobsonBalancedParentheses.html │ │ ├── Rank.html │ │ ├── Rank11.html │ │ ├── Rank16.html │ │ ├── Rank9.html │ │ ├── RankSelect.html │ │ ├── Select.html │ │ ├── Select9.html │ │ ├── SelectZero.html │ │ ├── SimpleBigSelect.html │ │ ├── SimpleBigSelectZero.html │ │ ├── SimpleSelect.html │ │ ├── SimpleSelectZero.html │ │ ├── SparseRank.html │ │ ├── SparseSelect.html │ │ ├── package-summary.html │ │ └── package-tree.html │ │ ├── io │ │ ├── BucketedHashStore.Bucket.html │ │ ├── BucketedHashStore.DuplicateException.html │ │ ├── BucketedHashStore.html │ │ ├── ChunkedHashStore.Chunk.html │ │ ├── ChunkedHashStore.DuplicateException.html │ │ ├── ChunkedHashStore.html │ │ ├── FileLinesBigList.FileLinesIterator.html │ │ ├── FileLinesBigList.html │ │ ├── FileLinesList.FileLinesIterator.html │ │ ├── FileLinesList.html │ │ ├── package-summary.html │ │ └── package-tree.html │ │ ├── mph │ │ ├── AbstractHashFunction.html │ │ ├── CHDMinimalPerfectHashFunction.Builder.html │ │ ├── CHDMinimalPerfectHashFunction.html │ │ ├── GOV3Function.Builder.html │ │ ├── GOV3Function.html │ │ ├── GOV4Function.Builder.html │ │ ├── GOV4Function.html │ │ ├── GOVMinimalPerfectHashFunction.Builder.html │ │ ├── GOVMinimalPerfectHashFunction.html │ │ ├── GV3CompressedFunction.Builder.html │ │ ├── GV3CompressedFunction.html │ │ ├── GV4CompressedFunction.Builder.html │ │ ├── GV4CompressedFunction.html │ │ ├── Hashes.html │ │ ├── HollowTrieDistributor.html │ │ ├── HollowTrieDistributorMonotoneMinimalPerfectHashFunction.html │ │ ├── HollowTrieMonotoneMinimalPerfectHashFunction.html │ │ ├── HypergraphSorter.html │ │ ├── LcpMonotoneMinimalPerfectHashFunction.Builder.html │ │ ├── LcpMonotoneMinimalPerfectHashFunction.html │ │ ├── MWHCFunction.Builder.html │ │ ├── MWHCFunction.html │ │ ├── MinimalPerfectHashFunction.Builder.html │ │ ├── MinimalPerfectHashFunction.html │ │ ├── PaCoTrieDistributor.html │ │ ├── PaCoTrieDistributorMonotoneMinimalPerfectHashFunction.html │ │ ├── TwoStepsGOV3Function.Builder.html │ │ ├── TwoStepsGOV3Function.html │ │ ├── TwoStepsLcpMonotoneMinimalPerfectHashFunction.Builder.html │ │ ├── TwoStepsLcpMonotoneMinimalPerfectHashFunction.html │ │ ├── TwoStepsMWHCFunction.Builder.html │ │ ├── TwoStepsMWHCFunction.html │ │ ├── VLLcpMonotoneMinimalPerfectHashFunction.html │ │ ├── VLPaCoTrieDistributor.html │ │ ├── VLPaCoTrieDistributorMonotoneMinimalPerfectHashFunction.html │ │ ├── ZFastTrieDistributor.html │ │ ├── ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.Builder.html │ │ ├── ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.html │ │ ├── codec │ │ │ ├── Codec.Binary.Coder.Decoder.html │ │ │ ├── Codec.Binary.Coder.html │ │ │ ├── Codec.Binary.html │ │ │ ├── Codec.Coder.html │ │ │ ├── Codec.Decoder.html │ │ │ ├── Codec.Gamma.Coder.Decoder.html │ │ │ ├── Codec.Gamma.Coder.html │ │ │ ├── Codec.Gamma.html │ │ │ ├── Codec.Huffman.Coder.Decoder.html │ │ │ ├── Codec.Huffman.Coder.html │ │ │ ├── Codec.Huffman.html │ │ │ ├── Codec.Unary.Coder.Decoder.html │ │ │ ├── Codec.Unary.Coder.html │ │ │ ├── Codec.Unary.html │ │ │ ├── Codec.ZeroCodec.Coder.Decoder.html │ │ │ ├── Codec.ZeroCodec.Coder.html │ │ │ ├── Codec.ZeroCodec.html │ │ │ ├── Codec.html │ │ │ ├── package-summary.html │ │ │ └── package-tree.html │ │ ├── package-summary.html │ │ ├── package-tree.html │ │ └── solve │ │ │ ├── Linear3SystemSolver.html │ │ │ ├── Linear4SystemSolver.html │ │ │ ├── Modulo2System.Modulo2Equation.html │ │ │ ├── Modulo2System.html │ │ │ ├── Modulo3System.Modulo3Equation.html │ │ │ ├── Modulo3System.html │ │ │ ├── Orient3Hypergraph.html │ │ │ ├── package-summary.html │ │ │ └── package-tree.html │ │ └── util │ │ ├── EliasFanoIndexedMonotoneLongBigList.EliasFanoIndexedMonotoneLongBigListIterator.html │ │ ├── EliasFanoIndexedMonotoneLongBigList.html │ │ ├── EliasFanoLongBigList.html │ │ ├── EliasFanoMonotoneBigLongBigList.EliasFanoMonotoneLongBigListIterator.html │ │ ├── EliasFanoMonotoneBigLongBigList.html │ │ ├── EliasFanoMonotoneLongBigList.EliasFanoMonotoneLongBigListIterator.html │ │ ├── EliasFanoMonotoneLongBigList.html │ │ ├── EliasFanoMonotoneLongBigList16.html │ │ ├── EliasFanoPrefixSumLongBigList.EliasFanoPrefixSumLongBigListIterator.html │ │ ├── EliasFanoPrefixSumLongBigList.html │ │ ├── MappedEliasFanoMonotoneLongBigList.MappedEliasFanoMonotoneLongBigListIterator.html │ │ ├── MappedEliasFanoMonotoneLongBigList.html │ │ ├── SignedFunctionStringMap.html │ │ ├── TwoSizesLongBigList.html │ │ ├── ZFastTrie.ExitData.html │ │ ├── ZFastTrie.Handle2NodeMap.html │ │ ├── ZFastTrie.InternalNode.html │ │ ├── ZFastTrie.Leaf.html │ │ ├── ZFastTrie.Node.html │ │ ├── ZFastTrie.ParexData.html │ │ ├── ZFastTrie.html │ │ ├── package-summary.html │ │ └── package-tree.html ├── legal │ ├── COPYRIGHT │ ├── LICENSE │ ├── jquery.md │ └── jqueryUI.md ├── link.svg ├── member-search-index.js ├── module-search-index.js ├── overview-summary.html ├── overview-tree.html ├── package-search-index.js ├── resources │ ├── glass.png │ └── x.png ├── script-dir │ ├── jquery-3.6.1.min.js │ ├── jquery-ui.min.css │ └── jquery-ui.min.js ├── script.js ├── search-page.js ├── search.html ├── search.js ├── serialized-form.html ├── stylesheet.css ├── tag-search-index.js └── type-search-index.js ├── genz.sh ├── ivy.xml ├── makefile ├── pom-model.xml ├── setcp.sh ├── slow └── it │ └── unimi │ └── dsi │ └── sux4j │ ├── bits │ ├── Rank9SelectSlowTest.java │ └── SimpleSelectSlowTest.java │ ├── mph │ ├── GOV3FunctionSlowTest.java │ ├── GOV4FunctionSlowTest.java │ ├── GOVMinimalPerfectHashFunctionSlowTest.java │ ├── HollowTrieDistributorMonotoneMinimalPerfectHashFunctionSlowTest.java │ ├── HollowTrieMonotoneMinimalPerfectHashFunctionSlowTest.java │ ├── LargeLongCollection.java │ ├── LcpMonotoneMinimalPerfectHashFunctionSlowTest.java │ ├── MinimalPerfectHashFunctionSlowTest.java │ ├── PaCoTrieDistributorMonotoneMinimalPerfectHashFunctionSlowTest.java │ ├── TwoStepsLcpMonotoneMinimalPerfectHashFunctionSlowTest.java │ ├── VLLcpMonotoneMinimalPerfectHashFunctionSlowTest.java │ ├── VLPaCoTrieDistributorMonotoneMinimalPerfectHashFunctionSlowTest.java │ └── ZFastTrieDistributorMonotoneMinimalPerfectHashFunctionSlowTest.java │ └── util │ ├── EliasFanoMonotoneBigLongBigListSlowTest.java │ └── EliasFanoMonotoneLongBigListSlowTest.java ├── src ├── it │ └── unimi │ │ └── dsi │ │ └── sux4j │ │ ├── bits │ │ ├── AbstractRank.java │ │ ├── BalancedParentheses.java │ │ ├── HintedBsearchSelect.java │ │ ├── JacobsonBalancedParentheses.java │ │ ├── Rank.java │ │ ├── Rank11.java │ │ ├── Rank12.java │ │ ├── Rank16.java │ │ ├── Rank9.java │ │ ├── RankSelect.java │ │ ├── Select.java │ │ ├── Select9.java │ │ ├── SelectZero.java │ │ ├── SimpleBigSelect.java │ │ ├── SimpleBigSelectZero.java │ │ ├── SimpleSelect.java │ │ ├── SimpleSelectZero.java │ │ ├── SparseRank.java │ │ ├── SparseSelect.java │ │ └── package-info.java │ │ ├── io │ │ ├── BucketedHashStore.java │ │ ├── ChunkedHashStore.java │ │ ├── FileLinesBigList.java │ │ ├── FileLinesList.java │ │ └── package-info.java │ │ ├── mph │ │ ├── AbstractHashFunction.java │ │ ├── CHDMinimalPerfectHashFunction.java │ │ ├── GOV3Function.java │ │ ├── GOV4Function.java │ │ ├── GOVMinimalPerfectHashFunction.java │ │ ├── GV3CompressedFunction.java │ │ ├── GV4CompressedFunction.java │ │ ├── Hashes.java │ │ ├── HollowTrieDistributor.java │ │ ├── HollowTrieDistributorMonotoneMinimalPerfectHashFunction.java │ │ ├── HollowTrieMonotoneMinimalPerfectHashFunction.java │ │ ├── HypergraphSorter.java │ │ ├── LcpMonotoneMinimalPerfectHashFunction.java │ │ ├── MWHCFunction.java │ │ ├── MinimalPerfectHashFunction.java │ │ ├── PaCoTrieDistributor.java │ │ ├── PaCoTrieDistributorMonotoneMinimalPerfectHashFunction.java │ │ ├── TwoStepsGOV3Function.java │ │ ├── TwoStepsLcpMonotoneMinimalPerfectHashFunction.java │ │ ├── TwoStepsMWHCFunction.java │ │ ├── VLLcpMonotoneMinimalPerfectHashFunction.java │ │ ├── VLPaCoTrieDistributor.java │ │ ├── VLPaCoTrieDistributorMonotoneMinimalPerfectHashFunction.java │ │ ├── ZFastTrieDistributor.java │ │ ├── ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.java │ │ ├── codec │ │ │ └── Codec.java │ │ ├── package-info.java │ │ └── solve │ │ │ ├── Linear3SystemSolver.java │ │ │ ├── Linear4SystemSolver.java │ │ │ ├── Modulo2SparseSystem.java │ │ │ ├── Modulo2System.java │ │ │ ├── Modulo3System.java │ │ │ ├── Orient3Hypergraph.java │ │ │ └── package-info.java │ │ ├── scratch │ │ ├── EliasFanoMonotoneLongBigListTables.java │ │ ├── MergedBitVectorIterator.java │ │ ├── NumberToBitVector.java │ │ ├── Rank11Original.java │ │ └── Rank9GogPetri.java │ │ ├── test │ │ ├── ByteArrayFunctionSpeedTest.java │ │ ├── EliasFanoIndexedMonotoneLongBigListSpeedTest.java │ │ ├── EliasFanoLongBigListSpeedTest.java │ │ ├── EliasFanoMonotoneBigLongBigListSpeedTest.java │ │ ├── EliasFanoMonotoneLongBigListSpeedTest.java │ │ ├── FunctionSpeedTest.java │ │ ├── GOVMinimalPerfectHashFunction128.java │ │ ├── GenerateGeometricValues.java │ │ ├── GeneratePowerLawValues.java │ │ ├── GenerateRandom32BitStrings.java │ │ ├── GenerateRandom64BitIntegers.java │ │ ├── GenerateRandom64BitStrings.java │ │ ├── GenerateRandomStrings.java │ │ ├── GenerateUniformValues.java │ │ ├── HollowTrieSpeedTest.java │ │ ├── ListSpeedTest.java │ │ ├── LongFunctionSpeedTest.java │ │ ├── RankSelectSpeedTest.java │ │ ├── RankSpeedTest.java │ │ ├── SelectSpeedTest.java │ │ ├── SuccinctTreeDecoder.java │ │ ├── TwoSizesLongBigListSpeedTest.java │ │ ├── ValueStats.java │ │ └── ZFastTrieSpeedTest.java │ │ └── util │ │ ├── EliasFanoIndexedMonotoneLongBigList.java │ │ ├── EliasFanoLongBigList.java │ │ ├── EliasFanoMonotoneBigLongBigList.java │ │ ├── EliasFanoMonotoneLongBigList.java │ │ ├── EliasFanoMonotoneLongBigList16.java │ │ ├── EliasFanoPrefixSumLongBigList.java │ │ ├── MappedEliasFanoMonotoneLongBigList.java │ │ ├── SignedFunctionStringMap.java │ │ ├── TwoSizesLongBigList.java │ │ ├── ZFastTrie.java │ │ └── package-info.java └── overview.html ├── sux4j.bnd └── test └── it └── unimi └── dsi └── sux4j ├── bits ├── BalancedParenthesesTestCase.java ├── JacobsonBalancedParenthesesTest.java ├── Rank11Test.java ├── Rank12Test.java ├── Rank16Test.java ├── Rank9BinaryTest.java ├── Rank9SelectTest.java ├── RankSelectTestCase.java ├── SimpleBigSelectTest.java ├── SimpleSelectTest.java ├── SimpleSelectZeroTest.java ├── SparseRankTest.java ├── SparseSelectTest.java ├── SparseTest.java ├── TrivialBalancedParentheses.java └── TrivialBalancedParenthesesTest.java ├── io ├── BucketedHashStoreTest.java └── FileLinesListTest.java ├── mph ├── CHDMinimalPerfectHashFunctionTest.java ├── GOV3FunctionTest.java ├── GOV4FunctionTest.java ├── GOVMinimalPerfectHashFunctionTest.java ├── GV3CompressedFunctionTest.java ├── GV4CompressedFunctionTest.java ├── HashesTest.java ├── HollowTrieDistributorMinimalPerfectMonotoneHashFunctionTest.java ├── HollowTrieMonotoneMinimalPerfectHashFunctionTest.java ├── HypergraphFunctionTest.java ├── HypergraphSolverTest.java ├── LcpMonotoneMinimalPerfectHashFunctionTest.java ├── MWHCFunctionTest.java ├── MinimalPerfectHashFunctionTest.java ├── PaCoTrieDistributorMonotoneMinimalPerfectHashFunctionTest.java ├── TwoStepsGOV3FunctionTest.java ├── TwoStepsLcpMonotoneMinimalPerfectHashFunctionTest.java ├── VLLcpMonotoneMinimalPerfectHashFunctionTest.java ├── VLPaCoTrieDistributorMonotoneMinimalPerfectHashFunctionTest.java ├── ZFastTrieDistributorMonotoneMinimalPerfectHashFunctionTest.java ├── codec │ └── CodecTest.java └── solve │ ├── Modulo2SparseSystemTest.java │ ├── Modulo2SystemTest.java │ └── Modulo3SystemTest.java ├── scratch ├── EliasFanoMonotoneLongBigListTablesTest.java ├── Rank11OriginalTest.java └── Rank9GogPetriTest.java └── util ├── EliasFanoIndexedMonotoneLongBigListTest.java ├── EliasFanoLongBigListTest.java ├── EliasFanoMonotoneBigLongBigListTest.java ├── EliasFanoMonotoneLongBigList16Test.java ├── EliasFanoMonotoneLongBigListTest.java ├── EliasFanoPrefixSumLongBigListTest.java ├── MappedEliasFanoMonotoneLongBigListTest.java ├── TwoSizesBigListTest.java └── ZFastTrieTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | *array 2 | *int64_t 3 | *int128_t 4 | *signature 5 | *.dump 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Welcome to the Sux Project and Sux4J! 2 | 3 | ## Introduction 4 | 5 | Sux is an umbrella nickname for the results of my fiddling with the 6 | implementation of basic succinct data structures in 7 | [C++](https://github.com/vigna/sux/), 8 | [Java](https://github.com/vigna/Sux4J/), and 9 | [Rust](https://github.com/vigna/sux-rs/). 10 | 11 | This repository contains the Java code and references to some papers. 12 | Please have a look at the other repositories for the main highlights in 13 | each language. 14 | 15 | This is free software. The Rust and Java code is distributed under either 16 | the [GNU Lesser General Public License 17 | 2.1+](https://www.gnu.org/licenses/old-licenses/lgpl-2.1.html) or the 18 | [Apache Software License 19 | 2.0](https://www.apache.org/licenses/LICENSE-2.0). The C++ code is 20 | distributed under the [GNU General Public License 21 | 3.0+](https://www.gnu.org/licenses/gpl-3.0.html) with a [Runtime Library 22 | Exception](https://www.gnu.org/licenses/gcc-exception-3.1.html) (as the C 23 | standard library). 24 | 25 | ## Building 26 | 27 | You need [Ant](https://ant.apache.org/) and [Ivy](https://ant.apache.org/ivy/). 28 | Then, run `ant ivy-setupjars jar`. 29 | 30 | ## Papers 31 | 32 | * A [paper](http://vigna.di.unimi.it/papers.php#VigBIRSQ) on the broadword 33 | techniques used in the rank/select code, and in particular about the 34 | broadword implementation of select queries implemented in Fast.select(). 36 | 37 | * A [paper](http://vigna.di.unimi.it/papers.php#BBPMMPH) on the theory of 38 | monotone minimal perfect hashing. 39 | 40 | * An [experimental paper](http://vigna.di.unimi.it/papers.php#BBPTPMMPH2) 41 | on monotone minimal perfect hashing. 42 | 43 | * A [paper](http://vigna.di.unimi.it/papers.php#GOVFSCF) on the current 44 | implementation of static and minimal perfect hash functions. 45 | 46 | * A [paper](http://vigna.di.unimi.it/papers.php#GeVECSF) on the current 47 | implementation of compressed static functions. 48 | 49 | * A [paper](http://vigna.di.unimi.it/papers.php#MaVCFTDRS) on the C++ 50 | implementation dynamic ranking and selection using compact Fenwick trees. 51 | 52 | * A [paper](http://vigna.di.unimi.it/papers.php#EGVRS) on the C++ 53 | implementation of RecSplit. 54 | 55 | * A [paper](http://vigna.di.unimi.it/papers.php#VigECS) on the Rust 56 | implementation of functions and filters based on ε-cost sharding. 57 | -------------------------------------------------------------------------------- /bash/testmain.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # Tests main methods 4 | 5 | KEYS=$(mktemp) 6 | FUNCTION=$(mktemp) 7 | VALUES=$(mktemp) 8 | 9 | LANG="en_US.UTF-8" cat >$KEYS <$KEYS.gz 18 | zstd $KEYS 19 | 20 | java bsh.Interpreter <. 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include "csf.h" 27 | 28 | csf *load_csf(int h) { 29 | csf *csf = calloc(1, sizeof *csf); 30 | read(h, &csf->size, sizeof csf->size); 31 | uint64_t t; 32 | 33 | read(h, &t, sizeof t); 34 | csf->multiplier = t; 35 | 36 | read(h, &t, sizeof t); 37 | csf->global_max_codeword_length = t; 38 | 39 | read(h, &csf->global_seed, sizeof csf->global_seed); 40 | read(h, &csf->offset_and_seed_length, sizeof csf->offset_and_seed_length); 41 | csf->offset_and_seed = malloc(csf->offset_and_seed_length * sizeof *csf->offset_and_seed); 42 | read(h, csf->offset_and_seed, csf->offset_and_seed_length * sizeof *csf->offset_and_seed); 43 | 44 | read(h, &csf->array_length, sizeof csf->array_length); 45 | 46 | csf->array = malloc(csf->array_length * sizeof *csf->array); 47 | read(h, csf->array, csf->array_length * sizeof *csf->array); 48 | 49 | // Decoder 50 | read(h, &csf->escaped_symbol_length, sizeof csf->escaped_symbol_length); 51 | read(h, &csf->escape_length, sizeof csf->escape_length); 52 | 53 | uint64_t decoding_table_length; 54 | read(h, &decoding_table_length, sizeof decoding_table_length); 55 | 56 | uint64_t num_symbols; 57 | read(h, &num_symbols, sizeof num_symbols); 58 | 59 | // Compact 60 | char *p = malloc(sizeof *csf + decoding_table_length * sizeof *csf->last_codeword_plus_one + decoding_table_length * sizeof *csf->how_many_up_to_block + (decoding_table_length + 7 & ~7ULL) * sizeof *csf->shift + num_symbols * sizeof *csf->symbol); 61 | csf = memcpy(p, csf, sizeof *csf); 62 | p += sizeof *csf; 63 | 64 | csf->last_codeword_plus_one = (uint64_t *)p; 65 | p += read(h, csf->last_codeword_plus_one, decoding_table_length * sizeof *csf->last_codeword_plus_one); 66 | 67 | csf->how_many_up_to_block = (uint32_t *)p; 68 | p += read(h, csf->how_many_up_to_block, decoding_table_length * sizeof *csf->how_many_up_to_block); 69 | 70 | csf->shift = (uint8_t *)p; 71 | read(h, csf->shift, decoding_table_length * sizeof *csf->shift); 72 | p += decoding_table_length + 7 & ~7ULL; // Realign 73 | 74 | csf->symbol = (uint64_t *)p; 75 | read(h, csf->symbol, num_symbols * sizeof *csf->symbol); 76 | 77 | return csf; 78 | } 79 | -------------------------------------------------------------------------------- /c/csf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | 23 | #ifdef USE_MMAP 24 | #include 25 | #include 26 | #define calloc(n, size) mmap((void *)(0x0UL), (n) * (size), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | (30 << MAP_HUGE_SHIFT), 0, 0) 27 | #endif 28 | 29 | typedef struct { 30 | uint64_t size; 31 | uint64_t multiplier; 32 | uint64_t global_max_codeword_length; 33 | uint64_t escaped_symbol_length; 34 | uint64_t escape_length; 35 | uint64_t global_seed; 36 | uint64_t offset_and_seed_length; 37 | uint64_t *offset_and_seed; 38 | uint64_t array_length; 39 | uint64_t *array; 40 | uint64_t *symbol; 41 | uint64_t *last_codeword_plus_one; 42 | uint32_t *how_many_up_to_block; 43 | uint8_t *shift; 44 | } csf; 45 | 46 | csf *load_csf(int h); 47 | -------------------------------------------------------------------------------- /c/csf3.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include "csf.h" 22 | 23 | int64_t csf3_get_byte_array(const csf *csf, char *key, uint64_t len); 24 | int64_t csf3_get_uint64_t(const csf *mph, uint64_t key); 25 | -------------------------------------------------------------------------------- /c/csf4.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include "csf.h" 22 | 23 | int64_t csf4_get_byte_array(const csf *csf, char *key, uint64_t len); 24 | int64_t csf4_get_uint64_t(const csf *mph, uint64_t key); 25 | -------------------------------------------------------------------------------- /c/mph.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | 23 | #ifdef USE_MMAP 24 | #include 25 | #include 26 | #define calloc(n, size) mmap((void *)(0x0UL), (n) * (size), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | (30 << MAP_HUGE_SHIFT), 0, 0) 27 | #endif 28 | 29 | typedef struct { 30 | uint64_t size; 31 | uint64_t multiplier; 32 | uint64_t global_seed; 33 | uint64_t edge_offset_and_seed_length; 34 | uint64_t *edge_offset_and_seed; 35 | uint64_t array_length; 36 | uint64_t *array; 37 | } mph; 38 | 39 | mph *load_mph(int h); 40 | int64_t mph_get_byte_array(const mph *mph, char *key, uint64_t len); 41 | int64_t mph_get_uint64_t(const mph *mph, uint64_t key); 42 | int64_t mph_get_uint128_t(const mph *mph, __uint128_t key); 43 | -------------------------------------------------------------------------------- /c/sf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include "sf.h" 26 | 27 | sf *load_sf(int h) { 28 | sf *sf = calloc(1, sizeof *sf); 29 | read(h, &sf->size, sizeof sf->size); 30 | uint64_t t; 31 | read(h, &t, sizeof t); 32 | sf->width = t; 33 | read(h, &t, sizeof t); 34 | sf->multiplier = t; 35 | read(h, &sf->global_seed, sizeof sf->global_seed); 36 | read(h, &sf->offset_and_seed_length, sizeof sf->offset_and_seed_length); 37 | sf->offset_and_seed = calloc(sf->offset_and_seed_length, sizeof *sf->offset_and_seed); 38 | read(h, sf->offset_and_seed, sf->offset_and_seed_length * sizeof *sf->offset_and_seed); 39 | 40 | read(h, &sf->array_length, sizeof sf->array_length); 41 | sf->array = calloc(sf->array_length, sizeof *sf->array); 42 | read(h, sf->array, sf->array_length * sizeof *sf->array); 43 | return sf; 44 | } 45 | -------------------------------------------------------------------------------- /c/sf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | 23 | #ifdef USE_MMAP 24 | #include 25 | #include 26 | #define calloc(n, size) mmap((void *)(0x0UL), (n) * (size), PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | (30 << MAP_HUGE_SHIFT), 0, 0) 27 | #endif 28 | 29 | typedef struct { 30 | uint64_t size; 31 | uint64_t width; 32 | uint64_t multiplier; 33 | uint64_t global_seed; 34 | uint64_t offset_and_seed_length; 35 | uint64_t *offset_and_seed; 36 | uint64_t array_length; 37 | uint64_t *array; 38 | } sf; 39 | 40 | sf *load_sf(int h); 41 | 42 | -------------------------------------------------------------------------------- /c/sf3.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include "sf.h" 22 | 23 | int64_t sf3_get_byte_array(const sf *sf, char *key, uint64_t len); 24 | int64_t sf3_get_uint64_t(const sf *sf, uint64_t key); 25 | int64_t sf3_get_signature(const sf *sf, const uint64_t signature[4]); 26 | -------------------------------------------------------------------------------- /c/sf4.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include "sf.h" 22 | 23 | int64_t sf4_get_byte_array(const sf *sf, char *key, uint64_t len); 24 | int64_t sf4_get_uint64_t(const sf *sf, uint64_t key); 25 | int64_t sf4_get_signature(const sf *sf, const uint64_t signature[4]); 26 | -------------------------------------------------------------------------------- /c/spooky.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SpookyHash - 128-bit noncryptographic hash function 3 | * 4 | * Written in 2012 by Bob Jenkins 5 | * 6 | * Converted to C in 2015 by Joergen Ibsen 7 | * 8 | * To the extent possible under law, the author(s) have dedicated all 9 | * copyright and related and neighboring rights to this software to the 10 | * public domain worldwide. This software is distributed without any 11 | * warranty. 12 | * 13 | * Original comment from SpookyV2.h by Bob Jenkins: 14 | * 15 | * SpookyHash: a 128-bit noncryptographic hash function 16 | * By Bob Jenkins, public domain 17 | * Oct 31 2010: alpha, framework + SpookyHash::Mix appears right 18 | * Oct 31 2011: alpha again, Mix only good to 2^^69 but rest appears right 19 | * Dec 31 2011: beta, improved Mix, tested it for 2-bit deltas 20 | * Feb 2 2012: production, same bits as beta 21 | * Feb 5 2012: adjusted definitions of uint* to be more portable 22 | * Mar 30 2012: 3 bytes/cycle, not 4. Alpha was 4 but wasn't thorough enough. 23 | * August 5 2012: SpookyV2 (different results) 24 | * 25 | * Up to 3 bytes/cycle for long messages. Reasonably fast for short messages. 26 | * All 1 or 2 bit deltas achieve avalanche within 1% bias per output bit. 27 | * 28 | * This was developed for and tested on 64-bit x86-compatible processors. 29 | * It assumes the processor is little-endian. There is a macro 30 | * controlling whether unaligned reads are allowed (by default they are). 31 | * This should be an equally good hash on big-endian machines, but it will 32 | * compute different results on them than on little-endian machines. 33 | * 34 | * Google's CityHash has similar specs to SpookyHash, and CityHash is faster 35 | * on new Intel boxes. MD4 and MD5 also have similar specs, but they are orders 36 | * of magnitude slower. CRCs are two or more times slower, but unlike 37 | * SpookyHash, they have nice math for combining the CRCs of pieces to form 38 | * the CRCs of wholes. There are also cryptographic hashes, but those are even 39 | * slower than MD5. 40 | */ 41 | 42 | #ifndef SPOOKY_H_INCLUDED 43 | #define SPOOKY_H_INCLUDED 44 | 45 | #include 46 | #include 47 | 48 | // size of the internal state 49 | #define SC_BLOCKSIZE (SC_NUMVARS * 8U) 50 | 51 | // size of buffer of unhashed data, in bytes 52 | #define SC_BUFSIZE (2U * SC_BLOCKSIZE) 53 | 54 | void spooky_short(const void *restrict message, size_t length, uint64_t seed, uint64_t *tuple); 55 | void spooky_short_rehash(const uint64_t *signature, const uint64_t seed, uint64_t * const tuple); 56 | 57 | #endif /* SPOOKY_H_INCLUDED */ 58 | -------------------------------------------------------------------------------- /c/test_byte_array.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #define SAMPLES 11 31 | 32 | static uint64_t get_system_time(void) { 33 | struct timeval tv; 34 | gettimeofday(&tv, NULL); 35 | return tv.tv_sec * 1000000 + tv.tv_usec; 36 | } 37 | 38 | static int cmp_uint64_t(const void *a, const void *b) { 39 | return *(uint64_t *)a < *(uint64_t *)b ? -1 : *(uint64_t *)a > *(uint64_t *)b ? 1 : 0; 40 | } 41 | 42 | int main(int argc, char* argv[]) { 43 | int h = open(argv[1], O_RDONLY); 44 | assert(h >= 0); 45 | SUX4J_MAP *SUX4J_MAP = SUX4J_LOAD_MAP(h); 46 | close(h); 47 | 48 | #define NKEYS 10000000 49 | h = open(argv[2], O_RDONLY); 50 | off_t len = lseek(h, 0, SEEK_END); 51 | lseek(h, 0, SEEK_SET); 52 | char *data = malloc(len); 53 | read(h, data, len); 54 | close(h); 55 | 56 | static char *test_buf[NKEYS]; 57 | static int test_len[NKEYS]; 58 | 59 | char *p = data; 60 | for(int i = 0; i < NKEYS; i++) { 61 | while(*p == 0xA || *p == 0xD) p++; 62 | test_buf[i] = p; 63 | while(*p != 0xA && *p != 0xD) p++; 64 | test_len[i] = p - test_buf[i]; 65 | } 66 | 67 | uint64_t u = 0; 68 | 69 | uint64_t sample[SAMPLES]; 70 | 71 | for(int k = SAMPLES; k-- != 0; ) { 72 | int64_t elapsed = - get_system_time(); 73 | for (int i = 0; i < NKEYS; ++i) u += SUX4J_GET_BYTE_ARRAY(SUX4J_MAP, test_buf[i], test_len[i]); 74 | 75 | elapsed += get_system_time(); 76 | sample[k] = elapsed; 77 | printf("Elapsed: %.3fs; %.3f ns/key\n", elapsed * 1E-6, elapsed * 1000. / NKEYS); 78 | } 79 | const volatile int unused = u; 80 | 81 | qsort(sample, SAMPLES, sizeof *sample, cmp_uint64_t); 82 | printf("\nMedian: %.3fs; %.3f ns/key\n", sample[SAMPLES / 2] * 1E-6, sample[SAMPLES / 2] * 1000. / NKEYS); 83 | } 84 | -------------------------------------------------------------------------------- /c/test_csf3_byte_array.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "csf3.h" 31 | 32 | #define SUX4J_MAP csf 33 | #define SUX4J_LOAD_MAP load_csf 34 | #define SUX4J_GET_BYTE_ARRAY csf3_get_byte_array 35 | 36 | #include "test_byte_array.c" 37 | -------------------------------------------------------------------------------- /c/test_csf4_byte_array.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "csf4.h" 31 | 32 | #define SUX4J_MAP csf 33 | #define SUX4J_LOAD_MAP load_csf 34 | #define SUX4J_GET_BYTE_ARRAY csf4_get_byte_array 35 | 36 | #include "test_byte_array.c" 37 | -------------------------------------------------------------------------------- /c/test_mph_byte_array.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "mph.h" 31 | 32 | #define SUX4J_MAP mph 33 | #define SUX4J_LOAD_MAP load_mph 34 | #define SUX4J_GET_BYTE_ARRAY mph_get_byte_array 35 | 36 | #include "test_byte_array.c" 37 | -------------------------------------------------------------------------------- /c/test_mph_uint128_t.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "mph.h" 31 | #define SAMPLES 11 32 | 33 | static uint64_t get_system_time(void) { 34 | struct timeval tv; 35 | gettimeofday(&tv, NULL); 36 | return tv.tv_sec * 1000000 + tv.tv_usec; 37 | } 38 | 39 | static inline uint64_t rotl(const uint64_t x, int k) { 40 | return (x << k) | (x >> (64 - k)); 41 | } 42 | 43 | static uint64_t s[2]; 44 | 45 | uint64_t next(void) { 46 | const uint64_t s0 = s[0]; 47 | uint64_t s1 = s[1]; 48 | const uint64_t result = s0 + s1; 49 | 50 | s1 ^= s0; 51 | s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b 52 | s[1] = rotl(s1, 37); // c 53 | 54 | return result; 55 | } 56 | 57 | static int cmp_uint64_t(const void *a, const void *b) { 58 | return *(uint64_t *)a < *(uint64_t *)b ? -1 : *(uint64_t *)a > *(uint64_t *)b ? 1 : 0; 59 | } 60 | 61 | int main(int argc, char* argv[]) { 62 | int h = open(argv[1], O_RDONLY); 63 | assert(h >= 0); 64 | mph *mph = load_mph(h); 65 | close(h); 66 | 67 | #define NKEYS 10000000 68 | 69 | uint64_t total = 0; 70 | uint64_t u = 0; 71 | 72 | uint64_t sample[SAMPLES]; 73 | 74 | for(int k = SAMPLES; k-- != 0; ) { 75 | s[0] = 0x5603141978c51071; 76 | s[1] = 0x3bbddc01ebdf4b72; 77 | 78 | int64_t elapsed = - get_system_time(); 79 | for (int i = 0; i < NKEYS; ++i) u ^= mph_get_uint128_t(mph, (__uint128_t)next() << 64 | (next() ^ u)); 80 | 81 | elapsed += get_system_time(); 82 | total += elapsed; 83 | sample[k] = elapsed; 84 | printf("Elapsed: %.3fs; %.3f ns/key\n", elapsed * 1E-6, elapsed * 1000. / NKEYS); 85 | } 86 | 87 | const volatile int unused = u; 88 | 89 | qsort(sample, SAMPLES, sizeof *sample, cmp_uint64_t); 90 | printf("\nMedian: %.3fs; %.3f ns/key\n", sample[SAMPLES / 2] * 1E-6, sample[SAMPLES / 2] * 1000. / NKEYS); 91 | } 92 | -------------------------------------------------------------------------------- /c/test_mph_uint64_t.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "mph.h" 31 | 32 | static uint64_t get_system_time(void) { 33 | struct timeval tv; 34 | gettimeofday(&tv, NULL); 35 | return tv.tv_sec * 1000000 + tv.tv_usec; 36 | } 37 | 38 | int main(int argc, char* argv[]) { 39 | int h = open(argv[1], O_RDONLY); 40 | assert(h >= 0); 41 | mph *mph = load_mph(h); 42 | close(h); 43 | 44 | #define NKEYS 10000000 45 | h = open(argv[2], O_RDONLY); 46 | uint64_t *data = calloc(NKEYS, sizeof *data); 47 | read(h, data, NKEYS * sizeof *data); 48 | close(h); 49 | 50 | uint64_t total = 0; 51 | uint64_t u = 0; 52 | 53 | for(int k = 10; k-- != 0; ) { 54 | int64_t elapsed = - get_system_time(); 55 | for (int i = 0; i < NKEYS; ++i) u ^= mph_get_uint64_t(mph, data[i]); 56 | 57 | elapsed += get_system_time(); 58 | total += elapsed; 59 | printf("Elapsed: %.3fs; %.3f ns/key\n", elapsed * 1E-6, elapsed * 1000. / NKEYS); 60 | } 61 | const volatile int unused = u; 62 | printf("\nAverage: %.3fs; %.3f ns/key\n", (total * .1) * 1E-6, (total * .1) * 1000. / NKEYS); 63 | } 64 | -------------------------------------------------------------------------------- /c/test_sf3_byte_array.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "sf3.h" 31 | 32 | #define SUX4J_MAP sf 33 | #define SUX4J_LOAD_MAP load_sf 34 | #define SUX4J_GET_BYTE_ARRAY sf3_get_byte_array 35 | 36 | #include "test_byte_array.c" 37 | -------------------------------------------------------------------------------- /c/test_sf3_signature.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2021 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "sf3.h" 31 | 32 | #define SUX4J_MAP sf 33 | #define SUX4J_LOAD_MAP load_sf 34 | #define SUX4J_GET_SIGNATURE sf3_get_signature 35 | 36 | #include "test_signature.c" 37 | -------------------------------------------------------------------------------- /c/test_sf4_byte_array.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2020 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "sf4.h" 31 | 32 | #define SUX4J_MAP sf 33 | #define SUX4J_LOAD_MAP load_sf 34 | #define SUX4J_GET_BYTE_ARRAY sf4_get_byte_array 35 | 36 | #include "test_byte_array.c" 37 | -------------------------------------------------------------------------------- /c/test_sf4_signature.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2021 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include "sf4.h" 31 | 32 | #define SUX4J_MAP sf 33 | #define SUX4J_LOAD_MAP load_sf 34 | #define SUX4J_GET_SIGNATURE sf4_get_signature 35 | 36 | #include "test_signature.c" 37 | -------------------------------------------------------------------------------- /c/test_signature.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux: Succinct data structures 3 | * 4 | * Copyright (C) 2018-2021 Sebastiano Vigna 5 | * 6 | * This library is free software; you can redistribute it and/or modify it 7 | * under the terms of the GNU Lesser General Public License as published by the Free 8 | * Software Foundation; either version 3 of the License, or (at your option) 9 | * any later version. 10 | * 11 | * This library is distributed in the hope that it will be useful, but 12 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 13 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License 14 | * for more details. 15 | * 16 | * You should have received a copy of the GNU Lesser General Public License 17 | * along with this program; if not, see . 18 | * 19 | */ 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #define SAMPLES 11 31 | 32 | static uint64_t get_system_time(void) { 33 | struct timeval tv; 34 | gettimeofday(&tv, NULL); 35 | return tv.tv_sec * 1000000 + tv.tv_usec; 36 | } 37 | 38 | static int cmp_uint64_t(const void *a, const void *b) { 39 | return *(uint64_t *)a < *(uint64_t *)b ? -1 : *(uint64_t *)a > *(uint64_t *)b ? 1 : 0; 40 | } 41 | 42 | static inline uint64_t rotl(const uint64_t x, int k) { 43 | return (x << k) | (x >> (64 - k)); 44 | } 45 | 46 | static uint64_t s[2]; 47 | 48 | uint64_t next(void) { 49 | const uint64_t s0 = s[0]; 50 | uint64_t s1 = s[1]; 51 | const uint64_t result = s0 + s1; 52 | 53 | s1 ^= s0; 54 | s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b 55 | s[1] = rotl(s1, 37); // c 56 | 57 | return result; 58 | } 59 | 60 | int main(int argc, char* argv[]) { 61 | int h = open(argv[1], O_RDONLY); 62 | assert(h >= 0); 63 | SUX4J_MAP *SUX4J_MAP = SUX4J_LOAD_MAP(h); 64 | close(h); 65 | 66 | #define NKEYS 10000000 67 | uint64_t u = 0; 68 | 69 | uint64_t sample[SAMPLES], signature[4] = { next(), next(), next(), next() }; 70 | 71 | for(int k = SAMPLES; k-- != 0; ) { 72 | s[0] = 0x5603141978c51071; 73 | s[1] = 0x3bbddc01ebdf4b72; 74 | 75 | int64_t elapsed = - get_system_time(); 76 | for (int i = 0; i < NKEYS; ++i) { 77 | const uint64_t t = next(); 78 | signature[0] ^= t; 79 | signature[1] ^= t; 80 | signature[2] ^= t; 81 | signature[3] ^= t; 82 | u += SUX4J_GET_SIGNATURE(SUX4J_MAP, signature); 83 | } 84 | 85 | elapsed += get_system_time(); 86 | sample[k] = elapsed; 87 | printf("Elapsed: %.3fs; %.3f ns/key\n", elapsed * 1E-6, elapsed * 1000. / NKEYS); 88 | } 89 | const volatile int unused = u; 90 | 91 | qsort(sample, SAMPLES, sizeof *sample, cmp_uint64_t); 92 | printf("\nMedian: %.3fs; %.3f ns/key\n", sample[SAMPLES / 2] * 1E-6, sample[SAMPLES / 2] * 1000. / NKEYS); 93 | } 94 | -------------------------------------------------------------------------------- /docs/copy.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 27 | 28 | 29 | 31 | 33 | 34 | -------------------------------------------------------------------------------- /docs/element-list: -------------------------------------------------------------------------------- 1 | it.unimi.dsi.sux4j.bits 2 | it.unimi.dsi.sux4j.io 3 | it.unimi.dsi.sux4j.mph 4 | it.unimi.dsi.sux4j.mph.codec 5 | it.unimi.dsi.sux4j.mph.solve 6 | it.unimi.dsi.sux4j.util 7 | -------------------------------------------------------------------------------- /docs/legal/COPYRIGHT: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vigna/Sux4J/273253bc589829f049c725e08452e8a44aebf22d/docs/legal/COPYRIGHT -------------------------------------------------------------------------------- /docs/legal/jquery.md: -------------------------------------------------------------------------------- 1 | ## jQuery v3.6.1 2 | 3 | ### jQuery License 4 | ``` 5 | jQuery v 3.6.1 6 | Copyright OpenJS Foundation and other contributors, https://openjsf.org/ 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining 9 | a copy of this software and associated documentation files (the 10 | "Software"), to deal in the Software without restriction, including 11 | without limitation the rights to use, copy, modify, merge, publish, 12 | distribute, sublicense, and/or sell copies of the Software, and to 13 | permit persons to whom the Software is furnished to do so, subject to 14 | the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be 17 | included in all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 23 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 | 27 | ****************************************** 28 | 29 | The jQuery JavaScript Library v3.6.1 also includes Sizzle.js 30 | 31 | Sizzle.js includes the following license: 32 | 33 | Copyright JS Foundation and other contributors, https://js.foundation/ 34 | 35 | This software consists of voluntary contributions made by many 36 | individuals. For exact contribution history, see the revision history 37 | available at https://github.com/jquery/sizzle 38 | 39 | The following license applies to all parts of this software except as 40 | documented below: 41 | 42 | ==== 43 | 44 | Permission is hereby granted, free of charge, to any person obtaining 45 | a copy of this software and associated documentation files (the 46 | "Software"), to deal in the Software without restriction, including 47 | without limitation the rights to use, copy, modify, merge, publish, 48 | distribute, sublicense, and/or sell copies of the Software, and to 49 | permit persons to whom the Software is furnished to do so, subject to 50 | the following conditions: 51 | 52 | The above copyright notice and this permission notice shall be 53 | included in all copies or substantial portions of the Software. 54 | 55 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 56 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 57 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 58 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 59 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 60 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 61 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 62 | 63 | ==== 64 | 65 | All files located in the node_modules and external directories are 66 | externally maintained libraries used by this software which have their 67 | own licenses; we recommend you read them, as their terms may differ from 68 | the terms above. 69 | 70 | ********************* 71 | 72 | ``` 73 | -------------------------------------------------------------------------------- /docs/legal/jqueryUI.md: -------------------------------------------------------------------------------- 1 | ## jQuery UI v1.13.2 2 | 3 | ### jQuery UI License 4 | ``` 5 | Copyright jQuery Foundation and other contributors, https://jquery.org/ 6 | 7 | This software consists of voluntary contributions made by many 8 | individuals. For exact contribution history, see the revision history 9 | available at https://github.com/jquery/jquery-ui 10 | 11 | The following license applies to all parts of this software except as 12 | documented below: 13 | 14 | ==== 15 | 16 | Permission is hereby granted, free of charge, to any person obtaining 17 | a copy of this software and associated documentation files (the 18 | "Software"), to deal in the Software without restriction, including 19 | without limitation the rights to use, copy, modify, merge, publish, 20 | distribute, sublicense, and/or sell copies of the Software, and to 21 | permit persons to whom the Software is furnished to do so, subject to 22 | the following conditions: 23 | 24 | The above copyright notice and this permission notice shall be 25 | included in all copies or substantial portions of the Software. 26 | 27 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 28 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 29 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 30 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 31 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 32 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 33 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 34 | 35 | ==== 36 | 37 | Copyright and related rights for sample code are waived via CC0. Sample 38 | code is defined as all source code contained within the demos directory. 39 | 40 | CC0: http://creativecommons.org/publicdomain/zero/1.0/ 41 | 42 | ==== 43 | 44 | All files located in the node_modules and external directories are 45 | externally maintained libraries used by this software which have their 46 | own licenses; we recommend you read them, as their terms may differ from 47 | the terms above. 48 | 49 | ``` 50 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /docs/module-search-index.js: -------------------------------------------------------------------------------- 1 | moduleSearchIndex = [];updateSearchResults(); -------------------------------------------------------------------------------- /docs/overview-summary.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Sux4J 5.4.1 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 17 | 18 | 19 |
20 | 23 |

index.html

24 |
25 | 26 | 27 | -------------------------------------------------------------------------------- /docs/package-search-index.js: -------------------------------------------------------------------------------- 1 | packageSearchIndex = [{"l":"All Packages","u":"allpackages-index.html"},{"l":"it.unimi.dsi.sux4j.bits"},{"l":"it.unimi.dsi.sux4j.io"},{"l":"it.unimi.dsi.sux4j.mph"},{"l":"it.unimi.dsi.sux4j.mph.codec"},{"l":"it.unimi.dsi.sux4j.mph.solve"},{"l":"it.unimi.dsi.sux4j.util"}];updateSearchResults(); -------------------------------------------------------------------------------- /docs/resources/glass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vigna/Sux4J/273253bc589829f049c725e08452e8a44aebf22d/docs/resources/glass.png -------------------------------------------------------------------------------- /docs/resources/x.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vigna/Sux4J/273253bc589829f049c725e08452e8a44aebf22d/docs/resources/x.png -------------------------------------------------------------------------------- /docs/script-dir/jquery-ui.min.css: -------------------------------------------------------------------------------- 1 | /*! jQuery UI - v1.13.2 - 2023-02-27 2 | * http://jqueryui.com 3 | * Includes: core.css, autocomplete.css, menu.css 4 | * Copyright jQuery Foundation and other contributors; Licensed MIT */ 5 | 6 | .ui-helper-hidden{display:none}.ui-helper-hidden-accessible{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.ui-helper-reset{margin:0;padding:0;border:0;outline:0;line-height:1.3;text-decoration:none;font-size:100%;list-style:none}.ui-helper-clearfix:before,.ui-helper-clearfix:after{content:"";display:table;border-collapse:collapse}.ui-helper-clearfix:after{clear:both}.ui-helper-zfix{width:100%;height:100%;top:0;left:0;position:absolute;opacity:0;-ms-filter:"alpha(opacity=0)"}.ui-front{z-index:100}.ui-state-disabled{cursor:default!important;pointer-events:none}.ui-icon{display:inline-block;vertical-align:middle;margin-top:-.25em;position:relative;text-indent:-99999px;overflow:hidden;background-repeat:no-repeat}.ui-widget-icon-block{left:50%;margin-left:-8px;display:block}.ui-widget-overlay{position:fixed;top:0;left:0;width:100%;height:100%}.ui-autocomplete{position:absolute;top:0;left:0;cursor:default}.ui-menu{list-style:none;padding:0;margin:0;display:block;outline:0}.ui-menu .ui-menu{position:absolute}.ui-menu .ui-menu-item{margin:0;cursor:pointer;list-style-image:url("data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")}.ui-menu .ui-menu-item-wrapper{position:relative;padding:3px 1em 3px .4em}.ui-menu .ui-menu-divider{margin:5px 0;height:0;font-size:0;line-height:0;border-width:1px 0 0 0}.ui-menu .ui-state-focus,.ui-menu .ui-state-active{margin:-1px}.ui-menu-icons{position:relative}.ui-menu-icons .ui-menu-item-wrapper{padding-left:2em}.ui-menu .ui-icon{position:absolute;top:0;bottom:0;left:.2em;margin:auto 0}.ui-menu .ui-menu-icon{left:auto;right:0} -------------------------------------------------------------------------------- /docs/search.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Search (Sux4J 5.4.1) 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 20 | 23 |
24 | 45 |
46 |
47 |

Search

48 |
49 | 50 | 51 |
52 | Additional resources 53 |
54 |
55 |
56 |

The help page provides an introduction to the scope and syntax of JavaDoc search.

57 |

You can use the <ctrl> or <cmd> keys in combination with the left and right arrow keys to switch between result tabs in this page.

58 |

The URL template below may be used to configure this page as a search engine in browsers that support this feature. It has been tested to work in Google Chrome and Mozilla Firefox. Note that other browsers may not support this feature or require a different URL format.

59 | link 60 |

61 | 62 |

63 |
64 |

Loading search index...

65 | 69 |
70 |
71 |
72 | 73 | 74 | -------------------------------------------------------------------------------- /genz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cp src/it/unimi/dsi/sux4j/bits/SimpleSelect.java src/it/unimi/dsi/sux4j/bits/SimpleSelectZero.java 4 | 5 | sed -i -e 's/bits\[\([^]]\)/~bits\[\1/g' src/it/unimi/dsi/sux4j/bits/SimpleSelectZero.java 6 | sed -i -e 's/Select/SelectZero/g' src/it/unimi/dsi/sux4j/bits/SimpleSelectZero.java 7 | sed -i -e 's/select(/selectZero(/g' src/it/unimi/dsi/sux4j/bits/SimpleSelectZero.java 8 | sed -i -e 's/Fast\.selectZero(/Fast\.select(/g' src/it/unimi/dsi/sux4j/bits/SimpleSelectZero.java 9 | sed -i -e '/numOnes = c/i\ 10 | if (length % 64 != 0) c -= 64 - length % 64;' src/it/unimi/dsi/sux4j/bits/SimpleSelectZero.java 11 | sed -i -e 's/select implementation/zero-select implementation/g' src/it/unimi/dsi/sux4j/bits/SimpleSelectZero.java 12 | 13 | cp src/it/unimi/dsi/sux4j/bits/SimpleBigSelect.java src/it/unimi/dsi/sux4j/bits/SimpleBigSelectZero.java 14 | 15 | sed -i -e 's/bits\[\([^]]\)/~bits\[\1/g' src/it/unimi/dsi/sux4j/bits/SimpleBigSelectZero.java 16 | sed -i -e 's/Select/SelectZero/g' src/it/unimi/dsi/sux4j/bits/SimpleBigSelectZero.java 17 | sed -i -e 's/select(/selectZero(/g' src/it/unimi/dsi/sux4j/bits/SimpleBigSelectZero.java 18 | sed -i -e 's/Fast\.selectZero(/Fast\.select(/g' src/it/unimi/dsi/sux4j/bits/SimpleBigSelectZero.java 19 | sed -i -e '/numOnes = c/i\ 20 | if (length % 64 != 0) c -= 64 - length % 64;' src/it/unimi/dsi/sux4j/bits/SimpleBigSelectZero.java 21 | sed -i -e 's/select implementation/zero-select implementation/g' src/it/unimi/dsi/sux4j/bits/SimpleBigSelectZero.java 22 | -------------------------------------------------------------------------------- /ivy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | include build.properties 2 | 3 | TAR=tar 4 | 5 | source: 6 | rm -fr sux4j-$(version) 7 | ant clean 8 | ln -s . sux4j-$(version) 9 | ./genz.sh 10 | $(TAR) chvf sux4j-$(version)-src.tar --owner=0 --group=0 \ 11 | sux4j-$(version)/README.md \ 12 | sux4j-$(version)/CHANGES \ 13 | sux4j-$(version)/COPYING.LESSER \ 14 | sux4j-$(version)/LICENSE-2.0.txt \ 15 | sux4j-$(version)/build.xml \ 16 | sux4j-$(version)/ivy.xml \ 17 | sux4j-$(version)/sux4j.bnd \ 18 | sux4j-$(version)/pom-model.xml \ 19 | sux4j-$(version)/build.properties \ 20 | $$(find sux4j-$(version)/src/it/unimi/dsi/sux4j -iname \*.java -or -iname \*.html) \ 21 | $$(find sux4j-$(version)/test/it/unimi/dsi/sux4j -iname \*.java) \ 22 | $$(find sux4j-$(version)/slow/it/unimi/dsi/sux4j -iname \*.java) \ 23 | sux4j-$(version)/src/overview.html 24 | $(TAR) --delete --wildcards -v -f sux4j-$(version)-src.tar \ 25 | sux4j-$(version)/src/it/unimi/dsi/sux4j/mph/solve/Modulo2SparseSystem.java \ 26 | sux4j-$(version)/test/it/unimi/dsi/sux4j/mph/solve/Modulo2SparseSystemTest.java \ 27 | sux4j-$(version)/src/it/unimi/dsi/sux4j/bits/Rank12.java \ 28 | sux4j-$(version)/test/it/unimi/dsi/sux4j/bits/Rank12Test.java \ 29 | sux4j-$(version)/src/it/unimi/dsi/sux4j/scratch/*.java \ 30 | sux4j-$(version)/test/it/unimi/dsi/sux4j/scratch/*.java \ 31 | sux4j-$(version)/src/it/unimi/dsi/sux4j/test/*.java 32 | gzip -f sux4j-$(version)-src.tar 33 | rm sux4j-$(version) 34 | 35 | binary: 36 | rm -fr sux4j-$(version) 37 | $(TAR) zxvf sux4j-$(version)-src.tar.gz 38 | (cd sux4j-$(version) && unset CLASSPATH && unset LOCAL_IVY_SETTINGS && ant ivy-clean ivy-setupjars && ant junit && ant clean && ant jar javadoc) 39 | $(TAR) zcvf sux4j-$(version)-bin.tar.gz --owner=0 --group=0 \ 40 | sux4j-$(version)/README.md \ 41 | sux4j-$(version)/CHANGES \ 42 | sux4j-$(version)/COPYING.LESSER \ 43 | sux4j-$(version)/LICENSE-2.0.txt \ 44 | sux4j-$(version)/COPYING.LESSER \ 45 | sux4j-$(version)/sux4j-$(version).jar \ 46 | sux4j-$(version)/docs 47 | $(TAR) zcvf sux4j-$(version)-deps.tar.gz --owner=0 --group=0 --transform='s|.*/||' $$(find sux4j-$(version)/jars/runtime -iname \*.jar -exec readlink {} \;) 48 | 49 | stage: 50 | rm -fr sux4j-$(version) 51 | $(TAR) zxvf sux4j-$(version)-src.tar.gz 52 | cp -fr bnd sux4j-$(version) 53 | (cd sux4j-$(version) && unset CLASSPATH && unset LOCAL_IVY_SETTINGS && ant ivy-clean ivy-setupjars && ant stage) 54 | -------------------------------------------------------------------------------- /pom-model.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | it.unimi.dsi 4 | ${ivy.pom.artifactId} 5 | jar 6 | Sux4J 7 | ${ivy.pom.version} 8 | Sux4j is an implementation of succinct data structure in Java. It provides a number of related implementations covering ranking/selection over bit arrays, compressed lists and minimal perfect hashing. 9 | http://sux4j.di.unimi.it/ 10 | 11 | 12 | GNU Lesser General Public License Version 2.1+ 13 | https://www.gnu.org/licenses/old-licenses/lgpl-2.1.html 14 | repo 15 | 16 | 17 | Apache License v2.0 18 | https://www.apache.org/licenses/LICENSE-2.0 19 | source 20 | 21 | 22 | 23 | scm:git://github.com/vigna/Sux4J.git 24 | https://github.com/vigna/Su4xJ 25 | 26 | 27 | 28 | 29 | vigna 30 | Sebastiano Vigna 31 | sebastiano.vigna@unimi.it 32 | 33 | 34 | 35 | 36 | 1.9 37 | 1.9 38 | 39 | 40 | -------------------------------------------------------------------------------- /setcp.sh: -------------------------------------------------------------------------------- 1 | JAR=sux4j 2 | 3 | sourcedir=$(cd $(dirname ${BASH_ARGV[0]}) && pwd) 4 | count=$(\ls -1 $sourcedir/$JAR-*.jar 2>/dev/null | wc -l) 5 | 6 | if (( count == 0 )); then 7 | echo "WARNING: no $JAR jar file." 8 | elif (( count > 1 )); then 9 | echo "WARNING: several $JAR jar files ($(\ls -m $JAR-*.jar))" 10 | else 11 | if echo $CLASSPATH | grep -E -q slf4j\|logback; then 12 | deps=$(\ls -1 $sourcedir/jars/test/*.jar | grep -v slf4j | paste -d: -s) 13 | else 14 | deps=$(\ls -1 $sourcedir/jars/test/*.jar | paste -d: -s) 15 | fi 16 | 17 | export CLASSPATH=$(ls -1 $sourcedir/$JAR-*.jar | tail -n 1):$deps:$CLASSPATH 18 | fi 19 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/bits/Rank9SelectSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.bits; 21 | 22 | 23 | import static org.junit.Assert.assertEquals; 24 | 25 | import org.junit.Test; 26 | 27 | import it.unimi.dsi.bits.LongArrayBitVector; 28 | 29 | public class Rank9SelectSlowTest { 30 | 31 | @Test 32 | public void testVeryLarge() { 33 | final LongArrayBitVector v = LongArrayBitVector.getInstance(2200000000L); 34 | for (int i = 0; i < 2200000000L / 64; i++) 35 | v.append(0x5555555555555555L, 64); 36 | Rank9 rank9; 37 | final Select9 select9 = new Select9(rank9 = new Rank9(v)); 38 | for (int i = 0; i < 1100000000; i++) 39 | assertEquals(i * 2L, select9.select(i)); 40 | for (int i = 0; i < 1100000000; i++) 41 | assertEquals(i, rank9.rank(i * 2L)); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/GOV3FunctionSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.bits.TransformationStrategies; 29 | import it.unimi.dsi.fastutil.longs.AbstractLongBigList; 30 | 31 | public class GOV3FunctionSlowTest { 32 | 33 | @Test 34 | public void testBig() throws IOException { 35 | final Iterable p = LargeLongCollection.getInstance(); 36 | final GOV3Function f = new GOV3Function.Builder().keys(p).transform(TransformationStrategies.fixedLong()).values(new AbstractLongBigList() { 37 | 38 | @Override 39 | public long getLong(final long index) { 40 | return index % 7; 41 | } 42 | 43 | @Override 44 | public long size64() { 45 | return LargeLongCollection.SIZE; 46 | } 47 | }, 3).build(); 48 | 49 | long j = 0; 50 | for (final Long s : p) { 51 | assertEquals(j++ % 7, f.getLong(s)); 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/GOV4FunctionSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.bits.TransformationStrategies; 29 | import it.unimi.dsi.fastutil.longs.AbstractLongBigList; 30 | 31 | public class GOV4FunctionSlowTest { 32 | 33 | @Test 34 | public void testBig() throws IOException { 35 | final Iterable p = LargeLongCollection.getInstance(); 36 | final GOV4Function f = new GOV4Function.Builder().keys(p).transform(TransformationStrategies.fixedLong()).values(new AbstractLongBigList() { 37 | 38 | @Override 39 | public long getLong(final long index) { 40 | return index % 7; 41 | } 42 | 43 | @Override 44 | public long size64() { 45 | return LargeLongCollection.SIZE; 46 | } 47 | }, 3).build(); 48 | 49 | long j = 0; 50 | for (final Long s : p) { 51 | assertEquals(j++ % 7, f.getLong(s)); 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/GOVMinimalPerfectHashFunctionSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertTrue; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.bits.TransformationStrategies; 29 | import it.unimi.dsi.fastutil.longs.LongOpenHashBigSet; 30 | 31 | public class GOVMinimalPerfectHashFunctionSlowTest { 32 | 33 | @Test 34 | public void testBig() throws IOException { 35 | final LargeLongCollection p = LargeLongCollection.getInstance(); 36 | final GOVMinimalPerfectHashFunction f = new GOVMinimalPerfectHashFunction.Builder().keys(p).transform(TransformationStrategies.fixedLong()).build(); 37 | final LongOpenHashBigSet s = new LongOpenHashBigSet(); 38 | for (final Long l : p) assertTrue(s.add(f.getLong(l))); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/HollowTrieDistributorMonotoneMinimalPerfectHashFunctionSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.bits.TransformationStrategies; 29 | 30 | public class HollowTrieDistributorMonotoneMinimalPerfectHashFunctionSlowTest { 31 | 32 | @Test 33 | public void testBig() throws IOException { 34 | final Iterable p = LargeLongCollection.getInstance(); 35 | final HollowTrieDistributorMonotoneMinimalPerfectHashFunction f = new HollowTrieDistributorMonotoneMinimalPerfectHashFunction<>(p, TransformationStrategies.fixedLong()); 36 | 37 | long j = 0; 38 | for (final Long s : p) { 39 | assertEquals(j++, f.getLong(s)); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/HollowTrieMonotoneMinimalPerfectHashFunctionSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | import it.unimi.dsi.bits.TransformationStrategies; 27 | 28 | public class HollowTrieMonotoneMinimalPerfectHashFunctionSlowTest { 29 | 30 | @Test 31 | public void testBig() { 32 | final Iterable p = LargeLongCollection.getInstance(); 33 | final HollowTrieMonotoneMinimalPerfectHashFunction f = new HollowTrieMonotoneMinimalPerfectHashFunction<>(p, TransformationStrategies.fixedLong()); 34 | 35 | long j = 0; 36 | for (Long s : p) { 37 | assertEquals(j++, f.getLong(s)); 38 | } 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/LargeLongCollection.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import java.util.Iterator; 23 | import java.util.NoSuchElementException; 24 | 25 | import it.unimi.dsi.fastutil.Size64; 26 | import it.unimi.dsi.fastutil.objects.ObjectIterator; 27 | 28 | final class LargeLongCollection implements Iterable, Size64 { 29 | 30 | public final static long SIZE = 3000000005L; // An odd number is essential to catch problems in the computation of the last bucket. 31 | private final static long INCREMENT = ((1L << 62) / SIZE); 32 | 33 | private LargeLongCollection() {} 34 | private final static LargeLongCollection INSTANCE = new LargeLongCollection(); 35 | 36 | public static LargeLongCollection getInstance() { 37 | return INSTANCE; 38 | } 39 | 40 | @Override 41 | public Iterator iterator() { 42 | return new ObjectIterator<>() { 43 | long curr = 0; 44 | @Override 45 | public boolean hasNext() { 46 | return curr < SIZE; 47 | } 48 | 49 | @Override 50 | public Long next() { 51 | if (! hasNext()) throw new NoSuchElementException(); 52 | return Long.valueOf(curr++ * INCREMENT); 53 | } 54 | }; 55 | } 56 | 57 | @Override 58 | @Deprecated 59 | public int size() { 60 | throw new UnsupportedOperationException("You should invoke size64(), only."); 61 | } 62 | 63 | @Override 64 | public long size64() { 65 | return SIZE; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/LcpMonotoneMinimalPerfectHashFunctionSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.bits.TransformationStrategies; 29 | 30 | public class LcpMonotoneMinimalPerfectHashFunctionSlowTest { 31 | 32 | @Test 33 | public void testBig() throws IOException { 34 | final Iterable p = LargeLongCollection.getInstance(); 35 | final LcpMonotoneMinimalPerfectHashFunction f = new LcpMonotoneMinimalPerfectHashFunction.Builder().keys(p).transform(TransformationStrategies.fixedLong()).build(); 36 | 37 | long j = 0; 38 | for (final Long s : p) { 39 | assertEquals(j++, f.getLong(s)); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/MinimalPerfectHashFunctionSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertFalse; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.bits.LongArrayBitVector; 29 | import it.unimi.dsi.bits.TransformationStrategies; 30 | 31 | public class MinimalPerfectHashFunctionSlowTest { 32 | 33 | @Test 34 | public void testBig() throws IOException { 35 | final Iterable p = LargeLongCollection.getInstance(); 36 | 37 | final LongArrayBitVector b = LongArrayBitVector.ofLength(LargeLongCollection.SIZE); 38 | final GOVMinimalPerfectHashFunction mph = new GOVMinimalPerfectHashFunction.Builder().keys(p).transform(TransformationStrategies.fixedLong()).build(); 39 | 40 | for (final Long long1 : p) { 41 | final long pos = mph.getLong(long1); 42 | assertFalse(b.getBoolean(pos)); 43 | b.set(pos); 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/PaCoTrieDistributorMonotoneMinimalPerfectHashFunctionSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.bits.TransformationStrategies; 29 | 30 | public class PaCoTrieDistributorMonotoneMinimalPerfectHashFunctionSlowTest { 31 | 32 | @Test 33 | public void testBig() throws IOException { 34 | final Iterable p = LargeLongCollection.getInstance(); 35 | final PaCoTrieDistributorMonotoneMinimalPerfectHashFunction f = new PaCoTrieDistributorMonotoneMinimalPerfectHashFunction<>(p, TransformationStrategies.fixedLong()); 36 | 37 | long j = 0; 38 | for (final Long s : p) { 39 | assertEquals(j++, f.getLong(s)); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/TwoStepsLcpMonotoneMinimalPerfectHashFunctionSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.bits.TransformationStrategies; 29 | 30 | public class TwoStepsLcpMonotoneMinimalPerfectHashFunctionSlowTest { 31 | 32 | @Test 33 | public void testBig() throws IOException { 34 | final Iterable p = LargeLongCollection.getInstance(); 35 | final TwoStepsLcpMonotoneMinimalPerfectHashFunction f = new TwoStepsLcpMonotoneMinimalPerfectHashFunction.Builder().keys(p).transform(TransformationStrategies.fixedLong()).build(); 36 | 37 | long j = 0; 38 | for (final Long s : p) { 39 | assertEquals(j++, f.getLong(s)); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/VLLcpMonotoneMinimalPerfectHashFunctionSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.bits.TransformationStrategies; 29 | 30 | public class VLLcpMonotoneMinimalPerfectHashFunctionSlowTest { 31 | 32 | @Test 33 | public void testBig() throws IOException { 34 | final Iterable p = LargeLongCollection.getInstance(); 35 | final VLLcpMonotoneMinimalPerfectHashFunction f = new VLLcpMonotoneMinimalPerfectHashFunction<>(p, TransformationStrategies.fixedLong()); 36 | 37 | long j = 0; 38 | for (final Long s : p) { 39 | assertEquals(j++, f.getLong(s)); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/VLPaCoTrieDistributorMonotoneMinimalPerfectHashFunctionSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.bits.TransformationStrategies; 29 | 30 | public class VLPaCoTrieDistributorMonotoneMinimalPerfectHashFunctionSlowTest { 31 | 32 | @Test 33 | public void testBig() throws IOException { 34 | final Iterable p = LargeLongCollection.getInstance(); 35 | final VLPaCoTrieDistributorMonotoneMinimalPerfectHashFunction f = new VLPaCoTrieDistributorMonotoneMinimalPerfectHashFunction<>(p, TransformationStrategies.fixedLong()); 36 | 37 | long j = 0; 38 | for (Long s : p) { 39 | assertEquals(j++, f.getLong(s)); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/mph/ZFastTrieDistributorMonotoneMinimalPerfectHashFunctionSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.bits.TransformationStrategies; 29 | 30 | public class ZFastTrieDistributorMonotoneMinimalPerfectHashFunctionSlowTest { 31 | 32 | @Test 33 | public void testBig() throws IOException { 34 | final Iterable p = LargeLongCollection.getInstance(); 35 | final ZFastTrieDistributorMonotoneMinimalPerfectHashFunction f = new ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.Builder().keys(p).transform(TransformationStrategies.fixedLong()).build(); 36 | 37 | long j = 0; 38 | for (final Long s : p) { 39 | assertEquals(j++, f.getLong(s)); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/util/EliasFanoMonotoneBigLongBigListSlowTest.java: -------------------------------------------------------------------------------- 1 | package it.unimi.dsi.sux4j.util; 2 | /* 3 | * Sux4J: Succinct data structures for Java 4 | * 5 | * Copyright (C) 2010-2023 Sebastiano Vigna 6 | * 7 | * This program and the accompanying materials are made available under the 8 | * terms of the GNU Lesser General Public License v2.1 or later, 9 | * which is available at 10 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 11 | * or the Apache Software License 2.0, which is available at 12 | * https://www.apache.org/licenses/LICENSE-2.0. 13 | * 14 | * This program is distributed in the hope that it will be useful, but 15 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16 | * or FITNESS FOR A PARTICULAR PURPOSE. 17 | * 18 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 19 | */ 20 | 21 | 22 | 23 | import static org.junit.Assert.assertEquals; 24 | import static org.junit.Assert.assertTrue; 25 | 26 | import java.util.NoSuchElementException; 27 | 28 | import org.junit.Test; 29 | 30 | import com.google.common.collect.Iterators; 31 | 32 | import it.unimi.dsi.fastutil.longs.LongIterator; 33 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator; 34 | 35 | public class EliasFanoMonotoneBigLongBigListSlowTest { 36 | 37 | private final class Elements implements LongIterator { 38 | private final XoRoShiRo128PlusRandomGenerator random = new XoRoShiRo128PlusRandomGenerator(0); 39 | private final int k; 40 | private final long n; 41 | long i, d; 42 | 43 | private Elements(final long n, final int k) { 44 | this.n = n; 45 | this.k = k; 46 | } 47 | 48 | @Override 49 | public boolean hasNext() { 50 | return i < n; 51 | } 52 | 53 | @Override 54 | public long nextLong() { 55 | if (!hasNext()) throw new NoSuchElementException(); 56 | i++; 57 | return d += k * Long.numberOfTrailingZeros(random.nextLong()); 58 | } 59 | } 60 | 61 | public void testRandom(final long n, final int k) { 62 | long m = 0; 63 | LongIterator iterator = new Elements(n, k); 64 | for (long i = 0; i < n; i++) m = iterator.nextLong(); 65 | m++; 66 | 67 | final EliasFanoMonotoneBigLongBigList ef = new EliasFanoMonotoneBigLongBigList(n, m, new Elements(n, k)); 68 | assertTrue(Iterators.elementsEqual(new Elements(n, k), ef.iterator())); 69 | iterator = new Elements(n, k); 70 | for (long i = 0; i < n; i++) assertEquals(iterator.nextLong(), ef.getLong(i)); 71 | } 72 | 73 | @Test 74 | public void test1Mi() { 75 | testRandom(1 << 20, 9); 76 | } 77 | 78 | @Test 79 | public void test100Mi() { 80 | testRandom(100 * (1 << 20), 9); 81 | } 82 | 83 | @Test 84 | public void test2Gi() { 85 | testRandom(2 * (1L << 30), 9); 86 | } 87 | 88 | @Test 89 | public void test128Gi() { 90 | testRandom(128 * (1L << 30), 9); 91 | } 92 | 93 | @Test 94 | public void test1MiNoLower() { 95 | testRandom(1 << 20, 1); 96 | } 97 | 98 | @Test 99 | public void test100MiNoLower() { 100 | testRandom(100 * (1 << 20), 1); 101 | } 102 | 103 | @Test 104 | public void test2GiNoLower() { 105 | testRandom(2 * (1L << 30), 1); 106 | } 107 | 108 | @Test 109 | public void test128GiNoLower() { 110 | testRandom(128 * (1L << 30), 1); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/sux4j/util/EliasFanoMonotoneLongBigListSlowTest.java: -------------------------------------------------------------------------------- 1 | package it.unimi.dsi.sux4j.util; 2 | /* 3 | * Sux4J: Succinct data structures for Java 4 | * 5 | * Copyright (C) 2010-2023 Sebastiano Vigna 6 | * 7 | * This program and the accompanying materials are made available under the 8 | * terms of the GNU Lesser General Public License v2.1 or later, 9 | * which is available at 10 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 11 | * or the Apache Software License 2.0, which is available at 12 | * https://www.apache.org/licenses/LICENSE-2.0. 13 | * 14 | * This program is distributed in the hope that it will be useful, but 15 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16 | * or FITNESS FOR A PARTICULAR PURPOSE. 17 | * 18 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 19 | */ 20 | 21 | 22 | 23 | import static org.junit.Assert.assertEquals; 24 | import static org.junit.Assert.assertTrue; 25 | 26 | import java.util.NoSuchElementException; 27 | 28 | import org.junit.Test; 29 | 30 | import com.google.common.collect.Iterators; 31 | 32 | import it.unimi.dsi.fastutil.longs.LongIterator; 33 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator; 34 | 35 | public class EliasFanoMonotoneLongBigListSlowTest { 36 | 37 | private final class Elements implements LongIterator { 38 | private final XoRoShiRo128PlusRandomGenerator random = new XoRoShiRo128PlusRandomGenerator(0); 39 | private final int k; 40 | private final long n; 41 | long i, d; 42 | 43 | private Elements(final long n, final int k) { 44 | this.n = n; 45 | this.k = k; 46 | } 47 | 48 | @Override 49 | public boolean hasNext() { 50 | return i < n; 51 | } 52 | 53 | @Override 54 | public long nextLong() { 55 | if (!hasNext()) throw new NoSuchElementException(); 56 | i++; 57 | return d += k * Long.numberOfTrailingZeros(random.nextLong()); 58 | } 59 | } 60 | 61 | public void testRandom(final long n, final int k) { 62 | long m = 0; 63 | LongIterator iterator = new Elements(n, k); 64 | for (long i = 0; i < n; i++) m = iterator.nextLong(); 65 | m++; 66 | 67 | final EliasFanoMonotoneLongBigList ef = new EliasFanoMonotoneLongBigList(n, m, new Elements(n, k)); 68 | assertTrue(Iterators.elementsEqual(new Elements(n, k), ef.iterator())); 69 | iterator = new Elements(n, k); 70 | for (long i = 0; i < n; i++) assertEquals(iterator.nextLong(), ef.getLong(i)); 71 | } 72 | 73 | @Test 74 | public void test1Mi() { 75 | testRandom(1 << 20, 9); 76 | } 77 | 78 | @Test 79 | public void test100Mi() { 80 | testRandom(100 * (1 << 20), 9); 81 | } 82 | 83 | @Test 84 | public void test2Gi() { 85 | testRandom(2 * (1L << 30), 9); 86 | } 87 | 88 | @Test 89 | public void test35Gi() { 90 | testRandom(35 * (1L << 30), 9); 91 | } 92 | 93 | @Test 94 | public void test1MiNoLower() { 95 | testRandom(1 << 20, 1); 96 | } 97 | 98 | @Test 99 | public void test100MiNoLower() { 100 | testRandom(100 * (1 << 20), 1); 101 | } 102 | 103 | @Test 104 | public void test2GiNoLower() { 105 | testRandom(2 * (1L << 30), 1); 106 | } 107 | 108 | @Test 109 | public void test48GiNoLower() { 110 | testRandom(48 * (1L << 30), 1); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/bits/AbstractRank.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2007-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.bits; 21 | 22 | /** An abstract implementation of {@link Rank} providing a few obvious derived methods. */ 23 | 24 | public abstract class AbstractRank implements Rank { 25 | private static final long serialVersionUID = 1L; 26 | 27 | @Override 28 | public long count() { 29 | return rank(bitVector().length()); 30 | } 31 | 32 | @Override 33 | public long rank(final long from, final long to) { 34 | return rank(to) - rank(from); 35 | } 36 | 37 | @Override 38 | public long rankZero(final long pos) { 39 | return pos - rank(pos); 40 | } 41 | 42 | @Override 43 | public long rankZero(final long from, final long to) { 44 | return to - from - rank(from, to); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/bits/BalancedParentheses.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2009-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.bits; 21 | 22 | import java.io.Serializable; 23 | 24 | import it.unimi.dsi.bits.BitVector; 25 | 26 | /** A data structure providing primitives for balanced parentheses 27 | * represented in a bit array. 28 | * 29 | *

A bit array of viewed by implementations of this class as a string of open (=one) and closed 30 | * (=zero) parentheses, which must be nested correctly. All operations are optional, but by contract 31 | * at least one of {@link #findOpen(long)} and {@link #findClose(long)} must be 32 | * provided. 33 | */ 34 | public interface BalancedParentheses extends Serializable { 35 | 36 | /** Returns the position of the matching open parenthesis (optional operation). 37 | * 38 | *

Note that if you do not implement this method you must 39 | * implement {@link #findClose(long)}. 40 | * 41 | * @param pos a position in the bit vector containing a closed parenthesis (a zero). 42 | * @return the position of the matching open parenthesis. 43 | */ 44 | public long findOpen(long pos); 45 | 46 | /** Returns the position of the matching closed parenthesis (optional operation). 47 | * 48 | *

Note that if you do not implement this method you must 49 | * implement {@link #findOpen(long)}. 50 | * 51 | * @param pos a position in the bit vector containing an open parenthesis (a one). 52 | * @return the position of the matching open parenthesis. 53 | */ 54 | public long findClose(long pos); 55 | 56 | /** Returns the position of the open parenthesis of the pair the most 57 | * tightly encloses the given position (optional operation). 58 | * 59 | * @param pos a position in the bit vector. 60 | * @return the position of the open parenthesis of the pair the most 61 | * tightly encloses the given position. 62 | */ 63 | public long enclose(long pos); 64 | 65 | /** Returns the bit vector indexed by this structure. 66 | * 67 | *

Note that you are not supposed to modify the returned vector. 68 | * 69 | * @return the bit vector indexed by this structure. 70 | */ 71 | public BitVector bitVector(); 72 | 73 | /** Returns the overall number of bits allocated by this structure. 74 | * 75 | * @return the overall number of bits allocated by this structure (not including the bits 76 | * of the {@linkplain #bitVector() indexed vector}). 77 | */ 78 | 79 | public long numBits(); 80 | } 81 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/bits/RankSelect.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.bits; 21 | 22 | import java.io.Serializable; 23 | 24 | import it.unimi.dsi.bits.BitVector; 25 | 26 | /** A serialisation-oriented container for associated rank/select(zero) structures. 27 | * 28 | *

Since structures in Sux4J serialise all contained data, including, if necessary, the underlying bit vector, 29 | * serialising separately a rank and a select structure might result in storing the underlying bit 30 | * vector twice. This class provide a simple solution by allowing one-shot serialisation of 31 | * all structures related to a bit vector. For convenience, it provides also delegate methods, albeit 32 | * the suggested usage is deserialisation and extraction of non-{@code null} structures. 33 | * 34 | */ 35 | public class RankSelect implements Rank, Select, SelectZero, Serializable { 36 | 37 | private static final long serialVersionUID = 1L; 38 | /** A rank structure, or {@code null}. */ 39 | public final Rank rank; 40 | /** A select structure, or {@code null}. */ 41 | public final Select select; 42 | /** A zero-select structure, or {@code null}. */ 43 | public final SelectZero selectZero; 44 | 45 | /** Creates a new rank/select container using the given structures. 46 | * 47 | * @param rank a rank structure, or {@code null}. 48 | * @param select a select structure, or {@code null}. 49 | * @param selectZero a zero-select structure, or {@code null}. 50 | */ 51 | public RankSelect(final Rank rank, final Select select, final SelectZero selectZero) { 52 | this.rank = rank; 53 | this.select = select; 54 | this.selectZero = selectZero; 55 | } 56 | 57 | /** Creates a new rank/select container without zero selection using the given structures. 58 | * 59 | * @param rank a rank structure, or {@code null}. 60 | * @param select a select structure, or {@code null}. 61 | */ 62 | public RankSelect(final Rank rank, final Select select) { 63 | this(rank, select, null); 64 | } 65 | 66 | @Override 67 | public long count() { 68 | return rank.count(); 69 | } 70 | 71 | @Override 72 | public long numBits() { 73 | return (rank != null ? rank.numBits() : 0) + (select != null ? select.numBits() : 0)+ (selectZero != null ? selectZero.numBits() : 0); 74 | } 75 | 76 | @Override 77 | public long rank(final long from, final long to) { 78 | return rank.rank(from, to); 79 | } 80 | 81 | @Override 82 | public long rank(final long pos) { 83 | return rank.rank(pos); 84 | } 85 | 86 | @Override 87 | public long rankZero(final long from, final long to) { 88 | return rank.rankZero(from, to); 89 | } 90 | 91 | @Override 92 | public long rankZero(final long pos) { 93 | return rank.rankZero(pos); 94 | } 95 | 96 | @Override 97 | public long select(final long rank) { 98 | return select.select(rank); 99 | } 100 | 101 | @Override 102 | public long selectZero(final long rank) { 103 | return selectZero.selectZero(rank); 104 | } 105 | 106 | @Override 107 | public BitVector bitVector() { 108 | if (rank != null) return rank.bitVector(); 109 | if (select != null) return select.bitVector(); 110 | if (selectZero != null) return selectZero.bitVector(); 111 | throw new UnsupportedOperationException("All fields are nulls"); 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/bits/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Ranking and selection structures. 3 | * 4 | *

5 | * This package provides a number of implementations of rank/select queries for bits 6 | * vectors. Ranking is counting the number of ones in an initial segment of a bit vector. 7 | * Selection is finding the position of the r-th one. Both operation can be 8 | * performed in constant time on an array of n bits using o(n) 9 | * additional bits, but in practice linear data structures with small constants and theoretically 10 | * non-constant time work much better. Sux4J proposes a number of new, very efficient implementation 11 | * of rank and select oriented to 64-bit processors (in other words: they will be fairly slow on 12 | * 32-bit processors). The implementations are based on broadword programming and described 13 | * in Sebastiano Vigna, “Broadword 14 | * Implementation of Rank/Select Queries”, in Proc. of the 7th International Workshop 15 | * on Experimental Algorithms, WEA 2008, volume 5038 of Lecture Notes in Computer Science, pages 16 | * 154−168. Springer, 2008. 17 | * 18 | *

19 | * For dense arrays, {@link it.unimi.dsi.sux4j.bits.Rank9} is the basic rank implementation; 20 | * {@link it.unimi.dsi.sux4j.bits.Rank16} is slightly slower but occupies much less space. Selection 21 | * can be performed using {@link it.unimi.dsi.sux4j.bits.SimpleSelect} for reasonably uniform bit 22 | * arrays, or using {@link it.unimi.dsi.sux4j.bits.Select9}, which occupies more space but 23 | * guarantees practical constant-time evaluation. 24 | * 25 | *

26 | * For sparse arrays (e.g., representation of pointers in a bitstream) we provide 27 | * {@link it.unimi.dsi.sux4j.bits.SparseRank} and {@link it.unimi.dsi.sux4j.bits.SparseSelect}. 28 | * Their main feature is that they do not require the original bit array, as they use an 29 | * {@link it.unimi.dsi.sux4j.util.EliasFanoMonotoneLongBigList} to implement a succint dictionary 30 | * containing the positions of bits set. If the bit array is sufficiently sparse, such a 31 | * representation provides significant gains in space occupancy. 32 | * 33 | *

34 | * All structures can be serialized. Since in some cases the original bit vector is stored inside 35 | * the structure, to avoid saving and loading twice the same vector we suggest to pack all 36 | * structures into a {@link it.unimi.dsi.sux4j.bits.RankSelect} instance. 37 | * 38 | *

39 | * Note that all methods in this package are considered low-level and do not perform bound checks on 40 | * their arguments. Bound checks can be enabled, however, by enabling assertions. 41 | */ 42 | package it.unimi.dsi.sux4j.bits; 43 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/io/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * I/O classes exploiting succinct data structures. 3 | */ 4 | package it.unimi.dsi.sux4j.io; 5 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/mph/AbstractHashFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import it.unimi.dsi.fastutil.Size64; 23 | import it.unimi.dsi.fastutil.objects.AbstractObject2LongFunction; 24 | 25 | /** A very minimal abstract hash implementation. It extends {@link AbstractObject2LongFunction}, 26 | * by {@link Size64}. Moreover, it provides a deprecated size() method that returns 27 | * -1 if {@link #size64()} is -1 or greater than {@link Integer#MAX_VALUE}, a {@link #size64()} returning -1 (that 28 | * you are invited to override), and a {@link #containsKey(Object)} implementation that returns true. 29 | */ 30 | 31 | public abstract class AbstractHashFunction extends AbstractObject2LongFunction implements Size64 { 32 | private static final long serialVersionUID = 2L; 33 | 34 | @Override 35 | public boolean containsKey(final Object key) { 36 | return true; 37 | } 38 | 39 | @Override 40 | @Deprecated 41 | public int size() { 42 | final long size64 = size64(); 43 | return size64 > Integer.MAX_VALUE ? -1 : (int)size64; 44 | } 45 | 46 | @Override 47 | public long size64() { 48 | return -1; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/mph/solve/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Solvers for linear systems. 3 | */ 4 | package it.unimi.dsi.sux4j.mph.solve; 5 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/scratch/MergedBitVectorIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | /** An iterator returning the union of the bit vectors returned by two iterators. 21 | * The two iterators must return bit vectors in an increasing fashion; the resulting 22 | * {@link MergedBitVectorIterator} will do the same. Duplicates will be eliminated. 23 | */ 24 | 25 | package it.unimi.dsi.sux4j.scratch; 26 | 27 | import java.util.Iterator; 28 | import java.util.NoSuchElementException; 29 | 30 | import it.unimi.dsi.bits.BitVector; 31 | import it.unimi.dsi.bits.LongArrayBitVector; 32 | import it.unimi.dsi.fastutil.objects.ObjectIterator; 33 | 34 | public class MergedBitVectorIterator implements ObjectIterator { 35 | /** The first component iterator. */ 36 | private final Iterator it0; 37 | /** The second component iterator. */ 38 | private final Iterator it1; 39 | /** The last bit vector returned by {@link #it0}. */ 40 | private BitVector curr0; 41 | /** The last bit vector returned by {@link #it1}. */ 42 | private BitVector curr1; 43 | /** The result. */ 44 | private final LongArrayBitVector result; 45 | 46 | /** Creates a new merged iterator by merging two given iterators. 47 | * 48 | * @param it0 the first (monotonically nondecreasing) component iterator. 49 | * @param it1 the second (monotonically nondecreasing) component iterator. 50 | */ 51 | public MergedBitVectorIterator(final Iterator it0, final Iterator it1) { 52 | this.it0 = it0; 53 | this.it1 = it1; 54 | result = LongArrayBitVector.getInstance(); 55 | if (it0.hasNext()) curr0 = it0.next(); 56 | if (it1.hasNext()) curr1 = it1.next(); 57 | } 58 | 59 | @Override 60 | public boolean hasNext() { 61 | return curr0 != null || curr1 != null; 62 | } 63 | 64 | @Override 65 | public BitVector next() { 66 | if (! hasNext()) throw new NoSuchElementException(); 67 | 68 | final int cmp; 69 | 70 | if (curr0 == null) { 71 | result.replace(curr1); 72 | curr1 = it1.hasNext() ? it1.next() : null; 73 | } 74 | else if (curr1 == null) { 75 | result.replace(curr0); 76 | curr0 = it0.hasNext() ? it0.next() : null; 77 | } 78 | else if ((cmp = curr0.compareTo(curr1)) < 0) { 79 | result.replace(curr0); 80 | curr0 = it0.hasNext() ? it0.next() : null; 81 | } 82 | else if (cmp > 0) { 83 | result.replace(curr1); 84 | curr1 = it1.hasNext() ? it1.next() : null; 85 | } 86 | else { 87 | result.replace(curr1); 88 | curr0 = it0.hasNext() ? it0.next() : null; 89 | curr1 = it1.hasNext() ? it1.next() : null; 90 | } 91 | 92 | return result; 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/scratch/NumberToBitVector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.scratch; 21 | 22 | import java.math.BigInteger; 23 | 24 | import it.unimi.dsi.bits.BitVector; 25 | import it.unimi.dsi.bits.LongArrayBitVector; 26 | import it.unimi.dsi.bits.TransformationStrategy; 27 | 28 | /** A transformation strategy that converts strings representing integers between 0 (inclusive) 29 | * and 2k (exclusive)) into fixed-length binary vectors (most-significant 30 | * bit is the 0-th). 31 | */ 32 | public class NumberToBitVector implements TransformationStrategy { 33 | private static final long serialVersionUID = 1L; 34 | /** Number of binary digits to be used. */ 35 | private final int width; 36 | 37 | /** Creates a transformation strategy with given number of binary digits. 38 | * 39 | * @param width number of binary digits; 40 | */ 41 | public NumberToBitVector(final int width) { 42 | this.width = width; 43 | } 44 | 45 | @Override 46 | public TransformationStrategy copy() { 47 | return new NumberToBitVector(width); 48 | } 49 | 50 | @Override 51 | public long numBits() { 52 | return 0; 53 | } 54 | 55 | @Override 56 | public long length(final BigInteger x) { 57 | return width; 58 | } 59 | 60 | @Override 61 | public BitVector toBitVector(final BigInteger x) { 62 | final LongArrayBitVector res = LongArrayBitVector.getInstance(width); 63 | for (int i = 0; i < width; i++) 64 | res.add(x.testBit(width - i - 1)); 65 | return res; 66 | } 67 | 68 | public static void main(final String arg[]) { 69 | final NumberToBitVector ntbv = new NumberToBitVector(15); 70 | System.out.println(ntbv.toBitVector(new BigInteger("567"))); 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/scratch/Rank9GogPetri.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.scratch; 21 | 22 | import static it.unimi.dsi.bits.LongArrayBitVector.word; 23 | import static it.unimi.dsi.bits.LongArrayBitVector.words; 24 | 25 | import java.io.IOException; 26 | import java.io.ObjectInputStream; 27 | 28 | import it.unimi.dsi.bits.BitVector; 29 | import it.unimi.dsi.bits.Fast; 30 | import it.unimi.dsi.bits.LongArrayBitVector; 31 | import it.unimi.dsi.sux4j.bits.AbstractRank; 32 | import it.unimi.dsi.sux4j.bits.Rank; 33 | 34 | /** A rank9 implementation. 35 | * 36 | *

rank9 is a ranking structure using 25% additional space and providing exceptionally fast ranking. 37 | */ 38 | 39 | public class Rank9GogPetri extends AbstractRank implements Rank { 40 | private static final boolean ASSERTS = false; 41 | private static final long serialVersionUID = 1L; 42 | 43 | protected transient long[] bits; 44 | protected final BitVector bitVector; 45 | protected final long[] count; 46 | protected final int numWords; 47 | protected final long numOnes; 48 | protected final long lastOne; 49 | 50 | public Rank9GogPetri(final long[] bits, final long length) { 51 | this(LongArrayBitVector.wrap(bits, length)); 52 | } 53 | 54 | public Rank9GogPetri(final BitVector bitVector) { 55 | this.bitVector = bitVector; 56 | this.bits = bitVector.bits(); 57 | final long length = bitVector.length(); 58 | 59 | numWords = words(length); 60 | 61 | final int numCounts = (int)((length + 8 * Long.SIZE - 1) / (8 * Long.SIZE)) * 2; 62 | // Init rank/select structure 63 | count = new long[numCounts + 1]; 64 | 65 | long c = 0, l = -1; 66 | int pos = 0; 67 | for(int i = 0; i < numWords; i += 8, pos += 2) { 68 | count[pos] = c; 69 | c += Long.bitCount(bits[i]); 70 | if (bits[i] != 0) l = i * 64L + Fast.mostSignificantBit(bits[i]); 71 | for(int j = 1; j < 8; j++) { 72 | count[pos + 1] |= (i + j <= numWords ? c - count[pos] : 0x1FFL) << 63 - 9 * j; 73 | if (i + j < numWords) { 74 | c += Long.bitCount(bits[i + j]); 75 | if (bits[i + j] != 0) l = (i + j) * 64L + Fast.mostSignificantBit(bits[i + j]); 76 | } 77 | } 78 | } 79 | 80 | numOnes = c; 81 | lastOne = l; 82 | count[numCounts] = c; 83 | } 84 | 85 | 86 | @Override 87 | public long rank(final long pos) { 88 | if (ASSERTS) assert pos >= 0; 89 | if (ASSERTS) assert pos <= bitVector.length(); 90 | // This test can be eliminated if there is always an additional word at the end of the bit array. 91 | if (pos > lastOne) return numOnes; 92 | 93 | final int word = word(pos); 94 | final int block = (word >>> 2) & ~1; 95 | final int offset = word & 7; 96 | 97 | return count[block] + (count[block + 1] >>> (63 - offset * 9) & 0x1FF) + Long.bitCount(bits[word] & (1L << pos) - 1); 98 | } 99 | 100 | @Override 101 | public long numBits() { 102 | return count.length * (long)Long.SIZE; 103 | } 104 | 105 | @Override 106 | public long count() { 107 | return numOnes; 108 | } 109 | 110 | @Override 111 | public long rank(final long from, final long to) { 112 | return rank(to) - rank(from); 113 | } 114 | 115 | public long lastOne() { 116 | return lastOne; 117 | } 118 | 119 | private void readObject(final ObjectInputStream s) throws IOException, ClassNotFoundException { 120 | s.defaultReadObject(); 121 | bits = bitVector.bits(); 122 | } 123 | 124 | @Override 125 | public BitVector bitVector() { 126 | return bitVector; 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/test/EliasFanoLongBigListSpeedTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.test; 21 | 22 | import org.apache.commons.math3.random.RandomGenerator; 23 | 24 | import com.martiansoftware.jsap.FlaggedOption; 25 | import com.martiansoftware.jsap.JSAP; 26 | import com.martiansoftware.jsap.JSAPException; 27 | import com.martiansoftware.jsap.JSAPResult; 28 | import com.martiansoftware.jsap.Parameter; 29 | import com.martiansoftware.jsap.SimpleJSAP; 30 | import com.martiansoftware.jsap.UnflaggedOption; 31 | 32 | import it.unimi.dsi.fastutil.ints.IntArrayList; 33 | import it.unimi.dsi.fastutil.longs.LongArrayList; 34 | import it.unimi.dsi.sux4j.util.EliasFanoLongBigList; 35 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator; 36 | 37 | public class EliasFanoLongBigListSpeedTest { 38 | 39 | public static void main(final String[] arg) throws JSAPException { 40 | 41 | final SimpleJSAP jsap = new SimpleJSAP(EliasFanoLongBigListSpeedTest.class.getName(), "Tests the speed of Elias-Fano compressed lists.", 42 | new Parameter[] { 43 | new UnflaggedOption("numElements", JSAP.INTSIZE_PARSER, "1Mi", JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The number of elements."), 44 | new UnflaggedOption("density", JSAP.DOUBLE_PARSER, ".5", JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The density."), 45 | new FlaggedOption("numPos", JSAP.INTSIZE_PARSER, "1Mi", JSAP.NOT_REQUIRED, 'p', "positions", "The number of positions to test"), 46 | new FlaggedOption("bulk", JSAP.INTSIZE_PARSER, "10", JSAP.NOT_REQUIRED, 'b', "bulk", "The number of positions to read with the bulk method"), 47 | }); 48 | 49 | final JSAPResult jsapResult = jsap.parse(arg); 50 | if (jsap.messagePrinted()) return; 51 | 52 | final int numElements = jsapResult.getInt("numElements"); 53 | final double density = jsapResult.getDouble("density"); 54 | final int numPos = jsapResult.getInt("numPos"); 55 | final int bulk = jsapResult.getInt("bulk"); 56 | 57 | final RandomGenerator random = new XoRoShiRo128PlusRandomGenerator(42); 58 | final IntArrayList list = new IntArrayList(numElements); 59 | for(long i = numElements; i-- != 0;) list.add(random.nextDouble() < density ? 0 : 100); 60 | 61 | final int[] position = new int[numPos]; 62 | 63 | for(int i = numPos; i-- != 0;) position[i] = (random.nextInt() & 0x7FFFFFFF) % (numElements - bulk); 64 | final long[] elements = new long[list.size()]; 65 | elements[0] = list.getInt(0); 66 | for(int i = 1; i < list.size(); i++) elements[i] = list.getInt(i) + elements[i - 1]; 67 | final EliasFanoLongBigList eliasFanoLongBigList = new EliasFanoLongBigList(LongArrayList.wrap(elements)); 68 | long time; 69 | System.err.println("getLong():"); 70 | for(int k = 10; k-- != 0;) { 71 | time = - System.nanoTime(); 72 | for(int i = 0; i < numPos; i++) eliasFanoLongBigList.getLong(position[i]); 73 | time += System.nanoTime(); 74 | System.err.println(time / 1E9 + "s, " + time / (double)numPos + " ns/element"); 75 | } 76 | 77 | final long[] dest = new long[bulk]; 78 | System.err.println("get():"); 79 | for(int k = 10; k-- != 0;) { 80 | time = - System.nanoTime(); 81 | for(int i = 0; i < numPos; i++) eliasFanoLongBigList.get(position[i], dest); 82 | time += System.nanoTime(); 83 | System.err.println(time / 1E9 + "s, " + time / (double)(numPos * bulk) + " ns/element"); 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/test/GenerateGeometricValues.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.test; 21 | 22 | import java.io.DataOutputStream; 23 | import java.io.FileOutputStream; 24 | import java.io.IOException; 25 | 26 | import org.apache.commons.math3.random.RandomGenerator; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | import com.martiansoftware.jsap.JSAP; 31 | import com.martiansoftware.jsap.JSAPException; 32 | import com.martiansoftware.jsap.JSAPResult; 33 | import com.martiansoftware.jsap.Parameter; 34 | import com.martiansoftware.jsap.SimpleJSAP; 35 | import com.martiansoftware.jsap.UnflaggedOption; 36 | 37 | import it.unimi.dsi.fastutil.io.FastBufferedOutputStream; 38 | import it.unimi.dsi.logging.ProgressLogger; 39 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator; 40 | 41 | public class GenerateGeometricValues { 42 | public static final Logger LOGGER = LoggerFactory.getLogger(GenerateGeometricValues.class); 43 | 44 | public static void main(final String[] arg) throws JSAPException, IOException { 45 | 46 | final SimpleJSAP jsap = new SimpleJSAP(GenerateGeometricValues.class.getName(), "Generates a binary list of longs geometrically distributed.", 47 | new Parameter[] { 48 | new UnflaggedOption("n", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of longs."), 49 | new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The output file.") 50 | }); 51 | 52 | final JSAPResult jsapResult = jsap.parse(arg); 53 | if (jsap.messagePrinted()) return; 54 | 55 | final long n = jsapResult.getLong("n"); 56 | final String output = jsapResult.getString("output"); 57 | 58 | final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator(); 59 | 60 | final ProgressLogger pl = new ProgressLogger(LOGGER); 61 | pl.expectedUpdates = n; 62 | pl.start("Generating... "); 63 | final DataOutputStream dos = new DataOutputStream(new FastBufferedOutputStream(new FileOutputStream(output))); 64 | 65 | for(long i = 0; i < n; i++) dos.writeLong(Long.numberOfTrailingZeros(r.nextLong())); 66 | 67 | pl.done(); 68 | dos.close(); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/test/GeneratePowerLawValues.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.test; 21 | 22 | import java.io.DataOutputStream; 23 | import java.io.FileOutputStream; 24 | import java.io.IOException; 25 | 26 | import org.apache.commons.math3.distribution.ZipfDistribution; 27 | import org.apache.commons.math3.random.RandomGenerator; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | 31 | import com.martiansoftware.jsap.JSAP; 32 | import com.martiansoftware.jsap.JSAPException; 33 | import com.martiansoftware.jsap.JSAPResult; 34 | import com.martiansoftware.jsap.Parameter; 35 | import com.martiansoftware.jsap.SimpleJSAP; 36 | import com.martiansoftware.jsap.UnflaggedOption; 37 | 38 | import it.unimi.dsi.fastutil.io.FastBufferedOutputStream; 39 | import it.unimi.dsi.logging.ProgressLogger; 40 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator; 41 | 42 | public class GeneratePowerLawValues { 43 | public static final Logger LOGGER = LoggerFactory.getLogger(GeneratePowerLawValues.class); 44 | 45 | public static void main(final String[] arg) throws JSAPException, IOException { 46 | 47 | final SimpleJSAP jsap = new SimpleJSAP(GeneratePowerLawValues.class.getName(), "Generates a binary list of power-law distributed longs starting from zero.", 48 | new Parameter[] { 49 | new UnflaggedOption("gamma", JSAP.DOUBLE_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The power law exponent."), 50 | new UnflaggedOption("max", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The strict upper bound for the support of the distribution."), 51 | new UnflaggedOption("n", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of longs."), 52 | new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The output file.") 53 | }); 54 | 55 | final JSAPResult jsapResult = jsap.parse(arg); 56 | if (jsap.messagePrinted()) return; 57 | 58 | final double gamma = jsapResult.getDouble("gamma"); 59 | final int max = jsapResult.getInt("max"); 60 | final long n = jsapResult.getLong("n"); 61 | final String output = jsapResult.getString("output"); 62 | 63 | final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator(); 64 | 65 | final ProgressLogger pl = new ProgressLogger(LOGGER); 66 | pl.expectedUpdates = n; 67 | pl.start("Generating... "); 68 | 69 | final ZipfDistribution zipf = new ZipfDistribution(r, max, gamma); 70 | final DataOutputStream dos = new DataOutputStream(new FastBufferedOutputStream(new FileOutputStream(output))); 71 | 72 | for(long i = 0; i < n; i++) dos.writeLong(zipf.sample() - 1); 73 | 74 | pl.done(); 75 | dos.close(); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/test/GenerateRandom32BitStrings.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.test; 21 | 22 | import java.io.FileOutputStream; 23 | import java.io.IOException; 24 | 25 | import org.apache.commons.math3.random.RandomGenerator; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | import com.martiansoftware.jsap.FlaggedOption; 30 | import com.martiansoftware.jsap.JSAP; 31 | import com.martiansoftware.jsap.JSAPException; 32 | import com.martiansoftware.jsap.JSAPResult; 33 | import com.martiansoftware.jsap.Parameter; 34 | import com.martiansoftware.jsap.SimpleJSAP; 35 | import com.martiansoftware.jsap.UnflaggedOption; 36 | 37 | import it.unimi.dsi.fastutil.io.FastBufferedOutputStream; 38 | import it.unimi.dsi.logging.ProgressLogger; 39 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator; 40 | 41 | public class GenerateRandom32BitStrings { 42 | public static final Logger LOGGER = LoggerFactory.getLogger(GenerateRandom32BitStrings.class); 43 | 44 | public static void main(final String[] arg) throws JSAPException, IOException { 45 | 46 | final SimpleJSAP jsap = new SimpleJSAP(GenerateRandom32BitStrings.class.getName(), "Generates a list of sorted 32-bit random strings using only characters in the ISO-8859-1 printable range [32..256).", 47 | new Parameter[] { 48 | new FlaggedOption("gap", JSAP.INTSIZE_PARSER, "1", JSAP.NOT_REQUIRED, 'g', "gap", "Impose a minimum gap."), 49 | new UnflaggedOption("n", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of strings (too small values might cause overflow)."), 50 | new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The output file.") 51 | }); 52 | 53 | final JSAPResult jsapResult = jsap.parse(arg); 54 | if (jsap.messagePrinted()) return; 55 | 56 | final int n = jsapResult.getInt("n"); 57 | final String output = jsapResult.getString("output"); 58 | final int gap = jsapResult.getInt("gap"); 59 | 60 | final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator(); 61 | 62 | final ProgressLogger pl = new ProgressLogger(LOGGER); 63 | pl.expectedUpdates = n; 64 | pl.start("Generating... "); 65 | 66 | double l = 0, t; 67 | final double limit = Math.pow(224, 4); 68 | final int incr = (int)Math.floor(1.99 * (limit / n)) - 1; 69 | 70 | LOGGER.info("Increment: " + incr); 71 | 72 | @SuppressWarnings("resource") 73 | final FastBufferedOutputStream fbs = new FastBufferedOutputStream(new FileOutputStream(output)); 74 | final int[] b = new int[4]; 75 | 76 | for(int i = 0; i < n; i++) { 77 | t = (l += (r.nextInt(incr) + gap)); 78 | if (l >= limit) throw new AssertionError(Integer.toString(i)); 79 | for(int j = 4; j-- != 0;) { 80 | b[j] = (int)(t % 224 + 32); 81 | t = Math.floor(t / 224); 82 | } 83 | 84 | for(int j = 0; j < 4; j++) fbs.write(b[j]); 85 | fbs.write(10); 86 | 87 | pl.lightUpdate(); 88 | } 89 | 90 | 91 | pl.done(); 92 | fbs.close(); 93 | 94 | LOGGER.info("Last/limit: " + (l / limit)); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/test/GenerateRandom64BitIntegers.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.test; 21 | 22 | import java.io.DataOutputStream; 23 | import java.io.FileOutputStream; 24 | import java.io.IOException; 25 | import java.math.BigInteger; 26 | 27 | import org.apache.commons.math3.random.RandomGenerator; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | 31 | import com.martiansoftware.jsap.FlaggedOption; 32 | import com.martiansoftware.jsap.JSAP; 33 | import com.martiansoftware.jsap.JSAPException; 34 | import com.martiansoftware.jsap.JSAPResult; 35 | import com.martiansoftware.jsap.Parameter; 36 | import com.martiansoftware.jsap.SimpleJSAP; 37 | import com.martiansoftware.jsap.UnflaggedOption; 38 | 39 | import it.unimi.dsi.fastutil.io.FastBufferedOutputStream; 40 | import it.unimi.dsi.logging.ProgressLogger; 41 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator; 42 | 43 | public class GenerateRandom64BitIntegers { 44 | public static final Logger LOGGER = LoggerFactory.getLogger(GenerateRandom64BitIntegers.class); 45 | 46 | public static void main(final String[] arg) throws JSAPException, IOException { 47 | 48 | final SimpleJSAP jsap = new SimpleJSAP(GenerateRandom64BitIntegers.class.getName(), "Generates a list of sorted 64-bit random integers in DataOutput format.", 49 | new Parameter[] { 50 | new FlaggedOption("gap", JSAP.INTSIZE_PARSER, "1", JSAP.NOT_REQUIRED, 'g', "gap", "Impose a minimum gap."), 51 | new UnflaggedOption("n", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of integers (too small values might cause overflow)."), 52 | new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The output file.") 53 | }); 54 | 55 | final JSAPResult jsapResult = jsap.parse(arg); 56 | if (jsap.messagePrinted()) return; 57 | 58 | final long n = jsapResult.getLong("n"); 59 | final int gap = jsapResult.getInt("gap"); 60 | final String output = jsapResult.getString("output"); 61 | 62 | final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator(); 63 | 64 | final ProgressLogger pl = new ProgressLogger(LOGGER); 65 | pl.expectedUpdates = n; 66 | pl.start("Generating... "); 67 | 68 | BigInteger l = BigInteger.ZERO; 69 | final BigInteger limit = BigInteger.valueOf(256).pow(8); 70 | final long incr = (long)Math.floor(1.99 * (limit.divide(BigInteger.valueOf(n)).longValue())) - 1; 71 | 72 | @SuppressWarnings("resource") 73 | final DataOutputStream dos = new DataOutputStream(new FastBufferedOutputStream(new FileOutputStream(output))); 74 | 75 | LOGGER.info("Increment: " + incr); 76 | 77 | for(long i = 0; i < n; i++) { 78 | l = l.add(BigInteger.valueOf((r.nextLong() & 0x7FFFFFFFFFFFFFFFL) % incr + gap)); 79 | if (l.compareTo(limit) > 0) throw new AssertionError(Long.toString(i)); 80 | dos.writeLong(l.longValue()); 81 | pl.lightUpdate(); 82 | } 83 | 84 | 85 | pl.done(); 86 | dos.close(); 87 | 88 | LOGGER.info("Last/limit: " + (l.doubleValue() / limit.doubleValue())); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/test/GenerateRandomStrings.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.test; 21 | 22 | import java.io.FileNotFoundException; 23 | import java.io.FileOutputStream; 24 | import java.io.OutputStreamWriter; 25 | import java.io.PrintWriter; 26 | import java.io.UnsupportedEncodingException; 27 | 28 | import org.apache.commons.math3.random.RandomGenerator; 29 | 30 | import com.martiansoftware.jsap.JSAP; 31 | import com.martiansoftware.jsap.JSAPException; 32 | import com.martiansoftware.jsap.JSAPResult; 33 | import com.martiansoftware.jsap.Parameter; 34 | import com.martiansoftware.jsap.SimpleJSAP; 35 | import com.martiansoftware.jsap.UnflaggedOption; 36 | 37 | import it.unimi.dsi.lang.MutableString; 38 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator; 39 | 40 | public class GenerateRandomStrings { 41 | 42 | public static void main(final String[] arg) throws JSAPException, UnsupportedEncodingException, FileNotFoundException { 43 | 44 | final SimpleJSAP jsap = new SimpleJSAP(GenerateRandomStrings.class.getName(), "Generates (not necessarily unique) random strings", new Parameter[] { 45 | new UnflaggedOption("n", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of strings."), 46 | new UnflaggedOption("l", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of characters per string."), 47 | new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The output file.") }); 48 | 49 | final JSAPResult jsapResult = jsap.parse(arg); 50 | if (jsap.messagePrinted()) return; 51 | 52 | final int n = jsapResult.getInt("n"); 53 | final int l = jsapResult.getInt("l"); 54 | final String output = jsapResult.getString("output"); 55 | 56 | final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator(); 57 | final PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream(output), "ISO-8859-1")); 58 | 59 | for (int i = 0; i < n; i++) { 60 | final MutableString t = new MutableString(l); 61 | for (int j = 0; j < l; j++) t.append((char)(32 + r.nextInt(94) + 1)); 62 | t.println(pw); 63 | } 64 | 65 | pw.close(); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/test/GenerateUniformValues.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.test; 21 | 22 | import java.io.DataOutputStream; 23 | import java.io.FileOutputStream; 24 | import java.io.IOException; 25 | 26 | import org.apache.commons.math3.random.RandomGenerator; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | import com.martiansoftware.jsap.JSAP; 31 | import com.martiansoftware.jsap.JSAPException; 32 | import com.martiansoftware.jsap.JSAPResult; 33 | import com.martiansoftware.jsap.Parameter; 34 | import com.martiansoftware.jsap.SimpleJSAP; 35 | import com.martiansoftware.jsap.UnflaggedOption; 36 | 37 | import it.unimi.dsi.fastutil.io.FastBufferedOutputStream; 38 | import it.unimi.dsi.logging.ProgressLogger; 39 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator; 40 | 41 | public class GenerateUniformValues { 42 | public static final Logger LOGGER = LoggerFactory.getLogger(GenerateUniformValues.class); 43 | 44 | public static void main(final String[] arg) throws JSAPException, IOException { 45 | 46 | final SimpleJSAP jsap = new SimpleJSAP(GenerateUniformValues.class.getName(), "Generates a binary list of uniformly distributed longs using a given number of bits.", 47 | new Parameter[] { 48 | new UnflaggedOption("b", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of bits."), 49 | new UnflaggedOption("n", JSAP.LONG_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The number of longs."), 50 | new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The output file.") 51 | }); 52 | 53 | final JSAPResult jsapResult = jsap.parse(arg); 54 | if (jsap.messagePrinted()) return; 55 | 56 | final int b = jsapResult.getInt("b"); 57 | final long n = jsapResult.getLong("n"); 58 | final String output = jsapResult.getString("output"); 59 | 60 | final RandomGenerator r = new XoRoShiRo128PlusRandomGenerator(); 61 | final long mask = b == 64 ? -1L: (1L << b) - 1; 62 | 63 | final ProgressLogger pl = new ProgressLogger(LOGGER); 64 | pl.expectedUpdates = n; 65 | pl.start("Generating... "); 66 | 67 | final DataOutputStream dos = new DataOutputStream(new FastBufferedOutputStream(new FileOutputStream(output))); 68 | 69 | for(long i = 0; i < n; i++) dos.writeLong(r.nextLong() & mask); 70 | pl.done(); 71 | dos.close(); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/test/HollowTrieSpeedTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.test; 21 | 22 | import java.io.FileInputStream; 23 | import java.io.IOException; 24 | import java.io.InputStreamReader; 25 | import java.nio.charset.Charset; 26 | import java.util.Iterator; 27 | import java.util.zip.GZIPInputStream; 28 | 29 | import com.martiansoftware.jsap.FlaggedOption; 30 | import com.martiansoftware.jsap.JSAP; 31 | import com.martiansoftware.jsap.JSAPException; 32 | import com.martiansoftware.jsap.JSAPResult; 33 | import com.martiansoftware.jsap.Parameter; 34 | import com.martiansoftware.jsap.SimpleJSAP; 35 | import com.martiansoftware.jsap.Switch; 36 | import com.martiansoftware.jsap.UnflaggedOption; 37 | import com.martiansoftware.jsap.stringparsers.ForNameStringParser; 38 | 39 | import it.unimi.dsi.fastutil.io.BinIO; 40 | import it.unimi.dsi.fastutil.objects.Object2LongFunction; 41 | import it.unimi.dsi.io.FastBufferedReader; 42 | import it.unimi.dsi.io.LineIterator; 43 | 44 | public class HollowTrieSpeedTest { 45 | 46 | public static void main(final String[] arg) throws NoSuchMethodException, IOException, JSAPException, ClassNotFoundException { 47 | 48 | final SimpleJSAP jsap = new SimpleJSAP(HollowTrieSpeedTest.class.getName(), "Tests the speed of a hollow trie.", 49 | new Parameter[] { 50 | new FlaggedOption("bufferSize", JSAP.INTSIZE_PARSER, "64Ki", JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of the I/O buffer used to read terms."), 51 | new FlaggedOption("encoding", ForNameStringParser.getParser(Charset.class), "UTF-8", JSAP.NOT_REQUIRED, 'e', "encoding", "The term file encoding."), 52 | new Switch("zipped", 'z', "zipped", "The term list is compressed in gzip format."), 53 | new FlaggedOption("termFile", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'o', "offline", "Read terms from this file (without loading them into core memory) instead of standard input."), 54 | new UnflaggedOption("trie", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised hollow trie.") 55 | }); 56 | 57 | final JSAPResult jsapResult = jsap.parse(arg); 58 | if (jsap.messagePrinted()) return; 59 | 60 | final int bufferSize = jsapResult.getInt("bufferSize"); 61 | final String trieName = jsapResult.getString("trie"); 62 | final String termFile = jsapResult.getString("termFile"); 63 | //final Class tableClass = jsapResult.getClass("class"); 64 | final Charset encoding = (Charset)jsapResult.getObject("encoding"); 65 | final boolean zipped = jsapResult.getBoolean("zipped"); 66 | 67 | @SuppressWarnings("unchecked") 68 | final Object2LongFunction hollowTrie = (Object2LongFunction)BinIO.loadObject(trieName); 69 | 70 | Iterator i; 71 | 72 | for(int k = 10; k-- != 0;) { 73 | if (termFile == null) i = new LineIterator(new FastBufferedReader(new InputStreamReader(System.in, encoding), bufferSize)); 74 | else i = new LineIterator(new FastBufferedReader(new InputStreamReader(zipped ? new GZIPInputStream(new FileInputStream(termFile)) : new FileInputStream(termFile), encoding), bufferSize)); 75 | long time = -System.currentTimeMillis(); 76 | int j = 0; 77 | while(i.hasNext()) { 78 | hollowTrie.getLong(i.next()); 79 | if (j++ % 10000 == 0) System.err.print('.'); 80 | } 81 | System.err.println(); 82 | time += System.currentTimeMillis(); 83 | System.err.println(time / 1E3 + "s, " + (time * 1E6) / j + " ns/vector"); 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/test/ListSpeedTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.test; 21 | 22 | import java.io.IOException; 23 | 24 | import com.martiansoftware.jsap.JSAP; 25 | import com.martiansoftware.jsap.JSAPException; 26 | import com.martiansoftware.jsap.JSAPResult; 27 | import com.martiansoftware.jsap.Parameter; 28 | import com.martiansoftware.jsap.SimpleJSAP; 29 | import com.martiansoftware.jsap.Switch; 30 | import com.martiansoftware.jsap.UnflaggedOption; 31 | 32 | import it.unimi.dsi.Util; 33 | import it.unimi.dsi.fastutil.io.BinIO; 34 | import it.unimi.dsi.fastutil.longs.LongList; 35 | 36 | public class ListSpeedTest { 37 | 38 | public static void main(final String[] arg) throws IOException, JSAPException, ClassNotFoundException { 39 | 40 | final SimpleJSAP jsap = new SimpleJSAP(ListSpeedTest.class.getName(), "Test the speed of a list", 41 | new Parameter[] { 42 | new Switch("random", 'r', "random", "Do a random test on at most 1 million strings."), 43 | new UnflaggedOption("list", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The filename for the serialised list.") 44 | }); 45 | 46 | final JSAPResult jsapResult = jsap.parse(arg); 47 | if (jsap.messagePrinted()) return; 48 | 49 | final String listName = jsapResult.getString("list"); 50 | 51 | final LongList list = (LongList)BinIO.loadObject(listName); 52 | long total = 0; 53 | final int n = list.size(); 54 | for(int k = 13; k-- != 0;) { 55 | long time = -System.currentTimeMillis(); 56 | for(int i = 0; i < n; i++) { 57 | list.getLong(i); 58 | if (i++ % 100000 == 0) System.out.print('.'); 59 | } 60 | System.out.println(); 61 | time += System.currentTimeMillis(); 62 | if (k < 10) total += time; 63 | System.out.println(time / 1E3 + "s, " + (time * 1E3) / n + " \u00b5s/item"); 64 | } 65 | System.out.println("Average: " + Util.format(total / 10E3) + "s, " + Util.format((total * 1E3) / (10 * n)) + " \u00b5s/item"); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/test/SuccinctTreeDecoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.test; 21 | 22 | import java.io.IOException; 23 | import java.io.Serializable; 24 | 25 | import it.unimi.dsi.bits.LongArrayBitVector; 26 | import it.unimi.dsi.compression.Decoder; 27 | import it.unimi.dsi.compression.TreeDecoder; 28 | import it.unimi.dsi.fastutil.booleans.BooleanIterator; 29 | import it.unimi.dsi.io.InputBitStream; 30 | import it.unimi.dsi.sux4j.bits.JacobsonBalancedParentheses; 31 | 32 | public class SuccinctTreeDecoder implements Decoder, Serializable { 33 | private static final long serialVersionUID = 1L; 34 | 35 | private final JacobsonBalancedParentheses balParen; 36 | private final LongArrayBitVector bitVector; 37 | private final boolean returnZero; 38 | 39 | public SuccinctTreeDecoder(final TreeDecoder treeDecoder) { 40 | bitVector = treeDecoder.succinctRepresentation(); 41 | //System.err.println(bitVector); 42 | //System.err.println(Arrays.toString(treeDecoder.buildCodes())); 43 | returnZero = bitVector.length() <= 2; 44 | balParen = new JacobsonBalancedParentheses(bitVector, false, true, false); 45 | } 46 | 47 | @Override 48 | public int decode(final BooleanIterator iterator) { 49 | if (returnZero) return 0; 50 | int p = 1, index = 0; 51 | 52 | for(;;) { 53 | if (iterator.nextBoolean()) { 54 | final int q = (int)(balParen.findClose(p) + 1); 55 | index += (q - p) / 2; 56 | if (! bitVector.getBoolean(q)) return index; 57 | p = q; 58 | } 59 | else if (! bitVector.getBoolean(++p)) return index; 60 | } 61 | } 62 | 63 | @Override 64 | public int decode(final InputBitStream ibs) throws IOException { 65 | if (returnZero) return 0; 66 | int p = 1, index = 0; 67 | 68 | for(;;) { 69 | if (ibs.readBit() != 0) { 70 | final int q = (int)(balParen.findClose(p) + 1); 71 | index += (q - p) / 2; 72 | if (! bitVector.getBoolean(q)) return index; 73 | p = q; 74 | } 75 | else if (! bitVector.getBoolean(++p)) return index; 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/test/ValueStats.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.test; 21 | 22 | import java.io.IOException; 23 | 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | import com.martiansoftware.jsap.JSAP; 28 | import com.martiansoftware.jsap.JSAPException; 29 | import com.martiansoftware.jsap.JSAPResult; 30 | import com.martiansoftware.jsap.Parameter; 31 | import com.martiansoftware.jsap.SimpleJSAP; 32 | import com.martiansoftware.jsap.UnflaggedOption; 33 | 34 | import it.unimi.dsi.bits.Fast; 35 | import it.unimi.dsi.fastutil.io.BinIO; 36 | import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap; 37 | import it.unimi.dsi.fastutil.longs.LongIterator; 38 | 39 | public class ValueStats { 40 | public static final Logger LOGGER = LoggerFactory.getLogger(ValueStats.class); 41 | 42 | public static void main(final String[] arg) throws JSAPException, IOException { 43 | 44 | final SimpleJSAP jsap = new SimpleJSAP(ValueStats.class.getName(), "Prints statistical data about a binary list of longs.", 45 | new Parameter[] { 46 | new UnflaggedOption("input", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The input file.") 47 | }); 48 | 49 | final JSAPResult jsapResult = jsap.parse(arg); 50 | if (jsap.messagePrinted()) return; 51 | 52 | final String input = jsapResult.getString("input"); 53 | long max = Long.MIN_VALUE; 54 | long min = Long.MAX_VALUE; 55 | long tot = 0; 56 | final Long2LongOpenHashMap freqs = new Long2LongOpenHashMap(); 57 | 58 | for(final LongIterator i = BinIO.asLongIterator(input); i.hasNext(); ) { 59 | final long x = i.nextLong(); 60 | max = Math.max(max, x); 61 | min = Math.min(min, x); 62 | freqs.addTo(x, 1); 63 | tot++; 64 | } 65 | 66 | System.out.println("Min: " + min); 67 | System.out.println("Max: " + max); 68 | double entropy = 0; 69 | for(final LongIterator iterator = freqs.values().iterator(); iterator.hasNext();) { 70 | final double p = (double)iterator.nextLong() / tot; 71 | entropy += -p * Fast.log2(p); 72 | } 73 | System.out.println("Entropy: " + entropy); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/sux4j/util/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Succinct data structures for collections. 3 | * 4 | *

5 | * This package provides implementations of some succinct techniques for the storage of static 6 | * lists. The main ingredient is the Elias–Fano representation of monotone sequences. For 7 | * monotone sequences, such as file pointers, an 8 | * {@link it.unimi.dsi.sux4j.util.EliasFanoMonotoneLongBigList} is the obvious choice. For general 9 | * sequences, you can either use an {@link it.unimi.dsi.sux4j.util.EliasFanoPrefixSumLongBigList}, 10 | * which stores the sequence using its prefix sums, or an 11 | * {@link it.unimi.dsi.sux4j.util.EliasFanoLongBigList}. The former is faster and provides also 12 | * prefix sums, but the latter provides a better compression ratio if the values stored are skewed 13 | * towards small values. {@link it.unimi.dsi.sux4j.util.EliasFanoIndexedMonotoneLongBigList} 14 | * provides {@linkplain it.unimi.dsi.sux4j.util.EliasFanoIndexedMonotoneLongBigList#successor(long) 15 | * content-based addressing methods}. 16 | * 17 | *

18 | * {@link it.unimi.dsi.sux4j.util.MappedEliasFanoMonotoneLongBigList} is a memory-mapped version of 19 | * {@link it.unimi.dsi.sux4j.util.EliasFanoMonotoneLongBigList}. 20 | */ 21 | package it.unimi.dsi.sux4j.util; -------------------------------------------------------------------------------- /src/overview.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Sux4J: Succinct data structures for Java 5 | 6 | 7 | 8 |

Sux4J is an effort to bring succinct data structures to Java. Presently it provides a number 9 | of related implementations covering ranking/selection over bit arrays, compressed lists 10 | and [[monotone] minimal perfect hash] functions. 11 | 12 |

Sux4J is free software 13 | distributed under either the GNU Lesser General Public License 2.1+ or the Apache Software License 2.0. 14 | 15 | 16 | -------------------------------------------------------------------------------- /sux4j.bnd: -------------------------------------------------------------------------------- 1 | Automatic-Module-Name: it.unimi.dsi.sux4j 2 | Bundle-Name: it.unimi.dsi.sux4j 3 | Bundle-SymbolicName: it.unimi.dsi.sux4j 4 | Export-Package: it.unimi.dsi.sux4j.* 5 | Bundle-Version: ${version} 6 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/bits/BalancedParenthesesTestCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.bits; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import it.unimi.dsi.bits.BitVector; 25 | import it.unimi.dsi.bits.LongArrayBitVector; 26 | import it.unimi.dsi.fastutil.ints.IntArrayList; 27 | import it.unimi.dsi.lang.MutableString; 28 | 29 | public abstract class BalancedParenthesesTestCase { 30 | 31 | public static String binary(long l, final boolean reverse) { 32 | if (reverse) l = Long.reverse(l); 33 | final MutableString s = new MutableString().append("0000000000000000000000000000000000000000000000000000000000000000000000000").append(Long.toBinaryString(l)); 34 | s.delete(0, s.length() - 64); 35 | s.insert(0, '\n'); 36 | s.append('\n'); 37 | for(int i = 0; i < 32; i++) s.append(" ").append(Long.toHexString((l >>> (31 - i) * 2) & 0x3)); 38 | s.append('\n'); 39 | for(int i = 0; i < 16; i++) s.append(" ").append(Long.toHexString((l >>> (15 - i) * 4) & 0xF)); 40 | s.append('\n'); 41 | return s.toString(); 42 | } 43 | 44 | 45 | public static LongArrayBitVector parse(final String s, final boolean check) { 46 | int e = 0; 47 | final LongArrayBitVector bv = LongArrayBitVector.getInstance(); 48 | for(int i = 0; i < s.length(); i++) { 49 | if (s.charAt(i) == '(') { 50 | bv.add(1); 51 | e++; 52 | } 53 | else { 54 | if (check && e == 0) throw new IllegalArgumentException(); 55 | bv.add(0); 56 | e--; 57 | } 58 | } 59 | 60 | if (check && e != 0) throw new IllegalArgumentException(); 61 | 62 | return bv; 63 | } 64 | 65 | 66 | public static long parseSmall(final String s, final boolean check) { 67 | if (s.length() > Long.SIZE) throw new IllegalArgumentException(); 68 | final LongArrayBitVector bv = parse(s, check); 69 | return bv.getLong(0, s.length()); 70 | } 71 | 72 | public static long parseSmall(final String s) { 73 | return parseSmall(s, true); 74 | } 75 | 76 | public void assertBalancedParentheses(final BalancedParentheses balancedParentheses) { 77 | final long length = balancedParentheses.bitVector().length(); 78 | final BitVector bits = balancedParentheses.bitVector(); 79 | 80 | // Build matching 81 | 82 | final IntArrayList stack = new IntArrayList(); 83 | final IntArrayList matches = new IntArrayList(); 84 | matches.size((int)length); 85 | 86 | for(int i = 0; i < length; i++) { 87 | if (bits.getBoolean(i)) stack.push(i); 88 | else { 89 | if (stack.isEmpty()) throw new AssertionError("The bit vector does not represent a correctly parenthesised string"); 90 | final int pos = stack.popInt(); 91 | matches.set(pos, i); 92 | matches.set(i, pos); 93 | } 94 | } 95 | 96 | if (! stack.isEmpty()) throw new AssertionError("The bit vector does not represent a correctly parenthesised string"); 97 | 98 | for(int i = 0; i < length; i++) { 99 | if (bits.getBoolean(i)) assertEquals("Finding closing for position " + i, matches.getInt(i), balancedParentheses.findClose(i)); 100 | // else assertEquals("Finding opening for position " + i, matches.getInt(i), 101 | // balancedParentheses.findOpen(i)); 102 | } 103 | } 104 | 105 | } 106 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/bits/Rank12Test.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.bits; 21 | 22 | import java.util.Random; 23 | 24 | import org.junit.Test; 25 | 26 | import it.unimi.dsi.bits.LongArrayBitVector; 27 | import it.unimi.dsi.util.XoRoShiRo128PlusRandom; 28 | 29 | public class Rank12Test extends RankSelectTestCase { 30 | 31 | @Test 32 | public void testEmpty() { 33 | Rank12 rank12; 34 | rank12 = new Rank12(new long[1], 64); 35 | assertRank(rank12); 36 | rank12 = new Rank12(new long[2], 128); 37 | assertRank(rank12); 38 | rank12 = new Rank12(new long[1], 63); 39 | assertRank(rank12); 40 | rank12 = new Rank12(new long[2], 65); 41 | assertRank(rank12); 42 | rank12 = new Rank12(new long[3], 129); 43 | assertRank(rank12); 44 | } 45 | 46 | @Test 47 | public void testSingleton() { 48 | Rank12 rank12; 49 | 50 | rank12 = new Rank12(new long[] { 1L << 63, 0 }, 64); 51 | assertRank(rank12); 52 | 53 | rank12 = new Rank12(new long[] { 1 }, 64); 54 | assertRank(rank12); 55 | 56 | rank12 = new Rank12(new long[] { 1L << 63, 0 }, 128); 57 | assertRank(rank12); 58 | 59 | rank12 = new Rank12(new long[] { 1L << 63, 0 }, 65); 60 | assertRank(rank12); 61 | 62 | rank12 = new Rank12(new long[] { 1L << 63, 0, 0 }, 129); 63 | assertRank(rank12); 64 | } 65 | 66 | @Test 67 | public void testDoubleton() { 68 | Rank12 rank12; 69 | 70 | rank12 = new Rank12(new long[] { 1 | 1L << 32 }, 64); 71 | assertRank(rank12); 72 | 73 | rank12 = new Rank12(new long[] { 1, 1 }, 128); 74 | assertRank(rank12); 75 | 76 | rank12 = new Rank12(new long[] { 1 | 1L << 32, 0 }, 63); 77 | assertRank(rank12); 78 | 79 | rank12 = new Rank12(new long[] { 1, 1, 0 }, 129); 80 | assertRank(rank12); 81 | } 82 | 83 | @Test 84 | public void testAlternating() { 85 | Rank12 rank12; 86 | 87 | rank12 = new Rank12(new long[] { 0xAAAAAAAAAAAAAAAAL }, 64); 88 | assertRank(rank12); 89 | 90 | rank12 = new Rank12(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL }, 128); 91 | assertRank(rank12); 92 | 93 | rank12 = new Rank12(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL }, 64 * 5); 94 | assertRank(rank12); 95 | 96 | rank12 = new Rank12(new long[] { 0xAAAAAAAAL }, 33); 97 | assertRank(rank12); 98 | 99 | rank12 = new Rank12(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAL }, 128); 100 | assertRank(rank12); 101 | } 102 | 103 | @Test 104 | public void testSelect() { 105 | Rank12 rank12; 106 | rank12 = new Rank12(LongArrayBitVector.of(1, 0, 1, 1, 0, 0, 0).bits(), 7); 107 | assertRank(rank12); 108 | } 109 | 110 | @Test 111 | public void testRandom() { 112 | for (int size = 10; size <= 100000000; size *= 10) { 113 | final Random r = new XoRoShiRo128PlusRandom(1); 114 | final LongArrayBitVector bitVector = LongArrayBitVector.getInstance(size); 115 | for (int i = 0; i < size; i++) 116 | bitVector.add(r.nextBoolean()); 117 | final Rank12 rank12 = new Rank12(bitVector); 118 | assertRank(rank12); 119 | } 120 | } 121 | 122 | @Test 123 | public void testAllSizes() { 124 | LongArrayBitVector v; 125 | Rank12 rank12; 126 | for (int size = 0; size <= 4096; size++) { 127 | v = LongArrayBitVector.getInstance().length(size); 128 | for (int i = (size + 1) / 2; i-- != 0;) 129 | v.set(i * 2); 130 | rank12 = new Rank12(v); 131 | assertRank(rank12); 132 | } 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/bits/RankSelectTestCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.bits; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import it.unimi.dsi.bits.BitVector; 25 | 26 | public abstract class RankSelectTestCase { 27 | public void assertRankAndSelect(final Rank rank, final Select select) { 28 | final long length = rank.bitVector().length(); 29 | final BitVector bits = rank.bitVector(); 30 | 31 | for(int j = 0, i = 0; i < length; i++) { 32 | assertEquals("Ranking " + i, j, rank.rank(i)); 33 | if (bits.getBoolean(i)) { 34 | assertEquals("Selecting " + j, i, select.select(j)); 35 | j++; 36 | } 37 | 38 | } 39 | } 40 | 41 | public void assertSelect(final Select s) { 42 | final BitVector bits = s.bitVector(); 43 | final long length = bits.length(); 44 | 45 | for(int j = 0, i = 0; i < length; i++) { 46 | if (bits.getBoolean(i)) { 47 | assertEquals("Selecting " + j, i, s.select(j)); 48 | j++; 49 | } 50 | 51 | } 52 | } 53 | 54 | public void assertSelectZero(final SelectZero s) { 55 | final BitVector bits = s.bitVector(); 56 | final long length = bits.length(); 57 | 58 | for(int j = 0, i = 0; i < length; i++) { 59 | if (! bits.getBoolean(i)) { 60 | assertEquals("Selecting " + j, i, s.selectZero(j)); 61 | j++; 62 | } 63 | 64 | } 65 | } 66 | 67 | public void assertRank(final Rank rank) { 68 | final long length = rank.bitVector().length(); 69 | final BitVector bits = rank.bitVector(); 70 | 71 | for(long j = 0, i = 0; i < length; i++) { 72 | assertEquals("Ranking " + i, j, rank.rank(i)); 73 | if (bits.getBoolean(i)) j++; 74 | } 75 | } 76 | 77 | } 78 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/bits/TrivialBalancedParentheses.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.bits; 21 | 22 | import it.unimi.dsi.bits.BitVector; 23 | 24 | public class TrivialBalancedParentheses implements BalancedParentheses { 25 | private static final long serialVersionUID = 1L; 26 | private final BitVector v; 27 | 28 | public TrivialBalancedParentheses(final BitVector v) { 29 | this.v = v; 30 | } 31 | 32 | @Override 33 | public BitVector bitVector() { 34 | return v; 35 | } 36 | 37 | @Override 38 | public long enclose(final long pos) { 39 | throw new UnsupportedOperationException(); 40 | } 41 | 42 | @Override 43 | public long findClose(long pos) { 44 | if (! v.getBoolean(pos)) throw new IllegalArgumentException(); 45 | int c = 1; 46 | while(++pos < v.length()) { 47 | if (! v.getBoolean(pos)) c--; 48 | else c++; 49 | if (c == 0) return pos; 50 | } 51 | 52 | throw new IllegalArgumentException(); 53 | } 54 | 55 | @Override 56 | public long findOpen(long pos) { 57 | if (v.getBoolean(pos)) throw new IllegalArgumentException(); 58 | 59 | int c = 1; 60 | while(--pos >= 0) { 61 | if (! v.getBoolean(pos)) c++; 62 | else c--; 63 | if (c == 0) return pos; 64 | } 65 | 66 | throw new IllegalArgumentException(); 67 | } 68 | 69 | @Override 70 | public long numBits() { 71 | return 0; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/bits/TrivialBalancedParenthesesTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.bits; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | import it.unimi.dsi.bits.LongArrayBitVector; 27 | 28 | public class TrivialBalancedParenthesesTest extends BalancedParenthesesTestCase { 29 | 30 | @Test 31 | public void testSimple() { 32 | LongArrayBitVector bv = LongArrayBitVector.of(1, 0); 33 | TrivialBalancedParentheses bp = new TrivialBalancedParentheses(bv); 34 | assertBalancedParentheses(bp); 35 | assertEquals(1, bp.findClose(0)); 36 | assertEquals(0, bp.findOpen(1)); 37 | // assertEquals(0, bp.enclose(1)); 38 | 39 | bv = LongArrayBitVector.of(1, 1, 0, 0); 40 | bp = new TrivialBalancedParentheses(bv); 41 | assertBalancedParentheses(bp); 42 | assertEquals(3, bp.findClose(0)); 43 | // assertEquals(0, bp.enclose(1)); 44 | assertEquals(2, bp.findClose(1)); 45 | assertEquals(1, bp.findOpen(2)); 46 | // assertEquals(1, bp.enclose(2)); 47 | assertEquals(0, bp.findOpen(3)); 48 | // assertEquals(1, bp.enclose(3)); 49 | 50 | bv = LongArrayBitVector.of(1, 1, 0, 1, 0, 0); 51 | bp = new TrivialBalancedParentheses(bv); 52 | assertBalancedParentheses(bp); 53 | assertEquals(5, bp.findClose(0)); 54 | assertEquals(2, bp.findClose(1)); 55 | // assertEquals(0, bp.enclose(1)); 56 | assertEquals(1, bp.findOpen(2)); 57 | // assertEquals(1, bp.enclose(2)); 58 | assertEquals(4, bp.findClose(3)); 59 | // assertEquals(1, bp.enclose(3)); 60 | assertEquals(3, bp.findOpen(4)); 61 | // assertEquals(3, bp.enclose(4)); 62 | assertEquals(0, bp.findOpen(5)); 63 | // assertEquals(3, bp.enclose(5)); 64 | 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/io/BucketedHashStoreTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2019-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.io; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.bits.TransformationStrategies; 29 | 30 | public class BucketedHashStoreTest { 31 | 32 | @Test 33 | public void test() throws IOException { 34 | for(final int s: new int[] { 0, 1, 10, 100, 1000, 1000000 }) { 35 | final BucketedHashStore b = new BucketedHashStore<>(TransformationStrategies.fixedLong()); 36 | for(int i = 0; i < s; i++) b.add(Long.valueOf(i)); 37 | b.bucketSize(35); 38 | long t = 0; 39 | for(final BucketedHashStore.Bucket bucket: b) t += bucket.size(); 40 | assertEquals(s, t); 41 | b.close(); 42 | } 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/io/FileLinesListTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.io; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.FileWriter; 26 | import java.io.IOException; 27 | 28 | import org.junit.Test; 29 | 30 | public class FileLinesListTest { 31 | 32 | @Test 33 | public void test() throws IOException { 34 | final File t = File.createTempFile(FileLinesListTest.class.getName(), "tmp"); 35 | t.deleteOnExit(); 36 | 37 | FileWriter fw = new FileWriter(t); 38 | fw.write("\naa\naaaa\n\naa\n".toCharArray()); 39 | fw.close(); 40 | 41 | FileLinesList fll = new FileLinesList(t.toString(), "ASCII"); 42 | assertEquals("", fll.get(0).toString()); 43 | assertEquals("aa", fll.get(1).toString()); 44 | assertEquals("aaaa", fll.get(2).toString()); 45 | assertEquals("", fll.get(3).toString()); 46 | assertEquals("aa", fll.get(4).toString()); 47 | 48 | fw = new FileWriter(t); 49 | fw.write("\n\n\n".toCharArray()); 50 | fw.close(); 51 | 52 | fll = new FileLinesList(t.toString(), "ASCII"); 53 | assertEquals("", fll.get(0).toString()); 54 | assertEquals("", fll.get(1).toString()); 55 | assertEquals("", fll.get(2).toString()); 56 | 57 | fw = new FileWriter(t); 58 | fw.write("\n\na".toCharArray()); 59 | fw.close(); 60 | 61 | fll = new FileLinesList(t.toString(), "ASCII"); 62 | assertEquals("", fll.get(0).toString()); 63 | assertEquals("", fll.get(1).toString()); 64 | assertEquals("a", fll.get(2).toString()); 65 | 66 | /* 67 | * fw = new FileWriter(t); fw.write("".toCharArray()); fw.close(); 68 | * 69 | * fll = new FileLinesList(t.toString(), "ASCII"); assertEquals("", fll.get(0 70 | *).toString()); 71 | */ 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/mph/CHDMinimalPerfectHashFunctionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.util.Arrays; 27 | import java.util.Collections; 28 | import java.util.List; 29 | 30 | import org.junit.Test; 31 | 32 | import it.unimi.dsi.bits.TransformationStrategies; 33 | import it.unimi.dsi.fastutil.io.BinIO; 34 | import it.unimi.dsi.sux4j.mph.CHDMinimalPerfectHashFunction.Builder; 35 | 36 | @SuppressWarnings("deprecation") 37 | public class CHDMinimalPerfectHashFunctionTest { 38 | 39 | private void check(final int size, final String[] s, final CHDMinimalPerfectHashFunction mph, final int w) { 40 | final int[] check = new int[s.length]; 41 | Arrays.fill(check, -1); 42 | for (int i = s.length; i-- != 0;) { 43 | assertEquals(Integer.toString(i), -1, check[(int)mph.getLong(s[i])]); 44 | check[(int)mph.getLong(s[i])] = i; 45 | } 46 | 47 | // Exercise code for negative results 48 | for (int i = 1000; i-- != 0;) 49 | if (w != 0) assertEquals(-1, mph.getLong(Integer.toString(i + size))); 50 | else mph.getLong(Integer.toString(i + size)); 51 | } 52 | 53 | @SuppressWarnings("unchecked") 54 | @Test 55 | public void testNumbers() throws IOException, ClassNotFoundException { 56 | 57 | for (final int size : new int[] { 0, 1, 4, 8, 20, 64, 100, 1000, 10000, 100000, 1000000 }) { 58 | for(final int signatureWidth: new int[] { 0, 32, 64 }) { 59 | System.err.println("Size: " + size + " w: " + signatureWidth); 60 | final String[] s = new String[size]; 61 | for (int i = s.length; i-- != 0;) 62 | s[i] = Integer.toString(i); 63 | 64 | CHDMinimalPerfectHashFunction mph = new Builder().keys(Arrays.asList(s)).transform(TransformationStrategies.utf16()).signed(signatureWidth).build(); 65 | 66 | check(size, s, mph, signatureWidth); 67 | 68 | final File temp = File.createTempFile(getClass().getSimpleName(), "test"); 69 | temp.deleteOnExit(); 70 | BinIO.storeObject(mph, temp); 71 | mph = (CHDMinimalPerfectHashFunction)BinIO.loadObject(temp); 72 | 73 | check(size, s, mph, signatureWidth); 74 | 75 | // From store 76 | final it.unimi.dsi.sux4j.io.ChunkedHashStore chunkedHashStore = new it.unimi.dsi.sux4j.io.ChunkedHashStore<>(TransformationStrategies.utf16(), null, signatureWidth < 0 ? -signatureWidth : 0, null); 77 | chunkedHashStore.addAll(Arrays.asList(s).iterator()); 78 | chunkedHashStore.checkAndRetry(Arrays.asList(s)); 79 | mph = new CHDMinimalPerfectHashFunction.Builder().store(chunkedHashStore).signed(signatureWidth).build(); 80 | chunkedHashStore.close(); 81 | 82 | check(size, s, mph, signatureWidth); 83 | } 84 | } 85 | } 86 | 87 | @Test 88 | public void testEmpty() throws IOException { 89 | final List emptyList = Collections.emptyList(); 90 | final CHDMinimalPerfectHashFunction mph = new CHDMinimalPerfectHashFunction.Builder().keys(emptyList).transform(TransformationStrategies.utf16()).build(); 91 | assertEquals(-1, mph.getLong("a")); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/mph/HypergraphFunctionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.util.Arrays; 27 | 28 | import org.junit.Test; 29 | 30 | import it.unimi.dsi.bits.TransformationStrategies; 31 | import it.unimi.dsi.fastutil.io.BinIO; 32 | import it.unimi.dsi.fastutil.longs.LongArrayList; 33 | 34 | public class HypergraphFunctionTest { 35 | 36 | @SuppressWarnings("unchecked") 37 | @Test 38 | public void testNumbers() throws IOException, ClassNotFoundException { 39 | for (final int size : new int[] { 0, 1, 4, 8, 20, 64, 100, 1000 }) { 40 | System.err.println("Size: " + size); 41 | 42 | final String[] s = new String[size]; 43 | final long[] v = new long[s.length]; 44 | for (int i = s.length; i-- != 0;) 45 | s[(int)(v[i] = i)] = Integer.toString(i); 46 | 47 | GOV3Function function = new GOV3Function.Builder().keys(Arrays.asList(s)).transform(TransformationStrategies.utf16()).values(LongArrayList.wrap(v), 12).build(); 48 | 49 | for (int i = s.length; i-- != 0;) 50 | assertEquals(i, function.getLong(s[i])); 51 | 52 | final File temp = File.createTempFile(getClass().getSimpleName(), "test"); 53 | temp.deleteOnExit(); 54 | BinIO.storeObject(function, temp); 55 | function = (GOV3Function)BinIO.loadObject(temp); 56 | for (int i = s.length; i-- != 0;) 57 | assertEquals(i, function.getLong(s[i])); 58 | 59 | function = new GOV3Function.Builder().keys(Arrays.asList(s)).transform(TransformationStrategies.utf16()).build(); 60 | for (int i = s.length; i-- != 0;) 61 | assertEquals(i, function.getLong(s[i])); 62 | } 63 | } 64 | 65 | public static String binary(final int l) { 66 | final String s = "0000000000000000000000000000000000000000000000000000000000000000000000000" + Integer.toBinaryString(l); 67 | return s.substring(s.length() - 32); 68 | } 69 | 70 | @SuppressWarnings("unchecked") 71 | @Test 72 | public void testSortedNumbers() throws IOException, ClassNotFoundException { 73 | 74 | final String[] s = new String[10]; 75 | final long[] v = new long[s.length]; 76 | for (int i = s.length; i-- != 0;) 77 | s[(int)(v[i] = i)] = binary(i); 78 | 79 | GOV3Function function = new GOV3Function.Builder().keys(Arrays.asList(s)).transform(TransformationStrategies.utf16()).values(LongArrayList.wrap(v), 12).build(); 80 | 81 | final int[] check = new int[s.length]; 82 | Arrays.fill(check, -1); 83 | for (int i = s.length; i-- != 0;) 84 | assertEquals(i, function.getLong(s[i])); 85 | 86 | final File temp = File.createTempFile(getClass().getSimpleName(), "test"); 87 | temp.deleteOnExit(); 88 | BinIO.storeObject(function, temp); 89 | function = (GOV3Function)BinIO.loadObject(temp); 90 | for (int i = s.length; i-- != 0;) 91 | assertEquals(i, function.getLong(s[i])); 92 | } 93 | 94 | } 95 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/mph/HypergraphSolverTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertTrue; 23 | 24 | import java.util.Arrays; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.fastutil.ints.IntOpenHashSet; 29 | import it.unimi.dsi.sux4j.mph.solve.Orient3Hypergraph; 30 | import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator; 31 | 32 | 33 | public class HypergraphSolverTest { 34 | 35 | public static int[][] vertex2Edge(final int numVars, final int[] vertex0, final int[] vertex1, final int[] vertex2) { 36 | final int[][] vertex2Edge = new int[numVars][]; 37 | final int[] d = new int[numVars]; 38 | for(int i = vertex0.length; i-- != 0;) { 39 | d[vertex0[i]]++; 40 | d[vertex1[i]]++; 41 | d[vertex2[i]]++; 42 | } 43 | 44 | for(int v = numVars; v-- != 0;) vertex2Edge[v] = new int[d[v]]; 45 | Arrays.fill(d, 0); 46 | for(int i = vertex0.length; i-- != 0;) { 47 | vertex2Edge[vertex0[i]][d[vertex0[i]]++] = i; 48 | vertex2Edge[vertex1[i]][d[vertex1[i]]++] = i; 49 | vertex2Edge[vertex2[i]][d[vertex2[i]]++] = i; 50 | } 51 | 52 | return vertex2Edge; 53 | } 54 | 55 | @Test 56 | public void smallTest() { 57 | final int[] vertex0 = { 0, 1, 2, 3 }; 58 | final int[] vertex1 = { 1, 2, 0, 1 }; 59 | final int[] vertex2 = { 2, 3, 4, 0 }; 60 | final int[] d = { 3, 3, 3, 2, 1 }; 61 | final int[] hinges = new int[vertex1.length]; 62 | assertTrue(Orient3Hypergraph.orient(vertex2Edge(5, vertex0, vertex1, vertex2), d, vertex0, vertex1, vertex2, hinges)); 63 | } 64 | 65 | @Test 66 | public void randomTest() { 67 | final XoRoShiRo128PlusRandomGenerator random = new XoRoShiRo128PlusRandomGenerator(1); 68 | for(final int n : new int[] { 5, 10, 100, 1000 }) { 69 | for(int count = 0; count < 10; count++) { 70 | final int size = (int)(.9 * n); 71 | final int[] d = new int[n]; 72 | final int[] vertex0 = new int[size]; 73 | final int[] vertex1 = new int[size]; 74 | final int[] vertex2 = new int[size]; 75 | final int[] hinges = new int[size]; 76 | final IntOpenHashSet edge[] = new IntOpenHashSet[size]; 77 | 78 | int v, w; 79 | for (int i = 0; i < size; i++) { 80 | boolean alreadySeen; 81 | do { 82 | vertex0[i] = i; 83 | 84 | do v = random.nextInt(n); while(v == i); 85 | vertex1[i] = v; 86 | 87 | do w = random.nextInt(n); while(w == i || w == v); 88 | vertex2[i] = w; 89 | 90 | edge[i] = new IntOpenHashSet(); 91 | edge[i].add(i); 92 | edge[i].add(v); 93 | edge[i].add(w); 94 | 95 | alreadySeen = false; 96 | for(int j = 0; j < i; j++) 97 | if (edge[j].equals(edge[i])) { 98 | alreadySeen = true; 99 | break; 100 | } 101 | } while(alreadySeen); 102 | 103 | d[i]++; 104 | d[v]++; 105 | d[w]++; 106 | } 107 | 108 | assertTrue("size: " + n + ", count: " + count, Orient3Hypergraph.orient(vertex2Edge(d.length, vertex0, vertex1, vertex2), d, vertex0, vertex1, vertex2, hinges)); 109 | } 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/mph/LcpMonotoneMinimalPerfectHashFunctionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.util.Arrays; 27 | 28 | import org.junit.Test; 29 | 30 | import it.unimi.dsi.bits.HuTuckerTransformationStrategy; 31 | import it.unimi.dsi.bits.TransformationStrategies; 32 | import it.unimi.dsi.fastutil.io.BinIO; 33 | 34 | public class LcpMonotoneMinimalPerfectHashFunctionTest { 35 | 36 | 37 | public static String binary(final int l) { 38 | final String s = "0000000000000000000000000000000000000000000000000000000000000000000000000" + Integer.toBinaryString(l); 39 | return s.substring(s.length() - 32); 40 | } 41 | 42 | private void check(final String[] s, final int size, final LcpMonotoneMinimalPerfectHashFunction mph, final int signatureWidth) { 43 | for (int i = s.length; i-- != 0;) assertEquals(i, mph.getLong(s[i])); 44 | 45 | // Exercise code for negative results 46 | if (signatureWidth == 0) for (int i = size; i-- != 0;) mph.getLong(binary(i + size)); 47 | else for (int i = size; i-- != 0;) assertEquals(-1, mph.getLong(binary(i + size))); 48 | } 49 | 50 | @SuppressWarnings("unchecked") 51 | @Test 52 | public void testSortedNumbers() throws IOException, ClassNotFoundException { 53 | 54 | for (int size = 1000; size < 10000000; size *= 10) { 55 | for (final int signatureWidth: new int[] { 0, 32, 64 }) { 56 | System.err.println("Size: " + size + " Signature width: " + signatureWidth); 57 | final String[] s = new String[size]; 58 | final int[] v = new int[s.length]; 59 | for (int i = s.length; i-- != 0;) 60 | s[v[i] = i] = binary(i); 61 | 62 | LcpMonotoneMinimalPerfectHashFunction mph = new LcpMonotoneMinimalPerfectHashFunction.Builder().keys(Arrays.asList(s)).transform(TransformationStrategies.prefixFreeUtf16()).signed(signatureWidth).build(); 63 | 64 | check(s, size, mph, signatureWidth); 65 | 66 | File temp = File.createTempFile(getClass().getSimpleName(), "test"); 67 | temp.deleteOnExit(); 68 | BinIO.storeObject(mph, temp); 69 | mph = (LcpMonotoneMinimalPerfectHashFunction)BinIO.loadObject(temp); 70 | 71 | check(s, size, mph, signatureWidth); 72 | 73 | mph = new LcpMonotoneMinimalPerfectHashFunction.Builder().keys(Arrays.asList(s)).transform(new HuTuckerTransformationStrategy(Arrays.asList(s), true)).signed(signatureWidth).build(); 74 | 75 | check(s, size, mph, signatureWidth); 76 | 77 | temp = File.createTempFile(getClass().getSimpleName(), "test"); 78 | temp.deleteOnExit(); 79 | BinIO.storeObject(mph, temp); 80 | 81 | check(s, size, mph, signatureWidth); 82 | } 83 | } 84 | } 85 | 86 | @Test 87 | public void testEmpty() throws IOException { 88 | final LcpMonotoneMinimalPerfectHashFunction mph = new LcpMonotoneMinimalPerfectHashFunction.Builder().keys(Arrays.asList(new String[] {})).transform(TransformationStrategies.prefixFreeUtf16()).build(); 89 | assertEquals(-1, mph.getLong("")); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/mph/TwoStepsGOV3FunctionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | import java.util.Arrays; 26 | 27 | import org.junit.Test; 28 | 29 | import it.unimi.dsi.bits.TransformationStrategies; 30 | import it.unimi.dsi.fastutil.longs.LongArrayList; 31 | import it.unimi.dsi.fastutil.longs.LongBigList; 32 | import it.unimi.dsi.fastutil.longs.LongBigLists; 33 | 34 | public class TwoStepsGOV3FunctionTest { 35 | 36 | @Test 37 | public void testSimpleList() throws IOException { 38 | final LongBigList l = LongBigLists.asBigList(new LongArrayList(new long[] { 4, 4, 4, 0, 1 })); 39 | final TwoStepsGOV3Function mph = new TwoStepsGOV3Function.Builder().keys(Arrays.asList(new String[] { "a", "b", "c", "d", "e" })).transform(TransformationStrategies.utf16()).values(l).build(); 40 | assertEquals(l.getLong(0), mph.getLong("a")); 41 | assertEquals(l.getLong(1), mph.getLong("b")); 42 | assertEquals(l.getLong(2), mph.getLong("c")); 43 | assertEquals(l.getLong(3), mph.getLong("d")); 44 | assertEquals(l.getLong(4), mph.getLong("e")); 45 | } 46 | 47 | @Test 48 | public void testSimpleCompressedList() throws IOException { 49 | final LongBigList l = LongBigLists.asBigList(new LongArrayList(new long[] { 4, 4, 4, 4, 4, 4, 0, 10000 })); 50 | final TwoStepsGOV3Function mph = new TwoStepsGOV3Function.Builder().keys(Arrays.asList(new String[] { "a", "b", "c", "d", "e", "f", "g", "h" })).transform(TransformationStrategies.utf16()).values(l).build(); 51 | assertEquals(l.getLong(0), mph.getLong("a")); 52 | assertEquals(l.getLong(1), mph.getLong("b")); 53 | assertEquals(l.getLong(2), mph.getLong("c")); 54 | assertEquals(l.getLong(3), mph.getLong("d")); 55 | assertEquals(l.getLong(4), mph.getLong("e")); 56 | assertEquals(l.getLong(5), mph.getLong("f")); 57 | assertEquals(l.getLong(6), mph.getLong("g")); 58 | assertEquals(l.getLong(7), mph.getLong("h")); 59 | } 60 | 61 | @Test 62 | public void testCompressedList() throws IOException { 63 | final LongBigList l = LongBigLists.asBigList(new LongArrayList(new long[] { 4, 4, 3, 3, 3, 4, 0, 10000 })); 64 | final TwoStepsGOV3Function mph = new TwoStepsGOV3Function.Builder().keys(Arrays.asList(new String[] { "a", "b", "c", "d", "e", "f", "g", "h" })).transform(TransformationStrategies.utf16()).values(l).build(); 65 | assertEquals(l.getLong(0), mph.getLong("a")); 66 | assertEquals(l.getLong(1), mph.getLong("b")); 67 | assertEquals(l.getLong(2), mph.getLong("c")); 68 | assertEquals(l.getLong(3), mph.getLong("d")); 69 | assertEquals(l.getLong(4), mph.getLong("e")); 70 | assertEquals(l.getLong(5), mph.getLong("f")); 71 | assertEquals(l.getLong(6), mph.getLong("g")); 72 | assertEquals(l.getLong(7), mph.getLong("h")); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/mph/TwoStepsLcpMonotoneMinimalPerfectHashFunctionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.util.Arrays; 27 | 28 | import org.junit.Test; 29 | 30 | import it.unimi.dsi.bits.HuTuckerTransformationStrategy; 31 | import it.unimi.dsi.bits.TransformationStrategies; 32 | import it.unimi.dsi.fastutil.io.BinIO; 33 | 34 | public class TwoStepsLcpMonotoneMinimalPerfectHashFunctionTest { 35 | 36 | public static String binary(final int l) { 37 | final String s = "0000000000000000000000000000000000000000000000000000000000000000000000000" + Integer.toBinaryString(l); 38 | return s.substring(s.length() - 32); 39 | } 40 | 41 | private void check(final String[] s, final int size, final TwoStepsLcpMonotoneMinimalPerfectHashFunction mph, final int signatureWidth) { 42 | for (int i = s.length; i-- != 0;) assertEquals(i, mph.getLong(s[i])); 43 | 44 | // Exercise code for negative results 45 | if (signatureWidth == 0) for (int i = size; i-- != 0;) mph.getLong(binary(i + size)); 46 | else for (int i = size; i-- != 0;) assertEquals(-1, mph.getLong(binary(i + size))); 47 | } 48 | 49 | @SuppressWarnings("unchecked") 50 | @Test 51 | public void testSortedNumbers() throws IOException, ClassNotFoundException { 52 | 53 | for (int size = 1000; size < 10000000; size *= 10) { 54 | for (final int signatureWidth: new int[] { 0, 32, 64 }) { 55 | System.err.println("Size: " + size + " Signature width: " + signatureWidth); 56 | final String[] s = new String[size]; 57 | final int[] v = new int[s.length]; 58 | for (int i = s.length; i-- != 0;) s[v[i] = i] = binary(i); 59 | 60 | TwoStepsLcpMonotoneMinimalPerfectHashFunction mph = new TwoStepsLcpMonotoneMinimalPerfectHashFunction.Builder().keys(Arrays.asList(s)).transform(TransformationStrategies.prefixFreeUtf16()).signed(signatureWidth).build(); 61 | 62 | check(s, size, mph, signatureWidth); 63 | 64 | File temp = File.createTempFile(getClass().getSimpleName(), "test"); 65 | temp.deleteOnExit(); 66 | BinIO.storeObject(mph, temp); 67 | mph = (TwoStepsLcpMonotoneMinimalPerfectHashFunction)BinIO.loadObject(temp); 68 | 69 | check(s, size, mph, signatureWidth); 70 | 71 | mph = new TwoStepsLcpMonotoneMinimalPerfectHashFunction.Builder().keys(Arrays.asList(s)).transform(new HuTuckerTransformationStrategy(Arrays.asList(s), true)).signed(signatureWidth).build(); 72 | 73 | check(s, size, mph, signatureWidth); 74 | 75 | temp = File.createTempFile(getClass().getSimpleName(), "test"); 76 | temp.deleteOnExit(); 77 | BinIO.storeObject(mph, temp); 78 | 79 | check(s, size, mph, signatureWidth); 80 | } 81 | } 82 | } 83 | 84 | @Test 85 | public void testEmpty() throws IOException { 86 | final TwoStepsLcpMonotoneMinimalPerfectHashFunction mph = new TwoStepsLcpMonotoneMinimalPerfectHashFunction.Builder().keys(Arrays.asList(new String[] {})).transform(TransformationStrategies.prefixFreeUtf16()).build(); 87 | assertEquals(-1, mph.getLong("")); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/mph/VLLcpMonotoneMinimalPerfectHashFunctionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.mph; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.util.Arrays; 27 | 28 | import org.junit.Test; 29 | 30 | import it.unimi.dsi.bits.HuTuckerTransformationStrategy; 31 | import it.unimi.dsi.bits.TransformationStrategies; 32 | import it.unimi.dsi.fastutil.io.BinIO; 33 | 34 | public class VLLcpMonotoneMinimalPerfectHashFunctionTest { 35 | 36 | 37 | public static String binary(final int l) { 38 | final String s = "0000000000000000000000000000000000000000000000000000000000000000000000000" + Integer.toBinaryString(l); 39 | return s.substring(s.length() - 32); 40 | } 41 | 42 | @SuppressWarnings("unchecked") 43 | @Test 44 | public void testSortedNumbers() throws IOException, ClassNotFoundException { 45 | 46 | final String[] s = new String[1000]; 47 | final int[] v = new int[s.length]; 48 | for (int i = s.length; i-- != 0;) 49 | s[v[i] = i] = binary(i); 50 | 51 | VLLcpMonotoneMinimalPerfectHashFunction mph = new VLLcpMonotoneMinimalPerfectHashFunction<>(Arrays.asList(s), TransformationStrategies.prefixFreeUtf16()); 52 | 53 | for (int i = s.length; i-- != 0;) 54 | assertEquals(i, mph.getLong(s[i])); 55 | 56 | // Exercise code for negative results 57 | for (int i = 1000; i-- != 0;) 58 | mph.getLong(binary(i * i + 1000)); 59 | 60 | File temp = File.createTempFile(getClass().getSimpleName(), "test"); 61 | temp.deleteOnExit(); 62 | BinIO.storeObject(mph, temp); 63 | mph = (VLLcpMonotoneMinimalPerfectHashFunction)BinIO.loadObject(temp); 64 | for (int i = s.length; i-- != 0;) 65 | assertEquals(i, mph.getLong(s[i])); 66 | 67 | 68 | mph = new VLLcpMonotoneMinimalPerfectHashFunction<>(Arrays.asList(s), new HuTuckerTransformationStrategy(Arrays.asList(s), true)); 69 | 70 | for (int i = s.length; i-- != 0;) 71 | assertEquals(i, mph.getLong(s[i])); 72 | 73 | temp = File.createTempFile(getClass().getSimpleName(), "test"); 74 | temp.deleteOnExit(); 75 | BinIO.storeObject(mph, temp); 76 | mph = (VLLcpMonotoneMinimalPerfectHashFunction)BinIO.loadObject(temp); 77 | for (int i = s.length; i-- != 0;) 78 | assertEquals(i, mph.getLong(s[i])); 79 | 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/scratch/EliasFanoMonotoneLongBigListTablesTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.scratch; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | import it.unimi.dsi.Util; 27 | import it.unimi.dsi.fastutil.longs.LongBigArrayBigList; 28 | import it.unimi.dsi.util.XoRoShiRo128PlusRandom; 29 | 30 | public class EliasFanoMonotoneLongBigListTablesTest { 31 | 32 | @Test 33 | public void testSmall() { 34 | LongBigArrayBigList l; 35 | 36 | l = new LongBigArrayBigList(new long[][] { { 0, 1, 2 } }); 37 | assertEquals(l, new EliasFanoMonotoneLongBigListTables(l)); 38 | 39 | l = new LongBigArrayBigList(new long[][] { { 0, 10, 20 } }); 40 | assertEquals(l, new EliasFanoMonotoneLongBigListTables(l)); 41 | } 42 | 43 | @Test 44 | public void testMedium() { 45 | // No skip tables involved 46 | LongBigArrayBigList l; 47 | 48 | l = new LongBigArrayBigList(Util.identity(1L << (EliasFanoMonotoneLongBigListTables.LOG_2_QUANTUM))); 49 | assertEquals(l, new EliasFanoMonotoneLongBigListTables(l)); 50 | 51 | for(int i = (int)l.size64(); i-- != 0;) l.set(i, l.getLong(i) * 1000); 52 | assertEquals(l, new EliasFanoMonotoneLongBigListTables(l)); 53 | 54 | l = new LongBigArrayBigList(Util.identity((1L << (EliasFanoMonotoneLongBigListTables.LOG_2_QUANTUM)) + 5)); 55 | assertEquals(l, new EliasFanoMonotoneLongBigListTables(l)); 56 | 57 | for(int i = (int)l.size64(); i-- != 0;) l.set(i, l.getLong(i) * 1000); 58 | assertEquals(l, new EliasFanoMonotoneLongBigListTables(l)); 59 | } 60 | 61 | @Test 62 | public void testLarge() { 63 | LongBigArrayBigList l; 64 | 65 | l = new LongBigArrayBigList(Util.identity(2 * (1L << (EliasFanoMonotoneLongBigListTables.LOG_2_QUANTUM)))); 66 | assertEquals(l, new EliasFanoMonotoneLongBigListTables(l)); 67 | 68 | for(int i = (int)l.size64(); i-- != 0;) l.set(i, l.getLong(i) * 1000); 69 | assertEquals(l, new EliasFanoMonotoneLongBigListTables(l)); 70 | 71 | l = new LongBigArrayBigList(Util.identity(2 * (1L << (EliasFanoMonotoneLongBigListTables.LOG_2_QUANTUM)) + 5)); 72 | assertEquals(l, new EliasFanoMonotoneLongBigListTables(l)); 73 | 74 | for(int i = (int)l.size64(); i-- != 0;) l.set(i, l.getLong(i) * 1000); 75 | assertEquals(l, new EliasFanoMonotoneLongBigListTables(l)); 76 | } 77 | 78 | @Test 79 | public void testRandom() { 80 | // Weird skips 81 | final LongBigArrayBigList l = new LongBigArrayBigList(); 82 | final XoRoShiRo128PlusRandom random = new XoRoShiRo128PlusRandom(0); 83 | for(long i = 10000000, c = 0; i-- != 0;) { 84 | c += Long.numberOfTrailingZeros(random.nextLong()); 85 | l.add(c); 86 | } 87 | assertEquals(l, new EliasFanoMonotoneLongBigListTables(l)); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/util/EliasFanoLongBigListTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | import it.unimi.dsi.fastutil.longs.LongBigArrayBigList; 27 | import it.unimi.dsi.fastutil.longs.LongIterators; 28 | import it.unimi.dsi.util.XoRoShiRo128PlusRandom; 29 | 30 | public class EliasFanoLongBigListTest { 31 | 32 | @Test 33 | public void testSmall() { 34 | for (final boolean offline : new boolean[] { false, true }) { 35 | LongBigArrayBigList l; 36 | l = new LongBigArrayBigList(new long[][] { { 0, 0, 0 } }); 37 | assertEquals(l, new EliasFanoLongBigList(l.iterator(), 0, offline)); 38 | 39 | l = new LongBigArrayBigList(new long[][] { { 0, 1, 0 } }); 40 | assertEquals(l, new EliasFanoLongBigList(l.iterator(), 0, offline)); 41 | 42 | l = new LongBigArrayBigList(new long[][] { { 1, 1, 1 } }); 43 | assertEquals(l, new EliasFanoLongBigList(l.iterator(), 0, offline)); 44 | 45 | l = new LongBigArrayBigList(new long[][] { { 4, 3, 2 } }); 46 | assertEquals(l, new EliasFanoLongBigList(l.iterator(), 0, offline)); 47 | 48 | l = new LongBigArrayBigList(new long[][] { { 128, 2000, 50000000, 200, 10 } }); 49 | assertEquals(l, new EliasFanoLongBigList(l.iterator(), 0, offline)); 50 | } 51 | } 52 | 53 | @Test 54 | public void testBulk() { 55 | final XoRoShiRo128PlusRandom random = new XoRoShiRo128PlusRandom(0); 56 | for(final int base: new int[] { 0, 1, 10 }) { 57 | final long[] s = new long[100000]; 58 | for(int i = s.length; i-- != 0;) s[i] = random.nextInt(100) + base; 59 | final EliasFanoLongBigList ef = new EliasFanoLongBigList(LongIterators.wrap(s)); 60 | for(int i = 0; i < 1000; i++) { 61 | final int from = random.nextInt(s.length - 100); 62 | final int to = from + random.nextInt(100); 63 | final long[] dest = ef.get(from, new long[Math.max(1, to - from)]); 64 | for(int j = from; j < to; j++) assertEquals(s[j], dest[j - from]); 65 | } 66 | 67 | for(int i = 0; i < 1000; i++) { 68 | final int from = random.nextInt(s.length - 100); 69 | final int to = from + random.nextInt(100); 70 | final int offset = random.nextInt(10); 71 | final long[] dest = ef.get(from, new long[Math.max(offset + 1, to - from + offset + random.nextInt(10))], offset, to - from); 72 | for(int j = from; j < to; j++) assertEquals(s[j], dest[offset + j - from]); 73 | } 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/util/EliasFanoMonotoneLongBigList16Test.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | import it.unimi.dsi.Util; 27 | import it.unimi.dsi.fastutil.longs.LongArrayList; 28 | import it.unimi.dsi.fastutil.longs.LongBigArrayBigList; 29 | import it.unimi.dsi.util.XoRoShiRo128PlusRandom; 30 | 31 | @Deprecated 32 | public class EliasFanoMonotoneLongBigList16Test { 33 | 34 | @Test 35 | public void testSmall() { 36 | LongBigArrayBigList l; 37 | 38 | l = new LongBigArrayBigList(new long[][] { { 0, 1, 2 } }); 39 | assertEquals(l, new EliasFanoMonotoneLongBigList16(l)); 40 | 41 | l = new LongBigArrayBigList(new long[][] { { 0, 10, 20 } }); 42 | assertEquals(l, new EliasFanoMonotoneLongBigList16(l)); 43 | } 44 | 45 | @Test 46 | public void testMedium() { 47 | // No skip tables involved 48 | LongBigArrayBigList l; 49 | 50 | l = new LongBigArrayBigList(Util.identity(100L)); 51 | assertEquals(l, new EliasFanoMonotoneLongBigList16(l)); 52 | 53 | for(int i = (int)l.size64(); i-- != 0;) l.set(i, l.getLong(i) * 1000); 54 | assertEquals(l, new EliasFanoMonotoneLongBigList16(l)); 55 | } 56 | 57 | @Test 58 | public void testLarge() { 59 | // No skip tables involved 60 | LongBigArrayBigList l; 61 | 62 | l = new LongBigArrayBigList(Util.identity(2 * (100L))); 63 | assertEquals(l, new EliasFanoMonotoneLongBigList16(l)); 64 | 65 | for(int i = (int)l.size64(); i-- != 0;) l.set(i, l.getLong(i) * 1000); 66 | assertEquals(l, new EliasFanoMonotoneLongBigList16(l)); 67 | } 68 | 69 | @Test 70 | public void testRandom() { 71 | // Weird skips 72 | final LongBigArrayBigList l = new LongBigArrayBigList(); 73 | final XoRoShiRo128PlusRandom random = new XoRoShiRo128PlusRandom(0); 74 | for(long i = 10000000, c = 0; i-- != 0;) { 75 | c += Long.numberOfTrailingZeros(random.nextLong()); 76 | l.add(c); 77 | } 78 | assertEquals(l, new EliasFanoMonotoneLongBigList16(l)); 79 | } 80 | 81 | @Test 82 | public void testBulk() { 83 | final XoRoShiRo128PlusRandom random = new XoRoShiRo128PlusRandom(0); 84 | for(final int base: new int[] { 0, 1, 10 }) { 85 | for(final int jump : new int[] { 1, 10, 100 }) { 86 | final long[] s = new long[100000]; 87 | for(int i = 1; i < s.length; i++) s[i] = s[i - 1] + random.nextInt(jump) + base; 88 | final EliasFanoMonotoneLongBigList16 ef = new EliasFanoMonotoneLongBigList16(LongArrayList.wrap(s)); 89 | for(int i = 0; i < 1000; i++) { 90 | final int from = random.nextInt(s.length - 100); 91 | final int to = from + random.nextInt(100); 92 | final long[] dest = ef.get(from, new long[Math.max(1, to - from)]); 93 | for(int j = from; j < to; j++) assertEquals(s[j], dest[j - from]); 94 | } 95 | 96 | for(int i = 0; i < 1000; i++) { 97 | final int from = random.nextInt(s.length - 100); 98 | final int to = from + random.nextInt(100); 99 | final int offset = random.nextInt(10); 100 | final long[] dest = ef.get(from, new long[Math.max(offset + 1, to - from + offset + random.nextInt(10))], offset, to - from); 101 | for(int j = from; j < to; j++) assertEquals("From: " + from + " to: " + to + " j: " + j, s[j], dest[offset + j - from]); 102 | } 103 | } 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/util/MappedEliasFanoMonotoneLongBigListTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.nio.ByteOrder; 27 | 28 | import org.junit.Test; 29 | 30 | import it.unimi.dsi.fastutil.longs.LongBigArrayBigList; 31 | import it.unimi.dsi.util.XoRoShiRo128PlusRandom; 32 | 33 | public class MappedEliasFanoMonotoneLongBigListTest { 34 | 35 | @Test 36 | public void testRandom() throws ClassNotFoundException, IOException { 37 | final File file = File.createTempFile(this.getClass().getName(), ".ef"); 38 | file.deleteOnExit(); 39 | new File(file + MappedEliasFanoMonotoneLongBigList.OBJECT_EXTENSION).deleteOnExit(); 40 | new File(file + MappedEliasFanoMonotoneLongBigList.LOWER_BITS_EXTENSION).deleteOnExit(); 41 | final LongBigArrayBigList l = new LongBigArrayBigList(); 42 | final XoRoShiRo128PlusRandom random = new XoRoShiRo128PlusRandom(0); 43 | for (long i = 1000, c = 0; i-- != 0;) { 44 | c += Long.numberOfTrailingZeros(random.nextLong()); 45 | l.add(c); 46 | } 47 | final EliasFanoMonotoneLongBigList e = new EliasFanoMonotoneLongBigList(l); 48 | e.dump(file.toString()); 49 | MappedEliasFanoMonotoneLongBigList m = MappedEliasFanoMonotoneLongBigList.load(file.toString()); 50 | assertEquals(m.copy(), e); 51 | assertEquals(l, e); 52 | m.close(); 53 | 54 | e.dump(file.toString(), ByteOrder.BIG_ENDIAN); 55 | m = MappedEliasFanoMonotoneLongBigList.load(file.toString()); 56 | assertEquals(m.copy(), e); 57 | assertEquals(m, e); 58 | m.close(); 59 | 60 | e.dump(file.toString(), ByteOrder.LITTLE_ENDIAN); 61 | m = MappedEliasFanoMonotoneLongBigList.load(file.toString()); 62 | assertEquals(m, e); 63 | assertEquals(m.copy(), e); 64 | m.close(); 65 | 66 | 67 | file.delete(); 68 | new File(file + MappedEliasFanoMonotoneLongBigList.OBJECT_EXTENSION).delete(); 69 | new File(file + MappedEliasFanoMonotoneLongBigList.LOWER_BITS_EXTENSION).delete(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/sux4j/util/TwoSizesBigListTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Sux4J: Succinct data structures for Java 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.sux4j.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | import it.unimi.dsi.bits.LongArrayBitVector; 27 | import it.unimi.dsi.fastutil.longs.LongBigList; 28 | 29 | public class TwoSizesBigListTest { 30 | @Test 31 | public void testConstruction() { 32 | final LongBigList l = LongArrayBitVector.getInstance().asLongBigList(10); 33 | for (int i = 0; i < 1024; i++) 34 | l.add(i); 35 | TwoSizesLongBigList ts = new TwoSizesLongBigList(l); 36 | assertEquals(ts, l); 37 | 38 | l.clear(); 39 | for (int i = 0; i < 512; i++) 40 | l.add(2); 41 | for (int i = 0; i < 512; i++) 42 | l.add(i); 43 | ts = new TwoSizesLongBigList(l); 44 | assertEquals(ts, l); 45 | } 46 | } 47 | --------------------------------------------------------------------------------