├── .gitignore ├── CHANGES ├── COPYING.LESSER ├── LICENSE-2.0.txt ├── README.md ├── bash ├── maketablejmh.sh ├── permbfcl.sh └── permfcl.sh ├── bnd └── biz.aQute.bnd-5.2.0.jar ├── build.properties ├── build.xml ├── dsiutils.bnd ├── ivy.xml ├── makefile ├── pom-model.xml ├── prngperf ├── README ├── pom.xml └── src │ └── main │ └── java │ └── test │ ├── BenchmarkRandom.java │ ├── BenchmarkSplitMix64.java │ ├── BenchmarkSplittableRandom.java │ ├── BenchmarkThreadLocalRandom.java │ ├── BenchmarkXoRoShiRo128Plus.java │ ├── BenchmarkXoRoShiRo128PlusPlus.java │ ├── BenchmarkXoRoShiRo128StarStar.java │ ├── BenchmarkXoShiRo256Plus.java │ ├── BenchmarkXoShiRo256PlusPlus.java │ ├── BenchmarkXoShiRo256StarStar.java │ └── BenchmarkXorShift1024StarPhi.java ├── setcp.sh ├── slow └── it │ └── unimi │ └── dsi │ ├── big │ └── util │ │ ├── FrontCodedStringBigListSlowTest.java │ │ ├── ImmutableExternalPrefixMapSlowTest.java │ │ ├── LiterallySignedStringMapSlowTest.java │ │ └── ShiftAddXorSignedStringMapSlowTest.java │ └── util │ └── HyperLogLogCounterArraySlowTest.java ├── src ├── it │ └── unimi │ │ └── dsi │ │ ├── Util.java │ │ ├── big │ │ ├── io │ │ │ ├── FileLinesByteArrayCollection.java │ │ │ ├── FileLinesCollection.java │ │ │ └── package-info.java │ │ └── util │ │ │ ├── AbstractPrefixMap.java │ │ │ ├── FrontCodedStringBigList.java │ │ │ ├── ImmutableBinaryTrie.java │ │ │ ├── ImmutableExternalPrefixMap.java │ │ │ ├── LiterallySignedStringMap.java │ │ │ ├── LongBigListSignedStringMap.java │ │ │ ├── MappedFrontCodedStringBigList.java │ │ │ ├── PermutedFrontCodedStringBigList.java │ │ │ ├── PrefixMap.java │ │ │ ├── SemiExternalGammaBigList.java │ │ │ ├── ShiftAddXorSignedStringMap.java │ │ │ ├── StringMap.java │ │ │ ├── StringMaps.java │ │ │ ├── TernaryIntervalSearchTree.java │ │ │ └── package-info.java │ │ ├── bits │ │ ├── AbstractBitVector.java │ │ ├── BitVector.java │ │ ├── BitVectors.java │ │ ├── BooleanListBitVector.java │ │ ├── Fast.java │ │ ├── HuTuckerTransformationStrategy.java │ │ ├── LongArrayBitVector.java │ │ ├── LongBigArrayBitVector.java │ │ ├── PrefixCoderTransformationStrategy.java │ │ ├── TransformationStrategies.java │ │ ├── TransformationStrategy.java │ │ └── package-info.java │ │ ├── compression │ │ ├── CanonicalFast64CodeWordDecoder.java │ │ ├── CodeWordCoder.java │ │ ├── Codec.java │ │ ├── Coder.java │ │ ├── Decoder.java │ │ ├── Fast64CodeWordCoder.java │ │ ├── HuTuckerCodec.java │ │ ├── HuffmanCodec.java │ │ ├── PrefixCodec.java │ │ ├── PrefixCoder.java │ │ ├── TreeDecoder.java │ │ └── package-info.java │ │ ├── io │ │ ├── ByteBufferInputStream.java │ │ ├── ByteDiskQueue.java │ │ ├── DebugInputBitStream.java │ │ ├── DebugOutputBitStream.java │ │ ├── DelimitedWordReader.java │ │ ├── FastBufferedReader.java │ │ ├── FileLinesByteArrayIterable.java │ │ ├── FileLinesCollection.java │ │ ├── FileLinesMutableStringIterable.java │ │ ├── InputBitStream.java │ │ ├── LineIterator.java │ │ ├── LineWordReader.java │ │ ├── MultipleInputStream.java │ │ ├── NullInputStream.java │ │ ├── NullOutputStream.java │ │ ├── NullReader.java │ │ ├── OfflineIterable.java │ │ ├── OutputBitStream.java │ │ ├── SafelyCloseable.java │ │ ├── SegmentedInputStream.java │ │ ├── WordReader.java │ │ ├── delta.in.16 │ │ ├── delta.out.12 │ │ ├── gamma.in.16 │ │ ├── gamma.out.12 │ │ ├── package-info.java │ │ ├── shiftedgamma.in.16 │ │ ├── shiftedgamma.out.12 │ │ ├── zeta3.in.16 │ │ └── zeta3.out.12 │ │ ├── lang │ │ ├── EnumStringParser.java │ │ ├── FlyweightPrototype.java │ │ ├── FlyweightPrototypes.java │ │ ├── MutableString.java │ │ ├── ObjectParser.java │ │ └── package-info.java │ │ ├── logging │ │ ├── ProgressLogger.java │ │ └── package-info.java │ │ ├── package-info.java │ │ ├── parser │ │ ├── Attribute.java │ │ ├── BulletParser.java │ │ ├── Element.java │ │ ├── Entity.java │ │ ├── HTMLFactory.java │ │ ├── ParsingFactory.java │ │ ├── WellFormedXmlFactory.java │ │ ├── callback │ │ │ ├── Callback.java │ │ │ ├── ComposedCallbackBuilder.java │ │ │ ├── DebugCallbackDecorator.java │ │ │ ├── DefaultCallback.java │ │ │ ├── LinkExtractor.java │ │ │ ├── TextExtractor.java │ │ │ └── package-info.java │ │ └── package-info.java │ │ ├── stat │ │ ├── Jackknife.java │ │ ├── SummaryStats.java │ │ ├── Ziggurat.java │ │ └── package-info.java │ │ ├── test │ │ ├── GeneratePrecomputedCodes.java │ │ ├── GeneratePrecomputedOutputCodes.java │ │ ├── InputBitStreamSpeedTest.java │ │ ├── LeastSignificantBitSpeedTest.java │ │ ├── MutableStringLengthSpeedTest.java │ │ ├── MutableStringRegressionTest.java │ │ ├── MutableStringReplaceSpeedTest.java │ │ ├── RandomSpeed.java │ │ ├── StringMapSpeedTest.java │ │ ├── TextPatternSpeedTest.java │ │ ├── WTF.java │ │ ├── XorShift.java │ │ ├── XorShiftPoly.java │ │ ├── XorShiftPoly116.java │ │ └── XorShiftPoly928.java │ │ └── util │ │ ├── AbstractPrefixMap.java │ │ ├── BloomFilter.java │ │ ├── ByteBufferLongBigList.java │ │ ├── CircularCharArrayBuffer.java │ │ ├── FrontCodedStringList.java │ │ ├── HyperLogLogCounterArray.java │ │ ├── ImmutableBinaryTrie.java │ │ ├── ImmutableExternalPrefixMap.java │ │ ├── IntParallelCounterArray.java │ │ ├── Interval.java │ │ ├── Intervals.java │ │ ├── KahanSummation.java │ │ ├── LiterallySignedStringMap.java │ │ ├── LongInterval.java │ │ ├── LongIntervals.java │ │ ├── PermutedFrontCodedStringList.java │ │ ├── PrefixMap.java │ │ ├── Properties.java │ │ ├── SemiExternalGammaList.java │ │ ├── ShiftAddXorSignedStringMap.java │ │ ├── SplitMix64Random.java │ │ ├── SplitMix64RandomGenerator.java │ │ ├── StringMap.java │ │ ├── StringMaps.java │ │ ├── TernaryIntervalSearchTree.java │ │ ├── TextPattern.java │ │ ├── XoRoShiRo128PlusPlusRandom.java │ │ ├── XoRoShiRo128PlusPlusRandomGenerator.java │ │ ├── XoRoShiRo128PlusRandom.java │ │ ├── XoRoShiRo128PlusRandomGenerator.java │ │ ├── XoRoShiRo128StarStarRandom.java │ │ ├── XoRoShiRo128StarStarRandomGenerator.java │ │ ├── XoShiRo256PlusPlusRandom.java │ │ ├── XoShiRo256PlusPlusRandomGenerator.java │ │ ├── XoShiRo256PlusRandom.java │ │ ├── XoShiRo256PlusRandomGenerator.java │ │ ├── XoShiRo256StarStarRandom.java │ │ ├── XoShiRo256StarStarRandomGenerator.java │ │ ├── XorGensRandomGenerator.java │ │ ├── XorShift1024StarPhiRandom.java │ │ ├── XorShift1024StarPhiRandomGenerator.java │ │ ├── XorShift1024StarRandom.java │ │ ├── XorShift1024StarRandomGenerator.java │ │ ├── XorShift128PlusRandom.java │ │ ├── XorShift128PlusRandomGenerator.java │ │ ├── XorShift64StarRandom.java │ │ ├── XorShift64StarRandomGenerator.java │ │ ├── concurrent │ │ ├── ReorderingBlockingQueue.java │ │ └── package-info.java │ │ └── package-info.java ├── overview.html └── stylesheet.css └── test └── it └── unimi └── dsi ├── UtilTest.java ├── big └── util │ ├── FrontCodedStringBigListTest.java │ ├── ImmutableBinaryTrieTest.java │ ├── ImmutableExternalPrefixMapTest.java │ ├── LiterallySignedStringMapTest.java │ ├── LongBigArraySignedStringMapTest.java │ ├── MappedFrontCodedStringBigListTest.java │ ├── SemiExternalGammaBigListTest.java │ ├── ShiftAddXorSignedStringMapTest.java │ └── TernaryIntervalSearchTreeTest.java ├── bits ├── AbstractBitVectorTest.java ├── BitVectorTestCase.java ├── BitVectorsTest.java ├── BooleanListBitVectorTest.java ├── ByteArrayTransformationStrategyTest.java ├── FastTest.java ├── FixedLongTransformationStrategyTest.java ├── IsoTransformationStrategyTest.java ├── LongArrayBitVectorTest.java ├── LongBigArrayBitVectorTest.java ├── PrefixFreeTransformationStrategyTest.java ├── RawByteArrayTransformationStrategyTest.java ├── RawFixedLongTransformationStrategyTest.java ├── RawISOTransformationStrategyTest.java ├── RawUtf16TransformationStrategyTest.java ├── RawUtf32TransformationStrategyTest.java ├── Utf16TransformationStrategyTest.java └── Utf32TransformationStrategyTest.java ├── compression ├── CodecTestCase.java ├── HuTuckerCodecTest.java └── HuffmanCodecTest.java ├── io ├── ByteBufferInputStreamTest.java ├── ByteDiskQueueTest.java ├── DelimitedWordReaderTest.java ├── FastBufferedReaderTest.java ├── FileLinesByteArrayCollectionTest.java ├── FileLinesMutableStringIterableTest.java ├── InputBitStreamTest.java ├── OfflineIterableTest.java ├── OutputBitStreamTest.java └── SegmentedInputStreamTest.java ├── lang ├── EnumParserTest.java ├── MutableStringTest.java ├── ObjectParserTest.java └── TwoStrings.java ├── parser ├── BulletParserTest.java ├── callback │ ├── BulletParserCallbackContentHandler.java │ ├── LinkExtractorTest.data │ ├── LinkExtractorTest.java │ └── TextExtractorTest.java └── test.data ├── stat ├── JackknifeTest.java └── SummaryStatsTest.java ├── test └── XorShiftTest.java └── util ├── BloomFilterTest.java ├── ByteBufferLongBigListTest.java ├── CircularCharArrayBufferTest.java ├── FrontCodedStringListTest.java ├── HyperLogLogCounterArrayTest.java ├── ImmutableBinaryTrieTest.java ├── ImmutableExternalPrefixMapTest.java ├── IntervalTest.java ├── KahanSummationTest.java ├── LineIteratorTest.java ├── LiterallySignedStringMapTest.java ├── LongIntervalTest.java ├── SemiExternalGammaListTest.java ├── ShiftAddXorSignedStringMapTest.java ├── SplitMix64RandomGeneratorTest.java ├── SplitMix64RandomTest.java ├── TernaryIntervalSearchTreeTest.java ├── TextPatternTest.java ├── XoRoShiRo128PlusPlusRandomGeneratorTest.java ├── XoRoShiRo128PlusPlusRandomTest.java ├── XoRoShiRo128PlusRandomGeneratorTest.java ├── XoRoShiRo128PlusRandomTest.java ├── XoRoShiRo128StarStarRandomGeneratorTest.java ├── XoRoShiRo128StarStarRandomTest.java ├── XoShiRo256PlusPlusRandomGeneratorTest.java ├── XoShiRo256PlusPlusRandomTest.java ├── XoShiRo256PlusRandomGeneratorTest.java ├── XoShiRo256PlusRandomTest.java ├── XoShiRo256StarStarRandomGeneratorTest.java ├── XoShiRo256StarStarRandomTest.java ├── XorShift1024StarPhiRandomGeneratorTest.java ├── XorShift1024StarPhiRandomTest.java ├── XorShift1024StarRandomGeneratorTest.java ├── XorShift1024StarRandomTest.java ├── XorShift128PlusRandomGeneratorTest.java ├── XorShift128PlusRandomTest.java ├── XorShift64StarRandomGeneratorTest.java ├── XorShift64StarRandomTest.java └── concurrent └── ReorderingBlockingQueueTest.java /.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .project 3 | coverage/ 4 | dist/ 5 | reports/ 6 | bin/ 7 | build/ 8 | pom.xml 9 | perf/ 10 | docs/* 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Welcome to the DSI Utilities! 2 | 3 | ## Introduction 4 | 5 | The DSI utilities are a mishmash of classes accumulated during the last 6 | twenty years in projects developed at the DSI (Dipartimento di Scienze 7 | dell'Informazione, e.g., Information Sciences Department), now DI 8 | (Dipartimento di Informatica, i.e., Informatics Department), of the 9 | Università degli Studi di Milano. 10 | 11 | The DSI utilities are free software distributed under either the [GNU 12 | Lesser General Public License 13 | 2.1+](https://www.gnu.org/licenses/old-licenses/lgpl-2.1.html) or the 14 | [Apache Software License 2.0](https://www.apache.org/licenses/LICENSE-2.0). 15 | 16 | ## Building 17 | 18 | You need [Ant](https://ant.apache.org/) and [Ivy](https://ant.apache.org/ivy/). 19 | Then, run `ant ivy-setupjars jar`. 20 | 21 | ## Papers 22 | 23 | * A [paper](http://vigna.di.unimi.it/papers.php#BoVMSJ) about the 24 | high-performance reimplementation of strings provided by the versatile 25 | class 26 | [`MutableString`](https://github.com/vigna/dsiutils/blob/master/src/it/unimi/dsi/lang/MutableString.java), 27 | and _compact approximators_, the randomized data structure used in 28 | [`TextPattern`](https://github.com/vigna/dsiutils/blob/master/src/it/unimi/dsi/util/TextPattern.java) 29 | to represent bad-character shifts. 30 | 31 | * A [paper](http://vigna.di.unimi.it/papers.php#VigBIRSQ) about the 32 | broadword implementation of select queries implemented in 33 | [`Fast.select()`](https://github.com/vigna/dsiutils/blob/master/src/it/unimi/dsi/bits/Fast.java). 34 | 35 | * Papers about the [pseudorandom number 36 | generators](http://prng.di.unimi.it/) included can be found 37 | [here](http://vigna.di.unimi.it/papers.php). 38 | -------------------------------------------------------------------------------- /bash/maketablejmh.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export LABEL=("nextLong()" "nextDouble()" "nextInt(100000)" "nextInt(230+1)") 4 | export i=0 5 | 6 | for m in nextLong nextDouble nextInt100000 nextInt2301; do 7 | echo " * " 8 | echo " * ${LABEL[$i]}" 9 | let i=i+1 10 | for r in Random ThreadLocalRandom SplittableRandom SplitMix64 XoRoShiRo128PlusPlus XoRoShiRo128StarStar XoRoShiRo128Plus XoShiRo256PlusPlus XoShiRo256StarStar XoShiRo256Plus XorShift1024StarPhi; do 11 | v=$(grep ^Benchmark$r.$m\ $1 | tr -s ' ' | cut -d' ' -f4) 12 | echo " * $v" 13 | done 14 | echo " * " 15 | done 16 | -------------------------------------------------------------------------------- /bash/permbfcl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | if [[ "$@" == "" ]]; then 4 | echo "USAGE: $(basename $0) FCL" 1>&2 5 | echo "The list of string will be read from standard input in UTF-8 encoding." 1>&2 6 | exit 1 7 | fi 8 | 9 | PERM=$(mktemp) 10 | LEXFCL=$(mktemp) 11 | 12 | nl -v0 -nln | LC_ALL=C sort -S2G -T. -k2 | tee >(cut -f1 | tr -d ' ' >$PERM) | cut -f2 | java -server it.unimi.dsi.big.util.FrontCodedStringBigList -u $LEXFCL 13 | 14 | java -server it.unimi.dsi.big.util.PermutedFrontCodedStringBigList -i -t $LEXFCL $PERM $1 15 | 16 | rm -f $LEXFCL $PERM 17 | -------------------------------------------------------------------------------- /bash/permfcl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | if [[ "$@" == "" ]]; then 4 | echo "USAGE: $(basename $0) FCL" 1>&2 5 | echo "The list of string will be read from standard input in UTF-8 encoding." 1>&2 6 | exit 1 7 | fi 8 | 9 | PERM=$(mktemp) 10 | LEXFCL=$(mktemp) 11 | 12 | nl -v0 -nln | LC_ALL=C sort -S2G -T. -k2 | tee >(cut -f1 | tr -d ' ' >$PERM) | cut -f2 | java -server it.unimi.dsi.util.FrontCodedStringList -u $LEXFCL 13 | 14 | java -server it.unimi.dsi.util.PermutedFrontCodedStringList -i -t $LEXFCL $PERM $1 15 | 16 | rm -f $LEXFCL $PERM 17 | -------------------------------------------------------------------------------- /bnd/biz.aQute.bnd-5.2.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/bnd/biz.aQute.bnd-5.2.0.jar -------------------------------------------------------------------------------- /build.properties: -------------------------------------------------------------------------------- 1 | version=2.7.4 2 | 3 | build.sysclasspath=ignore 4 | 5 | jar.base=/usr/share/java 6 | javadoc.base=/usr/share/javadoc 7 | 8 | dist=dist 9 | src=src 10 | test=test 11 | slow=slow 12 | reports=reports 13 | coverage=coverage 14 | checkstyle=checkstyle 15 | docs=docs 16 | build=build 17 | instrumented=instr 18 | 19 | j2se.apiurl=https://docs.oracle.com/javase/8/docs/api/ 20 | fastutil.apiurl=https://fastutil.di.unimi.it/docs/ 21 | jsap.apiurl=http://www.martiansoftware.com/jsap/doc/javadoc/ 22 | junit.apiurl=https://junit.org/junit4/javadoc/latest/ 23 | log4j.apiurl=https://logging.apache.org/log4j/1.2/apidocs/ 24 | slf4j.apiurl=https://www.slf4j.org/apidocs/ 25 | commons-configuration2.apiurl=https://commons.apache.org/proper/commons-configuration/apidocs/ 26 | commons-io.apiurl=https://commons.apache.org/proper/commons-io/javadocs/api-release/ 27 | commons-lang3.apiurl=https://commons.apache.org/proper/commons-lang/javadocs/api-release/ 28 | commons-collections4.apiurl=https://commons.apache.org/proper/commons-collections/javadocs/api-4.4/ 29 | commons-math3.apiurl=https://commons.apache.org/proper/commons-math/javadocs/api-3.6.1/ 30 | guava.apiurl=https://javadoc.io/doc/com.google.guava/guava/latest/index.html 31 | -------------------------------------------------------------------------------- /dsiutils.bnd: -------------------------------------------------------------------------------- 1 | Automatic-Module-Name: it.unimi.dsi.dsiutils 2 | Bundle-Name: it.unimi.dsi.dsiutils 3 | Bundle-SymbolicName: it.unimi.dsi.dsiutils 4 | Export-Package: it.unimi.dsi.* 5 | Bundle-Version: ${version} 6 | -------------------------------------------------------------------------------- /ivy.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | include build.properties 2 | 3 | TAR=tar 4 | 5 | source: 6 | rm -fr dsiutils-$(version) 7 | ant clean 8 | ln -s . dsiutils-$(version) 9 | $(TAR) chvf dsiutils-$(version)-src.tar --owner=0 --group=0 \ 10 | dsiutils-$(version)/README.md \ 11 | dsiutils-$(version)/CHANGES \ 12 | dsiutils-$(version)/COPYING.LESSER \ 13 | dsiutils-$(version)/LICENSE-2.0.txt \ 14 | dsiutils-$(version)/build.xml \ 15 | dsiutils-$(version)/ivy.xml \ 16 | dsiutils-$(version)/dsiutils.bnd \ 17 | dsiutils-$(version)/pom-model.xml \ 18 | dsiutils-$(version)/build.properties \ 19 | $$(find dsiutils-$(version)/src/it/unimi/dsi -iname \*.java -or -iname \*.html -or -iname \*.in.16 -or -iname \*.out.12) \ 20 | $$(find dsiutils-$(version)/test/it/unimi/dsi -iname \*.java -or -iname \*.html -or -iname \*.data) \ 21 | $$(find dsiutils-$(version)/slow/it/unimi/dsi -iname \*.java -or -iname \*.html) \ 22 | dsiutils-$(version)/src/overview.html 23 | $(TAR) --delete --wildcards -v -f dsiutils-$(version)-src.tar \ 24 | dsiutils-$(version)/src/it/unimi/dsi/test/*.java \ 25 | dsiutils-$(version)/test/it/unimi/dsi/test/*.java \ 26 | dsiutils-$(version)/src/it/unimi/dsi/util/IntParallel*.java \ 27 | dsiutils-$(version)/src/it/unimi/dsi/util/XorGens*.java \ 28 | dsiutils-$(version)/src/it/unimi/dsi/stat/Ziggurat.java 29 | gzip -f dsiutils-$(version)-src.tar 30 | rm dsiutils-$(version) 31 | 32 | binary: 33 | rm -fr dsiutils-$(version) 34 | $(TAR) zxvf dsiutils-$(version)-src.tar.gz 35 | (cd dsiutils-$(version) && unset CLASSPATH && unset LOCAL_IVY_SETTINGS && ant ivy-clean ivy-setupjars && ant junit && ant clean && ant jar javadoc) 36 | $(TAR) zcvf dsiutils-$(version)-bin.tar.gz --owner=0 --group=0 \ 37 | dsiutils-$(version)/README.md \ 38 | dsiutils-$(version)/CHANGES \ 39 | dsiutils-$(version)/COPYING.LESSER \ 40 | dsiutils-$(version)/LICENSE-2.0.txt \ 41 | dsiutils-$(version)/dsiutils-$(version).jar \ 42 | dsiutils-$(version)/docs 43 | $(TAR) zcvf dsiutils-$(version)-deps.tar.gz --owner=0 --group=0 --transform='s|.*/||' $$(find dsiutils-$(version)/jars/runtime -iname \*.jar -exec readlink {} \;) 44 | 45 | stage: 46 | rm -fr dsiutils-$(version) 47 | $(TAR) zxvf dsiutils-$(version)-src.tar.gz 48 | cp -fr bnd dsiutils-$(version) 49 | (cd dsiutils-$(version) && unset CLASSPATH && unset LOCAL_IVY_SETTINGS && ant ivy-clean ivy-setupjars && ant stage) 50 | -------------------------------------------------------------------------------- /pom-model.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | ${ivy.pom.groupId} 4 | ${ivy.pom.artifactId} 5 | jar 6 | DSI Utilities 7 | ${ivy.pom.version} 8 | The DSI utilities are a mishmash of classes accumulated during the last twenty years in projects developed at the DSI (Dipartimento di Scienze dell'Informazione, i.e., Information Sciences Department), now DI (Dipartimento di Informatica, i.e., Informatics Department), of the Universita` degli Studi di Milano. 9 | http://dsiutils.di.unimi.it/ 10 | 11 | 12 | GNU Lesser General Public License Version 2.1+ 13 | https://www.gnu.org/licenses/old-licenses/lgpl-2.1.html 14 | repo 15 | 16 | 17 | Apache License v2.0 18 | https://www.apache.org/licenses/LICENSE-2.0 19 | source 20 | 21 | 22 | 23 | scm:git://github.com/vigna/dsiutils.git 24 | https://github.com/vigna/dsiutils 25 | 26 | 27 | 28 | boldi 29 | Paolo Boldi 30 | paolo.boldi@unimi.it 31 | 32 | 33 | vigna 34 | Sebastiano Vigna 35 | sebastiano.vigna@unimi.it 36 | 37 | 38 | 39 | 1.8 40 | 1.8 41 | 42 | 43 | -------------------------------------------------------------------------------- /prngperf/README: -------------------------------------------------------------------------------- 1 | To perform a basic JMH run: 2 | 3 | mvn clean install && java -jar target/benchmarks.jar 4 | -------------------------------------------------------------------------------- /setcp.sh: -------------------------------------------------------------------------------- 1 | JAR=dsiutils 2 | 3 | sourcedir=$(cd $(dirname ${BASH_ARGV[0]}) && pwd) 4 | count=$(\ls -1 $sourcedir/$JAR-*.jar 2>/dev/null | wc -l) 5 | 6 | if (( count == 0 )); then 7 | echo "WARNING: no $JAR jar file." 8 | elif (( count > 1 )); then 9 | echo "WARNING: several $JAR jar files ($(\ls -m $JAR-*.jar))" 10 | else 11 | if echo $CLASSPATH | grep -q slf4j; then 12 | deps=$(\ls -1 $sourcedir/jars/test/*.jar | grep -v slf4j | paste -d: -s) 13 | else 14 | deps=$(\ls -1 $sourcedir/jars/test/*.jar | paste -d: -s) 15 | fi 16 | 17 | export CLASSPATH=$(ls -1 $sourcedir/$JAR-*.jar | tail -n 1):$deps:$CLASSPATH 18 | fi 19 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/big/util/FrontCodedStringBigListSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.big.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.nio.charset.StandardCharsets; 25 | import java.util.Iterator; 26 | import java.util.SplittableRandom; 27 | 28 | import org.junit.Test; 29 | 30 | import it.unimi.dsi.lang.MutableString; 31 | 32 | public class FrontCodedStringBigListSlowTest { 33 | @Test 34 | public void testLarge() { 35 | final long size = (1L << 31) + 10000; 36 | final FrontCodedStringBigList byteArrayFrontCodedBigList = new FrontCodedStringBigList(new Iterator() { 37 | SplittableRandom r = new SplittableRandom(0); 38 | long i = 0; 39 | 40 | @Override 41 | public boolean hasNext() { 42 | return i < size; 43 | } 44 | 45 | @Override 46 | public String next() { 47 | i++; 48 | return new String(new byte[] { (byte)r.nextLong() }, StandardCharsets.ISO_8859_1); 49 | } 50 | }, 10, true); 51 | SplittableRandom r = new SplittableRandom(0); 52 | for (long i = 0; i < size; i++) { 53 | assertEquals(new String(new byte[] { (byte)r.nextLong() }, StandardCharsets.ISO_8859_1), byteArrayFrontCodedBigList.get(i).toString()); 54 | } 55 | r = new SplittableRandom(0); 56 | final MutableString s = new MutableString(); 57 | for (long i = 0; i < size; i++) { 58 | byteArrayFrontCodedBigList.get(i, s); 59 | assertEquals(new String(new byte[] { (byte)r.nextLong() }, StandardCharsets.ISO_8859_1), s.toString()); 60 | } 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/big/util/ImmutableExternalPrefixMapSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2002-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.big.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertTrue; 24 | 25 | import java.io.IOException; 26 | import java.util.Iterator; 27 | import java.util.NoSuchElementException; 28 | 29 | import org.junit.Test; 30 | 31 | import it.unimi.dsi.fastutil.objects.ObjectIterator; 32 | 33 | public class ImmutableExternalPrefixMapSlowTest { 34 | 35 | public void testBig(final int blockSize) throws IOException { 36 | final Iterable p = new Iterable() { 37 | private final static long INCREMENT= ((1L << 62) / 3000000000L); 38 | @Override 39 | public Iterator iterator() { 40 | return new ObjectIterator() { 41 | long curr = 0; 42 | @Override 43 | public boolean hasNext() { 44 | return curr < 3000000000L; 45 | } 46 | 47 | @Override 48 | public String next() { 49 | if (! hasNext()) throw new NoSuchElementException(); 50 | final long v = curr++ * INCREMENT ; 51 | final char[] a = new char[4]; 52 | a[0] = (char)(v >>> 48); 53 | a[1] = (char)(v >>> 32); 54 | a[2] = (char)(v >>> 16); 55 | a[3] = (char)v; 56 | return String.valueOf(a); 57 | } 58 | }; 59 | } 60 | }; 61 | 62 | final ImmutableExternalPrefixMap d = new ImmutableExternalPrefixMap(p, blockSize); 63 | 64 | int j = 0; 65 | for (final String s : p) { 66 | assertTrue(s, d.containsKey(s)); 67 | assertEquals(s, d.list().get(j++).toString()); 68 | } 69 | 70 | final Iterator k = d.iterator(); 71 | for(final Iterator i = p.iterator(); i.hasNext();) { 72 | assertTrue(i.hasNext() == k.hasNext()); 73 | assertEquals(i.next().toString(), k.next().toString()); 74 | } 75 | 76 | // Test negatives 77 | for(long i = 1000000000000L; i < 1000000002000L; i++) assertEquals(-1, d.getLong(Long.toBinaryString(i))); 78 | 79 | } 80 | 81 | @Test 82 | public void testBig1024() throws IOException { 83 | testBig(1024); 84 | } 85 | 86 | @Test 87 | public void testBig16384() throws IOException { 88 | testBig(16384); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/big/util/LiterallySignedStringMapSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2002-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.big.util; 21 | 22 | import org.junit.Test; 23 | 24 | import it.unimi.dsi.fastutil.Size64; 25 | import it.unimi.dsi.fastutil.objects.AbstractObject2LongFunction; 26 | import it.unimi.dsi.fastutil.objects.AbstractObjectBigList; 27 | import it.unimi.dsi.lang.MutableString; 28 | 29 | public class LiterallySignedStringMapSlowTest { 30 | private final class LargeFunction extends AbstractObject2LongFunction implements Size64 { 31 | private static final long serialVersionUID = 1L; 32 | 33 | @Override 34 | public long getLong(final Object key) { 35 | try { 36 | final long l = Long.parseLong(key.toString()); 37 | return l < 1L << 31 ? l : -1; 38 | } 39 | catch(final Exception e) { 40 | return -1; 41 | } 42 | } 43 | 44 | @Override 45 | public boolean containsKey(final Object key) { 46 | try { 47 | final long l = Long.parseLong(key.toString()); 48 | return l < 1L << 31; 49 | } 50 | catch(final Exception e) { 51 | return false; 52 | } 53 | } 54 | 55 | @Override 56 | @Deprecated 57 | public int size() { 58 | return Integer.MAX_VALUE; 59 | } 60 | 61 | @Override 62 | public long size64() { 63 | return 1L << 31; 64 | } 65 | } 66 | 67 | @Test 68 | public void testLarge() { 69 | new LiterallySignedStringMap(new LargeFunction(), new AbstractObjectBigList() { 70 | 71 | @Override 72 | public MutableString get(final long index) { 73 | return new MutableString(Long.toString(index)); 74 | } 75 | 76 | @Override 77 | public long size64() { 78 | return 1L << 31; 79 | } 80 | }); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /slow/it/unimi/dsi/big/util/ShiftAddXorSignedStringMapSlowTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2002-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.big.util; 21 | 22 | import org.junit.Test; 23 | 24 | import it.unimi.dsi.fastutil.Size64; 25 | import it.unimi.dsi.fastutil.objects.AbstractObject2LongFunction; 26 | import it.unimi.dsi.fastutil.objects.AbstractObjectBigList; 27 | 28 | public class ShiftAddXorSignedStringMapSlowTest { 29 | private final class LargeFunction extends AbstractObject2LongFunction implements Size64 { 30 | private static final long serialVersionUID = 1L; 31 | 32 | @Override 33 | public long getLong(final Object key) { 34 | try { 35 | final long l = Long.parseLong((String)key); 36 | return l < 1L << 31 ? l : -1; 37 | } 38 | catch(final Exception e) { 39 | return -1; 40 | } 41 | } 42 | 43 | @Override 44 | public boolean containsKey(final Object key) { 45 | try { 46 | final long l = Long.parseLong((String)key); 47 | return l < 1L << 31; 48 | } 49 | catch(final Exception e) { 50 | return false; 51 | } 52 | } 53 | 54 | @Override 55 | @Deprecated 56 | public int size() { 57 | return Integer.MAX_VALUE; 58 | } 59 | 60 | @Override 61 | public long size64() { 62 | return 1L << 31; 63 | } 64 | } 65 | 66 | @SuppressWarnings("deprecation") 67 | @Test 68 | public void testLarge() { 69 | new ShiftAddXorSignedStringMap(new AbstractObjectBigList() { 70 | 71 | @Override 72 | public String get(final long index) { 73 | return Long.toString(index); 74 | } 75 | 76 | @Override 77 | public long size64() { 78 | return 1L << 31; 79 | } 80 | }.iterator(), new LargeFunction(), 1); 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/big/io/package-info.java: -------------------------------------------------------------------------------- 1 | /** I/O big classes 2 | * 3 | *

Classes in this package are big versions of classes in {@link it.unimi.dsi.io}. 4 | */ 5 | 6 | package it.unimi.dsi.big.io; 7 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/big/util/PrefixMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2004-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.big.util; 21 | 22 | import it.unimi.dsi.fastutil.Size64; 23 | import it.unimi.dsi.fastutil.objects.Object2ObjectFunction; 24 | import it.unimi.dsi.util.LongInterval; 25 | 26 | /** A map from prefixes to string intervals (and possibly vice versa). 27 | * 28 | *

Instances of this class provide the services of a {@link StringMap}, but by assuming 29 | * the strings are lexicographically ordered, they can provide further information by 30 | * exposing a {@linkplain #rangeMap() function from string prefixes to intervals} and a 31 | * {@linkplain #prefixMap() function from intervals to string prefixes}. 32 | * 33 | *

In the first case, given a prefix, we can ask for the range of strings starting 34 | * with that prefix, expressed as a {@link LongInterval}. This information is very useful to 35 | * satisfy prefix queries (e.g., monitor*) with a brute-force approach. 36 | * 37 | *

Optionally, a prefix map may provide the opposite service: given an interval of terms, it 38 | * may provide the maximum common prefix. This feature can be checked for by calling 39 | * {@link #prefixMap()}. 40 | * 41 | * @author Sebastiano Vigna 42 | * @since 2.0 43 | */ 44 | 45 | public interface PrefixMap extends StringMap, Size64 { 46 | /** Returns a function mapping prefixes to ranges of strings. 47 | * 48 | * @return a function mapping prefixes to ranges of strings. 49 | */ 50 | Object2ObjectFunction rangeMap(); 51 | 52 | /** Returns a function mapping ranges of strings to common prefixes (optional operation). 53 | * 54 | * @return a function mapping ranges of strings to common prefixes, or {@code null} if this 55 | * map does not support prefixes. 56 | */ 57 | Object2ObjectFunction prefixMap(); 58 | } 59 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/big/util/StringMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.big.util; 21 | 22 | import java.io.Serializable; 23 | 24 | import it.unimi.dsi.fastutil.Size64; 25 | import it.unimi.dsi.fastutil.objects.Object2LongFunction; 26 | import it.unimi.dsi.fastutil.objects.ObjectBigList; 27 | 28 | /** A map from strings to longs (and possibly vice versa). 29 | * 30 | *

String maps represent mappings from strings (actually, any subclass of {@link CharSequence}) 31 | * to numbers; they can support {@linkplain #list() reverse 32 | * mapping}, too. The latter has usually sense only if the map is minimal and perfect (e.g., a bijection of a set 33 | * of string with an initial segment of the natural numbers of the same size). String maps are useful for 34 | * terms of an MG4J 35 | * inverted index, URLs of a WebGraph-compressed 36 | * web snapshot, and so on. 37 | * 38 | * @author Sebastiano Vigna 39 | * @since 2.0 40 | */ 41 | 42 | public interface StringMap extends Object2LongFunction, Size64, Serializable { 43 | public static final long serialVersionUID = 0L; 44 | 45 | /** Returns a list view of the domain of this string map (optional operation). 46 | * 47 | *

Note that the list view acts as an inverse of the mapping implemented by this map. 48 | * 49 | * @return a list view of the domain of this string map, or {@code null} if this map does 50 | * not support this operation. 51 | */ 52 | 53 | ObjectBigList list(); 54 | 55 | /** Returns the intended number of keys in this function, or -1 if no such number exists. 56 | * 57 | *

Most function implementations will have some knowledge of the intended number of keys 58 | * in their domain. In some cases, however, this might not be possible. This default 59 | * implementation, in particular, returns -1. 60 | * 61 | * @return the intended number of keys in this function, or -1 if that number is not available. 62 | */ 63 | @Override 64 | default long size64() { 65 | return -1; 66 | } 67 | 68 | /** {@inheritDoc} 69 | * @deprecated Please use {@link #size64()} instead. */ 70 | @Deprecated 71 | @Override 72 | default int size() { 73 | return (int) Math.min(Integer.MAX_VALUE, size64()); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/big/util/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Collections and similar big classes 3 | * 4 | *

5 | * Some classes in this package are big versions of classes in {@link it.unimi.dsi.util}. Other 6 | * classes exists only in the big versions (e.g., 7 | * {@link it.unimi.dsi.big.util.MappedFrontCodedStringBigList}). 8 | */ 9 | 10 | package it.unimi.dsi.big.util; 11 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/bits/TransformationStrategy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2007-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.bits; 21 | 22 | import java.io.Serializable; 23 | 24 | /** A generic transformation from objects of a given type to bit vector. Most useful 25 | * when adding strings, etc. to a trie. 26 | */ 27 | 28 | public interface TransformationStrategy extends Serializable { 29 | /** Returns a bit vector representation of the given object. 30 | * 31 | * @param object the object to be turned into a bit-vector representation. 32 | * @return a bit-vector representation of object. 33 | */ 34 | BitVector toBitVector(T object); 35 | 36 | /** The (approximate) number of bits occupied by this transformation. 37 | * 38 | * @return the (approximate) number of bits occupied by this transformation. 39 | */ 40 | long numBits(); 41 | 42 | /** Returns a copy of this transformation strategy. 43 | * 44 | * @return a copy of this transformation strategy. 45 | */ 46 | TransformationStrategy copy(); 47 | 48 | /** Returns the length of the bit vector that would be computed by {@link #toBitVector(Object)}. 49 | * 50 | *

The raison d'être of this method is that it is often easy to know 51 | * the length of the representation without actually computing the representation. 52 | * 53 | * @param object the object whose representation length is to be known. 54 | * @return the length of the bit-vector representation of object (the one that would be returned by {@link #toBitVector(Object)}). 55 | */ 56 | long length(T object); 57 | } 58 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/bits/package-info.java: -------------------------------------------------------------------------------- 1 | /** Main classes manipulating bits 2 | * 3 | *

The {@link it.unimi.dsi.bits.BitVector} interface is the basis for bit vector manipulation. 4 | * The {@link it.unimi.dsi.bits.LongArrayBitVector} implementation is its main implementation. 5 | * The idea is to offer an efficent but easy-to-use bit-vector class by allowing access under many different views. For instance, 6 | * a bit vector can be seen as a {@link it.unimi.dsi.fastutil.longs.LongBigList} of integers of fixed width. Or as a sorted set of 7 | * integers, where the positions of the bits set to one represent elements. 8 | * 9 | *

Whenever another object has to be turned into a bit string, you can provide a 10 | * {@link it.unimi.dsi.bits.TransformationStrategy} to that purpose. The static container 11 | * {@link it.unimi.dsi.bits.TransformationStrategies} has several ready-made transformations, 12 | * and some useful wrapping methods. 13 | */ 14 | 15 | package it.unimi.dsi.bits; 16 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/compression/CodeWordCoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.compression; 21 | 22 | import java.io.IOException; 23 | import java.io.Serializable; 24 | 25 | import it.unimi.dsi.bits.BitVector; 26 | import it.unimi.dsi.fastutil.booleans.BooleanIterator; 27 | import it.unimi.dsi.fastutil.booleans.BooleanIterators; 28 | import it.unimi.dsi.io.OutputBitStream; 29 | 30 | /** A coder based on a set of codewords. */ 31 | 32 | public class CodeWordCoder implements PrefixCoder, Serializable { 33 | private static final long serialVersionUID = 1L; 34 | /** The array of codewords of this coder. */ 35 | protected final BitVector[] codeWord; 36 | 37 | /** Creates a new codeword-based coder using the given vector of codewords. The 38 | * coder will be able to encode symbols numbered from 0 to codeWord.length-1, included. 39 | * 40 | * @param codeWord a vector of codewords. 41 | */ 42 | public CodeWordCoder(final BitVector[] codeWord) { 43 | this.codeWord = codeWord; 44 | } 45 | 46 | @Override 47 | public BooleanIterator encode(final int symbol) { 48 | return codeWord[symbol].iterator(); 49 | } 50 | 51 | @Override 52 | public int encode(final int symbol, final OutputBitStream obs) throws IOException { 53 | final BitVector w = codeWord[symbol]; 54 | final int length = (int) w.length(); 55 | for(int i = 0; i < length; i++) obs.writeBit(w.getBoolean(i)); 56 | return length; 57 | } 58 | 59 | @Override 60 | public int flush(final OutputBitStream unused) { return 0; } 61 | 62 | @Override 63 | public BooleanIterator flush() { return BooleanIterators.EMPTY_ITERATOR; } 64 | 65 | @Override 66 | public BitVector[] codeWords() { return codeWord; } 67 | } 68 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/compression/Codec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.compression; 21 | 22 | /** An abstract factory corresponding to an instance of a specific compression technique. 23 | * 24 | *

An implementation of this interface provides coders and decoders. The 25 | * constructors must provide all data that is required to perform coding 26 | * and decoding. 27 | */ 28 | 29 | public interface Codec { 30 | /** Returns a coder for the compression technique represented by this coded. 31 | * 32 | * @return a coder for the compression technique represented by this codec. */ 33 | public Coder coder(); 34 | 35 | /** Returns a decoder for the compression technique represented by this coded. 36 | * 37 | * @return a decoder for the compression technique represented by this codec. */ 38 | public Decoder decoder(); 39 | 40 | /** Returns the number of symbols handled by this codec. 41 | * 42 | * @return the number of symbols handled by this codec. 43 | */ 44 | public int size(); 45 | } 46 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/compression/Coder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.compression; 21 | 22 | import java.io.IOException; 23 | 24 | import it.unimi.dsi.fastutil.booleans.BooleanIterator; 25 | import it.unimi.dsi.io.OutputBitStream; 26 | 27 | /** Coding methods for a specific compression technique. */ 28 | public interface Coder { 29 | /** Encodes a symbol. 30 | * 31 | * @param symbol a symbol. 32 | * @return a boolean iterator returning the bits coding symbol. 33 | */ 34 | BooleanIterator encode(int symbol); 35 | 36 | /** Encodes a symbol. 37 | * 38 | * @param symbol a symbol. 39 | * @param obs the output bit stream where the encoded symbol will be written. 40 | * @return the number of bits written. 41 | */ 42 | int encode(int symbol, OutputBitStream obs) throws IOException; 43 | 44 | /** Flushes the coder. 45 | * 46 | * Warning: this method will not {@link OutputBitStream#flush() flush} obs. 47 | * 48 | * @param obs the output bit stream where the flushing bits will be written. 49 | * @return the number of bits written to flush the coder. 50 | */ 51 | 52 | int flush(OutputBitStream obs); 53 | 54 | /** Flushes the coder. 55 | * 56 | * @return a boolean iterator returning the bits used to flush this coder. 57 | */ 58 | 59 | BooleanIterator flush(); 60 | } 61 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/compression/Decoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.compression; 21 | 22 | import java.io.IOException; 23 | 24 | import it.unimi.dsi.fastutil.booleans.BooleanIterator; 25 | import it.unimi.dsi.io.InputBitStream; 26 | 27 | /** Decoding methods for a specific compression technique. */ 28 | public interface Decoder { 29 | 30 | /** Decodes the next symbol from the given boolean iterator. 31 | * 32 | *

Note that {@link InputBitStream} implements {@link BooleanIterator}. 33 | * 34 | * @param iterator a boolean iterator. 35 | * @return the next symbol decoded from the bits emitted by i 36 | * @throws java.util.NoSuchElementException if iterator terminates before a symbol has been decoded. 37 | */ 38 | int decode(BooleanIterator iterator); 39 | 40 | /** Decodes the next symbol from the given input bit stream. 41 | * 42 | *

Note that {@link InputBitStream} implements {@link BooleanIterator}. 43 | * 44 | * @param ibs an input bit stream. 45 | * @return the next symbol decoded from ibs. 46 | */ 47 | int decode(InputBitStream ibs) throws IOException; 48 | } 49 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/compression/Fast64CodeWordCoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2007-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.compression; 21 | 22 | import java.io.IOException; 23 | 24 | import it.unimi.dsi.bits.BitVector; 25 | import it.unimi.dsi.io.OutputBitStream; 26 | 27 | /** A fast coder based on a set of codewords of length at most 64. */ 28 | 29 | public final class Fast64CodeWordCoder extends CodeWordCoder { 30 | private static final long serialVersionUID = 1L; 31 | /** An array parallel to {@link #codeWord} containing the codewords as longs (right aligned). */ 32 | private final long[] longCodeWord; 33 | /** A cached array, parallel to {@link #longCodeWord}, of codewords length. */ 34 | private final int[] length; 35 | 36 | /** Creates a new codeword-based coder using the given vector of codewords. The 37 | * coder will be able to encode symbols numbered from 0 to codeWord.length-1, included. 38 | * 39 | * @param codeWord a vector of codewords. 40 | * @param longCodeWord the same codewords as those specified in codeWord, but 41 | * as right-aligned longs written in left-to-right fashion. 42 | */ 43 | public Fast64CodeWordCoder(final BitVector[] codeWord, final long[] longCodeWord) { 44 | super(codeWord); 45 | this.longCodeWord = longCodeWord; 46 | length = new int[codeWord.length]; 47 | for(int i = length.length; i-- != 0;) length[i] = (int) codeWord[i].length(); 48 | } 49 | 50 | @Override 51 | public int encode(final int symbol, final OutputBitStream obs) throws IOException { 52 | return obs.writeLong(longCodeWord[symbol], length[symbol]); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/compression/PrefixCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.compression; 21 | 22 | import it.unimi.dsi.bits.BitVector; 23 | 24 | /** A codec based on a set of prefix-free codewords. 25 | * 26 | *

Prefix codec work by building a vector of prefix-free codewords, one for each symbol. The 27 | * method {@link #codeWords()} returns that vector. Moreover, this interface 28 | * strengthens the return type of {@link #coder()} to {@link PrefixCoder}. 29 | */ 30 | public interface PrefixCodec extends Codec { 31 | /** Returns the vector of prefix-free codewords used by this prefix coder. 32 | * 33 | * @return the vector of prefix-free codewords used by this prefix coder. 34 | */ 35 | public BitVector[] codeWords(); 36 | 37 | @Override 38 | public PrefixCoder coder(); 39 | } 40 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/compression/PrefixCoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.compression; 21 | 22 | import it.unimi.dsi.bits.BitVector; 23 | 24 | /** A coder based on a set of prefix-free codewords. 25 | * 26 | *

Not all coders are codeword-based (for instance, arithmetic coding 27 | * is not codeword-based). However, coders that are based on prefix-free codewords are invited 28 | * to return by means of {@link it.unimi.dsi.compression.Codec#coder()} an 29 | * implementation of this interface. 30 | * 31 | *

Note that the {@linkplain PrefixCodec#coder() coder} returned by a {@link PrefixCodec} is 32 | * an implementation of this interface. 33 | */ 34 | public interface PrefixCoder extends Coder { 35 | 36 | /** Provides access to the codewords. 37 | * 38 | * Warning: bit 0 of each bit vector returned by {@link #codeWords()} is 39 | * the first (leftmost) bit of the corresponding codeword: in other words, codewords are stored in 40 | * right-to-left fashion. 41 | * 42 | * @return the codewords. 43 | */ 44 | 45 | BitVector[] codeWords(); 46 | } 47 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/compression/package-info.java: -------------------------------------------------------------------------------- 1 | /** Word-based compression/decompression classes 2 | * 3 | *

Classes in this package provide interfaces for the compression system, and implementations 4 | * for codeword-based compression. Their main usage is the construction of 5 | * {@linkplain it.unimi.dsi.util.ImmutableExternalPrefixMap prefix maps}, but 6 | * they are also used, for instance, for WebGraph label 7 | * compression. 8 | */ 9 | 10 | package it.unimi.dsi.compression; 11 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/LineWordReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2006-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.io; 21 | 22 | import java.io.IOException; 23 | import java.io.Reader; 24 | import java.io.Serializable; 25 | 26 | import it.unimi.dsi.lang.MutableString; 27 | 28 | /** A trivial {@link it.unimi.dsi.io.WordReader} that considers each line 29 | * of a document a single word. 30 | * 31 | *

The intended usage of this class is that of indexing stuff like lists of document 32 | * identifiers: if the identifiers contain nonalphabetical characters, the default 33 | * {@link it.unimi.dsi.io.FastBufferedReader} might do a poor job. 34 | * 35 | *

Note that the non-word returned by {@link #next(MutableString, MutableString)} is 36 | * always empty. 37 | */ 38 | 39 | public class LineWordReader implements WordReader, Serializable { 40 | private static final long serialVersionUID = 1L; 41 | /** An fast buffered reader wrapping the underlying reader. */ 42 | private final FastBufferedReader fastBufferedReader = new FastBufferedReader(); 43 | 44 | @Override 45 | public boolean next(final MutableString word, final MutableString nonWord) throws IOException { 46 | nonWord.length(0); 47 | return fastBufferedReader.readLine(word) != null; 48 | } 49 | 50 | @Override 51 | public LineWordReader setReader(final Reader reader) { 52 | fastBufferedReader.setReader(reader); 53 | return this; 54 | } 55 | 56 | @Override 57 | public LineWordReader copy() { 58 | return new LineWordReader(); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/NullInputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2003-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.io; 21 | 22 | import java.io.IOException; 23 | import java.io.Serializable; 24 | 25 | import it.unimi.dsi.fastutil.io.MeasurableInputStream; 26 | import it.unimi.dsi.fastutil.io.RepositionableStream; 27 | 28 | /** End-of-stream-only input stream. 29 | * 30 | *

This stream has length 0, and will always return end-of-file on any read attempt. 31 | * 32 | *

This class is a singleton. You cannot create a null input stream, 33 | * but you can obtain an instance of this class using {@link #getInstance()}. 34 | * 35 | * @author Sebastiano Vigna 36 | * @since 0.8 37 | */ 38 | 39 | public class NullInputStream extends MeasurableInputStream implements RepositionableStream, Serializable { 40 | private static final long serialVersionUID = 1L; 41 | private final static NullInputStream INSTANCE = new NullInputStream(); 42 | 43 | private NullInputStream() {} 44 | 45 | @Override 46 | public int read() { return -1; } 47 | 48 | /** Returns the only instance of this class. 49 | * 50 | * @return the only instance of this class. 51 | */ 52 | public static NullInputStream getInstance() { 53 | return INSTANCE; 54 | } 55 | 56 | private Object readResolve() { 57 | return INSTANCE; 58 | } 59 | 60 | @Override 61 | public long length() { 62 | return 0; 63 | } 64 | 65 | @Override 66 | public long position() { 67 | return 0; 68 | } 69 | 70 | @Override 71 | public void position(final long position) throws IOException { 72 | // TODO: we should specify the semantics out of bounds 73 | return; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/NullOutputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2003-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.io; 21 | 22 | import java.io.IOException; 23 | import java.io.OutputStream; 24 | 25 | import it.unimi.dsi.fastutil.io.RepositionableStream; 26 | 27 | /** Throw-it-away output stream. 28 | * 29 | *

This stream discards whatever is written into it. Its usefulness is in 30 | * previewing the length of some coding by wrapping it in an {@link 31 | * OutputBitStream} (it is a good idea, in this case, {@linkplain 32 | * OutputBitStream#OutputBitStream(java.io.OutputStream,int) to specify a 0-length buffer}). 33 | * 34 | *

This class is a singleton. You cannot create a null output stream, 35 | * but you can obtain an instance of this class using {@link #getInstance()}. 36 | * 37 | * @author Sebastiano Vigna 38 | * @since 0.6 39 | */ 40 | 41 | public class NullOutputStream extends OutputStream implements RepositionableStream { 42 | 43 | private final static NullOutputStream SINGLETON = new NullOutputStream(); 44 | 45 | private NullOutputStream() {} 46 | 47 | @Override 48 | public void write(final int discarded) {} 49 | 50 | /** Returns the only instance of this class. */ 51 | public static NullOutputStream getInstance() { 52 | return SINGLETON; 53 | } 54 | 55 | @Override 56 | public long position() throws IOException { 57 | return 0; 58 | } 59 | 60 | @Override 61 | public void position(final long newPosition) throws IOException {} 62 | } 63 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/NullReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2003-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.io; 21 | 22 | import java.io.Reader; 23 | import java.io.Serializable; 24 | 25 | 26 | /** End-of-stream-only reader. 27 | * 28 | *

This reader will always return end-of-file on any read attempt. 29 | * 30 | *

This class is a singleton. You cannot create a null reader, 31 | * but you can obtain an instance of this class using {@link #getInstance()}. 32 | * 33 | * @author Sebastiano Vigna 34 | * @since 0.9.2 35 | */ 36 | 37 | public class NullReader extends Reader implements Serializable { 38 | private static final long serialVersionUID = 1L; 39 | 40 | private final static NullReader INSTANCE = new NullReader(); 41 | 42 | private NullReader() {} 43 | 44 | /** Returns the only instance of this class. 45 | * 46 | * @return the only instance of this class. 47 | */ 48 | public static NullReader getInstance() { 49 | return INSTANCE; 50 | } 51 | 52 | @Override 53 | public void close() {} 54 | 55 | @Override 56 | public int read(final char[] cbuf, final int off, final int len) { 57 | return -1; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/SafelyCloseable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2006-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.io; 21 | 22 | import java.io.Closeable; 23 | 24 | /** A marker interface for a closeable resource that implements safety measures to 25 | * make resource tracking easier. 26 | * 27 | *

Classes implementing this interface must provide a safety-net finaliser—a 28 | * finaliser that closes the resource and logs that resource should have been closed. 29 | * 30 | *

When the implementing class is abstract, concrete subclasses must 31 | * call super.close() in their own {@link java.io.Closeable#close()} method 32 | * to let the abstract class track correctly the resource. Moreover, 33 | * they must run super.finalize() in 34 | * their own finaliser (if any), as finalisation chaining is not automatic. 35 | * 36 | *

Note that if a concrete subclass implements readResolve(), it must 37 | * call super.close(), or actually return this (i.e., the deserialised 38 | * instance); otherwise, a spurious log could be generated when the deserialised instance is collected. 39 | * 40 | * @author Sebastiano Vigna 41 | * @since 1.1 42 | */ 43 | 44 | public interface SafelyCloseable extends Closeable {} 45 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/WordReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Paolo Boldi and Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.io; 21 | 22 | import java.io.IOException; 23 | import java.io.Reader; 24 | import java.io.Serializable; 25 | 26 | import it.unimi.dsi.lang.MutableString; 27 | 28 | /** An interface providing methods to break the input from a reader into words. 29 | * 30 | *

The intended implementations of this interface should decorate 31 | * a given reader (see, for instance, {@link it.unimi.dsi.io.FastBufferedReader}). 32 | * The reader can be changed at any time using {@link #setReader(Reader)}. 33 | * 34 | *

This interface is heavily oriented towards reusability and 35 | * streaming. It is conceived so that at most one method call has 36 | * to be performed per word, rather than per character, 37 | * and that implementations may completely avoid object creation by 38 | * {@linkplain #setReader(Reader) setting explicitly the underlying reader}. 39 | * 40 | *

The standard implementation ({@link it.unimi.dsi.io.FastBufferedReader}) breaks 41 | * words in the trivial way. More complex implementations (e.g., for languages requiring 42 | * segmentation) can subclass {@link it.unimi.dsi.io.FastBufferedReader} or provide their 43 | * own implementation. 44 | */ 45 | 46 | public interface WordReader extends Serializable { 47 | /** Extracts the next word and non-word. 48 | * 49 | *

If this method returns true, a new non-empty word, and possibly 50 | * a new non-word, have been extracted. It is acceptable 51 | * that the first call to this method after creation 52 | * or after a call to {@link #setReader(Reader)} returns an empty 53 | * word. In other words both word and nonWord are maximal. 54 | * 55 | * @param word the next word returned by the underlying reader. 56 | * @param nonWord the nonword following the next word returned by the underlying reader. 57 | * @return true if a new word was processed, false otherwise (in which 58 | * case both word and nonWord are unchanged). 59 | */ 60 | 61 | public abstract boolean next(MutableString word, MutableString nonWord) throws IOException; 62 | 63 | /** Resets the internal state of this word reader, which will start again reading from the given reader. 64 | * 65 | * @param reader the new reader providing characters. 66 | * @return this word reader. 67 | */ 68 | 69 | public abstract WordReader setReader(Reader reader); 70 | 71 | /** Returns a copy of this word reader. 72 | * 73 | *

This method must return a word reader with a behaviour that 74 | * matches exactly that of this word reader. 75 | * 76 | * @return a copy of this word reader. 77 | */ 78 | 79 | public abstract WordReader copy(); 80 | } 81 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/delta.in.16: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/delta.in.16 -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/delta.out.12: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/delta.out.12 -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/gamma.in.16: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/gamma.in.16 -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/gamma.out.12: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/gamma.out.12 -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/package-info.java: -------------------------------------------------------------------------------- 1 | /** I/O classes 2 | * 3 | *

Classes in this package fulfill needs that are not satisfied by the 4 | * standard I/O classes available. 5 | * 6 | *

Reading text

7 | * 8 | *

We provide replacement classes such as {@link 9 | * it.unimi.dsi.io.FastBufferedReader} and classes exposing the lines of 10 | * a file as an {@linkplain 11 | * it.unimi.dsi.io.FileLinesMutableStringIterable Iterable}. The general 12 | * {@link it.unimi.dsi.io.WordReader} interface is used by MG4J 13 | * to provide customizable word segmentation. 14 | * 15 | *

Bit-level I/O

16 | * 17 | *

The standard Java API lacks bit-level I/O classes: to this purpose, we 18 | * provide {@link it.unimi.dsi.io.InputBitStream} and {@link 19 | * it.unimi.dsi.io.OutputBitStream}, which can wrap any standard Java 20 | * corresponding stream and make it work at the bit level; moreover, they 21 | * provide support for several useful formats (such as unary, binary, minimal 22 | * binary, γ, δ and Golomb encoding). 23 | 24 | *

Bit input and output streams offer also efficient buffering and a way to 25 | * reposition the bit stream in case the underlying byte stream is a 26 | * file-based stream or a {@link it.unimi.dsi.fastutil.io.RepositionableStream}. 27 | * 28 | *

Conventions

29 | * 30 | *

All coding methods work on natural numbers. The 31 | * encoding of zero is very natural for some techniques, and much less natural 32 | * for others. To keep methods rationally organized, all methods are able to 33 | * encode any natural number. If, for instance, you want to write positive 34 | * numbers in unary encoding and you do not want to waste a bit, you have to 35 | * decrement them first (i.e., instead of p you must encode 36 | * p − 1). 37 | */ 38 | 39 | package it.unimi.dsi.io; 40 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/shiftedgamma.in.16: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/shiftedgamma.in.16 -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/shiftedgamma.out.12: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/shiftedgamma.out.12 -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/zeta3.in.16: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/zeta3.in.16 -------------------------------------------------------------------------------- /src/it/unimi/dsi/io/zeta3.out.12: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/zeta3.out.12 -------------------------------------------------------------------------------- /src/it/unimi/dsi/lang/EnumStringParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.lang; 21 | 22 | import com.martiansoftware.jsap.ParseException; 23 | import com.martiansoftware.jsap.StringParser; 24 | 25 | /** 26 | * A {@link com.martiansoftware.jsap.StringParser StringParser} that makes the user choose among 27 | * items of a Java {@code enum}. 28 | * 29 | *

Optionally, parsed strings can be normalized to upper case. 30 | * Thus, if the enum elements are defined in uppercase, the parser will be in practice 31 | * case-independent. 32 | * 33 | *

A typical usage example for an {@code ExampleEnum} with an item {@code A} that is going to be the default: 34 | *

35 |  * new FlaggedOption("example",
36 |  *     EnumStringParser.getParser(ExampleEnum.class, true),
37 |  *     ExampleEnum.A.name(), JSAP.NOT_REQUIRED, 'e', "example",
38 |  *     Arrays.toString(ExampleEnum.values()))
39 |  * 
40 | */ 41 | 42 | public class EnumStringParser> extends StringParser { 43 | private final Class enumClass; 44 | private final boolean toUpper; 45 | 46 | /** Returns the enum item obtained by passing the argument to {@link Enum#valueOf(Class, String)}. 47 | * 48 | * @param s an enum item name. 49 | * @return the enum item returned by {@link Enum#valueOf(Class, String)} (possibly 50 | * after upper casing {@code s}). 51 | */ 52 | @Override 53 | @SuppressWarnings({ "unchecked" }) 54 | public E parse(final String s) throws ParseException { 55 | try { 56 | return (E) enumClass.getMethod("valueOf", String.class).invoke(null, toUpper ? s.toUpperCase() : s); 57 | } catch (final Exception e) { 58 | throw (new ParseException("Unknown value '" + s + "'.", e)); 59 | } 60 | } 61 | 62 | private EnumStringParser(final Class enumClass, final boolean toUpper) { 63 | this.enumClass = enumClass; 64 | this.toUpper = toUpper; 65 | } 66 | 67 | /** 68 | * Returns an enum parser. 69 | * 70 | * @param enumClass an {@code enum} class whose values 71 | * @param toUpper tells the parser to upper case the strings to be parsed. 72 | */ 73 | public static > EnumStringParser getParser(final Class enumClass, final boolean toUpper) throws IllegalArgumentException { 74 | return new EnumStringParser<>(enumClass, toUpper); 75 | } 76 | 77 | /** 78 | * Returns an enum parser that does not normalize to upper case. 79 | * 80 | * @param enumClass an {@code enum} class whose values 81 | */ 82 | public static > EnumStringParser getParser(final Class enumClass) throws IllegalArgumentException { 83 | return getParser(enumClass, false); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/lang/FlyweightPrototype.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2006-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.lang; 21 | 22 | /** A prototype providing flyweight copies. 23 | * 24 | *

Flyweight copies are useful to implement multithreading on read-only 25 | * (but maybe stateful) classes. An instance of a class implementing this interface 26 | * is not necessarily thread safe, 27 | * but it can be (thread-) safely copied many times (i.e., it can be used as a prototype). 28 | * All copies will share as much as possible of the class read-only 29 | * state (so they are flyweight). 30 | * 31 | *

In the case an implementation is stateless, it can of course return always the same singleton 32 | * instance as a copy. At the other extreme, a stateful class may decide to synchronise its 33 | * methods and return itself as a copy instead. Note that in general the object returned 34 | * by {@link #copy()} must replicate the current state of the object, not 35 | * the object state at creation time. This might require some calls to methods that 36 | * modify the class internal state: in particular, one should always check whether such 37 | * methods are pointed out in the documentation of superclasses. 38 | * 39 | *

Warning: if {@link #copy()} accesses mutable internal state, setters 40 | * and {@link #copy()} must be suitably synchronised. 41 | * 42 | *

Implementing subclasses are invited to use covariant return-type overriding to 43 | * make {@link #copy()} return the right type. 44 | */ 45 | 46 | public interface FlyweightPrototype> { 47 | 48 | /** Returns a copy of this object, sharing state with this object as much as possible. 49 | * 50 | * @return a copy of this object, sharing state with this object as much as possible. */ 51 | public T copy(); 52 | } 53 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/lang/FlyweightPrototypes.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2006-2023 Paolo Boldi and Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.lang; 21 | 22 | import java.lang.reflect.Array; 23 | 24 | /** A class providing static methods and objects that do useful things 25 | * with {@linkplain it.unimi.dsi.lang.FlyweightPrototype flyweight protoypes}. 26 | */ 27 | 28 | public class FlyweightPrototypes { 29 | 30 | protected FlyweightPrototypes() {} 31 | 32 | /** Creates a flyweight copy of an array of {@linkplain it.unimi.dsi.lang.FlyweightPrototype flyweight prototypes}. 33 | * 34 | * @param the type of {@link FlyweightPrototype} you want to copy, that is, the 35 | * type of the elements of prototype. 36 | * @param prototype an array of prototypes. 37 | * @return a flyweight copy of prototype, obtained by invoking 38 | * {@link FlyweightPrototype#copy()} on each element. 39 | */ 40 | 41 | @SuppressWarnings("unchecked") 42 | public static > T[] copy(final T[] prototype) { 43 | final T[] result = (T[])Array.newInstance(prototype.getClass().getComponentType(), prototype.length); 44 | for(int i = 0; i < result.length; i++) result[i] = prototype[i].copy(); 45 | return result; 46 | } 47 | 48 | /** Creates a flyweight copy of the given object, or returns {@code null} if the given object is {@code null}. 49 | * 50 | * @param the type of {@link FlyweightPrototype} you want to copy, that is, the 51 | * type of prototype. 52 | * @param prototype a prototype to be copied, or {@code null}. 53 | * @return {@code null}, if prototype is {@code null}; 54 | * otherwise,a flyweight copy of prototype. 55 | */ 56 | @SuppressWarnings("null") 57 | public static > T copy(final T prototype) { 58 | return prototype != null ? prototype.copy() : null; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/lang/package-info.java: -------------------------------------------------------------------------------- 1 | /** Basic classes */ 2 | 3 | package it.unimi.dsi.lang; 4 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/logging/package-info.java: -------------------------------------------------------------------------------- 1 | /** Logging classes */ 2 | 3 | package it.unimi.dsi.logging; 4 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/package-info.java: -------------------------------------------------------------------------------- 1 | /** General utilities */ 2 | 3 | package it.unimi.dsi; 4 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/parser/Entity.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.parser; 21 | 22 | import it.unimi.dsi.lang.MutableString; 23 | 24 | /** 25 | * An SGML character entity. 26 | * 27 | * @deprecated This class is obsolete and kept around for backward compatibility only. 28 | */ 29 | 30 | @Deprecated 31 | public final class Entity { 32 | 33 | /** The name of this entity. */ 34 | public final CharSequence name; 35 | /** The Unicode character corresponding to this entity. */ 36 | public final char character; 37 | 38 | /** Creates a new entity with the specified name and character. 39 | * 40 | * @param name the name of the new entity. 41 | * @param character its character value. 42 | */ 43 | public Entity(final CharSequence name, final char character) { 44 | this.name = new MutableString(name); 45 | this.character = character; 46 | } 47 | 48 | /** Returns the name of this entity. 49 | * @return the name of this entity. 50 | */ 51 | 52 | @Override 53 | public String toString() { 54 | return name.toString(); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/parser/ParsingFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.parser; 21 | 22 | import it.unimi.dsi.lang.MutableString; 23 | 24 | /** 25 | * A set of coherent methods to turn element-type, attribute and entity names to unique interned 26 | * instances. 27 | * 28 | *

29 | * The {@link it.unimi.dsi.parser.BulletParser} needs a way to turn a name (for an element type, 30 | * attribute, or entity) into a corresponding object of type {@link it.unimi.dsi.parser.Element}, 31 | * {@link it.unimi.dsi.parser.Attribute} or {@link it.unimi.dsi.parser.Entity}, respectively. The 32 | * returned element must be an interned, unique representation. 33 | * 34 | *

35 | * For instance, the {@linkplain it.unimi.dsi.parser.HTMLFactory standard factory for HTML} parsing 36 | * has ready-made interned versions of all names in the (X)HTML specification, and returns them upon 37 | * request, but other policies are possible. For instance, instances of {@link WellFormedXmlFactory} 38 | * intern every seen name, without reference to a data type (except for entities, in which case the 39 | * HTML set is used). 40 | * 41 | *

42 | * The idea of factoring out the creation of interned counterparts of SGML/XML syntactical objects 43 | * is due to Fabien Campagne. 44 | * 45 | * @author Sebastiano Vigna 46 | * @since 1.0.2 47 | * @deprecated This class is obsolete and kept around for backward compatibility only. 48 | */ 49 | 50 | @Deprecated 51 | public interface ParsingFactory { 52 | 53 | /** Returns the {@link it.unimi.dsi.parser.Element} associated 54 | * to a name. 55 | * @param name the name of an element type. 56 | * @return the corresponding interned {@link Element} object. 57 | */ 58 | public Element getElement(final MutableString name); 59 | 60 | /** Returns the {@link it.unimi.dsi.parser.Attribute} associated 61 | * to a name. 62 | * @param name the name of an attribute. 63 | * @return the corresponding interned {@link Attribute} object. 64 | */ 65 | public Attribute getAttribute(final MutableString name); 66 | 67 | /** Returns the {@link it.unimi.dsi.parser.Entity} associated 68 | * to a name. 69 | * @param name the name of an entity. 70 | * @return the corresponding interned {@link Entity} object. 71 | */ 72 | public Entity getEntity(final MutableString name); 73 | } 74 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/parser/WellFormedXmlFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.parser; 21 | 22 | import it.unimi.dsi.fastutil.Hash; 23 | import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap; 24 | import it.unimi.dsi.lang.MutableString; 25 | 26 | /** 27 | * A factory for well-formed XML documents. 28 | * 29 | *

30 | * This factory assumes that every new name of an element type or of an attribute is new valid name. 31 | * For entities, instead, resolution is deferred to {@link it.unimi.dsi.parser.HTMLFactory}. 32 | * 33 | * @author Sebastiano Vigna 34 | * @since 1.0.2 35 | * @deprecated This class is obsolete and kept around for backward compatibility only. 36 | */ 37 | 38 | @Deprecated 39 | public class WellFormedXmlFactory implements ParsingFactory { 40 | /** The load factor for all maps. */ 41 | private static final float ONE_HALF = .5f; 42 | 43 | /** A (quick) map from attribute names to attributes. */ 44 | private final Object2ObjectOpenHashMap name2Attribute = new Object2ObjectOpenHashMap<>(Hash.DEFAULT_INITIAL_SIZE, ONE_HALF); 45 | 46 | /** A (quick) map from element-type names to element types. */ 47 | private final Object2ObjectOpenHashMap name2Element = new Object2ObjectOpenHashMap<>(Hash.DEFAULT_INITIAL_SIZE, ONE_HALF); 48 | 49 | public WellFormedXmlFactory() {} 50 | 51 | @Override 52 | public Element getElement(final MutableString name) { 53 | Element element = name2Element.get(name); 54 | if (element == null) { 55 | element = new Element(name); 56 | name2Element.put(element.name, element); 57 | } 58 | return element; 59 | } 60 | 61 | @Override 62 | public Attribute getAttribute(final MutableString name) { 63 | Attribute attribute = name2Attribute.get(name); 64 | if (attribute == null) { 65 | attribute = new Attribute(name); 66 | name2Attribute.put(attribute.name, attribute); 67 | } 68 | return attribute; 69 | } 70 | 71 | @Override 72 | public Entity getEntity(final MutableString name) { 73 | return HTMLFactory.INSTANCE.getEntity(name); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/parser/callback/DebugCallbackDecorator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.parser.callback; 21 | 22 | import java.util.Map; 23 | 24 | import it.unimi.dsi.lang.MutableString; 25 | import it.unimi.dsi.parser.Attribute; 26 | import it.unimi.dsi.parser.BulletParser; 27 | import it.unimi.dsi.parser.Element; 28 | 29 | /** 30 | * A decorator that prints on standard error all calls to the underlying callback. 31 | * 32 | * @deprecated This class is obsolete and kept around for backward compatibility only. 33 | */ 34 | @Deprecated 35 | public class DebugCallbackDecorator implements Callback { 36 | 37 | /** The underlying callback. */ 38 | private final Callback callback; 39 | 40 | public DebugCallbackDecorator(final Callback callback) { 41 | this.callback = callback; 42 | } 43 | 44 | @Override 45 | public boolean cdata(final Element element, final char[] text, final int offset, final int length) { 46 | System.err.println("cdata(" + new String(text, offset, length) + ")"); 47 | return callback.cdata(element, text, offset, length); 48 | } 49 | 50 | 51 | @Override 52 | public boolean characters(final char[] text, final int offset, final int length, final boolean flowBroken) { 53 | System.err.println("characters(" + new String(text, offset, length) + ", " + flowBroken + ")"); 54 | return callback.characters(text, offset, length, flowBroken); 55 | } 56 | 57 | 58 | @Override 59 | public void configure(final BulletParser parser) { 60 | System.err.println("configure()"); 61 | callback.configure(parser); 62 | } 63 | 64 | 65 | @Override 66 | public void endDocument() { 67 | System.err.println("endDocument()"); 68 | callback.endDocument(); 69 | } 70 | 71 | @Override 72 | public boolean endElement(final Element element) { 73 | System.err.println("endElement(" + element + ")"); 74 | return callback.endElement(element); 75 | } 76 | 77 | @Override 78 | public boolean equals(final Object obj) { 79 | return callback.equals(obj); 80 | } 81 | 82 | @Override 83 | public int hashCode() { 84 | return callback.hashCode(); 85 | } 86 | 87 | @Override 88 | public void startDocument() { 89 | System.err.println("startDocument()"); 90 | callback.startDocument(); 91 | } 92 | 93 | @Override 94 | public boolean startElement(final Element element, final Map attrMap) { 95 | System.err.println("endElement(" + element + ", " + attrMap + ")"); 96 | return callback.startElement(element, attrMap); 97 | } 98 | 99 | @Override 100 | public String toString() { 101 | return this.getClass().getName() + "(" + callback.toString() + ")"; 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/parser/callback/DefaultCallback.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.parser.callback; 21 | 22 | import java.util.Map; 23 | 24 | import it.unimi.dsi.lang.MutableString; 25 | import it.unimi.dsi.parser.Attribute; 26 | import it.unimi.dsi.parser.BulletParser; 27 | import it.unimi.dsi.parser.Element; 28 | 29 | /** 30 | * A default, do-nothing-at-all callback. 31 | * 32 | *

33 | * Callbacks can inherit from this class and forget about methods they are not interested in. 34 | * 35 | *

36 | * This class has a protected constructor. If you need an instance of this class, use 37 | * {@link #getInstance()}. 38 | * 39 | * @deprecated This class is obsolete and kept around for backward compatibility only. 40 | */ 41 | @Deprecated 42 | public class DefaultCallback implements Callback { 43 | private static final DefaultCallback SINGLETON = new DefaultCallback(); 44 | 45 | protected DefaultCallback() {} 46 | 47 | /** 48 | * Returns the singleton instance of the default callback. 49 | * 50 | * @return the singleton instance of the default callback. 51 | */ 52 | public static DefaultCallback getInstance() { 53 | return SINGLETON; 54 | } 55 | 56 | @Override 57 | public void configure(final BulletParser parserUnused) {} 58 | 59 | @Override 60 | public void startDocument() {} 61 | 62 | @Override 63 | public boolean startElement(final Element elementUnused, final Map attrMapUnused) { 64 | return true; 65 | } 66 | 67 | @Override 68 | public boolean endElement(final Element elementUnused) { 69 | return true; 70 | } 71 | 72 | @Override 73 | public boolean characters(final char[] textUnused, final int offsetUnused, final int lengthUnused, final boolean flowBrokenUnused) { 74 | return true; 75 | } 76 | 77 | @Override 78 | public boolean cdata(final Element elementUnused, final char[] textUnused, final int offsetUnused, final int lengthUnused) { 79 | return true; 80 | } 81 | 82 | @Override 83 | public void endDocument() {} 84 | } 85 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/parser/callback/TextExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2005-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.parser.callback; 21 | 22 | import java.util.Map; 23 | 24 | import it.unimi.dsi.lang.MutableString; 25 | import it.unimi.dsi.parser.Attribute; 26 | import it.unimi.dsi.parser.BulletParser; 27 | import it.unimi.dsi.parser.Element; 28 | 29 | /** 30 | * A callback extracting text and titles. 31 | * 32 | *

33 | * This callbacks extracts all text in the page, and the title. The resulting text is available 34 | * through {@link #text}, and the title through {@link #title}. 35 | * 36 | *

37 | * Note that {@link #text} and {@link #title} are never trimmed. 38 | * 39 | * @deprecated This class is obsolete and kept around for backward compatibility only. 40 | */ 41 | 42 | 43 | @Deprecated 44 | public class TextExtractor extends DefaultCallback { 45 | 46 | /** The text resulting from the parsing process. */ 47 | public final MutableString text = new MutableString(); 48 | /** The title resulting from the parsing process. */ 49 | public final MutableString title = new MutableString(); 50 | /** True if we are in the middle of the title. */ 51 | private boolean inTitle; 52 | 53 | /** 54 | * Configure the parser to parse text. 55 | */ 56 | 57 | @Override 58 | public void configure(final BulletParser parser) { 59 | parser.parseText(true); 60 | // To get the title. 61 | parser.parseTags(true); 62 | } 63 | 64 | @Override 65 | public void startDocument() { 66 | text.length(0); 67 | title.length(0); 68 | inTitle = false; 69 | } 70 | 71 | @Override 72 | public boolean characters(final char[] characters, final int offset, final int length, final boolean flowBroken) { 73 | text.append(characters, offset, length); 74 | if (inTitle) title.append(characters, offset, length); 75 | return true; 76 | } 77 | 78 | @Override 79 | public boolean endElement(final Element element) { 80 | // No element is allowed inside a title. 81 | inTitle = false; 82 | if (element.breaksFlow) { 83 | if (inTitle) title.append(' '); 84 | text.append(' '); 85 | } 86 | return true; 87 | } 88 | 89 | @Override 90 | public boolean startElement(final Element element, final Map attrMapUnused) { 91 | // No element is allowed inside a title. 92 | inTitle = element == Element.TITLE; 93 | if (element.breaksFlow) { 94 | if (inTitle) title.append(' '); 95 | text.append(' '); 96 | } 97 | return true; 98 | } 99 | 100 | } 101 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/parser/callback/package-info.java: -------------------------------------------------------------------------------- 1 | /** Callbacks for the {@link it.unimi.dsi.parser.BulletParser} */ 2 | 3 | package it.unimi.dsi.parser.callback; 4 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/parser/package-info.java: -------------------------------------------------------------------------------- 1 | /** A fast, lightweight, on-demand (X)HTML parser */ 2 | 3 | package it.unimi.dsi.parser; 4 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/stat/package-info.java: -------------------------------------------------------------------------------- 1 | /** Statistics classes */ 2 | 3 | package it.unimi.dsi.stat; 4 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/test/MutableStringLengthSpeedTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2012-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.test; 21 | 22 | import it.unimi.dsi.lang.MutableString; 23 | 24 | public class MutableStringLengthSpeedTest { 25 | 26 | private MutableStringLengthSpeedTest() {} 27 | 28 | public static void main(final String[] arg) { 29 | 30 | long i, n; 31 | 32 | n = Long.parseLong(arg[0]); 33 | 34 | final MutableString s = new MutableString("foobar0"); 35 | final MutableString t = new MutableString("foobar1"); 36 | final String u = new String("foobar2"); 37 | final StringBuffer v = new StringBuffer("foobar3"); 38 | final StringBuilder w = new StringBuilder("foobar4"); 39 | 40 | int k = 10; 41 | int x = 0; 42 | 43 | while (k-- != 0) { 44 | long start; 45 | 46 | System.out.println(); 47 | 48 | start = -System.nanoTime(); 49 | 50 | i = n / 2; 51 | while (i-- != 0) { 52 | // Using just ^= causes code elimination 53 | x ^= u.length(); 54 | x += u.length(); 55 | } 56 | 57 | start += System.nanoTime(); 58 | 59 | System.out.println("Called length() " + n + " times on a string in " + start + " ns (" + start / (double)n + " ns/call)"); 60 | 61 | 62 | start = -System.nanoTime(); 63 | 64 | i = n / 2; 65 | while (i-- != 0) { 66 | x ^= t.length(); 67 | x += t.length(); 68 | } 69 | 70 | start += System.nanoTime(); 71 | 72 | System.out.println("Called length() " + n + " times on a compact string in " + start + " ns (" + start / (double)n + " ns/call)"); 73 | 74 | start = -System.nanoTime(); 75 | 76 | i = n; 77 | s.loose(); 78 | i = n / 2; 79 | while (i-- != 0) { 80 | x ^= s.length(); 81 | x += s.length(); 82 | } 83 | 84 | start += System.nanoTime(); 85 | 86 | System.out.println("Called length() " + n + " times on a loose string in " + start + " ns (" + start / (double)n + " ns/call)"); 87 | 88 | start = -System.nanoTime(); 89 | 90 | i = n / 2; 91 | while (i-- != 0) { 92 | x ^= v.length(); 93 | x += v.length(); 94 | } 95 | 96 | start += System.nanoTime(); 97 | 98 | System.out.println("Called length() " + n + " times on a string buffer in " + start + " ns (" + start / (double)n + " ns/call)"); 99 | 100 | start = -System.nanoTime(); 101 | 102 | i = n / 2; 103 | while (i-- != 0) { 104 | x ^= w.length(); 105 | x += w.length(); 106 | } 107 | 108 | start += System.nanoTime(); 109 | if (x == 0) System.out.println(); 110 | System.out.println("Called length() " + n + " times on a string builder in " + start + " ns (" + start / (double)n + " ns/call)"); 111 | } 112 | 113 | } 114 | 115 | } 116 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/test/MutableStringReplaceSpeedTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2012-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.test; 21 | 22 | import java.io.BufferedReader; 23 | import java.io.IOException; 24 | import java.io.InputStreamReader; 25 | 26 | import it.unimi.dsi.lang.MutableString; 27 | 28 | public class MutableStringReplaceSpeedTest { 29 | 30 | private MutableStringReplaceSpeedTest() {} 31 | 32 | public static void main(final String[] arg) throws IOException { 33 | 34 | String target = null; 35 | 36 | final BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); 37 | final MutableString ms = new MutableString(); 38 | String line; 39 | while ((line = br.readLine()) != null) ms.append("\n").append(line); 40 | target = ms.toString(); 41 | 42 | MutableString s; 43 | String st; 44 | final String searchString = arg[0]; 45 | if (searchString.length() != 1) throw new IllegalArgumentException(); 46 | final char searchChar = new MutableString(arg[0]).charAt(0); 47 | final String replaceString = arg[1]; 48 | final MutableString replace = new MutableString(replaceString); 49 | final int n = Integer.parseInt(arg[2]); 50 | long start; 51 | 52 | for (int k = 10; k-- != 0;) { 53 | System.out.println(); 54 | 55 | s = new MutableString(target).compact(); 56 | start = -System.nanoTime(); 57 | for(int i = n; i-- != 0;) s.replace(searchChar, replace); 58 | start += System.nanoTime(); 59 | System.out.println("Called replace() " + n + " times on a compact string in " + start + " ns (" + start / (double)n + " ns/call)"); 60 | 61 | s = new MutableString(target).loose(); 62 | start = -System.nanoTime(); 63 | for(int i = n; i-- != 0;) s.replace(searchChar, replace); 64 | start += System.nanoTime(); 65 | System.out.println("Called replace() " + n + " times on a loose string in " + start + " ns (" + start / (double)n + " ns/call)"); 66 | 67 | final StringBuilder sb = new StringBuilder(target); 68 | start = -System.nanoTime(); 69 | 70 | for(int i = n; i-- != 0;) { 71 | int j = sb.length(); 72 | for (;;) { 73 | j = sb.lastIndexOf(searchString, j); 74 | if (j == -1) break; 75 | sb.replace(j, j + 1, replaceString); 76 | j--; 77 | } 78 | } 79 | 80 | start += System.nanoTime(); 81 | System.out.println("Called replace() " + n + " times on a string builder in " + start + " ns (" + start / (double)n + " ns/call)"); 82 | assert sb.length() == s.length(); 83 | assert s.toString().equals(sb.toString()); 84 | 85 | st = new String(target); 86 | start = -System.nanoTime(); 87 | for(int i = n; i-- != 0;) st = st.replaceAll(searchString, replaceString); 88 | start += System.nanoTime(); 89 | System.out.println("Called replaceAll() " + n + " times on a string in " + start + " ns (" + start / (double)n + " ns/call)"); 90 | assert sb.length() == st.length(); 91 | assert st.equals(sb.toString()); 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/test/TextPatternSpeedTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2012-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.test; 21 | 22 | import java.io.BufferedReader; 23 | import java.io.IOException; 24 | import java.io.InputStreamReader; 25 | 26 | import it.unimi.dsi.Util; 27 | import it.unimi.dsi.lang.MutableString; 28 | import it.unimi.dsi.util.TextPattern; 29 | 30 | public class TextPatternSpeedTest { 31 | 32 | private TextPatternSpeedTest() {} 33 | 34 | public static void main(final String[] arg) { 35 | 36 | String target = null; 37 | final MutableString ms = new MutableString(); 38 | 39 | try { 40 | final BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); 41 | String line; 42 | while ((line = br.readLine()) != null) ms.append("\n").append(line); 43 | ms.compact(); 44 | target = ms.toString(); 45 | } catch (final IOException e) { 46 | System.out.println("Problems while reading target"); 47 | e.printStackTrace(System.out); 48 | System.exit(1); 49 | } 50 | 51 | int u = 0; 52 | 53 | final String p = arg[0]; 54 | 55 | int k; 56 | long elapsed; 57 | System.out.println("Searching for " + p); 58 | final int n = 10000; 59 | 60 | for(k = 10; k-- != 0;) { 61 | System.out.println(); 62 | 63 | elapsed = -System.nanoTime(); 64 | 65 | for (int r = n; r-- != 0;) { 66 | int i = -1; 67 | do u ^= (i = target.indexOf(p, i + 1)); while (i != -1); 68 | } 69 | 70 | elapsed += System.nanoTime(); 71 | 72 | System.out.println("Called indexOf() " + n + " times on a string in " + elapsed + " ns (" + Util.format(elapsed / (double)n) + " ns/call)"); 73 | final TextPattern tp = new TextPattern(p); 74 | final char a[] = ms.array(); 75 | 76 | elapsed = -System.nanoTime(); 77 | 78 | for (int r = n; r-- != 0;) { 79 | int i = -1; 80 | do u ^= (i = tp.search(a, i + 1)); while (i != -1); 81 | } 82 | 83 | elapsed += System.nanoTime(); 84 | 85 | System.out.println("Called search() " + n + " times on a string in " + elapsed + " ns (" + Util.format(elapsed / (double)n) + " ns/call)"); 86 | 87 | final MutableString pattern = new MutableString(p); 88 | elapsed = -System.nanoTime(); 89 | 90 | for (int r = n; r-- != 0;) { 91 | int i = -1; 92 | do u ^= (i = ms.indexOf(pattern, i + 1)); while (i != -1); 93 | } 94 | 95 | elapsed += System.nanoTime(); 96 | 97 | System.out.println("Called indexOf() " + n + " times on a mutable string in " + elapsed + " ns (" + Util.format(elapsed / (double)n) + " ns/call)"); 98 | } 99 | 100 | if (u == 0) System.out.println((char)0); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/test/WTF.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2012-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.test; 21 | 22 | import java.util.Random; 23 | 24 | public class WTF { 25 | /* From https://twitter.com/joshbloch/status/269478731238760448 26 | * 27 | * Note that ThreadLocalRandom uses the same algorithm as Random. 28 | */ 29 | public static void main(final String[] arg) { 30 | final int shift = arg.length == 0 ? 0 : Integer.parseInt(arg[0]); 31 | for (int i = 0; i < 1000; i++) 32 | System.out.println(new Random(i).nextInt(1 << shift)); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/test/XorShiftPoly116.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2012-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.test; 21 | 22 | import java.math.BigInteger; 23 | 24 | public class XorShiftPoly116 { 25 | 26 | private XorShiftPoly116() {} 27 | 28 | /** The number of bits of state of the generator. */ 29 | public static final int BITS = 116; 30 | 31 | /** The period of the generator (2{@value #BITS} − 1). */ 32 | public static BigInteger twoToBitsMinus1; 33 | 34 | /** Factors of 2{@value #BITS} − - 1. */ 35 | public static final BigInteger[] factor = { 36 | new BigInteger("3"), 37 | new BigInteger("5"), 38 | new BigInteger("59"), 39 | new BigInteger("233"), 40 | new BigInteger("1103"), 41 | new BigInteger("2089"), 42 | new BigInteger("3033169"), 43 | new BigInteger("107367629"), 44 | new BigInteger("536903681") 45 | }; 46 | 47 | /** An array of cofactors. Entry 0 ≤ {@code i} < {@link #numCofactors} contains {@link #twoToBitsMinus1} divided by {@link #factor factor[i]}. Note that some 48 | * entries can be {@code null} if {@link #BITS} is less then 4096. */ 49 | public static final BigInteger[] cofactor = new BigInteger[factor.length]; 50 | 51 | /** The actual number of valid entries in {@link #cofactor}. */ 52 | public static int numCofactors; 53 | 54 | /** Computes the power to a given exponent, given the quadratures. 55 | * 56 | * @param e an exponent smaller than or equal to 2{@link #BITS}. 57 | */ 58 | public static void mPow(BigInteger e) { 59 | System.out.println("p := 1;"); 60 | for(int i = 0; ! e.equals(BigInteger.ZERO); i++) { 61 | if (e.testBit(0)) System.out.println("p := *p * q[" + i + "];"); 62 | e = e.shiftRight(1); 63 | } 64 | } 65 | 66 | public static void main(final String arg[]) { 67 | // Check factors 68 | BigInteger prod = BigInteger.ONE; 69 | for(final BigInteger f : factor) prod = prod.multiply(f); 70 | if (!prod.equals(BigInteger.valueOf(2).pow(BITS).subtract(BigInteger.ONE))) { 71 | System.err.println("Factors do not match"); 72 | return; 73 | } 74 | 75 | BigInteger result = BigInteger.ONE; 76 | twoToBitsMinus1 = BigInteger.valueOf(2).pow(BITS).subtract(BigInteger.ONE); 77 | int n; 78 | // Initialize cofactors. 79 | for(n = 0; n < factor.length; n++) { 80 | cofactor[n] = twoToBitsMinus1.divide(factor[n]); 81 | result = result.multiply(factor[n]); 82 | } 83 | 84 | // Safety check (you know, those numbers are LONG). 85 | if (! twoToBitsMinus1.equals(result)) throw new AssertionError(); 86 | 87 | System.out.println("Array q[" + (BITS + 1) + "];"); 88 | // Quadratures 89 | System.out.println("q[0] := x;"); 90 | for(int i1 = 1; i1 <= BITS; i1++) System.out.println("q[" + i1 + "] := q[" + (i1 - 1) + "] * q[" + (i1 - 1) + "];"); 91 | System.out.println("!!('Check: ', if q[" + BITS + "] = x then 1 else 0; &q fi);"); 92 | // Exponentiation to cofactors 93 | for (final BigInteger element : cofactor) { 94 | mPow(element); 95 | System.out.println("!!('Result: ', if p = 1 then 0; &q else 1 fi);"); 96 | } 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/util/KahanSummation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2012-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | /** Kahan's 23 | * summation algorithm encapsulated in an object. */ 24 | 25 | public class KahanSummation { 26 | /** The current value of the sum. */ 27 | private double value; 28 | /** The current correction. */ 29 | private double c; 30 | 31 | /** Adds a value. 32 | * @param v the value to be added to the sum. 33 | */ 34 | public void add(final double v) { 35 | final double y = v - c; 36 | final double t = value + y; 37 | c = (t - value) - y; 38 | value = t; 39 | } 40 | 41 | /** Returns the sum computed so far. 42 | * @return the sum computed so far. 43 | */ 44 | public double value() { 45 | return value; 46 | } 47 | 48 | /** Resets the current value and correction to zero. */ 49 | public void reset() { 50 | value = c = 0; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/util/LongIntervals.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2003-2023 Paolo Boldi and Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import java.util.Comparator; 23 | 24 | 25 | /** A class providing static methods and objects that do useful things with intervals. 26 | * 27 | * @see LongInterval 28 | */ 29 | 30 | public class LongIntervals { 31 | 32 | private LongIntervals() {} 33 | 34 | public static final LongInterval[] EMPTY_ARRAY = {}; 35 | 36 | /** An empty (singleton) interval. */ 37 | public static final LongInterval EMPTY_INTERVAL = new LongInterval(1, 0); 38 | 39 | /** A singleton located at −∞. */ 40 | public static final LongInterval MINUS_INFINITY = new LongInterval(Integer.MIN_VALUE, Integer.MIN_VALUE); 41 | 42 | /** A comparator between intervals defined as follows: 43 | * [a..b] is less than [a'..b'] 44 | * iff the first interval starts after the second one, that is, 45 | * iff a' < a. 46 | */ 47 | public static final Comparator STARTS_AFTER = (i1, i2) -> Long.compare(i2.left, i1.left); 48 | 49 | /** A comparator between intervals defined as follows: 50 | * [a..b] is less than [a'..b'] 51 | * iff the first interval starts before the second one, that is, 52 | * iff a < a'. 53 | */ 54 | public static final Comparator STARTS_BEFORE = (i1, i2) -> Long.compare(i1.left, i2.left); 55 | 56 | /** A comparator between intervals defined as follows: 57 | * [a..b] is less than [a'..b'] 58 | * iff the first interval ends after the second one, that is, 59 | * iff b' < b. 60 | */ 61 | public static final Comparator ENDS_AFTER = (i1, i2) -> Long.compare(i2.right, i1.right); 62 | 63 | /** A comparator between intervals defined as follows: 64 | * [a..b] is less than [a'..b'] 65 | * iff the first interval ends before the second one, that is, 66 | * iff b < b'. 67 | */ 68 | public static final Comparator ENDS_BEFORE = (i1, i2) -> Long.compare(i1.right, i2.right); 69 | 70 | /** A comparator between intervals based on their length. */ 71 | public static final Comparator LENGTH_COMPARATOR = (i1, i2) -> Long.compare(i1.length(), i2.length()); 72 | } 73 | 74 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/util/PrefixMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2004-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import it.unimi.dsi.fastutil.objects.Object2ObjectFunction; 23 | 24 | /** A map from prefixes to string intervals (and possibly vice versa). 25 | * 26 | *

Instances of this class provide the services of a {@link StringMap}, but by assuming 27 | * the strings are lexicographically ordered, they can provide further information by 28 | * exposing a {@linkplain #rangeMap() function from string prefixes to intervals} and a 29 | * {@linkplain #prefixMap() function from intervals to string prefixes}. 30 | * 31 | *

In the first case, given a prefix, we can ask for the range of strings starting 32 | * with that prefix, expressed as an {@link Interval}. This information is very useful to 33 | * satisfy prefix queries (e.g., monitor*) with a brute-force approach. 34 | * 35 | *

Optionally, a prefix map may provide the opposite service: given an interval of terms, it 36 | * may provide the maximum common prefix. This feature can be checked for by calling 37 | * {@link #prefixMap()}. 38 | * 39 | * @author Sebastiano Vigna 40 | * @since 0.9.2 41 | */ 42 | 43 | public interface PrefixMap extends StringMap { 44 | /** Returns a function mapping prefixes to ranges of strings. 45 | * 46 | * @return a function mapping prefixes to ranges of strings. 47 | */ 48 | Object2ObjectFunction rangeMap(); 49 | 50 | /** Returns a function mapping ranges of strings to common prefixes (optional operation). 51 | * 52 | * @return a function mapping ranges of strings to common prefixes, or {@code null} if this 53 | * map does not support prefixes. 54 | */ 55 | Object2ObjectFunction prefixMap(); 56 | } 57 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/util/StringMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2008-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import java.io.Serializable; 23 | 24 | import it.unimi.dsi.fastutil.objects.Object2LongFunction; 25 | import it.unimi.dsi.fastutil.objects.ObjectList; 26 | 27 | /** A map from strings to numbers (and possibly vice versa). 28 | * 29 | *

String maps represent mappings from strings (actually, any subclass of {@link CharSequence}) 30 | * to numbers; they can support {@linkplain #list() reverse 31 | * mapping}, too. The latter has usually sense only if the map is minimal and perfect (e.g., a bijection of a set 32 | * of string with an initial segment of the natural numbers of the same size). String maps are useful for 33 | * terms of an MG4J 34 | * inverted index, URLs of a WebGraph-compressed 35 | * web snapshot, and so on. 36 | * 37 | *

Warning: the return value of {@link #list()} is a fastutil {@link ObjectList}. 38 | * This in principle is not sensible, as string maps return longs (they extend 39 | * {@link Object2LongFunction}), and {@link ObjectList} has only integer index 40 | * support. If you need long indices, please consider using {@link it.unimi.dsi.big.util.StringMap}. 41 | * 42 | * @author Sebastiano Vigna 43 | * @since 0.2 44 | */ 45 | 46 | public interface StringMap extends Object2LongFunction, Serializable { 47 | 48 | /** Returns a list view of the domain of this string map (optional operation). 49 | * 50 | *

Note that the list view acts as an inverse of the mapping implemented by this map. 51 | * 52 | * @return a list view of the domain of this string map, or {@code null} if this map does 53 | * not support this operation. 54 | */ 55 | 56 | ObjectList list(); 57 | } 58 | -------------------------------------------------------------------------------- /src/it/unimi/dsi/util/concurrent/package-info.java: -------------------------------------------------------------------------------- 1 | /** Concurrent data structures */ 2 | 3 | package it.unimi.dsi.util.concurrent; 4 | -------------------------------------------------------------------------------- /src/overview.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | DSI utils 5 | 6 | 7 | 8 |

The DSI utilities are a mish mash of classes accumulated during the last 9 | ten years in projects developed at the former DSI (Dipartimento di Scienze dell'Informazione, 10 | i.e., Information Sciences Department), now DI (Dipartimento di Informatica, i.e., 11 | Informatics Department) of the Università degli Studi di Milano. 12 | They were originally distributed in several projects 13 | (mainly in MG4J) but we finally decided to 14 | gather all the material in a single place. 15 | 16 |

The DSI utilities are free software 17 | distributed under either the GNU Lesser General Public License 2.1+ or the Apache Software License 2.0. 18 | 19 |

Highlights

20 | 21 |

The implementations available are a bit eclectic due to the particular kind of applications 22 | we developed. Very broadly, we have: 23 | 24 |

    25 |
  • Implementations of pseudorandom number generators. See the {@linkplain it.unimi.dsi.util package documentation} for details.
  • 26 |
  • {@link it.unimi.dsi.bits.BitVector} and its implementations—a high-performance but flexible set of bit vector classes.
  • 27 |
  • A {@link it.unimi.dsi.compression} package containing codecs for several types of encodings.
  • 28 |
  • {@link it.unimi.dsi.logging.ProgressLogger}, a flexible logger with statistics marking the progress of the (many) classes 29 | we use that require hours of computation. 30 |
  • {@link it.unimi.dsi.lang.ObjectParser}, a class making it easy to specify complex objects on the command 31 | line.
  • 32 |
  • {@link it.unimi.dsi.lang.MutableString}, our answer to the Java {@link java.lang.String} class.
  • 33 |
  • The {@link it.unimi.dsi.io I/O package}, containing fast version of several classes existing in {@link java.io}, 34 | many useful classes to read easily text data (e.g., {@link it.unimi.dsi.io.FileLinesMutableStringIterable}), {@linkplain it.unimi.dsi.io.InputBitStream bit streams}, 35 | classes providing large-size memory mapping such as {@link it.unimi.dsi.io.ByteBufferInputStream}, 36 | and {@link it.unimi.dsi.io.OfflineIterable}—the easy & fast way to store large sequences of objects on disk and iterate on them.
  • 37 |
  • The {@link it.unimi.dsi.util} package, containing {@linkplain it.unimi.dsi.util.ImmutableBinaryTrie tries}, 38 | {@linkplain it.unimi.dsi.util.ImmutableExternalPrefixMap immutable prefix maps}, {@linkplain it.unimi.dsi.util.BloomFilter Bloom filters}, 39 | a very comfortable {@link it.unimi.dsi.util.Properties} class and more.
  • 40 |
  • The {@link it.unimi.dsi.stat} package, containing a lightweight class for {@linkplain it.unimi.dsi.stat.SummaryStats computing basic statistics} and 41 | {@linkplain it.unimi.dsi.stat.Jackknife an arbitrary-precision implementation of the Jackknife method}.
  • 42 |
  • Lots of utility methods in {@link it.unimi.dsi.Util} (have a look!)
  • 43 |
  • {@link it.unimi.dsi.big.util.MappedFrontCodedStringBigList}, which provides compact memory-mapped storage of strings, possibly with some compression.
  • 44 |
  • Big versions of I/O and utility classes in {@link it.unimi.dsi.big.io} and {@link it.unimi.dsi.big.util}.
  • 45 |
46 | 47 | 48 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/big/util/FrontCodedStringBigListTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.big.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.util.Arrays; 25 | import java.util.Collections; 26 | import java.util.List; 27 | 28 | import org.junit.Test; 29 | 30 | import it.unimi.dsi.lang.MutableString; 31 | 32 | public class FrontCodedStringBigListTest { 33 | 34 | @Test 35 | public void test() { 36 | final List c = Arrays.asList(TernaryIntervalSearchTreeTest.WORDS.clone()); 37 | final MutableString s = new MutableString(); 38 | for(int p = 0; p < 2; p++) { 39 | for(final boolean utf8: new boolean[] { false, true }) 40 | for(int ratio = 1; ratio < 8; ratio++) { 41 | final FrontCodedStringBigList fcl = new FrontCodedStringBigList(c.iterator(), ratio, utf8); 42 | for (int i = 0; i < fcl.size64(); i++) { 43 | assertEquals(Integer.toString(i), c.get(i), fcl.get(i).toString()); 44 | fcl.get(i, s); 45 | assertEquals(Integer.toString(i), c.get(i), s.toString()); 46 | } 47 | } 48 | 49 | Collections.sort(c); 50 | } 51 | } 52 | 53 | @Test 54 | public void testSurrogatePairs() { 55 | final List c = Arrays.asList(new String[] { "a", "AB\uE000AB", "\uD800\uDF02", "\uD800\uDF03", "b" }); 56 | for(final boolean utf8: new boolean[] { false, true }) 57 | for(int ratio = 1; ratio < 8; ratio++) { 58 | final FrontCodedStringBigList fcl = new FrontCodedStringBigList(c.iterator(), ratio, utf8); 59 | for (int i = 0; i < fcl.size64(); i++) { 60 | assertEquals(Integer.toString(i), c.get(i), fcl.get(i).toString()); 61 | } 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/big/util/LiterallySignedStringMapTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2002-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.big.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.io.Serializable; 27 | import java.util.Arrays; 28 | import java.util.Collections; 29 | 30 | import org.junit.Test; 31 | 32 | import it.unimi.dsi.fastutil.Hash; 33 | import it.unimi.dsi.fastutil.io.BinIO; 34 | import it.unimi.dsi.fastutil.objects.Object2LongOpenCustomHashMap; 35 | import it.unimi.dsi.fastutil.objects.ObjectBigLists; 36 | import it.unimi.dsi.lang.MutableString; 37 | import it.unimi.dsi.util.FrontCodedStringList; 38 | 39 | public class LiterallySignedStringMapTest { 40 | 41 | private final static class CharSequenceStrategy implements Hash.Strategy, Serializable { 42 | private static final long serialVersionUID = 1L; 43 | 44 | @Override 45 | public boolean equals(final CharSequence a, final CharSequence b) { 46 | if (a == null) return b == null; 47 | if (b == null) return false; 48 | return a.toString().equals(b.toString()); 49 | } 50 | 51 | @Override 52 | public int hashCode(final CharSequence o) { 53 | return o.toString().hashCode(); 54 | } 55 | } 56 | 57 | @Test 58 | public void testNumbers() throws IOException, ClassNotFoundException { 59 | for(int n = 10; n < 10000; n *= 10) { 60 | final String[] s = new String[n]; 61 | for(int i = s.length; i-- != 0;) s[i] = Integer.toString(i); 62 | Collections.shuffle(Arrays.asList(s)); 63 | 64 | final FrontCodedStringList fcl = new FrontCodedStringList(Arrays.asList(s), 8, true); 65 | // Test with mph 66 | final Object2LongOpenCustomHashMap mph = new Object2LongOpenCustomHashMap<>(new CharSequenceStrategy()); 67 | mph.defaultReturnValue(-1); 68 | for(int i = 0; i < s.length; i++) mph.put(new MutableString(s[i]), i); 69 | 70 | LiterallySignedStringMap map = new LiterallySignedStringMap(mph, ObjectBigLists.asBigList(fcl)); 71 | 72 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(s[i])); 73 | for(int i = s.length + n; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i))); 74 | 75 | final File temp = File.createTempFile(getClass().getSimpleName(), "test"); 76 | temp.deleteOnExit(); 77 | BinIO.storeObject(map, temp); 78 | map = (LiterallySignedStringMap)BinIO.loadObject(temp); 79 | 80 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(s[i])); 81 | for(int i = s.length + n; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i))); 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/big/util/LongBigArraySignedStringMapTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2002-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.big.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.util.Arrays; 27 | 28 | import org.junit.Test; 29 | 30 | import it.unimi.dsi.fastutil.io.BinIO; 31 | import it.unimi.dsi.fastutil.longs.LongBigArrayBigList; 32 | import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap; 33 | 34 | public class LongBigArraySignedStringMapTest { 35 | 36 | @SuppressWarnings("deprecation") 37 | @Test 38 | public void testNumbers() throws IOException { 39 | 40 | for(int width = 16; width <= Long.SIZE; width += 8) { 41 | final String[] s = new String[100000]; 42 | final long[] v = new long[s.length]; 43 | for(int i = s.length; i-- != 0;) s[(int)(v[i] = i)] = Integer.toString(i); 44 | 45 | // Test with mph 46 | final Object2LongOpenHashMap mph = new Object2LongOpenHashMap<>(s, v); 47 | final long[][] signatures = LongBigListSignedStringMap.sign(Arrays.asList(s).iterator(), mph); 48 | 49 | LongBigListSignedStringMap map = new LongBigListSignedStringMap(mph, LongBigArrayBigList.wrap(signatures)); 50 | 51 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(Integer.toString(i))); 52 | for(int i = s.length + 100; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i))); 53 | 54 | final File temp = File.createTempFile(getClass().getSimpleName(), "test"); 55 | temp.deleteOnExit(); 56 | 57 | BinIO.storeLongs(signatures, temp); 58 | map = new LongBigListSignedStringMap(mph, temp.toString()); 59 | 60 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(Integer.toString(i))); 61 | for(int i = s.length + 10000; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i))); 62 | 63 | temp.delete(); 64 | 65 | } 66 | } 67 | 68 | @SuppressWarnings("deprecation") 69 | @Test 70 | public void testSortedNumbers() throws IOException { 71 | 72 | for(int width = 16; width <= Long.SIZE; width += 8) { 73 | final String[] s = new String[100000]; 74 | final long[] v = new long[s.length]; 75 | for(int i = s.length; i-- != 0;) s[(int)(v[i] = i)] = Integer.toString(i); 76 | 77 | // Test with mph 78 | final Object2LongOpenHashMap mph = new Object2LongOpenHashMap<>(s, v); 79 | 80 | final File temp = File.createTempFile(getClass().getSimpleName(), "test"); 81 | temp.deleteOnExit(); 82 | 83 | LongBigListSignedStringMap.sign(Arrays.asList(s).iterator(), temp.toString()); 84 | final LongBigListSignedStringMap map = new LongBigListSignedStringMap(mph, temp.toString()); 85 | 86 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(Integer.toString(i))); 87 | for(int i = s.length + 10000; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i))); 88 | 89 | temp.delete(); 90 | 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/big/util/MappedFrontCodedStringBigListTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.big.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.nio.charset.StandardCharsets; 27 | import java.util.ArrayList; 28 | import java.util.Arrays; 29 | import java.util.Collections; 30 | import java.util.List; 31 | 32 | import org.apache.commons.configuration2.ex.ConfigurationException; 33 | import org.apache.commons.lang3.StringUtils; 34 | import org.junit.Test; 35 | 36 | import it.unimi.dsi.lang.MutableString; 37 | 38 | public class MappedFrontCodedStringBigListTest { 39 | 40 | @Test 41 | public void test() throws IOException, ConfigurationException { 42 | final String basename = File.createTempFile(this.getClass().getName(), ".basename").toString(); 43 | final List c = new ArrayList<>(Arrays.asList(TernaryIntervalSearchTreeTest.WORDS.clone())); 44 | c.add(StringUtils.repeat("a", 1000)); 45 | c.add(StringUtils.repeat("a", 500) + StringUtils.repeat("b", 500)); 46 | c.add(StringUtils.repeat("a", 1000) + StringUtils.repeat("b", 1000)); 47 | c.add(StringUtils.repeat("a", 100) + StringUtils.repeat("b", 1000)); 48 | final MutableString s = new MutableString(); 49 | Collections.sort(c); 50 | for (int p = 0; p < 2; p++) { 51 | for (int ratio = 1; ratio < 8; ratio++) { 52 | final FrontCodedStringBigList fcl = new FrontCodedStringBigList(c.iterator(), ratio, true); 53 | 54 | MappedFrontCodedStringBigList.build(basename, 4, c.stream().map(x -> x.getBytes(StandardCharsets.UTF_8)).iterator()); 55 | final MappedFrontCodedStringBigList mfcl = MappedFrontCodedStringBigList.load(basename); 56 | for (int i = 0; i < fcl.size64(); i++) { 57 | assertEquals(Integer.toString(i), c.get(i), mfcl.get(i).toString()); 58 | assertEquals(Integer.toString(i), c.get(i), mfcl.getString(i)); 59 | assertEquals(Integer.toString(i), c.get(i), new String(mfcl.getArray(i), StandardCharsets.UTF_8)); 60 | fcl.get(i, s); 61 | assertEquals(Integer.toString(i), c.get(i), s.toString()); 62 | } 63 | } 64 | } 65 | 66 | new File(basename + MappedFrontCodedStringBigList.PROPERTIES_EXTENSION).delete(); 67 | new File(basename + MappedFrontCodedStringBigList.BYTE_ARRAY_EXTENSION).delete(); 68 | new File(basename + MappedFrontCodedStringBigList.POINTERS_EXTENSION).delete(); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/big/util/SemiExternalGammaBigListTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2002-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.big.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertTrue; 24 | 25 | import java.io.IOException; 26 | 27 | import org.junit.Test; 28 | 29 | import it.unimi.dsi.fastutil.longs.LongArrayList; 30 | import it.unimi.dsi.fastutil.longs.LongList; 31 | import it.unimi.dsi.io.InputBitStream; 32 | import it.unimi.dsi.io.OutputBitStream; 33 | 34 | /** 35 | * @author Fabien Campagne 36 | * @author Sebastiano Vigna 37 | */ 38 | public class SemiExternalGammaBigListTest { 39 | 40 | private static InputBitStream buildInputStream(final LongList longs) throws IOException { 41 | final byte[] array = new byte[longs.size() * 4]; 42 | @SuppressWarnings("resource") 43 | final 44 | OutputBitStream streamer = new OutputBitStream(array); 45 | for (int i = 0; i < longs.size(); i++) streamer.writeLongGamma(longs.getLong(i)); 46 | final int size = (int)(streamer.writtenBits() / 8) + ((streamer.writtenBits() % 8) == 0 ? 0 : 1); 47 | final byte[] smaller = new byte[size]; 48 | System.arraycopy(array, 0, smaller, 0, size); 49 | 50 | return new InputBitStream(smaller); 51 | 52 | } 53 | 54 | @Test 55 | public void testSemiExternalGammaBigListGammaCoding() throws IOException { 56 | 57 | final long[] longs = { 10, 300, 450, 650, 1000, 1290, 1699 }; 58 | final LongList listLongs = new LongArrayList(longs); 59 | 60 | SemiExternalGammaBigList list = new SemiExternalGammaBigList(buildInputStream(listLongs), 1, listLongs.size()); 61 | for (long i = 0; i < longs.length; ++i) { 62 | assertEquals(("test failed for index: " + i), longs[(int) i], list.getLong(i)); 63 | } 64 | 65 | list = new SemiExternalGammaBigList(buildInputStream(listLongs), 2, listLongs.size()); 66 | for (long i = 0; i < longs.length; ++i) { 67 | assertEquals(("test failed for index: " + i), longs[(int) i], list.getLong(i)); 68 | } 69 | 70 | list = new SemiExternalGammaBigList(buildInputStream(listLongs), 4, listLongs.size()); 71 | for (long i = 0; i < longs.length; ++i) { 72 | assertEquals(("test failed for index: " + i), longs[(int) i], list.getLong(i)); 73 | } 74 | 75 | list = new SemiExternalGammaBigList(buildInputStream(listLongs), 7, listLongs.size()); 76 | for (long i = 0; i < longs.length; ++i) { 77 | assertEquals(("test failed for index: " + i), longs[(int) i], list.getLong(i)); 78 | } 79 | 80 | list = new SemiExternalGammaBigList(buildInputStream(listLongs), 8, listLongs.size()); 81 | for (long i = 0; i < longs.length; ++i) { 82 | assertEquals(("test failed for index: " + i), longs[(int) i], list.getLong(i)); 83 | } 84 | } 85 | 86 | @Test 87 | public void testEmptySemiExternalGammaBigListGammaCoding() throws IOException { 88 | 89 | final long[] longs = { }; 90 | final LongList listOffsets = new LongArrayList(longs); 91 | 92 | new SemiExternalGammaBigList(buildInputStream(listOffsets), 1, listOffsets.size()); 93 | assertTrue(true); 94 | } 95 | 96 | } 97 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/big/util/ShiftAddXorSignedStringMapTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2002-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.big.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.util.Arrays; 27 | 28 | import org.junit.Test; 29 | 30 | import it.unimi.dsi.fastutil.io.BinIO; 31 | import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap; 32 | 33 | public class ShiftAddXorSignedStringMapTest { 34 | 35 | @SuppressWarnings("deprecation") 36 | @Test 37 | public void testNumbers() throws IOException, ClassNotFoundException { 38 | 39 | for(int width = 16; width <= Long.SIZE; width += 8) { 40 | final String[] s = new String[1000]; 41 | final long[] v = new long[s.length]; 42 | for(int i = s.length; i-- != 0;) s[(int)(v[i] = i)] = Integer.toString(i); 43 | 44 | // Test with mph 45 | final Object2LongOpenHashMap mph = new Object2LongOpenHashMap<>(s, v); 46 | ShiftAddXorSignedStringMap map = new ShiftAddXorSignedStringMap(Arrays.asList(s).iterator(), mph, width); 47 | 48 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(Integer.toString(i))); 49 | for(int i = s.length + 100; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i))); 50 | 51 | final File temp = File.createTempFile(getClass().getSimpleName(), "test"); 52 | temp.deleteOnExit(); 53 | BinIO.storeObject(map, temp); 54 | map = (ShiftAddXorSignedStringMap)BinIO.loadObject(temp); 55 | 56 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(Integer.toString(i))); 57 | for(int i = s.length + 100; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i))); 58 | 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/bits/BitVectorsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.bits; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertFalse; 24 | 25 | import java.io.DataInputStream; 26 | import java.io.DataOutputStream; 27 | import java.io.IOException; 28 | import java.util.Arrays; 29 | import java.util.Iterator; 30 | 31 | import org.junit.Test; 32 | 33 | import it.unimi.dsi.fastutil.io.FastByteArrayInputStream; 34 | import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream; 35 | import it.unimi.dsi.io.OfflineIterable; 36 | 37 | public class BitVectorsTest { 38 | 39 | @Test 40 | public void testReadWriteFast() throws IOException { 41 | final FastByteArrayOutputStream fbaos = new FastByteArrayOutputStream(); 42 | final DataOutputStream dos = new DataOutputStream(fbaos); 43 | final LongArrayBitVector labv = LongArrayBitVector.getInstance(); 44 | final BitVector[] a = new BitVector[] { BitVectors.ZERO, BitVectors.ONE, BitVectors.EMPTY_VECTOR, 45 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAAL }, 64), 46 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAL }, 60), 47 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL }, 128), 48 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAL }, 124) }; 49 | 50 | for(final BitVector bv: a) { 51 | BitVectors.writeFast(bv, dos); 52 | dos.close(); 53 | assertEquals(bv, BitVectors.readFast(new DataInputStream(new FastByteArrayInputStream(fbaos.array)))); 54 | fbaos.reset(); 55 | } 56 | 57 | for(final BitVector bv: a) { 58 | BitVectors.writeFast(bv, dos); 59 | dos.close(); 60 | assertEquals(bv, BitVectors.readFast(new DataInputStream(new FastByteArrayInputStream(fbaos.array)), labv)); 61 | fbaos.reset(); 62 | } 63 | } 64 | 65 | @Test 66 | public void testMakeOffline() throws IOException { 67 | final BitVector[] a = new BitVector[] { BitVectors.ZERO, BitVectors.ONE, BitVectors.EMPTY_VECTOR, 68 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAAL }, 64), 69 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAL }, 60), 70 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL }, 128), 71 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAL }, 124) }; 72 | 73 | final OfflineIterable iterable = new OfflineIterable<>(BitVectors.OFFLINE_SERIALIZER, LongArrayBitVector.getInstance()); 74 | iterable.addAll(Arrays.asList(a)); 75 | 76 | final Iterator iterator = iterable.iterator(); 77 | for (final BitVector element : a) assertEquals(element, iterator.next()); 78 | assertFalse(iterator.hasNext()); 79 | iterable.close(); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/bits/FixedLongTransformationStrategyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.bits; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertFalse; 24 | import static org.junit.Assert.assertTrue; 25 | 26 | import org.junit.Test; 27 | 28 | public class FixedLongTransformationStrategyTest { 29 | 30 | @Test 31 | public void testGetBoolean() { 32 | final TransformationStrategy fixedLong = TransformationStrategies.fixedLong(); 33 | BitVector p = fixedLong.toBitVector(Long.valueOf(0)); 34 | for (int i = Long.SIZE; i-- != 1;) assertFalse(p.getBoolean(i)); 35 | 36 | // Flipped bit 37 | assertTrue(p.getBoolean(0)); 38 | p = fixedLong.toBitVector(Long.valueOf(0xDEADBEEFDEADF00DL)); 39 | for (int i = Long.SIZE; i-- != 0;) assertTrue(p.getBoolean(i) == (((0xDEADBEEFDEADF00DL ^ 1L << 63) & 1L << Long.SIZE - 1 - i) != 0)); 40 | } 41 | 42 | @Test 43 | public void testGetLong() { 44 | final TransformationStrategy fixedLong = TransformationStrategies.fixedLong(); 45 | final BitVector p = fixedLong.toBitVector(Long.valueOf(0xDEADBEEFDEADF00DL)); 46 | for(int from = Long.SIZE; from-- != 0;) 47 | for (int to = Long.SIZE; from < to--;) 48 | assertEquals(LongArrayBitVector.wrap(new long[] { 49 | Long.reverse(0xDEADBEEFDEADF00DL) ^ 1 }).getLong(from, to), p.getLong(from, to)); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/bits/PrefixFreeTransformationStrategyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.bits; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertFalse; 24 | import static org.junit.Assert.assertTrue; 25 | 26 | import org.junit.Test; 27 | 28 | public class PrefixFreeTransformationStrategyTest { 29 | 30 | @Test 31 | public void testGetBoolean() { 32 | final LongArrayBitVector v = LongArrayBitVector.of(0, 1, 0); 33 | final TransformationStrategy prefixFree = TransformationStrategies.prefixFree(); 34 | final BitVector p = prefixFree.toBitVector(v); 35 | assertTrue(p.getBoolean(0)); 36 | assertFalse(p.getBoolean(1)); 37 | assertTrue(p.getBoolean(2)); 38 | assertTrue(p.getBoolean(3)); 39 | assertTrue(p.getBoolean(4)); 40 | assertFalse(p.getBoolean(5)); 41 | assertFalse(p.getBoolean(6)); 42 | assertEquals(LongArrayBitVector.of(1, 0, 1, 1, 1, 0, 0), p); 43 | } 44 | 45 | @Test 46 | public void testGetLong() { 47 | LongArrayBitVector v = LongArrayBitVector.getInstance(); 48 | v.append(0xFFFFFFFFL, 32); 49 | final TransformationStrategy prefixFree = TransformationStrategies.prefixFree(); 50 | final BitVector p = prefixFree.toBitVector(v); 51 | assertEquals(0xFFFFFFFFFFFFFFFFL, p.getLong(0, 64)); 52 | assertFalse(p.getBoolean(64)); 53 | assertEquals(0, p.getLong(64, 64)); 54 | 55 | v.clear(); 56 | v.append(0x0, 32); 57 | assertEquals(0x5555555555555555L, p.getLong(0, 64)); 58 | assertEquals(0x5555555555555555L >>> 1, p.getLong(1, 64)); 59 | assertFalse(p.getBoolean(64)); 60 | 61 | v.clear(); 62 | v.append(0x3, 32); 63 | assertEquals(0x555555555555555FL, p.getLong(0, 64)); 64 | assertEquals(0x5FL, p.getLong(0, 7)); 65 | 66 | v = LongArrayBitVector.of(0, 0, 0, 0, 1, 1, 1); 67 | assertEquals(LongArrayBitVector.of(1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0), prefixFree.toBitVector(v)); 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/bits/RawByteArrayTransformationStrategyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.bits; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertFalse; 24 | import static org.junit.Assert.assertTrue; 25 | 26 | import org.junit.Test; 27 | 28 | public class RawByteArrayTransformationStrategyTest { 29 | 30 | @Test 31 | public void testGetLong() { 32 | byte[] a = new byte[] { 0x55, (byte)0xFF }; 33 | assertEquals(16, TransformationStrategies.rawByteArray().toBitVector(a).length()); 34 | assertEquals(0xFF55L, TransformationStrategies.rawByteArray().toBitVector(a).getLong(0, 16)); 35 | 36 | a = new byte[] { 1, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, 0 }; 37 | assertTrue(TransformationStrategies.rawByteArray().toBitVector(a).getBoolean(0)); 38 | assertFalse(TransformationStrategies.rawByteArray().toBitVector(a).getBoolean(1)); 39 | assertTrue(TransformationStrategies.rawByteArray().toBitVector(a).getBoolean(64)); 40 | assertEquals(0x1L, TransformationStrategies.rawByteArray().toBitVector(a).getLong(0, 56)); 41 | assertEquals(0x1L, TransformationStrategies.rawByteArray().toBitVector(a).getLong(0, 64)); 42 | assertEquals(-1L, TransformationStrategies.rawByteArray().toBitVector(a).getLong(64, 128)); 43 | 44 | for(int i = 1; i < 64; i++) 45 | assertEquals(1, TransformationStrategies.rawByteArray().toBitVector(a).getLong(0, i)); 46 | for(int i = 0; i < 63; i++) 47 | assertEquals(0, TransformationStrategies.rawByteArray().toBitVector(a).getLong(1, 1 + i)); 48 | for(int i = 64; i < 127; i++) 49 | assertEquals((1L << i - 64) - 1, TransformationStrategies.rawByteArray().toBitVector(a).getLong(64, i)); 50 | 51 | a = new byte[] { 1, 0, 0, 0, 0, 0, 0, 0, 0x55 }; 52 | assertEquals(0x55L << 57, TransformationStrategies.rawByteArray().toBitVector(a).getLong(7, 71)); 53 | assertEquals(0x15L << 57, TransformationStrategies.rawByteArray().toBitVector(a).getLong(7, 70)); 54 | assertEquals(0x15L << 57, TransformationStrategies.rawByteArray().toBitVector(a).getLong(7, 69)); 55 | 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/bits/RawFixedLongTransformationStrategyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.bits; 21 | 22 | import static org.junit.Assert.assertFalse; 23 | import static org.junit.Assert.assertTrue; 24 | 25 | import org.junit.Test; 26 | 27 | public class RawFixedLongTransformationStrategyTest { 28 | 29 | @Test 30 | public void testGetBoolean() { 31 | final TransformationStrategy rawFixedLong = TransformationStrategies.rawFixedLong(); 32 | BitVector p = rawFixedLong.toBitVector(Long.valueOf(0)); 33 | for(int i = Long.SIZE; i-- != 0;) assertFalse(p.getBoolean(i)); 34 | p = rawFixedLong.toBitVector(Long.valueOf(0xDEADBEEFDEADF00DL)); 35 | for(int i = Long.SIZE; i-- != 0;) assertTrue(p.getBoolean(i) == ((0xDEADBEEFDEADF00DL & 1L << i) != 0)); 36 | } 37 | 38 | @Test 39 | public void testGetLong() { 40 | final TransformationStrategy rawFixedLong = TransformationStrategies.rawFixedLong(); 41 | final BitVector p = rawFixedLong.toBitVector(Long.valueOf(0xDEADBEEFDEADF00DL)); 42 | for(int from = Long.SIZE; from-- != 0;) 43 | for(int to = Long.SIZE; from < to--;) 44 | assertTrue(p.getLong(from, to) == LongArrayBitVector.wrap(new long[] { 0xDEADBEEFDEADF00DL }).getLong(from, to)); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/bits/RawUtf32TransformationStrategyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.bits; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | public class RawUtf32TransformationStrategyTest { 27 | 28 | @Test 29 | public void testGetLong() { 30 | String s = new String(new char[] { '\u0001', '\u0002' }); 31 | assertEquals(64, TransformationStrategies.rawUtf32().toBitVector(s).length()); 32 | assertEquals(0x200000001L, TransformationStrategies.rawUtf32().toBitVector(s).getLong(0, 64)); 33 | s = new String(new char[] { '\u0001', '\u0002', '\u0003' }); 34 | assertEquals(96, TransformationStrategies.rawUtf32().toBitVector(s).length()); 35 | assertEquals(0x300000002L, TransformationStrategies.rawUtf32().toBitVector(s).getLong(32, 96)); 36 | s = new String(new char[] { '\u0001', '\u0002', '\u0003', '\u0004' }); 37 | assertEquals(128, TransformationStrategies.rawUtf32().toBitVector(s).length()); 38 | assertEquals(0x200000001L, TransformationStrategies.rawUtf32().toBitVector(s).getLong(0, 64)); 39 | assertEquals(0x400000003L, TransformationStrategies.rawUtf32().toBitVector(s).getLong(64, 128)); 40 | s = new String(new char[] { '\u0001', '\u0002', '\u0003', '\u0004' }); 41 | 42 | s = new String(new char[] { '\uD800', '\uDC00' }); 43 | assertEquals(32, TransformationStrategies.rawUtf32().length(s)); 44 | assertEquals(0x10000, TransformationStrategies.rawUtf32().toBitVector(s).getLong(0, 32)); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/bits/Utf32TransformationStrategyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.bits; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | public class Utf32TransformationStrategyTest { 27 | 28 | @Test 29 | public void testGetLong() { 30 | String s = new String(new char[] { '\u0001', '\u0002' }); 31 | assertEquals(96, TransformationStrategies.prefixFreeUtf32().toBitVector(s).length()); 32 | assertEquals(0x4000000080000000L, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(0, 64)); 33 | assertEquals(0x0000000040000000L, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(32, 96)); 34 | s = new String(new char[] { '\u0001', '\u0002', '\u0003' }); 35 | assertEquals(128, TransformationStrategies.prefixFreeUtf32().toBitVector(s).length()); 36 | assertEquals(0x80000000L, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(0, 48)); 37 | assertEquals(0x4000000080000000L, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(0, 64)); 38 | s = new String(new char[] { '\u0001', '\u0002', '\u0003', '\u0004' }); 39 | assertEquals(160, TransformationStrategies.prefixFreeUtf32().toBitVector(s).length()); 40 | assertEquals(0, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(128, 160)); 41 | //System.err.println(Long.toHexString(TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(16, 80))); 42 | assertEquals(0xC000000040000000L, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(32, 96)); 43 | s = new String(new char[] { '\uD800', '\uDC00' }); 44 | assertEquals(64, TransformationStrategies.prefixFreeUtf32().length(s)); 45 | assertEquals(0x8000, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(0, 64)); 46 | 47 | 48 | s = new String(new char[] { '\u0001', '\u0002' }); 49 | assertEquals(64, TransformationStrategies.utf32().toBitVector(s).length()); 50 | assertEquals(0x4000000080000000L, TransformationStrategies.utf32().toBitVector(s).getLong(0, 64)); 51 | s = new String(new char[] { '\u0001', '\u0002', '\u0003' }); 52 | assertEquals(96, TransformationStrategies.utf32().toBitVector(s).length()); 53 | assertEquals(0xC000000040000000L, TransformationStrategies.utf32().toBitVector(s).getLong(32, 96)); 54 | s = new String(new char[] { '\u0001', '\u0002', '\u0003', '\u0004' }); 55 | assertEquals(128, TransformationStrategies.utf32().toBitVector(s).length()); 56 | assertEquals(0x4000000080000000L, TransformationStrategies.utf32().toBitVector(s).getLong(0, 64)); 57 | assertEquals(0x20000000C0000000L, TransformationStrategies.utf32().toBitVector(s).getLong(64, 128)); 58 | 59 | s = new String(new char[] { '\uD800', '\uDC00' }); 60 | assertEquals(32, TransformationStrategies.utf32().length(s)); 61 | assertEquals(0x8000, TransformationStrategies.utf32().toBitVector(s).getLong(0, 32)); 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/compression/CodecTestCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.compression; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | import java.util.Random; 26 | 27 | import it.unimi.dsi.bits.BitVector; 28 | import it.unimi.dsi.fastutil.booleans.BooleanArrayList; 29 | import it.unimi.dsi.fastutil.booleans.BooleanIterator; 30 | import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream; 31 | import it.unimi.dsi.io.InputBitStream; 32 | import it.unimi.dsi.io.OutputBitStream; 33 | 34 | public abstract class CodecTestCase { 35 | protected static void checkPrefixCodec(final PrefixCodec codec, final Random r) throws IOException { 36 | final int[] symbol = new int[100]; 37 | final BooleanArrayList bits = new BooleanArrayList(); 38 | for(int i = 0; i < symbol.length; i++) symbol[i] = r.nextInt(codec.size()); 39 | for (final int element : symbol) { 40 | final BitVector word = codec.codeWords()[element]; 41 | for(long j = 0; j < word.length(); j++) bits.add(word.getBoolean(j)); 42 | } 43 | 44 | final BooleanIterator booleanIterator = bits.iterator(); 45 | final Decoder decoder = codec.decoder(); 46 | for (final int element : symbol) { 47 | assertEquals(element, decoder.decode(booleanIterator)); 48 | } 49 | 50 | final FastByteArrayOutputStream fbaos = new FastByteArrayOutputStream(); 51 | @SuppressWarnings("resource") 52 | final 53 | OutputBitStream obs = new OutputBitStream(fbaos, 0); 54 | obs.write(bits.iterator()); 55 | obs.flush(); 56 | final InputBitStream ibs = new InputBitStream(fbaos.array); 57 | 58 | for (final int element : symbol) { 59 | assertEquals(element, decoder.decode(ibs)); 60 | } 61 | } 62 | 63 | protected void checkLengths(final int[] frequency, final int[] codeLength, final BitVector[] codeWord) { 64 | for(int i = 0; i < frequency.length; i++) 65 | assertEquals(Integer.toString(i), codeLength[i], codeWord[i].length()); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/io/DelimitedWordReaderTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.io; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | @SuppressWarnings("resource") 27 | public class DelimitedWordReaderTest { 28 | 29 | @Test 30 | public void testToSpec() { 31 | final String className = DelimitedWordReader.class.getName(); 32 | assertEquals(className + "(\"_\")", new DelimitedWordReader("_").toSpec()); 33 | assertEquals(className + "(100,\"_\")", new DelimitedWordReader("100", "_").toSpec()); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/io/FileLinesByteArrayCollectionTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2016-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.io; 21 | 22 | import static org.junit.Assert.assertArrayEquals; 23 | import static org.junit.Assert.assertEquals; 24 | import static org.junit.Assert.assertFalse; 25 | import static org.junit.Assert.assertTrue; 26 | 27 | import java.io.File; 28 | import java.io.IOException; 29 | import java.util.Arrays; 30 | 31 | import org.junit.Test; 32 | 33 | import it.unimi.dsi.fastutil.io.BinIO; 34 | 35 | public class FileLinesByteArrayCollectionTest { 36 | 37 | @SuppressWarnings("deprecation") 38 | @Test 39 | public void test() throws IOException { 40 | final File file = File.createTempFile(FastBufferedReaderTest.class.getSimpleName(), "tmp"); 41 | file.deleteOnExit(); 42 | 43 | byte[] a = { '0', '\n', '1', '\n' }; 44 | BinIO.storeBytes(a, file); 45 | it.unimi.dsi.big.io.FileLinesByteArrayCollection flbac = new it.unimi.dsi.big.io.FileLinesByteArrayCollection(file.toString()); 46 | it.unimi.dsi.big.io.FileLinesByteArrayCollection.FileLinesIterator iterator = flbac.iterator(); 47 | assertArrayEquals(new byte[] { '0' }, iterator.next()); 48 | assertArrayEquals(new byte[] { '1' }, iterator.next()); 49 | assertFalse(iterator.hasNext()); 50 | assertEquals(2, flbac.size64()); 51 | 52 | a = new byte[] { '0', '\n', '1' }; 53 | BinIO.storeBytes(a, file); 54 | flbac = new it.unimi.dsi.big.io.FileLinesByteArrayCollection(file.toString()); 55 | assertEquals(2, flbac.size64()); 56 | iterator = flbac.iterator(); 57 | assertArrayEquals(new byte[] { '0' }, iterator.next()); 58 | assertTrue(iterator.hasNext()); 59 | assertArrayEquals(new byte[] { '1' }, iterator.next()); 60 | assertFalse(iterator.hasNext()); 61 | assertFalse(iterator.hasNext()); 62 | iterator.close(); 63 | 64 | a = new byte[1000000]; 65 | Arrays.fill(a, (byte)'A'); 66 | BinIO.storeBytes(a, file); 67 | flbac = new it.unimi.dsi.big.io.FileLinesByteArrayCollection(file.toString()); 68 | assertEquals(1, flbac.size64()); 69 | iterator = flbac.iterator(); 70 | assertArrayEquals(a, iterator.next()); 71 | assertFalse(iterator.hasNext()); 72 | 73 | file.delete(); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/io/OfflineIterableTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.io; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertFalse; 24 | 25 | import java.io.DataInput; 26 | import java.io.DataOutput; 27 | import java.io.IOException; 28 | import java.io.InputStream; 29 | import java.io.OutputStream; 30 | 31 | import org.junit.Test; 32 | 33 | import it.unimi.dsi.fastutil.objects.ObjectIterator; 34 | import it.unimi.dsi.fastutil.objects.ObjectIterators; 35 | import it.unimi.dsi.lang.MutableString; 36 | 37 | public class OfflineIterableTest { 38 | public void doIt(final String[] strings) throws IOException { 39 | final OfflineIterable.Serializer stringSerializer = new OfflineIterable.Serializer() { 40 | @Override 41 | public void read(final DataInput dis, final MutableString x) throws IOException { 42 | x.readSelfDelimUTF8((InputStream)dis); 43 | } 44 | @Override 45 | public void write(final MutableString x, final DataOutput dos) throws IOException { 46 | x.writeSelfDelimUTF8((OutputStream)dos); 47 | } 48 | }; 49 | final OfflineIterable stringIterable = new OfflineIterable<>(stringSerializer, new MutableString()); 50 | for (final String s: strings) 51 | stringIterable.add(new MutableString(s)); 52 | ObjectIterator shouldBe = ObjectIterators.wrap(strings); 53 | for (final MutableString m: stringIterable) 54 | assertEquals(new MutableString(shouldBe.next()), m); 55 | assertFalse(shouldBe.hasNext()); 56 | 57 | // Let's do it again. 58 | stringIterable.clear(); 59 | for (final String s: strings) 60 | stringIterable.add(new MutableString(s)); 61 | shouldBe = ObjectIterators.wrap(strings); 62 | for (final MutableString m: stringIterable) 63 | assertEquals(new MutableString(shouldBe.next()), m); 64 | assertFalse(shouldBe.hasNext()); 65 | 66 | stringIterable.close(); 67 | stringIterable.close(); // Twice, to test for safety 68 | } 69 | 70 | @Test 71 | public void testSimple() throws IOException { 72 | doIt(new String[] { "this", "is", "a", "test" }); 73 | } 74 | 75 | @Test 76 | public void testEmpty() throws IOException { 77 | doIt(new String[0]); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/io/SegmentedInputStreamTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.io; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | 26 | import org.junit.Before; 27 | import org.junit.Test; 28 | 29 | import it.unimi.dsi.fastutil.io.FastByteArrayInputStream; 30 | 31 | public class SegmentedInputStreamTest { 32 | 33 | private final FastByteArrayInputStream stream = new FastByteArrayInputStream( 34 | new byte[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 } 35 | ); 36 | 37 | private SegmentedInputStream sis; 38 | 39 | @Before 40 | public void setUp() throws IllegalArgumentException, IOException { 41 | sis = new SegmentedInputStream(stream); 42 | sis.addBlock(0, 1, 2); 43 | sis.addBlock(2, 3, 4); 44 | sis.addBlock(6, 7, 8); 45 | sis.addBlock(8, 11, 14); 46 | } 47 | 48 | @Test 49 | public void testResetClose() throws IOException { 50 | assertEquals(0, sis.read()); 51 | sis.reset(); 52 | assertEquals(1, sis.read()); 53 | sis.reset(); 54 | assertEquals(-1, sis.read()); 55 | 56 | sis.close(); 57 | assertEquals(2, sis.read()); 58 | sis.reset(); 59 | assertEquals(3, sis.read()); 60 | sis.reset(); 61 | assertEquals(-1, sis.read()); 62 | 63 | sis.close(); 64 | assertEquals(6, sis.read()); 65 | sis.reset(); 66 | assertEquals(7, sis.read()); 67 | sis.reset(); 68 | assertEquals(-1, sis.read()); 69 | } 70 | 71 | @Test 72 | public void testRead() throws IOException { 73 | final byte[] b = new byte[11]; 74 | assertEquals(1, sis.read(b, 0, 10)); 75 | assertEquals(0, b[0]); 76 | sis.reset(); 77 | assertEquals(1, sis.read(b, 1, 10)); 78 | assertEquals(1, b[1]); 79 | 80 | sis.close(); 81 | assertEquals(1, sis.read(b, 5, 5)); 82 | assertEquals(2, b[5]); 83 | } 84 | 85 | @Test 86 | public void testSkip() throws IOException { 87 | assertEquals(1, sis.skip(1)); 88 | sis.reset(); 89 | assertEquals(1, sis.skip(10)); 90 | sis.reset(); 91 | assertEquals(0, sis.skip(10)); 92 | 93 | sis.close(); 94 | sis.close(); 95 | sis.close(); 96 | 97 | assertEquals(2, sis.skip(2)); 98 | assertEquals(1, sis.skip(2)); 99 | sis.reset(); 100 | assertEquals(3, sis.skip(10)); 101 | 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/lang/EnumParserTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.lang; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | import com.martiansoftware.jsap.ParseException; 27 | 28 | public class EnumParserTest { 29 | public enum TestEnum { 30 | A, 31 | b, 32 | C 33 | } 34 | 35 | @Test 36 | public void test() throws Exception { 37 | final EnumStringParser enumStringParser = EnumStringParser.getParser(TestEnum.class); 38 | assertEquals(TestEnum.A, enumStringParser.parse("A")); 39 | assertEquals(TestEnum.b, enumStringParser.parse("b")); 40 | assertEquals(TestEnum.C, enumStringParser.parse("C")); 41 | } 42 | 43 | @Test(expected=ParseException.class) 44 | public void testNoMatchBecauseOfCase() throws Exception { 45 | final EnumStringParser enumStringParser = EnumStringParser.getParser(TestEnum.class); 46 | enumStringParser.parse("a"); 47 | } 48 | 49 | @Test(expected=ParseException.class) 50 | public void testNoMatchBecauseWrong() throws Exception { 51 | final EnumStringParser enumStringParser = EnumStringParser.getParser(TestEnum.class); 52 | enumStringParser.parse("D"); 53 | } 54 | 55 | @Test 56 | public void testNorm() throws Exception { 57 | final EnumStringParser enumStringParser = EnumStringParser.getParser(TestEnum.class, true); 58 | assertEquals(TestEnum.A, enumStringParser.parse("a")); 59 | assertEquals(TestEnum.C, enumStringParser.parse("c")); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/lang/MutableStringTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.lang; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertFalse; 24 | import static org.junit.Assert.assertTrue; 25 | 26 | import java.io.IOException; 27 | 28 | import org.junit.Test; 29 | 30 | import it.unimi.dsi.fastutil.io.FastByteArrayInputStream; 31 | import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream; 32 | 33 | public class MutableStringTest { 34 | @Test 35 | public void testSqueezeSpace() { 36 | final MutableString s = new MutableString(new char[] { 32, 13, 10, 32, 32, 32, 13, 10, 32, 32, 32, 13, 10, 32, 32, 32, 32, 32 }); 37 | 38 | assertEquals(new MutableString(" \r\n \r\n \r\n "), s.squeezeSpace()); 39 | assertEquals(new MutableString(" "), s.squeezeWhitespace()); 40 | } 41 | 42 | @Test 43 | public void testSubsequence() { 44 | final MutableString s = new MutableString("abc"); 45 | final CharSequence ss = s.subSequence(1, 3); 46 | assertEquals(new MutableString("bc"), ss); 47 | assertEquals(1, ss.subSequence(1, 2).length()); 48 | } 49 | 50 | @Test 51 | public void testSkipSelfDelimUTF8() throws IOException { 52 | final FastByteArrayOutputStream fastByteArrayOutputStream = new FastByteArrayOutputStream(); 53 | new MutableString("a").writeSelfDelimUTF8(fastByteArrayOutputStream); 54 | new MutableString("b").writeSelfDelimUTF8(fastByteArrayOutputStream); 55 | new MutableString("\u221E").writeSelfDelimUTF8(fastByteArrayOutputStream); 56 | new MutableString("c").writeSelfDelimUTF8(fastByteArrayOutputStream); 57 | fastByteArrayOutputStream.flush(); 58 | final FastByteArrayInputStream fastByteArrayInputStream = new FastByteArrayInputStream(fastByteArrayOutputStream.array); 59 | assertEquals("a", new MutableString().readSelfDelimUTF8(fastByteArrayInputStream).toString()); 60 | assertEquals("b", new MutableString().readSelfDelimUTF8(fastByteArrayInputStream).toString()); 61 | assertEquals(1, MutableString.skipSelfDelimUTF8(fastByteArrayInputStream)); 62 | assertEquals("c", new MutableString().readSelfDelimUTF8(fastByteArrayInputStream).toString()); 63 | fastByteArrayInputStream.position(0); 64 | assertEquals("a", new MutableString().readSelfDelimUTF8(fastByteArrayInputStream).toString()); 65 | assertEquals(1, MutableString.skipSelfDelimUTF8(fastByteArrayInputStream)); 66 | assertEquals("\uu221E", new MutableString().readSelfDelimUTF8(fastByteArrayInputStream).toString()); 67 | assertEquals("c", new MutableString().readSelfDelimUTF8(fastByteArrayInputStream).toString()); 68 | } 69 | 70 | @Test 71 | public void testIsEmpty() { 72 | assertTrue(new MutableString().compact().isEmpty()); 73 | assertTrue(new MutableString().loose().isEmpty()); 74 | assertFalse(new MutableString(" ").compact().isEmpty()); 75 | assertFalse(new MutableString(" ").loose().isEmpty()); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/lang/TwoStrings.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.lang; 21 | 22 | import java.util.Objects; 23 | 24 | public class TwoStrings { 25 | private final String a; 26 | private final String b; 27 | private final Object context; 28 | public void test() {} 29 | 30 | public TwoStrings(final String a, final String b) { 31 | this(null, a, b); 32 | } 33 | 34 | public TwoStrings(final String... a) { 35 | this(null, a); 36 | } 37 | 38 | public static TwoStrings getInstance(final String a) { 39 | return new TwoStrings(a, a); 40 | } 41 | 42 | public static TwoStrings getInstance(final String... a) { 43 | return getInstance(Integer.toString(a.length)); 44 | } 45 | 46 | public TwoStrings(final Object context, final String a, final String b) { 47 | this.a = a; 48 | this.b = b; 49 | this.context = context; 50 | } 51 | 52 | public TwoStrings(final Object context, final String... a) { 53 | this.a = a[0]; 54 | this.b = Integer.toString(a.length); 55 | this.context = context; 56 | } 57 | 58 | public static TwoStrings getInstance(final Object context, final String a) { 59 | return new TwoStrings(context, a, a); 60 | } 61 | 62 | public static TwoStrings getInstance(final Object context, final String... a) { 63 | return getInstance(context, Integer.toString(a.length)); 64 | } 65 | 66 | @Override 67 | public boolean equals(final Object obj) { 68 | if (this == obj) return true; 69 | if (obj == null) return false; 70 | if (getClass() != obj.getClass()) return false; 71 | final TwoStrings other = (TwoStrings)obj; 72 | if (a == null) { 73 | if (other.a != null) return false; 74 | } 75 | else if (!a.equals(other.a)) return false; 76 | if (b == null) { 77 | if (other.b != null) return false; 78 | } 79 | else if (!b.equals(other.b)) return false; 80 | if (context == null) { 81 | if (other.context != null) return false; 82 | } 83 | else if (!context.equals(other.context)) return false; 84 | return true; 85 | } 86 | 87 | @Override 88 | public String toString() { 89 | return getClass().getName() + "(" + context + ", " + a + ", " + b + ")"; 90 | } 91 | 92 | @Override 93 | public int hashCode() { 94 | return Objects.hash(a, b, context); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/parser/callback/LinkExtractorTest.data: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | Chapter 2. Vocabulary 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/parser/callback/LinkExtractorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.parser.callback; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.IOException; 25 | import java.nio.ByteBuffer; 26 | import java.nio.charset.StandardCharsets; 27 | 28 | import org.junit.Ignore; 29 | import org.junit.Test; 30 | 31 | import com.google.common.io.ByteStreams; 32 | 33 | import it.unimi.dsi.fastutil.objects.ObjectLinkedOpenHashSet; 34 | import it.unimi.dsi.parser.BulletParser; 35 | 36 | @Deprecated 37 | @Ignore 38 | public class LinkExtractorTest { 39 | 40 | @Test 41 | public void testExtractor() throws IOException { 42 | final char[] text = StandardCharsets.UTF_8.decode(ByteBuffer.wrap(ByteStreams.toByteArray(this.getClass().getResourceAsStream("LinkExtractorTest.data")))).toString().toCharArray(); 43 | 44 | final BulletParser parser = new BulletParser(); 45 | final LinkExtractor linkExtractor = new LinkExtractor(); 46 | parser.setCallback(linkExtractor); 47 | parser.parse(text); 48 | 49 | testExtractorResults(linkExtractor); 50 | } 51 | 52 | private void testExtractorResults(final LinkExtractor linkExtractor) { 53 | assertEquals(new ObjectLinkedOpenHashSet<>(new String[] { "manual.css", "http://link.com/", "http://anchor.com/", "http://badanchor.com/" }), linkExtractor.urls); 54 | assertEquals("http://base.com/", linkExtractor.base()); 55 | assertEquals("http://refresh.com/", linkExtractor.metaRefresh()); 56 | assertEquals("http://location.com/", linkExtractor.metaLocation()); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/parser/callback/TextExtractorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.parser.callback; 21 | 22 | import static org.junit.Assert.assertTrue; 23 | 24 | import org.junit.Ignore; 25 | import org.junit.Test; 26 | 27 | import it.unimi.dsi.parser.BulletParser; 28 | 29 | @Deprecated 30 | @Ignore 31 | public class TextExtractorTest { 32 | 33 | @Test 34 | public void testBRBreaksFlow() { 35 | final char a[] = "ciao
mamma
".toCharArray(); 36 | final BulletParser bulletParser = new BulletParser(); 37 | final TextExtractor textExtractor = new TextExtractor(); 38 | bulletParser.setCallback(textExtractor); 39 | bulletParser.parse(a); 40 | assertTrue(textExtractor.text.toString(), textExtractor.text.indexOf(' ') != -1); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/parser/test.data: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Lilypond Snippet Repository ♪♫ 6 | 9 | 16 | 17 | 18 |
31 | 32 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/stat/JackknifeTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2011-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.stat; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.util.ArrayList; 25 | 26 | import org.junit.Test; 27 | 28 | 29 | public class JackknifeTest { 30 | @Test 31 | public void test() { 32 | final ArrayList samples = new ArrayList<>(); 33 | samples.add(new double[] { 1 }); 34 | samples.add(new double[] { 2 }); 35 | samples.add(new double[] { 3 }); 36 | // Linear statistics must pass through the jackknife without bias. 37 | final Jackknife average = Jackknife.compute(samples, Jackknife.IDENTITY); 38 | assertEquals(2, average.estimate[0], 1E-30); 39 | assertEquals(Math.sqrt(((1 - 2) * (1 - 2) + (3 - 2) * (3 - 2)) / 6.), average.standardError[0], 1E-30); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/stat/SummaryStatsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2011-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.stat; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | public class SummaryStatsTest { 27 | @Test 28 | public void test() { 29 | final SummaryStats summaryStats = new SummaryStats(); 30 | summaryStats.add(0); 31 | assertEquals(0, summaryStats.sum(), 0); 32 | assertEquals(0, summaryStats.mean(), 0); 33 | assertEquals(0, summaryStats.variance(), 0); 34 | assertEquals(0, summaryStats.min(), 0); 35 | assertEquals(0, summaryStats.max(), 0); 36 | assertEquals(1, summaryStats.size64()); 37 | 38 | summaryStats.add(1); 39 | assertEquals(1, summaryStats.sum(), 0); 40 | assertEquals(.5, summaryStats.mean(), 0); 41 | assertEquals(.25, summaryStats.variance(), 0); 42 | assertEquals(0, summaryStats.min(), 0); 43 | assertEquals(1, summaryStats.max(), 0); 44 | assertEquals(2, summaryStats.size64()); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/util/ByteBufferLongBigListTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.io.RandomAccessFile; 27 | import java.nio.ByteBuffer; 28 | import java.nio.ByteOrder; 29 | import java.nio.channels.FileChannel.MapMode; 30 | 31 | import org.junit.Test; 32 | 33 | import it.unimi.dsi.fastutil.io.BinIO; 34 | import it.unimi.dsi.fastutil.longs.LongIterators; 35 | 36 | @SuppressWarnings("deprecation") 37 | public class ByteBufferLongBigListTest { 38 | 39 | @Test 40 | public void testSetGetSmall() { 41 | final ByteBufferLongBigList b = new ByteBufferLongBigList(ByteBuffer.allocate(1000)); 42 | b.set(0, 10); 43 | assertEquals(10, b.getLong(0)); 44 | } 45 | 46 | @Test 47 | public void testSetGetBig() throws IOException { 48 | final File f = File.createTempFile(ByteBufferLongBigListTest.class.getSimpleName(), "buffer"); 49 | f.deleteOnExit(); 50 | BinIO.storeLongs(LongIterators.fromTo(0, 200000000), f); 51 | final RandomAccessFile c = new RandomAccessFile(f.toString(), "rw"); 52 | final ByteBufferLongBigList b = ByteBufferLongBigList.map(c.getChannel(), ByteOrder.BIG_ENDIAN, MapMode.READ_WRITE); 53 | b.set(1, 10); 54 | assertEquals(10, b.getLong(1)); 55 | b.set(190000000, 10); 56 | assertEquals(10, b.getLong(190000000)); 57 | c.close(); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/util/CircularCharArrayBufferTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.util.Iterator; 25 | import java.util.Random; 26 | 27 | import org.apache.commons.collections4.queue.CircularFifoQueue; 28 | import org.apache.commons.lang3.RandomStringUtils; 29 | import org.junit.Test; 30 | 31 | public class CircularCharArrayBufferTest { 32 | 33 | static Random r = new SplitMix64Random(0); 34 | static int[] sizes = { 1, 5, 10, 100, 500, 1000 }; 35 | 36 | @SuppressWarnings("null") 37 | private static void copyInto(final CircularFifoQueue cfb, final char[] c, final int offset, final int length) { 38 | final int howMany = Math.min(length, cfb.size()); 39 | final Iterator it = cfb.iterator(); 40 | for (int i = 0; i < howMany; i++) 41 | c[offset + i] = ((Character)it.next()).charValue(); 42 | } 43 | 44 | @Test 45 | public void testAdd() { 46 | for (final int size: sizes) { 47 | // System.out.printf("CIRCULAR BUFFER OF SIZE %d: ", size); 48 | final CircularFifoQueue cfb = new CircularFifoQueue<>(size); 49 | final CircularCharArrayBuffer ccab = new CircularCharArrayBuffer(size); 50 | final int times = r.nextInt(50); 51 | for (int j = 0; j < times; j++) { 52 | final char[] c = new char[1 + r.nextInt(1 + size * 10 / 2)]; 53 | final int offset = r.nextInt(c.length); 54 | final int len = r.nextInt(c.length - offset); 55 | System.arraycopy(RandomStringUtils.randomAlphanumeric(c.length).toCharArray(), 0, c, 0, c.length); 56 | for (int i = offset; i < offset + len; i++) 57 | cfb.add(Character.valueOf(c[i])); 58 | ccab.add(c, offset, len); 59 | final char[] res = new char[cfb.size()]; 60 | copyInto(cfb, res, 0, cfb.size()); 61 | final char[] res2 = new char[cfb.size()]; 62 | ccab.toCharArray(res2, 0, cfb.size()); 63 | assertEquals(new String(res), new String(res2)); 64 | } 65 | } 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/util/FrontCodedStringListTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.util.Arrays; 25 | import java.util.Collections; 26 | import java.util.List; 27 | 28 | import org.junit.Test; 29 | 30 | import it.unimi.dsi.lang.MutableString; 31 | 32 | public class FrontCodedStringListTest { 33 | 34 | @Test 35 | public void test() { 36 | final List c = Arrays.asList(TernaryIntervalSearchTreeTest.WORDS.clone()); 37 | final MutableString s = new MutableString(); 38 | for(int p = 0; p < 2; p++) { 39 | for(final boolean utf8: new boolean[] { false, true }) 40 | for(int ratio = 1; ratio < 8; ratio++) { 41 | final FrontCodedStringList fcl = new FrontCodedStringList(c.iterator(), ratio, utf8); 42 | for(int i = 0; i < fcl.size(); i++) { 43 | assertEquals(Integer.toString(i), c.get(i), fcl.get(i).toString()); 44 | fcl.get(i, s); 45 | assertEquals(Integer.toString(i), c.get(i), s.toString()); 46 | } 47 | } 48 | 49 | Collections.sort(c); 50 | } 51 | } 52 | 53 | @Test 54 | public void testSurrogatePairs() { 55 | final List c = Arrays.asList(new String[] { "a", "AB\uE000AB", "\uD800\uDF02", "\uD800\uDF03", "b" }); 56 | for(final boolean utf8: new boolean[] { false, true }) 57 | for(int ratio = 1; ratio < 8; ratio++) { 58 | final FrontCodedStringList fcl = new FrontCodedStringList(c.iterator(), ratio, utf8); 59 | for(int i = 0; i < fcl.size(); i++) { 60 | assertEquals(Integer.toString(i), c.get(i), fcl.get(i).toString()); 61 | } 62 | } 63 | } 64 | 65 | 66 | } 67 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/util/HyperLogLogCounterArrayTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Paolo Boldi and Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import static org.junit.Assert.assertTrue; 23 | 24 | import org.junit.Test; 25 | 26 | public class HyperLogLogCounterArrayTest { 27 | 28 | @Test 29 | public void testSingle() { 30 | final int numTrials = 10; 31 | 32 | for(final int size: new int[] { 1, 10, 100, 1000, 100000 }) 33 | for(final int log2m: new int[] { 6, 8, 12 }) { 34 | final double rsd = HyperLogLogCounterArray.relativeStandardDeviation(log2m); 35 | int correct = 0; 36 | for (int trial = 0; trial < numTrials; trial++) { 37 | final HyperLogLogCounterArray a = new HyperLogLogCounterArray(1, size, log2m, trial); 38 | final int incr = (int)((1L << 32) / size); 39 | int x = Integer.MIN_VALUE; 40 | for(int i = 0; i < size; i++) { 41 | a.add(0, x); 42 | x += incr; 43 | } 44 | 45 | //System.err.println("Trial " + trial + ", size " + size + ", error: " + (size - a.count(0)) / size + " " + (Math.abs(size - a.count(0)) < 2 * rsd * size ? "(+)" : "(-)")); 46 | if (Math.abs(size - a.count(0)) / size < 2 * rsd) correct++; 47 | } 48 | 49 | //System.err.println("Correct trials for size " + size + ", rsd " + rsd + ": " + correct); 50 | assertTrue(correct + " < " + 9, correct >= 9); 51 | } 52 | } 53 | 54 | @Test 55 | public void testDouble() { 56 | final int numTrials = 10; 57 | 58 | for(final int size: new int[] { 1, 10, 100, 1000, 100000 }) 59 | for(final int log2m: new int[] { 4, 6, 8, 12 }) { 60 | final double rsd = HyperLogLogCounterArray.relativeStandardDeviation(log2m); 61 | int correct0 = 0, correct1 = 0; 62 | for (int trial = 0; trial < numTrials; trial++) { 63 | final HyperLogLogCounterArray a = new HyperLogLogCounterArray(2, size, log2m, trial); 64 | final int incr = (int)((1L << 32) / size); 65 | int x = Integer.MIN_VALUE; 66 | for(int i = 0; i < size; i++) { 67 | a.add(0, x); 68 | a.add(1, x); 69 | x += incr; 70 | } 71 | 72 | //System.err.println("Trial " + trial + " (0), size " + size + ", error: " + (size - a.count(0)) / size + " " + (Math.abs(size - a.count(0)) < 2 * rsd * size ? "(+)" : "(-)")); 73 | //System.err.println("Trial " + trial + " (1), size " + size + ", error: " + (size - a.count(1)) / size + " " + (Math.abs(size - a.count(1)) < 2 * rsd * size ? "(+)" : "(-)")); 74 | if (Math.abs(size - a.count(0)) / size < 2 * rsd) correct0++; 75 | if (Math.abs(size - a.count(1)) / size < 2 * rsd) correct1++; 76 | } 77 | 78 | //System.err.println("Correct trials (0) for size " + size + ", rsd " + rsd + ": " + correct0); 79 | //System.err.println("Correct trials (1) for size " + size + ", rsd " + rsd + ": " + correct1); 80 | assertTrue(correct0 + " < " + 9, correct0 >= 9); 81 | assertTrue(correct1 + " < " + 9, correct1 >= 9); 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/util/KahanSummationTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2011-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | 27 | //RELEASE-STATUS: DIST 28 | 29 | public class KahanSummationTest { 30 | @Test 31 | public void testSum() { 32 | final KahanSummation sum = new KahanSummation(); 33 | sum.add(1); 34 | sum.add(2); 35 | sum.add(3); 36 | assertEquals(6, sum.value(), 0); 37 | } 38 | 39 | @Test 40 | public void testDifficult() { 41 | final KahanSummation sum = new KahanSummation(); 42 | sum.add(Double.MIN_NORMAL); 43 | sum.add(Double.MIN_NORMAL); 44 | sum.add(-Double.MIN_NORMAL); 45 | assertEquals(Double.MIN_NORMAL, sum.value(), 0); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/util/LineIteratorTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.StringReader; 25 | 26 | import org.junit.Test; 27 | 28 | import it.unimi.dsi.io.FastBufferedReader; 29 | import it.unimi.dsi.io.LineIterator; 30 | import it.unimi.dsi.logging.ProgressLogger; 31 | 32 | 33 | public class LineIteratorTest { 34 | 35 | private static final String TEXT = "0\n1\n2\n3"; 36 | private static final CharSequence[] LINES = TEXT.split("\n"); 37 | 38 | @Test 39 | public void testLineIteratorProgressLogger() { 40 | testLineIterator(new ProgressLogger()); 41 | } 42 | 43 | @Test 44 | public void testLineIterator() { 45 | testLineIterator(null); 46 | } 47 | 48 | public void testLineIterator(final ProgressLogger pl) { 49 | final LineIterator lineIterator = new LineIterator(new FastBufferedReader(new StringReader(TEXT)), pl); 50 | int i = 0; 51 | while(lineIterator.hasNext()) 52 | assertEquals(LINES[i++].toString(), lineIterator.next().toString()); 53 | 54 | assertEquals(i, LINES.length); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/util/LiterallySignedStringMapTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.io.Serializable; 27 | import java.util.Arrays; 28 | import java.util.Collections; 29 | 30 | import org.junit.Test; 31 | 32 | import it.unimi.dsi.fastutil.Hash; 33 | import it.unimi.dsi.fastutil.io.BinIO; 34 | import it.unimi.dsi.fastutil.objects.Object2LongOpenCustomHashMap; 35 | import it.unimi.dsi.lang.MutableString; 36 | 37 | public class LiterallySignedStringMapTest { 38 | 39 | private final static class CharSequenceStrategy implements Hash.Strategy, Serializable { 40 | private static final long serialVersionUID = 1L; 41 | 42 | @Override 43 | public boolean equals(final CharSequence a, final CharSequence b) { 44 | if (a == null) return b == null; 45 | if (b == null) return false; 46 | return a.toString().equals(b.toString()); 47 | } 48 | 49 | @Override 50 | public int hashCode(final CharSequence o) { 51 | return o.toString().hashCode(); 52 | } 53 | } 54 | 55 | @Test 56 | public void testNumbers() throws IOException, ClassNotFoundException { 57 | for(int n = 10; n < 10000; n *= 10) { 58 | final String[] s = new String[n]; 59 | for(int i = s.length; i-- != 0;) s[i] = Integer.toString(i); 60 | Collections.shuffle(Arrays.asList(s)); 61 | 62 | final FrontCodedStringList fcl = new FrontCodedStringList(Arrays.asList(s), 8, true); 63 | // Test with mph 64 | final Object2LongOpenCustomHashMap mph = new Object2LongOpenCustomHashMap<>(new CharSequenceStrategy()); 65 | mph.defaultReturnValue(-1); 66 | for(int i = 0; i < s.length; i++) mph.put(new MutableString(s[i]), i); 67 | 68 | LiterallySignedStringMap map = new LiterallySignedStringMap(mph, fcl); 69 | 70 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(s[i])); 71 | for(int i = s.length + n; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i))); 72 | 73 | final File temp = File.createTempFile(getClass().getSimpleName(), "test"); 74 | temp.deleteOnExit(); 75 | BinIO.storeObject(map, temp); 76 | map = (LiterallySignedStringMap)BinIO.loadObject(temp); 77 | 78 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(s[i])); 79 | for(int i = s.length + n; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i))); 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/util/SemiExternalGammaListTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertTrue; 24 | 25 | import java.io.IOException; 26 | 27 | import org.junit.Test; 28 | 29 | import it.unimi.dsi.fastutil.longs.LongArrayList; 30 | import it.unimi.dsi.fastutil.longs.LongList; 31 | import it.unimi.dsi.io.InputBitStream; 32 | import it.unimi.dsi.io.OutputBitStream; 33 | 34 | /** 35 | * @author Fabien Campagne 36 | * @author Sebastiano Vigna 37 | */ 38 | public class SemiExternalGammaListTest { 39 | 40 | private static InputBitStream buildInputStream(final LongList longs) throws IOException { 41 | final byte[] array = new byte[longs.size() * 4]; 42 | @SuppressWarnings("resource") 43 | final 44 | OutputBitStream streamer = new OutputBitStream(array); 45 | for (int i = 0; i < longs.size(); i++) streamer.writeLongGamma(longs.getLong(i)); 46 | final int size = (int)(streamer.writtenBits() / 8) + ((streamer.writtenBits() % 8) == 0 ? 0 : 1); 47 | final byte[] smaller = new byte[size]; 48 | System.arraycopy(array, 0, smaller, 0, size); 49 | 50 | return new InputBitStream(smaller); 51 | 52 | } 53 | 54 | 55 | @Test 56 | public void testSemiExternalGammaListGammaCoding() throws IOException { 57 | 58 | final long[] longs = { 10, 300, 450, 650, 1000, 1290, 1699 }; 59 | final LongList listLongs = new LongArrayList(longs); 60 | 61 | SemiExternalGammaList list = new SemiExternalGammaList(buildInputStream(listLongs), 1, listLongs.size()); 62 | for (int i = 0; i < longs.length; ++i) { 63 | assertEquals(("test failed for index: " + i), longs[i], list.getLong(i)); 64 | } 65 | 66 | list = new SemiExternalGammaList(buildInputStream(listLongs), 2, listLongs.size()); 67 | for (int i = 0; i < longs.length; ++i) { 68 | assertEquals(("test failed for index: " + i), longs[i], list.getLong(i)); 69 | } 70 | 71 | list = new SemiExternalGammaList(buildInputStream(listLongs), 4, listLongs.size()); 72 | for (int i = 0; i < longs.length; ++i) { 73 | assertEquals(("test failed for index: " + i), longs[i], list.getLong(i)); 74 | } 75 | 76 | list = new SemiExternalGammaList(buildInputStream(listLongs), 7, listLongs.size()); 77 | for (int i = 0; i < longs.length; ++i) { 78 | assertEquals(("test failed for index: " + i), longs[i], list.getLong(i)); 79 | } 80 | 81 | list = new SemiExternalGammaList(buildInputStream(listLongs), 8, listLongs.size()); 82 | for (int i = 0; i < longs.length; ++i) { 83 | assertEquals(("test failed for index: " + i), longs[i], list.getLong(i)); 84 | } 85 | } 86 | 87 | @Test 88 | public void testEmptySemiExternalGammaListGammaCoding() throws IOException { 89 | 90 | final long[] longs = { }; 91 | final LongList listOffsets = new LongArrayList(longs); 92 | 93 | new SemiExternalGammaList(buildInputStream(listOffsets), 1, listOffsets.size()); 94 | assertTrue(true); 95 | } 96 | 97 | } 98 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/util/ShiftAddXorSignedStringMapTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.util.Arrays; 27 | 28 | import org.junit.Test; 29 | 30 | import it.unimi.dsi.fastutil.io.BinIO; 31 | import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap; 32 | 33 | public class ShiftAddXorSignedStringMapTest { 34 | 35 | @SuppressWarnings("deprecation") 36 | @Test 37 | public void testNumbers() throws IOException, ClassNotFoundException { 38 | 39 | for(int width = 16; width <= Long.SIZE; width += 8) { 40 | final String[] s = new String[1000]; 41 | final long[] v = new long[s.length]; 42 | for(int i = s.length; i-- != 0;) s[(int)(v[i] = i)] = Integer.toString(i); 43 | 44 | // Test with mph 45 | final Object2LongOpenHashMap mph = new Object2LongOpenHashMap<>(s, v); 46 | ShiftAddXorSignedStringMap map = new ShiftAddXorSignedStringMap(Arrays.asList(s).iterator(), mph, width); 47 | 48 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(Integer.toString(i))); 49 | for(int i = s.length + 100; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i))); 50 | 51 | final File temp = File.createTempFile(getClass().getSimpleName(), "test"); 52 | temp.deleteOnExit(); 53 | BinIO.storeObject(map, temp); 54 | map = (ShiftAddXorSignedStringMap)BinIO.loadObject(temp); 55 | 56 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(Integer.toString(i))); 57 | for(int i = s.length + 100; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i))); 58 | 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/util/SplitMix64RandomTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2015-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertTrue; 24 | 25 | import org.junit.Test; 26 | 27 | 28 | public class SplitMix64RandomTest { 29 | private final static long seeds[] = { 0, 1, 1024, 0x5555555555555555L }; 30 | 31 | @Test 32 | public void testNextFloat() { 33 | for (final long seed : seeds) { 34 | final SplitMix64Random splitMixRandom = new SplitMix64Random(seed); 35 | double avg = 0; 36 | for (int i = 1000000; i-- != 0;) { 37 | final float d = splitMixRandom.nextFloat(); 38 | assertTrue(d < 1); 39 | assertTrue(d >= 0); 40 | avg += d; 41 | } 42 | 43 | assertEquals(500000, avg, 1000); 44 | } 45 | } 46 | 47 | @Test 48 | public void testNextDouble() { 49 | for (final long seed : seeds) { 50 | final SplitMix64Random splitMixRandom = new SplitMix64Random(seed); 51 | double avg = 0; 52 | for (int i = 1000000; i-- != 0;) { 53 | final double d = splitMixRandom.nextDouble(); 54 | assertTrue(d < 1); 55 | assertTrue(d >= 0); 56 | avg += d; 57 | } 58 | 59 | assertEquals(500000, avg, 500); 60 | } 61 | } 62 | 63 | @Test 64 | public void testNextInt() { 65 | for (final long seed : seeds) { 66 | final SplitMix64Random splitMixRandom = new SplitMix64Random(seed); 67 | double avg = 0; 68 | for (int i = 100000000; i-- != 0;) { 69 | final int d = splitMixRandom.nextInt(101); 70 | assertTrue(d <= 100); 71 | assertTrue(d >= 0); 72 | avg += d; 73 | } 74 | 75 | assertEquals(5000000000L, avg, 1000000); 76 | } 77 | } 78 | 79 | @Test 80 | public void testNextInt2() { 81 | for (final long seed : seeds) { 82 | final SplitMix64Random splitMixRandom = new SplitMix64Random(seed); 83 | final int[] count = new int[32]; 84 | long change = 0; 85 | int prev = 0; 86 | for (int i = 1000000; i-- != 0;) { 87 | final int d = splitMixRandom.nextInt(); 88 | change += Long.bitCount(d ^ prev); 89 | for (int b = 32; b-- != 0;) 90 | if ((d & (1 << b)) != 0) count[b]++; 91 | prev = d; 92 | } 93 | 94 | assertEquals(32 * 1000000L, change, 23000); 95 | for (int b = 32; b-- != 0;) assertEquals(500000, count[b], 1500); 96 | } 97 | } 98 | 99 | @Test 100 | public void testNextLong() { 101 | for (final long seed : seeds) { 102 | final SplitMix64Random splitMixRandom = new SplitMix64Random(seed); 103 | final int[] count = new int[64]; 104 | long change = 0; 105 | long prev = 0; 106 | for (int i = 1000000; i-- != 0;) { 107 | final long d = splitMixRandom.nextLong(); 108 | change += Long.bitCount(d ^ prev); 109 | for (int b = 64; b-- != 0;) 110 | if ((d & (1L << b)) != 0) count[b]++; 111 | prev = d; 112 | } 113 | 114 | assertEquals(32 * 1000000L, change, 4000); 115 | for (int b = 64; b-- != 0;) assertEquals(500000, count[b], 1500); 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/util/TextPatternTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2010-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | import static org.junit.Assert.assertTrue; 24 | 25 | import org.junit.Test; 26 | 27 | import it.unimi.dsi.fastutil.chars.CharArrayList; 28 | 29 | public class TextPatternTest { 30 | @Test 31 | public void testSingleCharacterSearch() { 32 | final byte[] b = new byte[] { 1, (byte)'A', 2 }; 33 | final String s = " A "; 34 | final TextPattern pattern = new TextPattern("A"); 35 | 36 | assertEquals(-1, pattern.search(b, 0, 1)); 37 | assertEquals(-1, pattern.search(s, 0, 1)); 38 | assertEquals(-1, pattern.search(s.toCharArray(), 0, 1)); 39 | assertEquals(-1, pattern.search(CharArrayList.wrap(s.toCharArray()), 0, 1)); 40 | 41 | assertEquals(1, pattern.search(b)); 42 | assertEquals(1, pattern.search(s)); 43 | assertEquals(1, pattern.search(s.toCharArray())); 44 | assertEquals(1, pattern.search(CharArrayList.wrap(s.toCharArray()))); 45 | } 46 | 47 | @Test 48 | public void testSearch() { 49 | final byte[] b = new byte[] { 1, (byte)'A', 'B', 2 }; 50 | final String s = " AB "; 51 | final TextPattern pattern = new TextPattern("AB"); 52 | 53 | assertEquals(-1, pattern.search(b, 0, 2)); 54 | assertEquals(-1, pattern.search(s, 0, 2)); 55 | assertEquals(-1, pattern.search(s.toCharArray(), 0, 2)); 56 | assertEquals(-1, pattern.search(CharArrayList.wrap(s.toCharArray()), 0, 2)); 57 | 58 | assertEquals(1, pattern.search(b)); 59 | assertEquals(1, pattern.search(s)); 60 | assertEquals(1, pattern.search(s.toCharArray())); 61 | assertEquals(1, pattern.search(CharArrayList.wrap(s.toCharArray()))); 62 | 63 | TextPattern patternMeta = new TextPattern("\n" + 72 | "\n" + 73 | "\n" + 74 | "\n" + 75 | "" + 79 | "" + 80 | "Sebastiano Vigna\n" + 81 | "\n" + 82 | "\n" + 83 | "" + 84 | "
\n" + 85 | "
    " + 86 | "
    Bye bye baby\n" + 87 | " and not this one\n" + 88 | "\n\n even whitespace counts \n\n" + 89 | "The frame source counts\n" + 90 | "\n" + 91 | "\n" + 92 | ""; 93 | 94 | 95 | } 96 | -------------------------------------------------------------------------------- /test/it/unimi/dsi/util/concurrent/ReorderingBlockingQueueTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * DSI utilities 3 | * 4 | * Copyright (C) 2017-2023 Sebastiano Vigna 5 | * 6 | * This program and the accompanying materials are made available under the 7 | * terms of the GNU Lesser General Public License v2.1 or later, 8 | * which is available at 9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html, 10 | * or the Apache Software License 2.0, which is available at 11 | * https://www.apache.org/licenses/LICENSE-2.0. 12 | * 13 | * This program is distributed in the hope that it will be useful, but 14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 | * or FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0 18 | */ 19 | 20 | package it.unimi.dsi.util.concurrent; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | import org.junit.Test; 25 | 26 | import it.unimi.dsi.Util; 27 | import it.unimi.dsi.fastutil.ints.IntArrays; 28 | import it.unimi.dsi.util.XoRoShiRo128PlusRandom; 29 | 30 | public class ReorderingBlockingQueueTest { 31 | @Test 32 | public void testNoBlocking() throws InterruptedException { 33 | for(final int size: new int[] { 1, 10, 100, 128, 256 }) { 34 | final ReorderingBlockingQueue q = new ReorderingBlockingQueue<>(size); 35 | final int[] perm = Util.identity(size); 36 | IntArrays.shuffle(perm, new XoRoShiRo128PlusRandom()); 37 | for(int i = perm.length; i-- != 0;) q.put(Integer.valueOf(perm[i]), perm[i]); 38 | for(int i = 0; i < perm.length; i++) assertEquals(i, q.take().intValue()); 39 | assertEquals(0, q.size()); 40 | } 41 | } 42 | 43 | @Test 44 | public void testBlocking() throws InterruptedException { 45 | for(final int size: new int[] { 10, 100, 128, 256, 1024 }) { 46 | for(final int d: new int[] { 1, 2, 3, 4 }) { 47 | final ReorderingBlockingQueue q = new ReorderingBlockingQueue<>(size / d); 48 | final int[] perm = Util.identity(size); 49 | IntArrays.shuffle(perm, new XoRoShiRo128PlusRandom()); 50 | for(int i = perm.length; i-- != 0;) { 51 | final int t = perm[i]; 52 | new Thread() { 53 | @Override 54 | public void run() { 55 | try { 56 | q.put(Integer.valueOf(t), t); 57 | } 58 | catch (final InterruptedException e) { 59 | throw new RuntimeException(e.getMessage(), e); 60 | } 61 | } 62 | }.start(); 63 | } 64 | for(int i = 0; i < perm.length; i++) assertEquals(i, q.take().intValue()); 65 | assertEquals(0, q.size()); 66 | } 67 | } 68 | } 69 | } 70 | --------------------------------------------------------------------------------