${LABEL[$i]}"
9 | let i=i+1
10 | for r in Random ThreadLocalRandom SplittableRandom SplitMix64 XoRoShiRo128PlusPlus XoRoShiRo128StarStar XoRoShiRo128Plus XoShiRo256PlusPlus XoShiRo256StarStar XoShiRo256Plus XorShift1024StarPhi; do
11 | v=$(grep ^Benchmark$r.$m\ $1 | tr -s ' ' | cut -d' ' -f4)
12 | echo " *
$v"
13 | done
14 | echo " * "
15 | done
16 |
--------------------------------------------------------------------------------
/bash/permbfcl.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 |
3 | if [[ "$@" == "" ]]; then
4 | echo "USAGE: $(basename $0) FCL" 1>&2
5 | echo "The list of string will be read from standard input in UTF-8 encoding." 1>&2
6 | exit 1
7 | fi
8 |
9 | PERM=$(mktemp)
10 | LEXFCL=$(mktemp)
11 |
12 | nl -v0 -nln | LC_ALL=C sort -S2G -T. -k2 | tee >(cut -f1 | tr -d ' ' >$PERM) | cut -f2 | java -server it.unimi.dsi.big.util.FrontCodedStringBigList -u $LEXFCL
13 |
14 | java -server it.unimi.dsi.big.util.PermutedFrontCodedStringBigList -i -t $LEXFCL $PERM $1
15 |
16 | rm -f $LEXFCL $PERM
17 |
--------------------------------------------------------------------------------
/bash/permfcl.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 |
3 | if [[ "$@" == "" ]]; then
4 | echo "USAGE: $(basename $0) FCL" 1>&2
5 | echo "The list of string will be read from standard input in UTF-8 encoding." 1>&2
6 | exit 1
7 | fi
8 |
9 | PERM=$(mktemp)
10 | LEXFCL=$(mktemp)
11 |
12 | nl -v0 -nln | LC_ALL=C sort -S2G -T. -k2 | tee >(cut -f1 | tr -d ' ' >$PERM) | cut -f2 | java -server it.unimi.dsi.util.FrontCodedStringList -u $LEXFCL
13 |
14 | java -server it.unimi.dsi.util.PermutedFrontCodedStringList -i -t $LEXFCL $PERM $1
15 |
16 | rm -f $LEXFCL $PERM
17 |
--------------------------------------------------------------------------------
/bnd/biz.aQute.bnd-5.2.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/bnd/biz.aQute.bnd-5.2.0.jar
--------------------------------------------------------------------------------
/build.properties:
--------------------------------------------------------------------------------
1 | version=2.7.4
2 |
3 | build.sysclasspath=ignore
4 |
5 | jar.base=/usr/share/java
6 | javadoc.base=/usr/share/javadoc
7 |
8 | dist=dist
9 | src=src
10 | test=test
11 | slow=slow
12 | reports=reports
13 | coverage=coverage
14 | checkstyle=checkstyle
15 | docs=docs
16 | build=build
17 | instrumented=instr
18 |
19 | j2se.apiurl=https://docs.oracle.com/javase/8/docs/api/
20 | fastutil.apiurl=https://fastutil.di.unimi.it/docs/
21 | jsap.apiurl=http://www.martiansoftware.com/jsap/doc/javadoc/
22 | junit.apiurl=https://junit.org/junit4/javadoc/latest/
23 | log4j.apiurl=https://logging.apache.org/log4j/1.2/apidocs/
24 | slf4j.apiurl=https://www.slf4j.org/apidocs/
25 | commons-configuration2.apiurl=https://commons.apache.org/proper/commons-configuration/apidocs/
26 | commons-io.apiurl=https://commons.apache.org/proper/commons-io/javadocs/api-release/
27 | commons-lang3.apiurl=https://commons.apache.org/proper/commons-lang/javadocs/api-release/
28 | commons-collections4.apiurl=https://commons.apache.org/proper/commons-collections/javadocs/api-4.4/
29 | commons-math3.apiurl=https://commons.apache.org/proper/commons-math/javadocs/api-3.6.1/
30 | guava.apiurl=https://javadoc.io/doc/com.google.guava/guava/latest/index.html
31 |
--------------------------------------------------------------------------------
/dsiutils.bnd:
--------------------------------------------------------------------------------
1 | Automatic-Module-Name: it.unimi.dsi.dsiutils
2 | Bundle-Name: it.unimi.dsi.dsiutils
3 | Bundle-SymbolicName: it.unimi.dsi.dsiutils
4 | Export-Package: it.unimi.dsi.*
5 | Bundle-Version: ${version}
6 |
--------------------------------------------------------------------------------
/ivy.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
1 | include build.properties
2 |
3 | TAR=tar
4 |
5 | source:
6 | rm -fr dsiutils-$(version)
7 | ant clean
8 | ln -s . dsiutils-$(version)
9 | $(TAR) chvf dsiutils-$(version)-src.tar --owner=0 --group=0 \
10 | dsiutils-$(version)/README.md \
11 | dsiutils-$(version)/CHANGES \
12 | dsiutils-$(version)/COPYING.LESSER \
13 | dsiutils-$(version)/LICENSE-2.0.txt \
14 | dsiutils-$(version)/build.xml \
15 | dsiutils-$(version)/ivy.xml \
16 | dsiutils-$(version)/dsiutils.bnd \
17 | dsiutils-$(version)/pom-model.xml \
18 | dsiutils-$(version)/build.properties \
19 | $$(find dsiutils-$(version)/src/it/unimi/dsi -iname \*.java -or -iname \*.html -or -iname \*.in.16 -or -iname \*.out.12) \
20 | $$(find dsiutils-$(version)/test/it/unimi/dsi -iname \*.java -or -iname \*.html -or -iname \*.data) \
21 | $$(find dsiutils-$(version)/slow/it/unimi/dsi -iname \*.java -or -iname \*.html) \
22 | dsiutils-$(version)/src/overview.html
23 | $(TAR) --delete --wildcards -v -f dsiutils-$(version)-src.tar \
24 | dsiutils-$(version)/src/it/unimi/dsi/test/*.java \
25 | dsiutils-$(version)/test/it/unimi/dsi/test/*.java \
26 | dsiutils-$(version)/src/it/unimi/dsi/util/IntParallel*.java \
27 | dsiutils-$(version)/src/it/unimi/dsi/util/XorGens*.java \
28 | dsiutils-$(version)/src/it/unimi/dsi/stat/Ziggurat.java
29 | gzip -f dsiutils-$(version)-src.tar
30 | rm dsiutils-$(version)
31 |
32 | binary:
33 | rm -fr dsiutils-$(version)
34 | $(TAR) zxvf dsiutils-$(version)-src.tar.gz
35 | (cd dsiutils-$(version) && unset CLASSPATH && unset LOCAL_IVY_SETTINGS && ant ivy-clean ivy-setupjars && ant junit && ant clean && ant jar javadoc)
36 | $(TAR) zcvf dsiutils-$(version)-bin.tar.gz --owner=0 --group=0 \
37 | dsiutils-$(version)/README.md \
38 | dsiutils-$(version)/CHANGES \
39 | dsiutils-$(version)/COPYING.LESSER \
40 | dsiutils-$(version)/LICENSE-2.0.txt \
41 | dsiutils-$(version)/dsiutils-$(version).jar \
42 | dsiutils-$(version)/docs
43 | $(TAR) zcvf dsiutils-$(version)-deps.tar.gz --owner=0 --group=0 --transform='s|.*/||' $$(find dsiutils-$(version)/jars/runtime -iname \*.jar -exec readlink {} \;)
44 |
45 | stage:
46 | rm -fr dsiutils-$(version)
47 | $(TAR) zxvf dsiutils-$(version)-src.tar.gz
48 | cp -fr bnd dsiutils-$(version)
49 | (cd dsiutils-$(version) && unset CLASSPATH && unset LOCAL_IVY_SETTINGS && ant ivy-clean ivy-setupjars && ant stage)
50 |
--------------------------------------------------------------------------------
/pom-model.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 | ${ivy.pom.groupId}
4 | ${ivy.pom.artifactId}
5 | jar
6 | DSI Utilities
7 | ${ivy.pom.version}
8 | The DSI utilities are a mishmash of classes accumulated during the last twenty years in projects developed at the DSI (Dipartimento di Scienze dell'Informazione, i.e., Information Sciences Department), now DI (Dipartimento di Informatica, i.e., Informatics Department), of the Universita` degli Studi di Milano.
9 | http://dsiutils.di.unimi.it/
10 |
11 |
12 | GNU Lesser General Public License Version 2.1+
13 | https://www.gnu.org/licenses/old-licenses/lgpl-2.1.html
14 | repo
15 |
16 |
17 | Apache License v2.0
18 | https://www.apache.org/licenses/LICENSE-2.0
19 | source
20 |
21 |
22 |
23 | scm:git://github.com/vigna/dsiutils.git
24 | https://github.com/vigna/dsiutils
25 |
26 |
27 |
28 | boldi
29 | Paolo Boldi
30 | paolo.boldi@unimi.it
31 |
32 |
33 | vigna
34 | Sebastiano Vigna
35 | sebastiano.vigna@unimi.it
36 |
37 |
38 |
39 | 1.8
40 | 1.8
41 |
42 |
43 |
--------------------------------------------------------------------------------
/prngperf/README:
--------------------------------------------------------------------------------
1 | To perform a basic JMH run:
2 |
3 | mvn clean install && java -jar target/benchmarks.jar
4 |
--------------------------------------------------------------------------------
/setcp.sh:
--------------------------------------------------------------------------------
1 | JAR=dsiutils
2 |
3 | sourcedir=$(cd $(dirname ${BASH_ARGV[0]}) && pwd)
4 | count=$(\ls -1 $sourcedir/$JAR-*.jar 2>/dev/null | wc -l)
5 |
6 | if (( count == 0 )); then
7 | echo "WARNING: no $JAR jar file."
8 | elif (( count > 1 )); then
9 | echo "WARNING: several $JAR jar files ($(\ls -m $JAR-*.jar))"
10 | else
11 | if echo $CLASSPATH | grep -q slf4j; then
12 | deps=$(\ls -1 $sourcedir/jars/test/*.jar | grep -v slf4j | paste -d: -s)
13 | else
14 | deps=$(\ls -1 $sourcedir/jars/test/*.jar | paste -d: -s)
15 | fi
16 |
17 | export CLASSPATH=$(ls -1 $sourcedir/$JAR-*.jar | tail -n 1):$deps:$CLASSPATH
18 | fi
19 |
--------------------------------------------------------------------------------
/slow/it/unimi/dsi/big/util/FrontCodedStringBigListSlowTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.big.util;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import java.nio.charset.StandardCharsets;
25 | import java.util.Iterator;
26 | import java.util.SplittableRandom;
27 |
28 | import org.junit.Test;
29 |
30 | import it.unimi.dsi.lang.MutableString;
31 |
32 | public class FrontCodedStringBigListSlowTest {
33 | @Test
34 | public void testLarge() {
35 | final long size = (1L << 31) + 10000;
36 | final FrontCodedStringBigList byteArrayFrontCodedBigList = new FrontCodedStringBigList(new Iterator() {
37 | SplittableRandom r = new SplittableRandom(0);
38 | long i = 0;
39 |
40 | @Override
41 | public boolean hasNext() {
42 | return i < size;
43 | }
44 |
45 | @Override
46 | public String next() {
47 | i++;
48 | return new String(new byte[] { (byte)r.nextLong() }, StandardCharsets.ISO_8859_1);
49 | }
50 | }, 10, true);
51 | SplittableRandom r = new SplittableRandom(0);
52 | for (long i = 0; i < size; i++) {
53 | assertEquals(new String(new byte[] { (byte)r.nextLong() }, StandardCharsets.ISO_8859_1), byteArrayFrontCodedBigList.get(i).toString());
54 | }
55 | r = new SplittableRandom(0);
56 | final MutableString s = new MutableString();
57 | for (long i = 0; i < size; i++) {
58 | byteArrayFrontCodedBigList.get(i, s);
59 | assertEquals(new String(new byte[] { (byte)r.nextLong() }, StandardCharsets.ISO_8859_1), s.toString());
60 | }
61 | }
62 |
63 | }
64 |
--------------------------------------------------------------------------------
/slow/it/unimi/dsi/big/util/ImmutableExternalPrefixMapSlowTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2002-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.big.util;
21 |
22 | import static org.junit.Assert.assertEquals;
23 | import static org.junit.Assert.assertTrue;
24 |
25 | import java.io.IOException;
26 | import java.util.Iterator;
27 | import java.util.NoSuchElementException;
28 |
29 | import org.junit.Test;
30 |
31 | import it.unimi.dsi.fastutil.objects.ObjectIterator;
32 |
33 | public class ImmutableExternalPrefixMapSlowTest {
34 |
35 | public void testBig(final int blockSize) throws IOException {
36 | final Iterable p = new Iterable() {
37 | private final static long INCREMENT= ((1L << 62) / 3000000000L);
38 | @Override
39 | public Iterator iterator() {
40 | return new ObjectIterator() {
41 | long curr = 0;
42 | @Override
43 | public boolean hasNext() {
44 | return curr < 3000000000L;
45 | }
46 |
47 | @Override
48 | public String next() {
49 | if (! hasNext()) throw new NoSuchElementException();
50 | final long v = curr++ * INCREMENT ;
51 | final char[] a = new char[4];
52 | a[0] = (char)(v >>> 48);
53 | a[1] = (char)(v >>> 32);
54 | a[2] = (char)(v >>> 16);
55 | a[3] = (char)v;
56 | return String.valueOf(a);
57 | }
58 | };
59 | }
60 | };
61 |
62 | final ImmutableExternalPrefixMap d = new ImmutableExternalPrefixMap(p, blockSize);
63 |
64 | int j = 0;
65 | for (final String s : p) {
66 | assertTrue(s, d.containsKey(s));
67 | assertEquals(s, d.list().get(j++).toString());
68 | }
69 |
70 | final Iterator k = d.iterator();
71 | for(final Iterator i = p.iterator(); i.hasNext();) {
72 | assertTrue(i.hasNext() == k.hasNext());
73 | assertEquals(i.next().toString(), k.next().toString());
74 | }
75 |
76 | // Test negatives
77 | for(long i = 1000000000000L; i < 1000000002000L; i++) assertEquals(-1, d.getLong(Long.toBinaryString(i)));
78 |
79 | }
80 |
81 | @Test
82 | public void testBig1024() throws IOException {
83 | testBig(1024);
84 | }
85 |
86 | @Test
87 | public void testBig16384() throws IOException {
88 | testBig(16384);
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/slow/it/unimi/dsi/big/util/LiterallySignedStringMapSlowTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2002-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.big.util;
21 |
22 | import org.junit.Test;
23 |
24 | import it.unimi.dsi.fastutil.Size64;
25 | import it.unimi.dsi.fastutil.objects.AbstractObject2LongFunction;
26 | import it.unimi.dsi.fastutil.objects.AbstractObjectBigList;
27 | import it.unimi.dsi.lang.MutableString;
28 |
29 | public class LiterallySignedStringMapSlowTest {
30 | private final class LargeFunction extends AbstractObject2LongFunction implements Size64 {
31 | private static final long serialVersionUID = 1L;
32 |
33 | @Override
34 | public long getLong(final Object key) {
35 | try {
36 | final long l = Long.parseLong(key.toString());
37 | return l < 1L << 31 ? l : -1;
38 | }
39 | catch(final Exception e) {
40 | return -1;
41 | }
42 | }
43 |
44 | @Override
45 | public boolean containsKey(final Object key) {
46 | try {
47 | final long l = Long.parseLong(key.toString());
48 | return l < 1L << 31;
49 | }
50 | catch(final Exception e) {
51 | return false;
52 | }
53 | }
54 |
55 | @Override
56 | @Deprecated
57 | public int size() {
58 | return Integer.MAX_VALUE;
59 | }
60 |
61 | @Override
62 | public long size64() {
63 | return 1L << 31;
64 | }
65 | }
66 |
67 | @Test
68 | public void testLarge() {
69 | new LiterallySignedStringMap(new LargeFunction(), new AbstractObjectBigList() {
70 |
71 | @Override
72 | public MutableString get(final long index) {
73 | return new MutableString(Long.toString(index));
74 | }
75 |
76 | @Override
77 | public long size64() {
78 | return 1L << 31;
79 | }
80 | });
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/slow/it/unimi/dsi/big/util/ShiftAddXorSignedStringMapSlowTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2002-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.big.util;
21 |
22 | import org.junit.Test;
23 |
24 | import it.unimi.dsi.fastutil.Size64;
25 | import it.unimi.dsi.fastutil.objects.AbstractObject2LongFunction;
26 | import it.unimi.dsi.fastutil.objects.AbstractObjectBigList;
27 |
28 | public class ShiftAddXorSignedStringMapSlowTest {
29 | private final class LargeFunction extends AbstractObject2LongFunction implements Size64 {
30 | private static final long serialVersionUID = 1L;
31 |
32 | @Override
33 | public long getLong(final Object key) {
34 | try {
35 | final long l = Long.parseLong((String)key);
36 | return l < 1L << 31 ? l : -1;
37 | }
38 | catch(final Exception e) {
39 | return -1;
40 | }
41 | }
42 |
43 | @Override
44 | public boolean containsKey(final Object key) {
45 | try {
46 | final long l = Long.parseLong((String)key);
47 | return l < 1L << 31;
48 | }
49 | catch(final Exception e) {
50 | return false;
51 | }
52 | }
53 |
54 | @Override
55 | @Deprecated
56 | public int size() {
57 | return Integer.MAX_VALUE;
58 | }
59 |
60 | @Override
61 | public long size64() {
62 | return 1L << 31;
63 | }
64 | }
65 |
66 | @SuppressWarnings("deprecation")
67 | @Test
68 | public void testLarge() {
69 | new ShiftAddXorSignedStringMap(new AbstractObjectBigList() {
70 |
71 | @Override
72 | public String get(final long index) {
73 | return Long.toString(index);
74 | }
75 |
76 | @Override
77 | public long size64() {
78 | return 1L << 31;
79 | }
80 | }.iterator(), new LargeFunction(), 1);
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/big/io/package-info.java:
--------------------------------------------------------------------------------
1 | /** I/O big classes
2 | *
3 | *
Classes in this package are big versions of classes in {@link it.unimi.dsi.io}.
4 | */
5 |
6 | package it.unimi.dsi.big.io;
7 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/big/util/PrefixMap.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2004-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.big.util;
21 |
22 | import it.unimi.dsi.fastutil.Size64;
23 | import it.unimi.dsi.fastutil.objects.Object2ObjectFunction;
24 | import it.unimi.dsi.util.LongInterval;
25 |
26 | /** A map from prefixes to string intervals (and possibly vice versa).
27 | *
28 | *
Instances of this class provide the services of a {@link StringMap}, but by assuming
29 | * the strings are lexicographically ordered, they can provide further information by
30 | * exposing a {@linkplain #rangeMap() function from string prefixes to intervals} and a
31 | * {@linkplain #prefixMap() function from intervals to string prefixes}.
32 | *
33 | *
In the first case, given a prefix, we can ask for the range of strings starting
34 | * with that prefix, expressed as a {@link LongInterval}. This information is very useful to
35 | * satisfy prefix queries (e.g., monitor*) with a brute-force approach.
36 | *
37 | *
Optionally, a prefix map may provide the opposite service: given an interval of terms, it
38 | * may provide the maximum common prefix. This feature can be checked for by calling
39 | * {@link #prefixMap()}.
40 | *
41 | * @author Sebastiano Vigna
42 | * @since 2.0
43 | */
44 |
45 | public interface PrefixMap extends StringMap, Size64 {
46 | /** Returns a function mapping prefixes to ranges of strings.
47 | *
48 | * @return a function mapping prefixes to ranges of strings.
49 | */
50 | Object2ObjectFunction rangeMap();
51 |
52 | /** Returns a function mapping ranges of strings to common prefixes (optional operation).
53 | *
54 | * @return a function mapping ranges of strings to common prefixes, or {@code null} if this
55 | * map does not support prefixes.
56 | */
57 | Object2ObjectFunction prefixMap();
58 | }
59 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/big/util/StringMap.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2008-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.big.util;
21 |
22 | import java.io.Serializable;
23 |
24 | import it.unimi.dsi.fastutil.Size64;
25 | import it.unimi.dsi.fastutil.objects.Object2LongFunction;
26 | import it.unimi.dsi.fastutil.objects.ObjectBigList;
27 |
28 | /** A map from strings to longs (and possibly vice versa).
29 | *
30 | *
String maps represent mappings from strings (actually, any subclass of {@link CharSequence})
31 | * to numbers; they can support {@linkplain #list() reverse
32 | * mapping}, too. The latter has usually sense only if the map is minimal and perfect (e.g., a bijection of a set
33 | * of string with an initial segment of the natural numbers of the same size). String maps are useful for
34 | * terms of an MG4J
35 | * inverted index, URLs of a WebGraph-compressed
36 | * web snapshot, and so on.
37 | *
38 | * @author Sebastiano Vigna
39 | * @since 2.0
40 | */
41 |
42 | public interface StringMap extends Object2LongFunction, Size64, Serializable {
43 | public static final long serialVersionUID = 0L;
44 |
45 | /** Returns a list view of the domain of this string map (optional operation).
46 | *
47 | *
Note that the list view acts as an inverse of the mapping implemented by this map.
48 | *
49 | * @return a list view of the domain of this string map, or {@code null} if this map does
50 | * not support this operation.
51 | */
52 |
53 | ObjectBigList extends S> list();
54 |
55 | /** Returns the intended number of keys in this function, or -1 if no such number exists.
56 | *
57 | *
Most function implementations will have some knowledge of the intended number of keys
58 | * in their domain. In some cases, however, this might not be possible. This default
59 | * implementation, in particular, returns -1.
60 | *
61 | * @return the intended number of keys in this function, or -1 if that number is not available.
62 | */
63 | @Override
64 | default long size64() {
65 | return -1;
66 | }
67 |
68 | /** {@inheritDoc}
69 | * @deprecated Please use {@link #size64()} instead. */
70 | @Deprecated
71 | @Override
72 | default int size() {
73 | return (int) Math.min(Integer.MAX_VALUE, size64());
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/big/util/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Collections and similar big classes
3 | *
4 | *
5 | * Some classes in this package are big versions of classes in {@link it.unimi.dsi.util}. Other
6 | * classes exists only in the big versions (e.g.,
7 | * {@link it.unimi.dsi.big.util.MappedFrontCodedStringBigList}).
8 | */
9 |
10 | package it.unimi.dsi.big.util;
11 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/bits/TransformationStrategy.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2007-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.bits;
21 |
22 | import java.io.Serializable;
23 |
24 | /** A generic transformation from objects of a given type to bit vector. Most useful
25 | * when adding strings, etc. to a trie.
26 | */
27 |
28 | public interface TransformationStrategy extends Serializable {
29 | /** Returns a bit vector representation of the given object.
30 | *
31 | * @param object the object to be turned into a bit-vector representation.
32 | * @return a bit-vector representation of object.
33 | */
34 | BitVector toBitVector(T object);
35 |
36 | /** The (approximate) number of bits occupied by this transformation.
37 | *
38 | * @return the (approximate) number of bits occupied by this transformation.
39 | */
40 | long numBits();
41 |
42 | /** Returns a copy of this transformation strategy.
43 | *
44 | * @return a copy of this transformation strategy.
45 | */
46 | TransformationStrategy copy();
47 |
48 | /** Returns the length of the bit vector that would be computed by {@link #toBitVector(Object)}.
49 | *
50 | *
The raison d'être of this method is that it is often easy to know
51 | * the length of the representation without actually computing the representation.
52 | *
53 | * @param object the object whose representation length is to be known.
54 | * @return the length of the bit-vector representation of object (the one that would be returned by {@link #toBitVector(Object)}).
55 | */
56 | long length(T object);
57 | }
58 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/bits/package-info.java:
--------------------------------------------------------------------------------
1 | /** Main classes manipulating bits
2 | *
3 | *
The {@link it.unimi.dsi.bits.BitVector} interface is the basis for bit vector manipulation.
4 | * The {@link it.unimi.dsi.bits.LongArrayBitVector} implementation is its main implementation.
5 | * The idea is to offer an efficent but easy-to-use bit-vector class by allowing access under many different views. For instance,
6 | * a bit vector can be seen as a {@link it.unimi.dsi.fastutil.longs.LongBigList} of integers of fixed width. Or as a sorted set of
7 | * integers, where the positions of the bits set to one represent elements.
8 | *
9 | *
Whenever another object has to be turned into a bit string, you can provide a
10 | * {@link it.unimi.dsi.bits.TransformationStrategy} to that purpose. The static container
11 | * {@link it.unimi.dsi.bits.TransformationStrategies} has several ready-made transformations,
12 | * and some useful wrapping methods.
13 | */
14 |
15 | package it.unimi.dsi.bits;
16 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/compression/CodeWordCoder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.compression;
21 |
22 | import java.io.IOException;
23 | import java.io.Serializable;
24 |
25 | import it.unimi.dsi.bits.BitVector;
26 | import it.unimi.dsi.fastutil.booleans.BooleanIterator;
27 | import it.unimi.dsi.fastutil.booleans.BooleanIterators;
28 | import it.unimi.dsi.io.OutputBitStream;
29 |
30 | /** A coder based on a set of codewords. */
31 |
32 | public class CodeWordCoder implements PrefixCoder, Serializable {
33 | private static final long serialVersionUID = 1L;
34 | /** The array of codewords of this coder. */
35 | protected final BitVector[] codeWord;
36 |
37 | /** Creates a new codeword-based coder using the given vector of codewords. The
38 | * coder will be able to encode symbols numbered from 0 to codeWord.length-1, included.
39 | *
40 | * @param codeWord a vector of codewords.
41 | */
42 | public CodeWordCoder(final BitVector[] codeWord) {
43 | this.codeWord = codeWord;
44 | }
45 |
46 | @Override
47 | public BooleanIterator encode(final int symbol) {
48 | return codeWord[symbol].iterator();
49 | }
50 |
51 | @Override
52 | public int encode(final int symbol, final OutputBitStream obs) throws IOException {
53 | final BitVector w = codeWord[symbol];
54 | final int length = (int) w.length();
55 | for(int i = 0; i < length; i++) obs.writeBit(w.getBoolean(i));
56 | return length;
57 | }
58 |
59 | @Override
60 | public int flush(final OutputBitStream unused) { return 0; }
61 |
62 | @Override
63 | public BooleanIterator flush() { return BooleanIterators.EMPTY_ITERATOR; }
64 |
65 | @Override
66 | public BitVector[] codeWords() { return codeWord; }
67 | }
68 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/compression/Codec.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.compression;
21 |
22 | /** An abstract factory corresponding to an instance of a specific compression technique.
23 | *
24 | *
An implementation of this interface provides coders and decoders. The
25 | * constructors must provide all data that is required to perform coding
26 | * and decoding.
27 | */
28 |
29 | public interface Codec {
30 | /** Returns a coder for the compression technique represented by this coded.
31 | *
32 | * @return a coder for the compression technique represented by this codec. */
33 | public Coder coder();
34 |
35 | /** Returns a decoder for the compression technique represented by this coded.
36 | *
37 | * @return a decoder for the compression technique represented by this codec. */
38 | public Decoder decoder();
39 |
40 | /** Returns the number of symbols handled by this codec.
41 | *
42 | * @return the number of symbols handled by this codec.
43 | */
44 | public int size();
45 | }
46 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/compression/Coder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.compression;
21 |
22 | import java.io.IOException;
23 |
24 | import it.unimi.dsi.fastutil.booleans.BooleanIterator;
25 | import it.unimi.dsi.io.OutputBitStream;
26 |
27 | /** Coding methods for a specific compression technique. */
28 | public interface Coder {
29 | /** Encodes a symbol.
30 | *
31 | * @param symbol a symbol.
32 | * @return a boolean iterator returning the bits coding symbol.
33 | */
34 | BooleanIterator encode(int symbol);
35 |
36 | /** Encodes a symbol.
37 | *
38 | * @param symbol a symbol.
39 | * @param obs the output bit stream where the encoded symbol will be written.
40 | * @return the number of bits written.
41 | */
42 | int encode(int symbol, OutputBitStream obs) throws IOException;
43 |
44 | /** Flushes the coder.
45 | *
46 | * Warning: this method will not {@link OutputBitStream#flush() flush} obs.
47 | *
48 | * @param obs the output bit stream where the flushing bits will be written.
49 | * @return the number of bits written to flush the coder.
50 | */
51 |
52 | int flush(OutputBitStream obs);
53 |
54 | /** Flushes the coder.
55 | *
56 | * @return a boolean iterator returning the bits used to flush this coder.
57 | */
58 |
59 | BooleanIterator flush();
60 | }
61 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/compression/Decoder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.compression;
21 |
22 | import java.io.IOException;
23 |
24 | import it.unimi.dsi.fastutil.booleans.BooleanIterator;
25 | import it.unimi.dsi.io.InputBitStream;
26 |
27 | /** Decoding methods for a specific compression technique. */
28 | public interface Decoder {
29 |
30 | /** Decodes the next symbol from the given boolean iterator.
31 | *
32 | *
Note that {@link InputBitStream} implements {@link BooleanIterator}.
33 | *
34 | * @param iterator a boolean iterator.
35 | * @return the next symbol decoded from the bits emitted by i
36 | * @throws java.util.NoSuchElementException if iterator terminates before a symbol has been decoded.
37 | */
38 | int decode(BooleanIterator iterator);
39 |
40 | /** Decodes the next symbol from the given input bit stream.
41 | *
42 | *
Note that {@link InputBitStream} implements {@link BooleanIterator}.
43 | *
44 | * @param ibs an input bit stream.
45 | * @return the next symbol decoded from ibs.
46 | */
47 | int decode(InputBitStream ibs) throws IOException;
48 | }
49 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/compression/Fast64CodeWordCoder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2007-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.compression;
21 |
22 | import java.io.IOException;
23 |
24 | import it.unimi.dsi.bits.BitVector;
25 | import it.unimi.dsi.io.OutputBitStream;
26 |
27 | /** A fast coder based on a set of codewords of length at most 64. */
28 |
29 | public final class Fast64CodeWordCoder extends CodeWordCoder {
30 | private static final long serialVersionUID = 1L;
31 | /** An array parallel to {@link #codeWord} containing the codewords as longs (right aligned). */
32 | private final long[] longCodeWord;
33 | /** A cached array, parallel to {@link #longCodeWord}, of codewords length. */
34 | private final int[] length;
35 |
36 | /** Creates a new codeword-based coder using the given vector of codewords. The
37 | * coder will be able to encode symbols numbered from 0 to codeWord.length-1, included.
38 | *
39 | * @param codeWord a vector of codewords.
40 | * @param longCodeWord the same codewords as those specified in codeWord, but
41 | * as right-aligned longs written in left-to-right fashion.
42 | */
43 | public Fast64CodeWordCoder(final BitVector[] codeWord, final long[] longCodeWord) {
44 | super(codeWord);
45 | this.longCodeWord = longCodeWord;
46 | length = new int[codeWord.length];
47 | for(int i = length.length; i-- != 0;) length[i] = (int) codeWord[i].length();
48 | }
49 |
50 | @Override
51 | public int encode(final int symbol, final OutputBitStream obs) throws IOException {
52 | return obs.writeLong(longCodeWord[symbol], length[symbol]);
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/compression/PrefixCodec.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.compression;
21 |
22 | import it.unimi.dsi.bits.BitVector;
23 |
24 | /** A codec based on a set of prefix-free codewords.
25 | *
26 | *
Prefix codec work by building a vector of prefix-free codewords, one for each symbol. The
27 | * method {@link #codeWords()} returns that vector. Moreover, this interface
28 | * strengthens the return type of {@link #coder()} to {@link PrefixCoder}.
29 | */
30 | public interface PrefixCodec extends Codec {
31 | /** Returns the vector of prefix-free codewords used by this prefix coder.
32 | *
33 | * @return the vector of prefix-free codewords used by this prefix coder.
34 | */
35 | public BitVector[] codeWords();
36 |
37 | @Override
38 | public PrefixCoder coder();
39 | }
40 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/compression/PrefixCoder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.compression;
21 |
22 | import it.unimi.dsi.bits.BitVector;
23 |
24 | /** A coder based on a set of prefix-free codewords.
25 | *
26 | *
Not all coders are codeword-based (for instance, arithmetic coding
27 | * is not codeword-based). However, coders that are based on prefix-free codewords are invited
28 | * to return by means of {@link it.unimi.dsi.compression.Codec#coder()} an
29 | * implementation of this interface.
30 | *
31 | *
Note that the {@linkplain PrefixCodec#coder() coder} returned by a {@link PrefixCodec} is
32 | * an implementation of this interface.
33 | */
34 | public interface PrefixCoder extends Coder {
35 |
36 | /** Provides access to the codewords.
37 | *
38 | * Warning: bit 0 of each bit vector returned by {@link #codeWords()} is
39 | * the first (leftmost) bit of the corresponding codeword: in other words, codewords are stored in
40 | * right-to-left fashion.
41 | *
42 | * @return the codewords.
43 | */
44 |
45 | BitVector[] codeWords();
46 | }
47 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/compression/package-info.java:
--------------------------------------------------------------------------------
1 | /** Word-based compression/decompression classes
2 | *
3 | *
Classes in this package provide interfaces for the compression system, and implementations
4 | * for codeword-based compression. Their main usage is the construction of
5 | * {@linkplain it.unimi.dsi.util.ImmutableExternalPrefixMap prefix maps}, but
6 | * they are also used, for instance, for WebGraph label
7 | * compression.
8 | */
9 |
10 | package it.unimi.dsi.compression;
11 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/LineWordReader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2006-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.io;
21 |
22 | import java.io.IOException;
23 | import java.io.Reader;
24 | import java.io.Serializable;
25 |
26 | import it.unimi.dsi.lang.MutableString;
27 |
28 | /** A trivial {@link it.unimi.dsi.io.WordReader} that considers each line
29 | * of a document a single word.
30 | *
31 | *
The intended usage of this class is that of indexing stuff like lists of document
32 | * identifiers: if the identifiers contain nonalphabetical characters, the default
33 | * {@link it.unimi.dsi.io.FastBufferedReader} might do a poor job.
34 | *
35 | *
Note that the non-word returned by {@link #next(MutableString, MutableString)} is
36 | * always empty.
37 | */
38 |
39 | public class LineWordReader implements WordReader, Serializable {
40 | private static final long serialVersionUID = 1L;
41 | /** An fast buffered reader wrapping the underlying reader. */
42 | private final FastBufferedReader fastBufferedReader = new FastBufferedReader();
43 |
44 | @Override
45 | public boolean next(final MutableString word, final MutableString nonWord) throws IOException {
46 | nonWord.length(0);
47 | return fastBufferedReader.readLine(word) != null;
48 | }
49 |
50 | @Override
51 | public LineWordReader setReader(final Reader reader) {
52 | fastBufferedReader.setReader(reader);
53 | return this;
54 | }
55 |
56 | @Override
57 | public LineWordReader copy() {
58 | return new LineWordReader();
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/NullInputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2003-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.io;
21 |
22 | import java.io.IOException;
23 | import java.io.Serializable;
24 |
25 | import it.unimi.dsi.fastutil.io.MeasurableInputStream;
26 | import it.unimi.dsi.fastutil.io.RepositionableStream;
27 |
28 | /** End-of-stream-only input stream.
29 | *
30 | *
This stream has length 0, and will always return end-of-file on any read attempt.
31 | *
32 | *
This class is a singleton. You cannot create a null input stream,
33 | * but you can obtain an instance of this class using {@link #getInstance()}.
34 | *
35 | * @author Sebastiano Vigna
36 | * @since 0.8
37 | */
38 |
39 | public class NullInputStream extends MeasurableInputStream implements RepositionableStream, Serializable {
40 | private static final long serialVersionUID = 1L;
41 | private final static NullInputStream INSTANCE = new NullInputStream();
42 |
43 | private NullInputStream() {}
44 |
45 | @Override
46 | public int read() { return -1; }
47 |
48 | /** Returns the only instance of this class.
49 | *
50 | * @return the only instance of this class.
51 | */
52 | public static NullInputStream getInstance() {
53 | return INSTANCE;
54 | }
55 |
56 | private Object readResolve() {
57 | return INSTANCE;
58 | }
59 |
60 | @Override
61 | public long length() {
62 | return 0;
63 | }
64 |
65 | @Override
66 | public long position() {
67 | return 0;
68 | }
69 |
70 | @Override
71 | public void position(final long position) throws IOException {
72 | // TODO: we should specify the semantics out of bounds
73 | return;
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/NullOutputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2003-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.io;
21 |
22 | import java.io.IOException;
23 | import java.io.OutputStream;
24 |
25 | import it.unimi.dsi.fastutil.io.RepositionableStream;
26 |
27 | /** Throw-it-away output stream.
28 | *
29 | *
This stream discards whatever is written into it. Its usefulness is in
30 | * previewing the length of some coding by wrapping it in an {@link
31 | * OutputBitStream} (it is a good idea, in this case, {@linkplain
32 | * OutputBitStream#OutputBitStream(java.io.OutputStream,int) to specify a 0-length buffer}).
33 | *
34 | *
This class is a singleton. You cannot create a null output stream,
35 | * but you can obtain an instance of this class using {@link #getInstance()}.
36 | *
37 | * @author Sebastiano Vigna
38 | * @since 0.6
39 | */
40 |
41 | public class NullOutputStream extends OutputStream implements RepositionableStream {
42 |
43 | private final static NullOutputStream SINGLETON = new NullOutputStream();
44 |
45 | private NullOutputStream() {}
46 |
47 | @Override
48 | public void write(final int discarded) {}
49 |
50 | /** Returns the only instance of this class. */
51 | public static NullOutputStream getInstance() {
52 | return SINGLETON;
53 | }
54 |
55 | @Override
56 | public long position() throws IOException {
57 | return 0;
58 | }
59 |
60 | @Override
61 | public void position(final long newPosition) throws IOException {}
62 | }
63 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/NullReader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2003-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.io;
21 |
22 | import java.io.Reader;
23 | import java.io.Serializable;
24 |
25 |
26 | /** End-of-stream-only reader.
27 | *
28 | *
This reader will always return end-of-file on any read attempt.
29 | *
30 | *
This class is a singleton. You cannot create a null reader,
31 | * but you can obtain an instance of this class using {@link #getInstance()}.
32 | *
33 | * @author Sebastiano Vigna
34 | * @since 0.9.2
35 | */
36 |
37 | public class NullReader extends Reader implements Serializable {
38 | private static final long serialVersionUID = 1L;
39 |
40 | private final static NullReader INSTANCE = new NullReader();
41 |
42 | private NullReader() {}
43 |
44 | /** Returns the only instance of this class.
45 | *
46 | * @return the only instance of this class.
47 | */
48 | public static NullReader getInstance() {
49 | return INSTANCE;
50 | }
51 |
52 | @Override
53 | public void close() {}
54 |
55 | @Override
56 | public int read(final char[] cbuf, final int off, final int len) {
57 | return -1;
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/SafelyCloseable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2006-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.io;
21 |
22 | import java.io.Closeable;
23 |
24 | /** A marker interface for a closeable resource that implements safety measures to
25 | * make resource tracking easier.
26 | *
27 | *
Classes implementing this interface must provide a safety-net finaliser—a
28 | * finaliser that closes the resource and logs that resource should have been closed.
29 | *
30 | *
When the implementing class is abstract, concrete subclasses must
31 | * call super.close() in their own {@link java.io.Closeable#close()} method
32 | * to let the abstract class track correctly the resource. Moreover,
33 | * they must run super.finalize() in
34 | * their own finaliser (if any), as finalisation chaining is not automatic.
35 | *
36 | *
Note that if a concrete subclass implements readResolve(), it must
37 | * call super.close(), or actually return this (i.e., the deserialised
38 | * instance); otherwise, a spurious log could be generated when the deserialised instance is collected.
39 | *
40 | * @author Sebastiano Vigna
41 | * @since 1.1
42 | */
43 |
44 | public interface SafelyCloseable extends Closeable {}
45 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/WordReader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Paolo Boldi and Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.io;
21 |
22 | import java.io.IOException;
23 | import java.io.Reader;
24 | import java.io.Serializable;
25 |
26 | import it.unimi.dsi.lang.MutableString;
27 |
28 | /** An interface providing methods to break the input from a reader into words.
29 | *
30 | *
The intended implementations of this interface should decorate
31 | * a given reader (see, for instance, {@link it.unimi.dsi.io.FastBufferedReader}).
32 | * The reader can be changed at any time using {@link #setReader(Reader)}.
33 | *
34 | *
This interface is heavily oriented towards reusability and
35 | * streaming. It is conceived so that at most one method call has
36 | * to be performed per word, rather than per character,
37 | * and that implementations may completely avoid object creation by
38 | * {@linkplain #setReader(Reader) setting explicitly the underlying reader}.
39 | *
40 | *
The standard implementation ({@link it.unimi.dsi.io.FastBufferedReader}) breaks
41 | * words in the trivial way. More complex implementations (e.g., for languages requiring
42 | * segmentation) can subclass {@link it.unimi.dsi.io.FastBufferedReader} or provide their
43 | * own implementation.
44 | */
45 |
46 | public interface WordReader extends Serializable {
47 | /** Extracts the next word and non-word.
48 | *
49 | *
If this method returns true, a new non-empty word, and possibly
50 | * a new non-word, have been extracted. It is acceptable
51 | * that the first call to this method after creation
52 | * or after a call to {@link #setReader(Reader)} returns an empty
53 | * word. In other words both word and nonWord are maximal.
54 | *
55 | * @param word the next word returned by the underlying reader.
56 | * @param nonWord the nonword following the next word returned by the underlying reader.
57 | * @return true if a new word was processed, false otherwise (in which
58 | * case both word and nonWord are unchanged).
59 | */
60 |
61 | public abstract boolean next(MutableString word, MutableString nonWord) throws IOException;
62 |
63 | /** Resets the internal state of this word reader, which will start again reading from the given reader.
64 | *
65 | * @param reader the new reader providing characters.
66 | * @return this word reader.
67 | */
68 |
69 | public abstract WordReader setReader(Reader reader);
70 |
71 | /** Returns a copy of this word reader.
72 | *
73 | *
This method must return a word reader with a behaviour that
74 | * matches exactly that of this word reader.
75 | *
76 | * @return a copy of this word reader.
77 | */
78 |
79 | public abstract WordReader copy();
80 | }
81 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/delta.in.16:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/delta.in.16
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/delta.out.12:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/delta.out.12
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/gamma.in.16:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/gamma.in.16
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/gamma.out.12:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/gamma.out.12
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/package-info.java:
--------------------------------------------------------------------------------
1 | /** I/O classes
2 | *
3 | *
Classes in this package fulfill needs that are not satisfied by the
4 | * standard I/O classes available.
5 | *
6 | *
Reading text
7 | *
8 | *
We provide replacement classes such as {@link
9 | * it.unimi.dsi.io.FastBufferedReader} and classes exposing the lines of
10 | * a file as an {@linkplain
11 | * it.unimi.dsi.io.FileLinesMutableStringIterable Iterable}. The general
12 | * {@link it.unimi.dsi.io.WordReader} interface is used by MG4J
13 | * to provide customizable word segmentation.
14 | *
15 | *
Bit-level I/O
16 | *
17 | *
The standard Java API lacks bit-level I/O classes: to this purpose, we
18 | * provide {@link it.unimi.dsi.io.InputBitStream} and {@link
19 | * it.unimi.dsi.io.OutputBitStream}, which can wrap any standard Java
20 | * corresponding stream and make it work at the bit level; moreover, they
21 | * provide support for several useful formats (such as unary, binary, minimal
22 | * binary, γ, δ and Golomb encoding).
23 |
24 | *
Bit input and output streams offer also efficient buffering and a way to
25 | * reposition the bit stream in case the underlying byte stream is a
26 | * file-based stream or a {@link it.unimi.dsi.fastutil.io.RepositionableStream}.
27 | *
28 | *
Conventions
29 | *
30 | *
All coding methods work on natural numbers. The
31 | * encoding of zero is very natural for some techniques, and much less natural
32 | * for others. To keep methods rationally organized, all methods are able to
33 | * encode any natural number. If, for instance, you want to write positive
34 | * numbers in unary encoding and you do not want to waste a bit, you have to
35 | * decrement them first (i.e., instead of p you must encode
36 | * p − 1).
37 | */
38 |
39 | package it.unimi.dsi.io;
40 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/shiftedgamma.in.16:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/shiftedgamma.in.16
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/shiftedgamma.out.12:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/shiftedgamma.out.12
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/zeta3.in.16:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/zeta3.in.16
--------------------------------------------------------------------------------
/src/it/unimi/dsi/io/zeta3.out.12:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vigna/dsiutils/e5e53d568d406d49c8458bd8b189b72179e401ce/src/it/unimi/dsi/io/zeta3.out.12
--------------------------------------------------------------------------------
/src/it/unimi/dsi/lang/EnumStringParser.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.lang;
21 |
22 | import com.martiansoftware.jsap.ParseException;
23 | import com.martiansoftware.jsap.StringParser;
24 |
25 | /**
26 | * A {@link com.martiansoftware.jsap.StringParser StringParser} that makes the user choose among
27 | * items of a Java {@code enum}.
28 | *
29 | *
Optionally, parsed strings can be normalized to upper case.
30 | * Thus, if the enum elements are defined in uppercase, the parser will be in practice
31 | * case-independent.
32 | *
33 | *
A typical usage example for an {@code ExampleEnum} with an item {@code A} that is going to be the default:
34 | *
40 | */
41 |
42 | public class EnumStringParser> extends StringParser {
43 | private final Class enumClass;
44 | private final boolean toUpper;
45 |
46 | /** Returns the enum item obtained by passing the argument to {@link Enum#valueOf(Class, String)}.
47 | *
48 | * @param s an enum item name.
49 | * @return the enum item returned by {@link Enum#valueOf(Class, String)} (possibly
50 | * after upper casing {@code s}).
51 | */
52 | @Override
53 | @SuppressWarnings({ "unchecked" })
54 | public E parse(final String s) throws ParseException {
55 | try {
56 | return (E) enumClass.getMethod("valueOf", String.class).invoke(null, toUpper ? s.toUpperCase() : s);
57 | } catch (final Exception e) {
58 | throw (new ParseException("Unknown value '" + s + "'.", e));
59 | }
60 | }
61 |
62 | private EnumStringParser(final Class enumClass, final boolean toUpper) {
63 | this.enumClass = enumClass;
64 | this.toUpper = toUpper;
65 | }
66 |
67 | /**
68 | * Returns an enum parser.
69 | *
70 | * @param enumClass an {@code enum} class whose values
71 | * @param toUpper tells the parser to upper case the strings to be parsed.
72 | */
73 | public static > EnumStringParser getParser(final Class enumClass, final boolean toUpper) throws IllegalArgumentException {
74 | return new EnumStringParser<>(enumClass, toUpper);
75 | }
76 |
77 | /**
78 | * Returns an enum parser that does not normalize to upper case.
79 | *
80 | * @param enumClass an {@code enum} class whose values
81 | */
82 | public static > EnumStringParser getParser(final Class enumClass) throws IllegalArgumentException {
83 | return getParser(enumClass, false);
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/lang/FlyweightPrototype.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2006-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.lang;
21 |
22 | /** A prototype providing flyweight copies.
23 | *
24 | *
Flyweight copies are useful to implement multithreading on read-only
25 | * (but maybe stateful) classes. An instance of a class implementing this interface
26 | * is not necessarily thread safe,
27 | * but it can be (thread-) safely copied many times (i.e., it can be used as a prototype).
28 | * All copies will share as much as possible of the class read-only
29 | * state (so they are flyweight).
30 | *
31 | *
In the case an implementation is stateless, it can of course return always the same singleton
32 | * instance as a copy. At the other extreme, a stateful class may decide to synchronise its
33 | * methods and return itself as a copy instead. Note that in general the object returned
34 | * by {@link #copy()} must replicate the current state of the object, not
35 | * the object state at creation time. This might require some calls to methods that
36 | * modify the class internal state: in particular, one should always check whether such
37 | * methods are pointed out in the documentation of superclasses.
38 | *
39 | *
Warning: if {@link #copy()} accesses mutable internal state, setters
40 | * and {@link #copy()} must be suitably synchronised.
41 | *
42 | *
Implementing subclasses are invited to use covariant return-type overriding to
43 | * make {@link #copy()} return the right type.
44 | */
45 |
46 | public interface FlyweightPrototype> {
47 |
48 | /** Returns a copy of this object, sharing state with this object as much as possible.
49 | *
50 | * @return a copy of this object, sharing state with this object as much as possible. */
51 | public T copy();
52 | }
53 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/lang/FlyweightPrototypes.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2006-2023 Paolo Boldi and Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.lang;
21 |
22 | import java.lang.reflect.Array;
23 |
24 | /** A class providing static methods and objects that do useful things
25 | * with {@linkplain it.unimi.dsi.lang.FlyweightPrototype flyweight protoypes}.
26 | */
27 |
28 | public class FlyweightPrototypes {
29 |
30 | protected FlyweightPrototypes() {}
31 |
32 | /** Creates a flyweight copy of an array of {@linkplain it.unimi.dsi.lang.FlyweightPrototype flyweight prototypes}.
33 | *
34 | * @param the type of {@link FlyweightPrototype} you want to copy, that is, the
35 | * type of the elements of prototype.
36 | * @param prototype an array of prototypes.
37 | * @return a flyweight copy of prototype, obtained by invoking
38 | * {@link FlyweightPrototype#copy()} on each element.
39 | */
40 |
41 | @SuppressWarnings("unchecked")
42 | public static > T[] copy(final T[] prototype) {
43 | final T[] result = (T[])Array.newInstance(prototype.getClass().getComponentType(), prototype.length);
44 | for(int i = 0; i < result.length; i++) result[i] = prototype[i].copy();
45 | return result;
46 | }
47 |
48 | /** Creates a flyweight copy of the given object, or returns {@code null} if the given object is {@code null}.
49 | *
50 | * @param the type of {@link FlyweightPrototype} you want to copy, that is, the
51 | * type of prototype.
52 | * @param prototype a prototype to be copied, or {@code null}.
53 | * @return {@code null}, if prototype is {@code null};
54 | * otherwise,a flyweight copy of prototype.
55 | */
56 | @SuppressWarnings("null")
57 | public static > T copy(final T prototype) {
58 | return prototype != null ? prototype.copy() : null;
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/lang/package-info.java:
--------------------------------------------------------------------------------
1 | /** Basic classes */
2 |
3 | package it.unimi.dsi.lang;
4 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/logging/package-info.java:
--------------------------------------------------------------------------------
1 | /** Logging classes */
2 |
3 | package it.unimi.dsi.logging;
4 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/package-info.java:
--------------------------------------------------------------------------------
1 | /** General utilities */
2 |
3 | package it.unimi.dsi;
4 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/parser/Entity.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.parser;
21 |
22 | import it.unimi.dsi.lang.MutableString;
23 |
24 | /**
25 | * An SGML character entity.
26 | *
27 | * @deprecated This class is obsolete and kept around for backward compatibility only.
28 | */
29 |
30 | @Deprecated
31 | public final class Entity {
32 |
33 | /** The name of this entity. */
34 | public final CharSequence name;
35 | /** The Unicode character corresponding to this entity. */
36 | public final char character;
37 |
38 | /** Creates a new entity with the specified name and character.
39 | *
40 | * @param name the name of the new entity.
41 | * @param character its character value.
42 | */
43 | public Entity(final CharSequence name, final char character) {
44 | this.name = new MutableString(name);
45 | this.character = character;
46 | }
47 |
48 | /** Returns the name of this entity.
49 | * @return the name of this entity.
50 | */
51 |
52 | @Override
53 | public String toString() {
54 | return name.toString();
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/parser/ParsingFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.parser;
21 |
22 | import it.unimi.dsi.lang.MutableString;
23 |
24 | /**
25 | * A set of coherent methods to turn element-type, attribute and entity names to unique interned
26 | * instances.
27 | *
28 | *
29 | * The {@link it.unimi.dsi.parser.BulletParser} needs a way to turn a name (for an element type,
30 | * attribute, or entity) into a corresponding object of type {@link it.unimi.dsi.parser.Element},
31 | * {@link it.unimi.dsi.parser.Attribute} or {@link it.unimi.dsi.parser.Entity}, respectively. The
32 | * returned element must be an interned, unique representation.
33 | *
34 | *
35 | * For instance, the {@linkplain it.unimi.dsi.parser.HTMLFactory standard factory for HTML} parsing
36 | * has ready-made interned versions of all names in the (X)HTML specification, and returns them upon
37 | * request, but other policies are possible. For instance, instances of {@link WellFormedXmlFactory}
38 | * intern every seen name, without reference to a data type (except for entities, in which case the
39 | * HTML set is used).
40 | *
41 | *
42 | * The idea of factoring out the creation of interned counterparts of SGML/XML syntactical objects
43 | * is due to Fabien Campagne.
44 | *
45 | * @author Sebastiano Vigna
46 | * @since 1.0.2
47 | * @deprecated This class is obsolete and kept around for backward compatibility only.
48 | */
49 |
50 | @Deprecated
51 | public interface ParsingFactory {
52 |
53 | /** Returns the {@link it.unimi.dsi.parser.Element} associated
54 | * to a name.
55 | * @param name the name of an element type.
56 | * @return the corresponding interned {@link Element} object.
57 | */
58 | public Element getElement(final MutableString name);
59 |
60 | /** Returns the {@link it.unimi.dsi.parser.Attribute} associated
61 | * to a name.
62 | * @param name the name of an attribute.
63 | * @return the corresponding interned {@link Attribute} object.
64 | */
65 | public Attribute getAttribute(final MutableString name);
66 |
67 | /** Returns the {@link it.unimi.dsi.parser.Entity} associated
68 | * to a name.
69 | * @param name the name of an entity.
70 | * @return the corresponding interned {@link Entity} object.
71 | */
72 | public Entity getEntity(final MutableString name);
73 | }
74 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/parser/WellFormedXmlFactory.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.parser;
21 |
22 | import it.unimi.dsi.fastutil.Hash;
23 | import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
24 | import it.unimi.dsi.lang.MutableString;
25 |
26 | /**
27 | * A factory for well-formed XML documents.
28 | *
29 | *
30 | * This factory assumes that every new name of an element type or of an attribute is new valid name.
31 | * For entities, instead, resolution is deferred to {@link it.unimi.dsi.parser.HTMLFactory}.
32 | *
33 | * @author Sebastiano Vigna
34 | * @since 1.0.2
35 | * @deprecated This class is obsolete and kept around for backward compatibility only.
36 | */
37 |
38 | @Deprecated
39 | public class WellFormedXmlFactory implements ParsingFactory {
40 | /** The load factor for all maps. */
41 | private static final float ONE_HALF = .5f;
42 |
43 | /** A (quick) map from attribute names to attributes. */
44 | private final Object2ObjectOpenHashMap name2Attribute = new Object2ObjectOpenHashMap<>(Hash.DEFAULT_INITIAL_SIZE, ONE_HALF);
45 |
46 | /** A (quick) map from element-type names to element types. */
47 | private final Object2ObjectOpenHashMap name2Element = new Object2ObjectOpenHashMap<>(Hash.DEFAULT_INITIAL_SIZE, ONE_HALF);
48 |
49 | public WellFormedXmlFactory() {}
50 |
51 | @Override
52 | public Element getElement(final MutableString name) {
53 | Element element = name2Element.get(name);
54 | if (element == null) {
55 | element = new Element(name);
56 | name2Element.put(element.name, element);
57 | }
58 | return element;
59 | }
60 |
61 | @Override
62 | public Attribute getAttribute(final MutableString name) {
63 | Attribute attribute = name2Attribute.get(name);
64 | if (attribute == null) {
65 | attribute = new Attribute(name);
66 | name2Attribute.put(attribute.name, attribute);
67 | }
68 | return attribute;
69 | }
70 |
71 | @Override
72 | public Entity getEntity(final MutableString name) {
73 | return HTMLFactory.INSTANCE.getEntity(name);
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/parser/callback/DebugCallbackDecorator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.parser.callback;
21 |
22 | import java.util.Map;
23 |
24 | import it.unimi.dsi.lang.MutableString;
25 | import it.unimi.dsi.parser.Attribute;
26 | import it.unimi.dsi.parser.BulletParser;
27 | import it.unimi.dsi.parser.Element;
28 |
29 | /**
30 | * A decorator that prints on standard error all calls to the underlying callback.
31 | *
32 | * @deprecated This class is obsolete and kept around for backward compatibility only.
33 | */
34 | @Deprecated
35 | public class DebugCallbackDecorator implements Callback {
36 |
37 | /** The underlying callback. */
38 | private final Callback callback;
39 |
40 | public DebugCallbackDecorator(final Callback callback) {
41 | this.callback = callback;
42 | }
43 |
44 | @Override
45 | public boolean cdata(final Element element, final char[] text, final int offset, final int length) {
46 | System.err.println("cdata(" + new String(text, offset, length) + ")");
47 | return callback.cdata(element, text, offset, length);
48 | }
49 |
50 |
51 | @Override
52 | public boolean characters(final char[] text, final int offset, final int length, final boolean flowBroken) {
53 | System.err.println("characters(" + new String(text, offset, length) + ", " + flowBroken + ")");
54 | return callback.characters(text, offset, length, flowBroken);
55 | }
56 |
57 |
58 | @Override
59 | public void configure(final BulletParser parser) {
60 | System.err.println("configure()");
61 | callback.configure(parser);
62 | }
63 |
64 |
65 | @Override
66 | public void endDocument() {
67 | System.err.println("endDocument()");
68 | callback.endDocument();
69 | }
70 |
71 | @Override
72 | public boolean endElement(final Element element) {
73 | System.err.println("endElement(" + element + ")");
74 | return callback.endElement(element);
75 | }
76 |
77 | @Override
78 | public boolean equals(final Object obj) {
79 | return callback.equals(obj);
80 | }
81 |
82 | @Override
83 | public int hashCode() {
84 | return callback.hashCode();
85 | }
86 |
87 | @Override
88 | public void startDocument() {
89 | System.err.println("startDocument()");
90 | callback.startDocument();
91 | }
92 |
93 | @Override
94 | public boolean startElement(final Element element, final Map attrMap) {
95 | System.err.println("endElement(" + element + ", " + attrMap + ")");
96 | return callback.startElement(element, attrMap);
97 | }
98 |
99 | @Override
100 | public String toString() {
101 | return this.getClass().getName() + "(" + callback.toString() + ")";
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/parser/callback/DefaultCallback.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.parser.callback;
21 |
22 | import java.util.Map;
23 |
24 | import it.unimi.dsi.lang.MutableString;
25 | import it.unimi.dsi.parser.Attribute;
26 | import it.unimi.dsi.parser.BulletParser;
27 | import it.unimi.dsi.parser.Element;
28 |
29 | /**
30 | * A default, do-nothing-at-all callback.
31 | *
32 | *
33 | * Callbacks can inherit from this class and forget about methods they are not interested in.
34 | *
35 | *
36 | * This class has a protected constructor. If you need an instance of this class, use
37 | * {@link #getInstance()}.
38 | *
39 | * @deprecated This class is obsolete and kept around for backward compatibility only.
40 | */
41 | @Deprecated
42 | public class DefaultCallback implements Callback {
43 | private static final DefaultCallback SINGLETON = new DefaultCallback();
44 |
45 | protected DefaultCallback() {}
46 |
47 | /**
48 | * Returns the singleton instance of the default callback.
49 | *
50 | * @return the singleton instance of the default callback.
51 | */
52 | public static DefaultCallback getInstance() {
53 | return SINGLETON;
54 | }
55 |
56 | @Override
57 | public void configure(final BulletParser parserUnused) {}
58 |
59 | @Override
60 | public void startDocument() {}
61 |
62 | @Override
63 | public boolean startElement(final Element elementUnused, final Map attrMapUnused) {
64 | return true;
65 | }
66 |
67 | @Override
68 | public boolean endElement(final Element elementUnused) {
69 | return true;
70 | }
71 |
72 | @Override
73 | public boolean characters(final char[] textUnused, final int offsetUnused, final int lengthUnused, final boolean flowBrokenUnused) {
74 | return true;
75 | }
76 |
77 | @Override
78 | public boolean cdata(final Element elementUnused, final char[] textUnused, final int offsetUnused, final int lengthUnused) {
79 | return true;
80 | }
81 |
82 | @Override
83 | public void endDocument() {}
84 | }
85 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/parser/callback/TextExtractor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2005-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.parser.callback;
21 |
22 | import java.util.Map;
23 |
24 | import it.unimi.dsi.lang.MutableString;
25 | import it.unimi.dsi.parser.Attribute;
26 | import it.unimi.dsi.parser.BulletParser;
27 | import it.unimi.dsi.parser.Element;
28 |
29 | /**
30 | * A callback extracting text and titles.
31 | *
32 | *
33 | * This callbacks extracts all text in the page, and the title. The resulting text is available
34 | * through {@link #text}, and the title through {@link #title}.
35 | *
36 | *
37 | * Note that {@link #text} and {@link #title} are never trimmed.
38 | *
39 | * @deprecated This class is obsolete and kept around for backward compatibility only.
40 | */
41 |
42 |
43 | @Deprecated
44 | public class TextExtractor extends DefaultCallback {
45 |
46 | /** The text resulting from the parsing process. */
47 | public final MutableString text = new MutableString();
48 | /** The title resulting from the parsing process. */
49 | public final MutableString title = new MutableString();
50 | /** True if we are in the middle of the title. */
51 | private boolean inTitle;
52 |
53 | /**
54 | * Configure the parser to parse text.
55 | */
56 |
57 | @Override
58 | public void configure(final BulletParser parser) {
59 | parser.parseText(true);
60 | // To get the title.
61 | parser.parseTags(true);
62 | }
63 |
64 | @Override
65 | public void startDocument() {
66 | text.length(0);
67 | title.length(0);
68 | inTitle = false;
69 | }
70 |
71 | @Override
72 | public boolean characters(final char[] characters, final int offset, final int length, final boolean flowBroken) {
73 | text.append(characters, offset, length);
74 | if (inTitle) title.append(characters, offset, length);
75 | return true;
76 | }
77 |
78 | @Override
79 | public boolean endElement(final Element element) {
80 | // No element is allowed inside a title.
81 | inTitle = false;
82 | if (element.breaksFlow) {
83 | if (inTitle) title.append(' ');
84 | text.append(' ');
85 | }
86 | return true;
87 | }
88 |
89 | @Override
90 | public boolean startElement(final Element element, final Map attrMapUnused) {
91 | // No element is allowed inside a title.
92 | inTitle = element == Element.TITLE;
93 | if (element.breaksFlow) {
94 | if (inTitle) title.append(' ');
95 | text.append(' ');
96 | }
97 | return true;
98 | }
99 |
100 | }
101 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/parser/callback/package-info.java:
--------------------------------------------------------------------------------
1 | /** Callbacks for the {@link it.unimi.dsi.parser.BulletParser} */
2 |
3 | package it.unimi.dsi.parser.callback;
4 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/parser/package-info.java:
--------------------------------------------------------------------------------
1 | /** A fast, lightweight, on-demand (X)HTML parser */
2 |
3 | package it.unimi.dsi.parser;
4 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/stat/package-info.java:
--------------------------------------------------------------------------------
1 | /** Statistics classes */
2 |
3 | package it.unimi.dsi.stat;
4 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/test/MutableStringLengthSpeedTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2012-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.test;
21 |
22 | import it.unimi.dsi.lang.MutableString;
23 |
24 | public class MutableStringLengthSpeedTest {
25 |
26 | private MutableStringLengthSpeedTest() {}
27 |
28 | public static void main(final String[] arg) {
29 |
30 | long i, n;
31 |
32 | n = Long.parseLong(arg[0]);
33 |
34 | final MutableString s = new MutableString("foobar0");
35 | final MutableString t = new MutableString("foobar1");
36 | final String u = new String("foobar2");
37 | final StringBuffer v = new StringBuffer("foobar3");
38 | final StringBuilder w = new StringBuilder("foobar4");
39 |
40 | int k = 10;
41 | int x = 0;
42 |
43 | while (k-- != 0) {
44 | long start;
45 |
46 | System.out.println();
47 |
48 | start = -System.nanoTime();
49 |
50 | i = n / 2;
51 | while (i-- != 0) {
52 | // Using just ^= causes code elimination
53 | x ^= u.length();
54 | x += u.length();
55 | }
56 |
57 | start += System.nanoTime();
58 |
59 | System.out.println("Called length() " + n + " times on a string in " + start + " ns (" + start / (double)n + " ns/call)");
60 |
61 |
62 | start = -System.nanoTime();
63 |
64 | i = n / 2;
65 | while (i-- != 0) {
66 | x ^= t.length();
67 | x += t.length();
68 | }
69 |
70 | start += System.nanoTime();
71 |
72 | System.out.println("Called length() " + n + " times on a compact string in " + start + " ns (" + start / (double)n + " ns/call)");
73 |
74 | start = -System.nanoTime();
75 |
76 | i = n;
77 | s.loose();
78 | i = n / 2;
79 | while (i-- != 0) {
80 | x ^= s.length();
81 | x += s.length();
82 | }
83 |
84 | start += System.nanoTime();
85 |
86 | System.out.println("Called length() " + n + " times on a loose string in " + start + " ns (" + start / (double)n + " ns/call)");
87 |
88 | start = -System.nanoTime();
89 |
90 | i = n / 2;
91 | while (i-- != 0) {
92 | x ^= v.length();
93 | x += v.length();
94 | }
95 |
96 | start += System.nanoTime();
97 |
98 | System.out.println("Called length() " + n + " times on a string buffer in " + start + " ns (" + start / (double)n + " ns/call)");
99 |
100 | start = -System.nanoTime();
101 |
102 | i = n / 2;
103 | while (i-- != 0) {
104 | x ^= w.length();
105 | x += w.length();
106 | }
107 |
108 | start += System.nanoTime();
109 | if (x == 0) System.out.println();
110 | System.out.println("Called length() " + n + " times on a string builder in " + start + " ns (" + start / (double)n + " ns/call)");
111 | }
112 |
113 | }
114 |
115 | }
116 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/test/MutableStringReplaceSpeedTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2012-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.test;
21 |
22 | import java.io.BufferedReader;
23 | import java.io.IOException;
24 | import java.io.InputStreamReader;
25 |
26 | import it.unimi.dsi.lang.MutableString;
27 |
28 | public class MutableStringReplaceSpeedTest {
29 |
30 | private MutableStringReplaceSpeedTest() {}
31 |
32 | public static void main(final String[] arg) throws IOException {
33 |
34 | String target = null;
35 |
36 | final BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
37 | final MutableString ms = new MutableString();
38 | String line;
39 | while ((line = br.readLine()) != null) ms.append("\n").append(line);
40 | target = ms.toString();
41 |
42 | MutableString s;
43 | String st;
44 | final String searchString = arg[0];
45 | if (searchString.length() != 1) throw new IllegalArgumentException();
46 | final char searchChar = new MutableString(arg[0]).charAt(0);
47 | final String replaceString = arg[1];
48 | final MutableString replace = new MutableString(replaceString);
49 | final int n = Integer.parseInt(arg[2]);
50 | long start;
51 |
52 | for (int k = 10; k-- != 0;) {
53 | System.out.println();
54 |
55 | s = new MutableString(target).compact();
56 | start = -System.nanoTime();
57 | for(int i = n; i-- != 0;) s.replace(searchChar, replace);
58 | start += System.nanoTime();
59 | System.out.println("Called replace() " + n + " times on a compact string in " + start + " ns (" + start / (double)n + " ns/call)");
60 |
61 | s = new MutableString(target).loose();
62 | start = -System.nanoTime();
63 | for(int i = n; i-- != 0;) s.replace(searchChar, replace);
64 | start += System.nanoTime();
65 | System.out.println("Called replace() " + n + " times on a loose string in " + start + " ns (" + start / (double)n + " ns/call)");
66 |
67 | final StringBuilder sb = new StringBuilder(target);
68 | start = -System.nanoTime();
69 |
70 | for(int i = n; i-- != 0;) {
71 | int j = sb.length();
72 | for (;;) {
73 | j = sb.lastIndexOf(searchString, j);
74 | if (j == -1) break;
75 | sb.replace(j, j + 1, replaceString);
76 | j--;
77 | }
78 | }
79 |
80 | start += System.nanoTime();
81 | System.out.println("Called replace() " + n + " times on a string builder in " + start + " ns (" + start / (double)n + " ns/call)");
82 | assert sb.length() == s.length();
83 | assert s.toString().equals(sb.toString());
84 |
85 | st = new String(target);
86 | start = -System.nanoTime();
87 | for(int i = n; i-- != 0;) st = st.replaceAll(searchString, replaceString);
88 | start += System.nanoTime();
89 | System.out.println("Called replaceAll() " + n + " times on a string in " + start + " ns (" + start / (double)n + " ns/call)");
90 | assert sb.length() == st.length();
91 | assert st.equals(sb.toString());
92 | }
93 | }
94 | }
95 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/test/TextPatternSpeedTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2012-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.test;
21 |
22 | import java.io.BufferedReader;
23 | import java.io.IOException;
24 | import java.io.InputStreamReader;
25 |
26 | import it.unimi.dsi.Util;
27 | import it.unimi.dsi.lang.MutableString;
28 | import it.unimi.dsi.util.TextPattern;
29 |
30 | public class TextPatternSpeedTest {
31 |
32 | private TextPatternSpeedTest() {}
33 |
34 | public static void main(final String[] arg) {
35 |
36 | String target = null;
37 | final MutableString ms = new MutableString();
38 |
39 | try {
40 | final BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
41 | String line;
42 | while ((line = br.readLine()) != null) ms.append("\n").append(line);
43 | ms.compact();
44 | target = ms.toString();
45 | } catch (final IOException e) {
46 | System.out.println("Problems while reading target");
47 | e.printStackTrace(System.out);
48 | System.exit(1);
49 | }
50 |
51 | int u = 0;
52 |
53 | final String p = arg[0];
54 |
55 | int k;
56 | long elapsed;
57 | System.out.println("Searching for " + p);
58 | final int n = 10000;
59 |
60 | for(k = 10; k-- != 0;) {
61 | System.out.println();
62 |
63 | elapsed = -System.nanoTime();
64 |
65 | for (int r = n; r-- != 0;) {
66 | int i = -1;
67 | do u ^= (i = target.indexOf(p, i + 1)); while (i != -1);
68 | }
69 |
70 | elapsed += System.nanoTime();
71 |
72 | System.out.println("Called indexOf() " + n + " times on a string in " + elapsed + " ns (" + Util.format(elapsed / (double)n) + " ns/call)");
73 | final TextPattern tp = new TextPattern(p);
74 | final char a[] = ms.array();
75 |
76 | elapsed = -System.nanoTime();
77 |
78 | for (int r = n; r-- != 0;) {
79 | int i = -1;
80 | do u ^= (i = tp.search(a, i + 1)); while (i != -1);
81 | }
82 |
83 | elapsed += System.nanoTime();
84 |
85 | System.out.println("Called search() " + n + " times on a string in " + elapsed + " ns (" + Util.format(elapsed / (double)n) + " ns/call)");
86 |
87 | final MutableString pattern = new MutableString(p);
88 | elapsed = -System.nanoTime();
89 |
90 | for (int r = n; r-- != 0;) {
91 | int i = -1;
92 | do u ^= (i = ms.indexOf(pattern, i + 1)); while (i != -1);
93 | }
94 |
95 | elapsed += System.nanoTime();
96 |
97 | System.out.println("Called indexOf() " + n + " times on a mutable string in " + elapsed + " ns (" + Util.format(elapsed / (double)n) + " ns/call)");
98 | }
99 |
100 | if (u == 0) System.out.println((char)0);
101 | }
102 | }
103 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/test/WTF.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2012-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.test;
21 |
22 | import java.util.Random;
23 |
24 | public class WTF {
25 | /* From https://twitter.com/joshbloch/status/269478731238760448
26 | *
27 | * Note that ThreadLocalRandom uses the same algorithm as Random.
28 | */
29 | public static void main(final String[] arg) {
30 | final int shift = arg.length == 0 ? 0 : Integer.parseInt(arg[0]);
31 | for (int i = 0; i < 1000; i++)
32 | System.out.println(new Random(i).nextInt(1 << shift));
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/test/XorShiftPoly116.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2012-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.test;
21 |
22 | import java.math.BigInteger;
23 |
24 | public class XorShiftPoly116 {
25 |
26 | private XorShiftPoly116() {}
27 |
28 | /** The number of bits of state of the generator. */
29 | public static final int BITS = 116;
30 |
31 | /** The period of the generator (2{@value #BITS} − 1). */
32 | public static BigInteger twoToBitsMinus1;
33 |
34 | /** Factors of 2{@value #BITS} − - 1. */
35 | public static final BigInteger[] factor = {
36 | new BigInteger("3"),
37 | new BigInteger("5"),
38 | new BigInteger("59"),
39 | new BigInteger("233"),
40 | new BigInteger("1103"),
41 | new BigInteger("2089"),
42 | new BigInteger("3033169"),
43 | new BigInteger("107367629"),
44 | new BigInteger("536903681")
45 | };
46 |
47 | /** An array of cofactors. Entry 0 ≤ {@code i} < {@link #numCofactors} contains {@link #twoToBitsMinus1} divided by {@link #factor factor[i]}. Note that some
48 | * entries can be {@code null} if {@link #BITS} is less then 4096. */
49 | public static final BigInteger[] cofactor = new BigInteger[factor.length];
50 |
51 | /** The actual number of valid entries in {@link #cofactor}. */
52 | public static int numCofactors;
53 |
54 | /** Computes the power to a given exponent, given the quadratures.
55 | *
56 | * @param e an exponent smaller than or equal to 2{@link #BITS}.
57 | */
58 | public static void mPow(BigInteger e) {
59 | System.out.println("p := 1;");
60 | for(int i = 0; ! e.equals(BigInteger.ZERO); i++) {
61 | if (e.testBit(0)) System.out.println("p := *p * q[" + i + "];");
62 | e = e.shiftRight(1);
63 | }
64 | }
65 |
66 | public static void main(final String arg[]) {
67 | // Check factors
68 | BigInteger prod = BigInteger.ONE;
69 | for(final BigInteger f : factor) prod = prod.multiply(f);
70 | if (!prod.equals(BigInteger.valueOf(2).pow(BITS).subtract(BigInteger.ONE))) {
71 | System.err.println("Factors do not match");
72 | return;
73 | }
74 |
75 | BigInteger result = BigInteger.ONE;
76 | twoToBitsMinus1 = BigInteger.valueOf(2).pow(BITS).subtract(BigInteger.ONE);
77 | int n;
78 | // Initialize cofactors.
79 | for(n = 0; n < factor.length; n++) {
80 | cofactor[n] = twoToBitsMinus1.divide(factor[n]);
81 | result = result.multiply(factor[n]);
82 | }
83 |
84 | // Safety check (you know, those numbers are LONG).
85 | if (! twoToBitsMinus1.equals(result)) throw new AssertionError();
86 |
87 | System.out.println("Array q[" + (BITS + 1) + "];");
88 | // Quadratures
89 | System.out.println("q[0] := x;");
90 | for(int i1 = 1; i1 <= BITS; i1++) System.out.println("q[" + i1 + "] := q[" + (i1 - 1) + "] * q[" + (i1 - 1) + "];");
91 | System.out.println("!!('Check: ', if q[" + BITS + "] = x then 1 else 0; &q fi);");
92 | // Exponentiation to cofactors
93 | for (final BigInteger element : cofactor) {
94 | mPow(element);
95 | System.out.println("!!('Result: ', if p = 1 then 0; &q else 1 fi);");
96 | }
97 | }
98 | }
99 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/util/KahanSummation.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2012-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.util;
21 |
22 | /** Kahan's
23 | * summation algorithm encapsulated in an object. */
24 |
25 | public class KahanSummation {
26 | /** The current value of the sum. */
27 | private double value;
28 | /** The current correction. */
29 | private double c;
30 |
31 | /** Adds a value.
32 | * @param v the value to be added to the sum.
33 | */
34 | public void add(final double v) {
35 | final double y = v - c;
36 | final double t = value + y;
37 | c = (t - value) - y;
38 | value = t;
39 | }
40 |
41 | /** Returns the sum computed so far.
42 | * @return the sum computed so far.
43 | */
44 | public double value() {
45 | return value;
46 | }
47 |
48 | /** Resets the current value and correction to zero. */
49 | public void reset() {
50 | value = c = 0;
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/util/LongIntervals.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2003-2023 Paolo Boldi and Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.util;
21 |
22 | import java.util.Comparator;
23 |
24 |
25 | /** A class providing static methods and objects that do useful things with intervals.
26 | *
27 | * @see LongInterval
28 | */
29 |
30 | public class LongIntervals {
31 |
32 | private LongIntervals() {}
33 |
34 | public static final LongInterval[] EMPTY_ARRAY = {};
35 |
36 | /** An empty (singleton) interval. */
37 | public static final LongInterval EMPTY_INTERVAL = new LongInterval(1, 0);
38 |
39 | /** A singleton located at −∞. */
40 | public static final LongInterval MINUS_INFINITY = new LongInterval(Integer.MIN_VALUE, Integer.MIN_VALUE);
41 |
42 | /** A comparator between intervals defined as follows:
43 | * [a..b] is less than [a'..b']
44 | * iff the first interval starts after the second one, that is,
45 | * iff a' < a.
46 | */
47 | public static final Comparator STARTS_AFTER = (i1, i2) -> Long.compare(i2.left, i1.left);
48 |
49 | /** A comparator between intervals defined as follows:
50 | * [a..b] is less than [a'..b']
51 | * iff the first interval starts before the second one, that is,
52 | * iff a < a'.
53 | */
54 | public static final Comparator STARTS_BEFORE = (i1, i2) -> Long.compare(i1.left, i2.left);
55 |
56 | /** A comparator between intervals defined as follows:
57 | * [a..b] is less than [a'..b']
58 | * iff the first interval ends after the second one, that is,
59 | * iff b' < b.
60 | */
61 | public static final Comparator ENDS_AFTER = (i1, i2) -> Long.compare(i2.right, i1.right);
62 |
63 | /** A comparator between intervals defined as follows:
64 | * [a..b] is less than [a'..b']
65 | * iff the first interval ends before the second one, that is,
66 | * iff b < b'.
67 | */
68 | public static final Comparator ENDS_BEFORE = (i1, i2) -> Long.compare(i1.right, i2.right);
69 |
70 | /** A comparator between intervals based on their length. */
71 | public static final Comparator LENGTH_COMPARATOR = (i1, i2) -> Long.compare(i1.length(), i2.length());
72 | }
73 |
74 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/util/PrefixMap.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2004-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.util;
21 |
22 | import it.unimi.dsi.fastutil.objects.Object2ObjectFunction;
23 |
24 | /** A map from prefixes to string intervals (and possibly vice versa).
25 | *
26 | *
Instances of this class provide the services of a {@link StringMap}, but by assuming
27 | * the strings are lexicographically ordered, they can provide further information by
28 | * exposing a {@linkplain #rangeMap() function from string prefixes to intervals} and a
29 | * {@linkplain #prefixMap() function from intervals to string prefixes}.
30 | *
31 | *
In the first case, given a prefix, we can ask for the range of strings starting
32 | * with that prefix, expressed as an {@link Interval}. This information is very useful to
33 | * satisfy prefix queries (e.g., monitor*) with a brute-force approach.
34 | *
35 | *
Optionally, a prefix map may provide the opposite service: given an interval of terms, it
36 | * may provide the maximum common prefix. This feature can be checked for by calling
37 | * {@link #prefixMap()}.
38 | *
39 | * @author Sebastiano Vigna
40 | * @since 0.9.2
41 | */
42 |
43 | public interface PrefixMap extends StringMap {
44 | /** Returns a function mapping prefixes to ranges of strings.
45 | *
46 | * @return a function mapping prefixes to ranges of strings.
47 | */
48 | Object2ObjectFunction rangeMap();
49 |
50 | /** Returns a function mapping ranges of strings to common prefixes (optional operation).
51 | *
52 | * @return a function mapping ranges of strings to common prefixes, or {@code null} if this
53 | * map does not support prefixes.
54 | */
55 | Object2ObjectFunction prefixMap();
56 | }
57 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/util/StringMap.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2008-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.util;
21 |
22 | import java.io.Serializable;
23 |
24 | import it.unimi.dsi.fastutil.objects.Object2LongFunction;
25 | import it.unimi.dsi.fastutil.objects.ObjectList;
26 |
27 | /** A map from strings to numbers (and possibly vice versa).
28 | *
29 | *
String maps represent mappings from strings (actually, any subclass of {@link CharSequence})
30 | * to numbers; they can support {@linkplain #list() reverse
31 | * mapping}, too. The latter has usually sense only if the map is minimal and perfect (e.g., a bijection of a set
32 | * of string with an initial segment of the natural numbers of the same size). String maps are useful for
33 | * terms of an MG4J
34 | * inverted index, URLs of a WebGraph-compressed
35 | * web snapshot, and so on.
36 | *
37 | *
Warning: the return value of {@link #list()} is a fastutil {@link ObjectList}.
38 | * This in principle is not sensible, as string maps return longs (they extend
39 | * {@link Object2LongFunction}), and {@link ObjectList} has only integer index
40 | * support. If you need long indices, please consider using {@link it.unimi.dsi.big.util.StringMap}.
41 | *
42 | * @author Sebastiano Vigna
43 | * @since 0.2
44 | */
45 |
46 | public interface StringMap extends Object2LongFunction, Serializable {
47 |
48 | /** Returns a list view of the domain of this string map (optional operation).
49 | *
50 | *
Note that the list view acts as an inverse of the mapping implemented by this map.
51 | *
52 | * @return a list view of the domain of this string map, or {@code null} if this map does
53 | * not support this operation.
54 | */
55 |
56 | ObjectList extends S> list();
57 | }
58 |
--------------------------------------------------------------------------------
/src/it/unimi/dsi/util/concurrent/package-info.java:
--------------------------------------------------------------------------------
1 | /** Concurrent data structures */
2 |
3 | package it.unimi.dsi.util.concurrent;
4 |
--------------------------------------------------------------------------------
/src/overview.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | DSI utils
5 |
6 |
7 |
8 |
The DSI utilities are a mish mash of classes accumulated during the last
9 | ten years in projects developed at the former DSI (Dipartimento di Scienze dell'Informazione,
10 | i.e., Information Sciences Department), now DI (Dipartimento di Informatica, i.e.,
11 | Informatics Department) of the Università degli Studi di Milano.
12 | They were originally distributed in several projects
13 | (mainly in MG4J) but we finally decided to
14 | gather all the material in a single place.
15 |
16 |
The implementations available are a bit eclectic due to the particular kind of applications
22 | we developed. Very broadly, we have:
23 |
24 |
25 |
Implementations of pseudorandom number generators. See the {@linkplain it.unimi.dsi.util package documentation} for details.
26 |
{@link it.unimi.dsi.bits.BitVector} and its implementations—a high-performance but flexible set of bit vector classes.
27 |
A {@link it.unimi.dsi.compression} package containing codecs for several types of encodings.
28 |
{@link it.unimi.dsi.logging.ProgressLogger}, a flexible logger with statistics marking the progress of the (many) classes
29 | we use that require hours of computation.
30 |
{@link it.unimi.dsi.lang.ObjectParser}, a class making it easy to specify complex objects on the command
31 | line.
32 |
{@link it.unimi.dsi.lang.MutableString}, our answer to the Java {@link java.lang.String} class.
33 |
The {@link it.unimi.dsi.io I/O package}, containing fast version of several classes existing in {@link java.io},
34 | many useful classes to read easily text data (e.g., {@link it.unimi.dsi.io.FileLinesMutableStringIterable}), {@linkplain it.unimi.dsi.io.InputBitStream bit streams},
35 | classes providing large-size memory mapping such as {@link it.unimi.dsi.io.ByteBufferInputStream},
36 | and {@link it.unimi.dsi.io.OfflineIterable}—the easy & fast way to store large sequences of objects on disk and iterate on them.
37 |
The {@link it.unimi.dsi.util} package, containing {@linkplain it.unimi.dsi.util.ImmutableBinaryTrie tries},
38 | {@linkplain it.unimi.dsi.util.ImmutableExternalPrefixMap immutable prefix maps}, {@linkplain it.unimi.dsi.util.BloomFilter Bloom filters},
39 | a very comfortable {@link it.unimi.dsi.util.Properties} class and more.
40 |
The {@link it.unimi.dsi.stat} package, containing a lightweight class for {@linkplain it.unimi.dsi.stat.SummaryStats computing basic statistics} and
41 | {@linkplain it.unimi.dsi.stat.Jackknife an arbitrary-precision implementation of the Jackknife method}.
42 |
Lots of utility methods in {@link it.unimi.dsi.Util} (have a look!)
43 |
{@link it.unimi.dsi.big.util.MappedFrontCodedStringBigList}, which provides compact memory-mapped storage of strings, possibly with some compression.
44 |
Big versions of I/O and utility classes in {@link it.unimi.dsi.big.io} and {@link it.unimi.dsi.big.util}.
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/big/util/FrontCodedStringBigListTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.big.util;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import java.util.Arrays;
25 | import java.util.Collections;
26 | import java.util.List;
27 |
28 | import org.junit.Test;
29 |
30 | import it.unimi.dsi.lang.MutableString;
31 |
32 | public class FrontCodedStringBigListTest {
33 |
34 | @Test
35 | public void test() {
36 | final List c = Arrays.asList(TernaryIntervalSearchTreeTest.WORDS.clone());
37 | final MutableString s = new MutableString();
38 | for(int p = 0; p < 2; p++) {
39 | for(final boolean utf8: new boolean[] { false, true })
40 | for(int ratio = 1; ratio < 8; ratio++) {
41 | final FrontCodedStringBigList fcl = new FrontCodedStringBigList(c.iterator(), ratio, utf8);
42 | for (int i = 0; i < fcl.size64(); i++) {
43 | assertEquals(Integer.toString(i), c.get(i), fcl.get(i).toString());
44 | fcl.get(i, s);
45 | assertEquals(Integer.toString(i), c.get(i), s.toString());
46 | }
47 | }
48 |
49 | Collections.sort(c);
50 | }
51 | }
52 |
53 | @Test
54 | public void testSurrogatePairs() {
55 | final List c = Arrays.asList(new String[] { "a", "AB\uE000AB", "\uD800\uDF02", "\uD800\uDF03", "b" });
56 | for(final boolean utf8: new boolean[] { false, true })
57 | for(int ratio = 1; ratio < 8; ratio++) {
58 | final FrontCodedStringBigList fcl = new FrontCodedStringBigList(c.iterator(), ratio, utf8);
59 | for (int i = 0; i < fcl.size64(); i++) {
60 | assertEquals(Integer.toString(i), c.get(i), fcl.get(i).toString());
61 | }
62 | }
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/big/util/LiterallySignedStringMapTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2002-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.big.util;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import java.io.File;
25 | import java.io.IOException;
26 | import java.io.Serializable;
27 | import java.util.Arrays;
28 | import java.util.Collections;
29 |
30 | import org.junit.Test;
31 |
32 | import it.unimi.dsi.fastutil.Hash;
33 | import it.unimi.dsi.fastutil.io.BinIO;
34 | import it.unimi.dsi.fastutil.objects.Object2LongOpenCustomHashMap;
35 | import it.unimi.dsi.fastutil.objects.ObjectBigLists;
36 | import it.unimi.dsi.lang.MutableString;
37 | import it.unimi.dsi.util.FrontCodedStringList;
38 |
39 | public class LiterallySignedStringMapTest {
40 |
41 | private final static class CharSequenceStrategy implements Hash.Strategy, Serializable {
42 | private static final long serialVersionUID = 1L;
43 |
44 | @Override
45 | public boolean equals(final CharSequence a, final CharSequence b) {
46 | if (a == null) return b == null;
47 | if (b == null) return false;
48 | return a.toString().equals(b.toString());
49 | }
50 |
51 | @Override
52 | public int hashCode(final CharSequence o) {
53 | return o.toString().hashCode();
54 | }
55 | }
56 |
57 | @Test
58 | public void testNumbers() throws IOException, ClassNotFoundException {
59 | for(int n = 10; n < 10000; n *= 10) {
60 | final String[] s = new String[n];
61 | for(int i = s.length; i-- != 0;) s[i] = Integer.toString(i);
62 | Collections.shuffle(Arrays.asList(s));
63 |
64 | final FrontCodedStringList fcl = new FrontCodedStringList(Arrays.asList(s), 8, true);
65 | // Test with mph
66 | final Object2LongOpenCustomHashMap mph = new Object2LongOpenCustomHashMap<>(new CharSequenceStrategy());
67 | mph.defaultReturnValue(-1);
68 | for(int i = 0; i < s.length; i++) mph.put(new MutableString(s[i]), i);
69 |
70 | LiterallySignedStringMap map = new LiterallySignedStringMap(mph, ObjectBigLists.asBigList(fcl));
71 |
72 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(s[i]));
73 | for(int i = s.length + n; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i)));
74 |
75 | final File temp = File.createTempFile(getClass().getSimpleName(), "test");
76 | temp.deleteOnExit();
77 | BinIO.storeObject(map, temp);
78 | map = (LiterallySignedStringMap)BinIO.loadObject(temp);
79 |
80 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(s[i]));
81 | for(int i = s.length + n; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i)));
82 | }
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/big/util/LongBigArraySignedStringMapTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2002-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.big.util;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import java.io.File;
25 | import java.io.IOException;
26 | import java.util.Arrays;
27 |
28 | import org.junit.Test;
29 |
30 | import it.unimi.dsi.fastutil.io.BinIO;
31 | import it.unimi.dsi.fastutil.longs.LongBigArrayBigList;
32 | import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
33 |
34 | public class LongBigArraySignedStringMapTest {
35 |
36 | @SuppressWarnings("deprecation")
37 | @Test
38 | public void testNumbers() throws IOException {
39 |
40 | for(int width = 16; width <= Long.SIZE; width += 8) {
41 | final String[] s = new String[100000];
42 | final long[] v = new long[s.length];
43 | for(int i = s.length; i-- != 0;) s[(int)(v[i] = i)] = Integer.toString(i);
44 |
45 | // Test with mph
46 | final Object2LongOpenHashMap mph = new Object2LongOpenHashMap<>(s, v);
47 | final long[][] signatures = LongBigListSignedStringMap.sign(Arrays.asList(s).iterator(), mph);
48 |
49 | LongBigListSignedStringMap map = new LongBigListSignedStringMap(mph, LongBigArrayBigList.wrap(signatures));
50 |
51 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(Integer.toString(i)));
52 | for(int i = s.length + 100; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i)));
53 |
54 | final File temp = File.createTempFile(getClass().getSimpleName(), "test");
55 | temp.deleteOnExit();
56 |
57 | BinIO.storeLongs(signatures, temp);
58 | map = new LongBigListSignedStringMap(mph, temp.toString());
59 |
60 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(Integer.toString(i)));
61 | for(int i = s.length + 10000; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i)));
62 |
63 | temp.delete();
64 |
65 | }
66 | }
67 |
68 | @SuppressWarnings("deprecation")
69 | @Test
70 | public void testSortedNumbers() throws IOException {
71 |
72 | for(int width = 16; width <= Long.SIZE; width += 8) {
73 | final String[] s = new String[100000];
74 | final long[] v = new long[s.length];
75 | for(int i = s.length; i-- != 0;) s[(int)(v[i] = i)] = Integer.toString(i);
76 |
77 | // Test with mph
78 | final Object2LongOpenHashMap mph = new Object2LongOpenHashMap<>(s, v);
79 |
80 | final File temp = File.createTempFile(getClass().getSimpleName(), "test");
81 | temp.deleteOnExit();
82 |
83 | LongBigListSignedStringMap.sign(Arrays.asList(s).iterator(), temp.toString());
84 | final LongBigListSignedStringMap map = new LongBigListSignedStringMap(mph, temp.toString());
85 |
86 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(Integer.toString(i)));
87 | for(int i = s.length + 10000; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i)));
88 |
89 | temp.delete();
90 |
91 | }
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/big/util/MappedFrontCodedStringBigListTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.big.util;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import java.io.File;
25 | import java.io.IOException;
26 | import java.nio.charset.StandardCharsets;
27 | import java.util.ArrayList;
28 | import java.util.Arrays;
29 | import java.util.Collections;
30 | import java.util.List;
31 |
32 | import org.apache.commons.configuration2.ex.ConfigurationException;
33 | import org.apache.commons.lang3.StringUtils;
34 | import org.junit.Test;
35 |
36 | import it.unimi.dsi.lang.MutableString;
37 |
38 | public class MappedFrontCodedStringBigListTest {
39 |
40 | @Test
41 | public void test() throws IOException, ConfigurationException {
42 | final String basename = File.createTempFile(this.getClass().getName(), ".basename").toString();
43 | final List c = new ArrayList<>(Arrays.asList(TernaryIntervalSearchTreeTest.WORDS.clone()));
44 | c.add(StringUtils.repeat("a", 1000));
45 | c.add(StringUtils.repeat("a", 500) + StringUtils.repeat("b", 500));
46 | c.add(StringUtils.repeat("a", 1000) + StringUtils.repeat("b", 1000));
47 | c.add(StringUtils.repeat("a", 100) + StringUtils.repeat("b", 1000));
48 | final MutableString s = new MutableString();
49 | Collections.sort(c);
50 | for (int p = 0; p < 2; p++) {
51 | for (int ratio = 1; ratio < 8; ratio++) {
52 | final FrontCodedStringBigList fcl = new FrontCodedStringBigList(c.iterator(), ratio, true);
53 |
54 | MappedFrontCodedStringBigList.build(basename, 4, c.stream().map(x -> x.getBytes(StandardCharsets.UTF_8)).iterator());
55 | final MappedFrontCodedStringBigList mfcl = MappedFrontCodedStringBigList.load(basename);
56 | for (int i = 0; i < fcl.size64(); i++) {
57 | assertEquals(Integer.toString(i), c.get(i), mfcl.get(i).toString());
58 | assertEquals(Integer.toString(i), c.get(i), mfcl.getString(i));
59 | assertEquals(Integer.toString(i), c.get(i), new String(mfcl.getArray(i), StandardCharsets.UTF_8));
60 | fcl.get(i, s);
61 | assertEquals(Integer.toString(i), c.get(i), s.toString());
62 | }
63 | }
64 | }
65 |
66 | new File(basename + MappedFrontCodedStringBigList.PROPERTIES_EXTENSION).delete();
67 | new File(basename + MappedFrontCodedStringBigList.BYTE_ARRAY_EXTENSION).delete();
68 | new File(basename + MappedFrontCodedStringBigList.POINTERS_EXTENSION).delete();
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/big/util/SemiExternalGammaBigListTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2002-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.big.util;
21 |
22 | import static org.junit.Assert.assertEquals;
23 | import static org.junit.Assert.assertTrue;
24 |
25 | import java.io.IOException;
26 |
27 | import org.junit.Test;
28 |
29 | import it.unimi.dsi.fastutil.longs.LongArrayList;
30 | import it.unimi.dsi.fastutil.longs.LongList;
31 | import it.unimi.dsi.io.InputBitStream;
32 | import it.unimi.dsi.io.OutputBitStream;
33 |
34 | /**
35 | * @author Fabien Campagne
36 | * @author Sebastiano Vigna
37 | */
38 | public class SemiExternalGammaBigListTest {
39 |
40 | private static InputBitStream buildInputStream(final LongList longs) throws IOException {
41 | final byte[] array = new byte[longs.size() * 4];
42 | @SuppressWarnings("resource")
43 | final
44 | OutputBitStream streamer = new OutputBitStream(array);
45 | for (int i = 0; i < longs.size(); i++) streamer.writeLongGamma(longs.getLong(i));
46 | final int size = (int)(streamer.writtenBits() / 8) + ((streamer.writtenBits() % 8) == 0 ? 0 : 1);
47 | final byte[] smaller = new byte[size];
48 | System.arraycopy(array, 0, smaller, 0, size);
49 |
50 | return new InputBitStream(smaller);
51 |
52 | }
53 |
54 | @Test
55 | public void testSemiExternalGammaBigListGammaCoding() throws IOException {
56 |
57 | final long[] longs = { 10, 300, 450, 650, 1000, 1290, 1699 };
58 | final LongList listLongs = new LongArrayList(longs);
59 |
60 | SemiExternalGammaBigList list = new SemiExternalGammaBigList(buildInputStream(listLongs), 1, listLongs.size());
61 | for (long i = 0; i < longs.length; ++i) {
62 | assertEquals(("test failed for index: " + i), longs[(int) i], list.getLong(i));
63 | }
64 |
65 | list = new SemiExternalGammaBigList(buildInputStream(listLongs), 2, listLongs.size());
66 | for (long i = 0; i < longs.length; ++i) {
67 | assertEquals(("test failed for index: " + i), longs[(int) i], list.getLong(i));
68 | }
69 |
70 | list = new SemiExternalGammaBigList(buildInputStream(listLongs), 4, listLongs.size());
71 | for (long i = 0; i < longs.length; ++i) {
72 | assertEquals(("test failed for index: " + i), longs[(int) i], list.getLong(i));
73 | }
74 |
75 | list = new SemiExternalGammaBigList(buildInputStream(listLongs), 7, listLongs.size());
76 | for (long i = 0; i < longs.length; ++i) {
77 | assertEquals(("test failed for index: " + i), longs[(int) i], list.getLong(i));
78 | }
79 |
80 | list = new SemiExternalGammaBigList(buildInputStream(listLongs), 8, listLongs.size());
81 | for (long i = 0; i < longs.length; ++i) {
82 | assertEquals(("test failed for index: " + i), longs[(int) i], list.getLong(i));
83 | }
84 | }
85 |
86 | @Test
87 | public void testEmptySemiExternalGammaBigListGammaCoding() throws IOException {
88 |
89 | final long[] longs = { };
90 | final LongList listOffsets = new LongArrayList(longs);
91 |
92 | new SemiExternalGammaBigList(buildInputStream(listOffsets), 1, listOffsets.size());
93 | assertTrue(true);
94 | }
95 |
96 | }
97 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/big/util/ShiftAddXorSignedStringMapTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2002-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.big.util;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import java.io.File;
25 | import java.io.IOException;
26 | import java.util.Arrays;
27 |
28 | import org.junit.Test;
29 |
30 | import it.unimi.dsi.fastutil.io.BinIO;
31 | import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
32 |
33 | public class ShiftAddXorSignedStringMapTest {
34 |
35 | @SuppressWarnings("deprecation")
36 | @Test
37 | public void testNumbers() throws IOException, ClassNotFoundException {
38 |
39 | for(int width = 16; width <= Long.SIZE; width += 8) {
40 | final String[] s = new String[1000];
41 | final long[] v = new long[s.length];
42 | for(int i = s.length; i-- != 0;) s[(int)(v[i] = i)] = Integer.toString(i);
43 |
44 | // Test with mph
45 | final Object2LongOpenHashMap mph = new Object2LongOpenHashMap<>(s, v);
46 | ShiftAddXorSignedStringMap map = new ShiftAddXorSignedStringMap(Arrays.asList(s).iterator(), mph, width);
47 |
48 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(Integer.toString(i)));
49 | for(int i = s.length + 100; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i)));
50 |
51 | final File temp = File.createTempFile(getClass().getSimpleName(), "test");
52 | temp.deleteOnExit();
53 | BinIO.storeObject(map, temp);
54 | map = (ShiftAddXorSignedStringMap)BinIO.loadObject(temp);
55 |
56 | for(int i = s.length; i-- != 0;) assertEquals(i, map.getLong(Integer.toString(i)));
57 | for(int i = s.length + 100; i-- != s.length;) assertEquals(-1, map.getLong(Integer.toString(i)));
58 |
59 | }
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/bits/BitVectorsTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.bits;
21 |
22 | import static org.junit.Assert.assertEquals;
23 | import static org.junit.Assert.assertFalse;
24 |
25 | import java.io.DataInputStream;
26 | import java.io.DataOutputStream;
27 | import java.io.IOException;
28 | import java.util.Arrays;
29 | import java.util.Iterator;
30 |
31 | import org.junit.Test;
32 |
33 | import it.unimi.dsi.fastutil.io.FastByteArrayInputStream;
34 | import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream;
35 | import it.unimi.dsi.io.OfflineIterable;
36 |
37 | public class BitVectorsTest {
38 |
39 | @Test
40 | public void testReadWriteFast() throws IOException {
41 | final FastByteArrayOutputStream fbaos = new FastByteArrayOutputStream();
42 | final DataOutputStream dos = new DataOutputStream(fbaos);
43 | final LongArrayBitVector labv = LongArrayBitVector.getInstance();
44 | final BitVector[] a = new BitVector[] { BitVectors.ZERO, BitVectors.ONE, BitVectors.EMPTY_VECTOR,
45 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAAL }, 64),
46 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAL }, 60),
47 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL }, 128),
48 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAL }, 124) };
49 |
50 | for(final BitVector bv: a) {
51 | BitVectors.writeFast(bv, dos);
52 | dos.close();
53 | assertEquals(bv, BitVectors.readFast(new DataInputStream(new FastByteArrayInputStream(fbaos.array))));
54 | fbaos.reset();
55 | }
56 |
57 | for(final BitVector bv: a) {
58 | BitVectors.writeFast(bv, dos);
59 | dos.close();
60 | assertEquals(bv, BitVectors.readFast(new DataInputStream(new FastByteArrayInputStream(fbaos.array)), labv));
61 | fbaos.reset();
62 | }
63 | }
64 |
65 | @Test
66 | public void testMakeOffline() throws IOException {
67 | final BitVector[] a = new BitVector[] { BitVectors.ZERO, BitVectors.ONE, BitVectors.EMPTY_VECTOR,
68 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAAL }, 64),
69 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAL }, 60),
70 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAAL }, 128),
71 | LongArrayBitVector.wrap(new long[] { 0xAAAAAAAAAAAAAAAAL, 0xAAAAAAAAAAAAAAAL }, 124) };
72 |
73 | final OfflineIterable iterable = new OfflineIterable<>(BitVectors.OFFLINE_SERIALIZER, LongArrayBitVector.getInstance());
74 | iterable.addAll(Arrays.asList(a));
75 |
76 | final Iterator iterator = iterable.iterator();
77 | for (final BitVector element : a) assertEquals(element, iterator.next());
78 | assertFalse(iterator.hasNext());
79 | iterable.close();
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/bits/FixedLongTransformationStrategyTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.bits;
21 |
22 | import static org.junit.Assert.assertEquals;
23 | import static org.junit.Assert.assertFalse;
24 | import static org.junit.Assert.assertTrue;
25 |
26 | import org.junit.Test;
27 |
28 | public class FixedLongTransformationStrategyTest {
29 |
30 | @Test
31 | public void testGetBoolean() {
32 | final TransformationStrategy fixedLong = TransformationStrategies.fixedLong();
33 | BitVector p = fixedLong.toBitVector(Long.valueOf(0));
34 | for (int i = Long.SIZE; i-- != 1;) assertFalse(p.getBoolean(i));
35 |
36 | // Flipped bit
37 | assertTrue(p.getBoolean(0));
38 | p = fixedLong.toBitVector(Long.valueOf(0xDEADBEEFDEADF00DL));
39 | for (int i = Long.SIZE; i-- != 0;) assertTrue(p.getBoolean(i) == (((0xDEADBEEFDEADF00DL ^ 1L << 63) & 1L << Long.SIZE - 1 - i) != 0));
40 | }
41 |
42 | @Test
43 | public void testGetLong() {
44 | final TransformationStrategy fixedLong = TransformationStrategies.fixedLong();
45 | final BitVector p = fixedLong.toBitVector(Long.valueOf(0xDEADBEEFDEADF00DL));
46 | for(int from = Long.SIZE; from-- != 0;)
47 | for (int to = Long.SIZE; from < to--;)
48 | assertEquals(LongArrayBitVector.wrap(new long[] {
49 | Long.reverse(0xDEADBEEFDEADF00DL) ^ 1 }).getLong(from, to), p.getLong(from, to));
50 | }
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/bits/PrefixFreeTransformationStrategyTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.bits;
21 |
22 | import static org.junit.Assert.assertEquals;
23 | import static org.junit.Assert.assertFalse;
24 | import static org.junit.Assert.assertTrue;
25 |
26 | import org.junit.Test;
27 |
28 | public class PrefixFreeTransformationStrategyTest {
29 |
30 | @Test
31 | public void testGetBoolean() {
32 | final LongArrayBitVector v = LongArrayBitVector.of(0, 1, 0);
33 | final TransformationStrategy prefixFree = TransformationStrategies.prefixFree();
34 | final BitVector p = prefixFree.toBitVector(v);
35 | assertTrue(p.getBoolean(0));
36 | assertFalse(p.getBoolean(1));
37 | assertTrue(p.getBoolean(2));
38 | assertTrue(p.getBoolean(3));
39 | assertTrue(p.getBoolean(4));
40 | assertFalse(p.getBoolean(5));
41 | assertFalse(p.getBoolean(6));
42 | assertEquals(LongArrayBitVector.of(1, 0, 1, 1, 1, 0, 0), p);
43 | }
44 |
45 | @Test
46 | public void testGetLong() {
47 | LongArrayBitVector v = LongArrayBitVector.getInstance();
48 | v.append(0xFFFFFFFFL, 32);
49 | final TransformationStrategy prefixFree = TransformationStrategies.prefixFree();
50 | final BitVector p = prefixFree.toBitVector(v);
51 | assertEquals(0xFFFFFFFFFFFFFFFFL, p.getLong(0, 64));
52 | assertFalse(p.getBoolean(64));
53 | assertEquals(0, p.getLong(64, 64));
54 |
55 | v.clear();
56 | v.append(0x0, 32);
57 | assertEquals(0x5555555555555555L, p.getLong(0, 64));
58 | assertEquals(0x5555555555555555L >>> 1, p.getLong(1, 64));
59 | assertFalse(p.getBoolean(64));
60 |
61 | v.clear();
62 | v.append(0x3, 32);
63 | assertEquals(0x555555555555555FL, p.getLong(0, 64));
64 | assertEquals(0x5FL, p.getLong(0, 7));
65 |
66 | v = LongArrayBitVector.of(0, 0, 0, 0, 1, 1, 1);
67 | assertEquals(LongArrayBitVector.of(1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0), prefixFree.toBitVector(v));
68 | }
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/bits/RawByteArrayTransformationStrategyTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.bits;
21 |
22 | import static org.junit.Assert.assertEquals;
23 | import static org.junit.Assert.assertFalse;
24 | import static org.junit.Assert.assertTrue;
25 |
26 | import org.junit.Test;
27 |
28 | public class RawByteArrayTransformationStrategyTest {
29 |
30 | @Test
31 | public void testGetLong() {
32 | byte[] a = new byte[] { 0x55, (byte)0xFF };
33 | assertEquals(16, TransformationStrategies.rawByteArray().toBitVector(a).length());
34 | assertEquals(0xFF55L, TransformationStrategies.rawByteArray().toBitVector(a).getLong(0, 16));
35 |
36 | a = new byte[] { 1, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, 0 };
37 | assertTrue(TransformationStrategies.rawByteArray().toBitVector(a).getBoolean(0));
38 | assertFalse(TransformationStrategies.rawByteArray().toBitVector(a).getBoolean(1));
39 | assertTrue(TransformationStrategies.rawByteArray().toBitVector(a).getBoolean(64));
40 | assertEquals(0x1L, TransformationStrategies.rawByteArray().toBitVector(a).getLong(0, 56));
41 | assertEquals(0x1L, TransformationStrategies.rawByteArray().toBitVector(a).getLong(0, 64));
42 | assertEquals(-1L, TransformationStrategies.rawByteArray().toBitVector(a).getLong(64, 128));
43 |
44 | for(int i = 1; i < 64; i++)
45 | assertEquals(1, TransformationStrategies.rawByteArray().toBitVector(a).getLong(0, i));
46 | for(int i = 0; i < 63; i++)
47 | assertEquals(0, TransformationStrategies.rawByteArray().toBitVector(a).getLong(1, 1 + i));
48 | for(int i = 64; i < 127; i++)
49 | assertEquals((1L << i - 64) - 1, TransformationStrategies.rawByteArray().toBitVector(a).getLong(64, i));
50 |
51 | a = new byte[] { 1, 0, 0, 0, 0, 0, 0, 0, 0x55 };
52 | assertEquals(0x55L << 57, TransformationStrategies.rawByteArray().toBitVector(a).getLong(7, 71));
53 | assertEquals(0x15L << 57, TransformationStrategies.rawByteArray().toBitVector(a).getLong(7, 70));
54 | assertEquals(0x15L << 57, TransformationStrategies.rawByteArray().toBitVector(a).getLong(7, 69));
55 |
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/bits/RawFixedLongTransformationStrategyTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.bits;
21 |
22 | import static org.junit.Assert.assertFalse;
23 | import static org.junit.Assert.assertTrue;
24 |
25 | import org.junit.Test;
26 |
27 | public class RawFixedLongTransformationStrategyTest {
28 |
29 | @Test
30 | public void testGetBoolean() {
31 | final TransformationStrategy rawFixedLong = TransformationStrategies.rawFixedLong();
32 | BitVector p = rawFixedLong.toBitVector(Long.valueOf(0));
33 | for(int i = Long.SIZE; i-- != 0;) assertFalse(p.getBoolean(i));
34 | p = rawFixedLong.toBitVector(Long.valueOf(0xDEADBEEFDEADF00DL));
35 | for(int i = Long.SIZE; i-- != 0;) assertTrue(p.getBoolean(i) == ((0xDEADBEEFDEADF00DL & 1L << i) != 0));
36 | }
37 |
38 | @Test
39 | public void testGetLong() {
40 | final TransformationStrategy rawFixedLong = TransformationStrategies.rawFixedLong();
41 | final BitVector p = rawFixedLong.toBitVector(Long.valueOf(0xDEADBEEFDEADF00DL));
42 | for(int from = Long.SIZE; from-- != 0;)
43 | for(int to = Long.SIZE; from < to--;)
44 | assertTrue(p.getLong(from, to) == LongArrayBitVector.wrap(new long[] { 0xDEADBEEFDEADF00DL }).getLong(from, to));
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/bits/RawUtf32TransformationStrategyTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.bits;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import org.junit.Test;
25 |
26 | public class RawUtf32TransformationStrategyTest {
27 |
28 | @Test
29 | public void testGetLong() {
30 | String s = new String(new char[] { '\u0001', '\u0002' });
31 | assertEquals(64, TransformationStrategies.rawUtf32().toBitVector(s).length());
32 | assertEquals(0x200000001L, TransformationStrategies.rawUtf32().toBitVector(s).getLong(0, 64));
33 | s = new String(new char[] { '\u0001', '\u0002', '\u0003' });
34 | assertEquals(96, TransformationStrategies.rawUtf32().toBitVector(s).length());
35 | assertEquals(0x300000002L, TransformationStrategies.rawUtf32().toBitVector(s).getLong(32, 96));
36 | s = new String(new char[] { '\u0001', '\u0002', '\u0003', '\u0004' });
37 | assertEquals(128, TransformationStrategies.rawUtf32().toBitVector(s).length());
38 | assertEquals(0x200000001L, TransformationStrategies.rawUtf32().toBitVector(s).getLong(0, 64));
39 | assertEquals(0x400000003L, TransformationStrategies.rawUtf32().toBitVector(s).getLong(64, 128));
40 | s = new String(new char[] { '\u0001', '\u0002', '\u0003', '\u0004' });
41 |
42 | s = new String(new char[] { '\uD800', '\uDC00' });
43 | assertEquals(32, TransformationStrategies.rawUtf32().length(s));
44 | assertEquals(0x10000, TransformationStrategies.rawUtf32().toBitVector(s).getLong(0, 32));
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/bits/Utf32TransformationStrategyTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.bits;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import org.junit.Test;
25 |
26 | public class Utf32TransformationStrategyTest {
27 |
28 | @Test
29 | public void testGetLong() {
30 | String s = new String(new char[] { '\u0001', '\u0002' });
31 | assertEquals(96, TransformationStrategies.prefixFreeUtf32().toBitVector(s).length());
32 | assertEquals(0x4000000080000000L, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(0, 64));
33 | assertEquals(0x0000000040000000L, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(32, 96));
34 | s = new String(new char[] { '\u0001', '\u0002', '\u0003' });
35 | assertEquals(128, TransformationStrategies.prefixFreeUtf32().toBitVector(s).length());
36 | assertEquals(0x80000000L, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(0, 48));
37 | assertEquals(0x4000000080000000L, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(0, 64));
38 | s = new String(new char[] { '\u0001', '\u0002', '\u0003', '\u0004' });
39 | assertEquals(160, TransformationStrategies.prefixFreeUtf32().toBitVector(s).length());
40 | assertEquals(0, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(128, 160));
41 | //System.err.println(Long.toHexString(TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(16, 80)));
42 | assertEquals(0xC000000040000000L, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(32, 96));
43 | s = new String(new char[] { '\uD800', '\uDC00' });
44 | assertEquals(64, TransformationStrategies.prefixFreeUtf32().length(s));
45 | assertEquals(0x8000, TransformationStrategies.prefixFreeUtf32().toBitVector(s).getLong(0, 64));
46 |
47 |
48 | s = new String(new char[] { '\u0001', '\u0002' });
49 | assertEquals(64, TransformationStrategies.utf32().toBitVector(s).length());
50 | assertEquals(0x4000000080000000L, TransformationStrategies.utf32().toBitVector(s).getLong(0, 64));
51 | s = new String(new char[] { '\u0001', '\u0002', '\u0003' });
52 | assertEquals(96, TransformationStrategies.utf32().toBitVector(s).length());
53 | assertEquals(0xC000000040000000L, TransformationStrategies.utf32().toBitVector(s).getLong(32, 96));
54 | s = new String(new char[] { '\u0001', '\u0002', '\u0003', '\u0004' });
55 | assertEquals(128, TransformationStrategies.utf32().toBitVector(s).length());
56 | assertEquals(0x4000000080000000L, TransformationStrategies.utf32().toBitVector(s).getLong(0, 64));
57 | assertEquals(0x20000000C0000000L, TransformationStrategies.utf32().toBitVector(s).getLong(64, 128));
58 |
59 | s = new String(new char[] { '\uD800', '\uDC00' });
60 | assertEquals(32, TransformationStrategies.utf32().length(s));
61 | assertEquals(0x8000, TransformationStrategies.utf32().toBitVector(s).getLong(0, 32));
62 | }
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/compression/CodecTestCase.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.compression;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import java.io.IOException;
25 | import java.util.Random;
26 |
27 | import it.unimi.dsi.bits.BitVector;
28 | import it.unimi.dsi.fastutil.booleans.BooleanArrayList;
29 | import it.unimi.dsi.fastutil.booleans.BooleanIterator;
30 | import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream;
31 | import it.unimi.dsi.io.InputBitStream;
32 | import it.unimi.dsi.io.OutputBitStream;
33 |
34 | public abstract class CodecTestCase {
35 | protected static void checkPrefixCodec(final PrefixCodec codec, final Random r) throws IOException {
36 | final int[] symbol = new int[100];
37 | final BooleanArrayList bits = new BooleanArrayList();
38 | for(int i = 0; i < symbol.length; i++) symbol[i] = r.nextInt(codec.size());
39 | for (final int element : symbol) {
40 | final BitVector word = codec.codeWords()[element];
41 | for(long j = 0; j < word.length(); j++) bits.add(word.getBoolean(j));
42 | }
43 |
44 | final BooleanIterator booleanIterator = bits.iterator();
45 | final Decoder decoder = codec.decoder();
46 | for (final int element : symbol) {
47 | assertEquals(element, decoder.decode(booleanIterator));
48 | }
49 |
50 | final FastByteArrayOutputStream fbaos = new FastByteArrayOutputStream();
51 | @SuppressWarnings("resource")
52 | final
53 | OutputBitStream obs = new OutputBitStream(fbaos, 0);
54 | obs.write(bits.iterator());
55 | obs.flush();
56 | final InputBitStream ibs = new InputBitStream(fbaos.array);
57 |
58 | for (final int element : symbol) {
59 | assertEquals(element, decoder.decode(ibs));
60 | }
61 | }
62 |
63 | protected void checkLengths(final int[] frequency, final int[] codeLength, final BitVector[] codeWord) {
64 | for(int i = 0; i < frequency.length; i++)
65 | assertEquals(Integer.toString(i), codeLength[i], codeWord[i].length());
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/io/DelimitedWordReaderTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.io;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import org.junit.Test;
25 |
26 | @SuppressWarnings("resource")
27 | public class DelimitedWordReaderTest {
28 |
29 | @Test
30 | public void testToSpec() {
31 | final String className = DelimitedWordReader.class.getName();
32 | assertEquals(className + "(\"_\")", new DelimitedWordReader("_").toSpec());
33 | assertEquals(className + "(100,\"_\")", new DelimitedWordReader("100", "_").toSpec());
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/io/FileLinesByteArrayCollectionTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2016-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.io;
21 |
22 | import static org.junit.Assert.assertArrayEquals;
23 | import static org.junit.Assert.assertEquals;
24 | import static org.junit.Assert.assertFalse;
25 | import static org.junit.Assert.assertTrue;
26 |
27 | import java.io.File;
28 | import java.io.IOException;
29 | import java.util.Arrays;
30 |
31 | import org.junit.Test;
32 |
33 | import it.unimi.dsi.fastutil.io.BinIO;
34 |
35 | public class FileLinesByteArrayCollectionTest {
36 |
37 | @SuppressWarnings("deprecation")
38 | @Test
39 | public void test() throws IOException {
40 | final File file = File.createTempFile(FastBufferedReaderTest.class.getSimpleName(), "tmp");
41 | file.deleteOnExit();
42 |
43 | byte[] a = { '0', '\n', '1', '\n' };
44 | BinIO.storeBytes(a, file);
45 | it.unimi.dsi.big.io.FileLinesByteArrayCollection flbac = new it.unimi.dsi.big.io.FileLinesByteArrayCollection(file.toString());
46 | it.unimi.dsi.big.io.FileLinesByteArrayCollection.FileLinesIterator iterator = flbac.iterator();
47 | assertArrayEquals(new byte[] { '0' }, iterator.next());
48 | assertArrayEquals(new byte[] { '1' }, iterator.next());
49 | assertFalse(iterator.hasNext());
50 | assertEquals(2, flbac.size64());
51 |
52 | a = new byte[] { '0', '\n', '1' };
53 | BinIO.storeBytes(a, file);
54 | flbac = new it.unimi.dsi.big.io.FileLinesByteArrayCollection(file.toString());
55 | assertEquals(2, flbac.size64());
56 | iterator = flbac.iterator();
57 | assertArrayEquals(new byte[] { '0' }, iterator.next());
58 | assertTrue(iterator.hasNext());
59 | assertArrayEquals(new byte[] { '1' }, iterator.next());
60 | assertFalse(iterator.hasNext());
61 | assertFalse(iterator.hasNext());
62 | iterator.close();
63 |
64 | a = new byte[1000000];
65 | Arrays.fill(a, (byte)'A');
66 | BinIO.storeBytes(a, file);
67 | flbac = new it.unimi.dsi.big.io.FileLinesByteArrayCollection(file.toString());
68 | assertEquals(1, flbac.size64());
69 | iterator = flbac.iterator();
70 | assertArrayEquals(a, iterator.next());
71 | assertFalse(iterator.hasNext());
72 |
73 | file.delete();
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/io/OfflineIterableTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.io;
21 |
22 | import static org.junit.Assert.assertEquals;
23 | import static org.junit.Assert.assertFalse;
24 |
25 | import java.io.DataInput;
26 | import java.io.DataOutput;
27 | import java.io.IOException;
28 | import java.io.InputStream;
29 | import java.io.OutputStream;
30 |
31 | import org.junit.Test;
32 |
33 | import it.unimi.dsi.fastutil.objects.ObjectIterator;
34 | import it.unimi.dsi.fastutil.objects.ObjectIterators;
35 | import it.unimi.dsi.lang.MutableString;
36 |
37 | public class OfflineIterableTest {
38 | public void doIt(final String[] strings) throws IOException {
39 | final OfflineIterable.Serializer stringSerializer = new OfflineIterable.Serializer() {
40 | @Override
41 | public void read(final DataInput dis, final MutableString x) throws IOException {
42 | x.readSelfDelimUTF8((InputStream)dis);
43 | }
44 | @Override
45 | public void write(final MutableString x, final DataOutput dos) throws IOException {
46 | x.writeSelfDelimUTF8((OutputStream)dos);
47 | }
48 | };
49 | final OfflineIterable stringIterable = new OfflineIterable<>(stringSerializer, new MutableString());
50 | for (final String s: strings)
51 | stringIterable.add(new MutableString(s));
52 | ObjectIterator shouldBe = ObjectIterators.wrap(strings);
53 | for (final MutableString m: stringIterable)
54 | assertEquals(new MutableString(shouldBe.next()), m);
55 | assertFalse(shouldBe.hasNext());
56 |
57 | // Let's do it again.
58 | stringIterable.clear();
59 | for (final String s: strings)
60 | stringIterable.add(new MutableString(s));
61 | shouldBe = ObjectIterators.wrap(strings);
62 | for (final MutableString m: stringIterable)
63 | assertEquals(new MutableString(shouldBe.next()), m);
64 | assertFalse(shouldBe.hasNext());
65 |
66 | stringIterable.close();
67 | stringIterable.close(); // Twice, to test for safety
68 | }
69 |
70 | @Test
71 | public void testSimple() throws IOException {
72 | doIt(new String[] { "this", "is", "a", "test" });
73 | }
74 |
75 | @Test
76 | public void testEmpty() throws IOException {
77 | doIt(new String[0]);
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/io/SegmentedInputStreamTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.io;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import java.io.IOException;
25 |
26 | import org.junit.Before;
27 | import org.junit.Test;
28 |
29 | import it.unimi.dsi.fastutil.io.FastByteArrayInputStream;
30 |
31 | public class SegmentedInputStreamTest {
32 |
33 | private final FastByteArrayInputStream stream = new FastByteArrayInputStream(
34 | new byte[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }
35 | );
36 |
37 | private SegmentedInputStream sis;
38 |
39 | @Before
40 | public void setUp() throws IllegalArgumentException, IOException {
41 | sis = new SegmentedInputStream(stream);
42 | sis.addBlock(0, 1, 2);
43 | sis.addBlock(2, 3, 4);
44 | sis.addBlock(6, 7, 8);
45 | sis.addBlock(8, 11, 14);
46 | }
47 |
48 | @Test
49 | public void testResetClose() throws IOException {
50 | assertEquals(0, sis.read());
51 | sis.reset();
52 | assertEquals(1, sis.read());
53 | sis.reset();
54 | assertEquals(-1, sis.read());
55 |
56 | sis.close();
57 | assertEquals(2, sis.read());
58 | sis.reset();
59 | assertEquals(3, sis.read());
60 | sis.reset();
61 | assertEquals(-1, sis.read());
62 |
63 | sis.close();
64 | assertEquals(6, sis.read());
65 | sis.reset();
66 | assertEquals(7, sis.read());
67 | sis.reset();
68 | assertEquals(-1, sis.read());
69 | }
70 |
71 | @Test
72 | public void testRead() throws IOException {
73 | final byte[] b = new byte[11];
74 | assertEquals(1, sis.read(b, 0, 10));
75 | assertEquals(0, b[0]);
76 | sis.reset();
77 | assertEquals(1, sis.read(b, 1, 10));
78 | assertEquals(1, b[1]);
79 |
80 | sis.close();
81 | assertEquals(1, sis.read(b, 5, 5));
82 | assertEquals(2, b[5]);
83 | }
84 |
85 | @Test
86 | public void testSkip() throws IOException {
87 | assertEquals(1, sis.skip(1));
88 | sis.reset();
89 | assertEquals(1, sis.skip(10));
90 | sis.reset();
91 | assertEquals(0, sis.skip(10));
92 |
93 | sis.close();
94 | sis.close();
95 | sis.close();
96 |
97 | assertEquals(2, sis.skip(2));
98 | assertEquals(1, sis.skip(2));
99 | sis.reset();
100 | assertEquals(3, sis.skip(10));
101 |
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/lang/EnumParserTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.lang;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import org.junit.Test;
25 |
26 | import com.martiansoftware.jsap.ParseException;
27 |
28 | public class EnumParserTest {
29 | public enum TestEnum {
30 | A,
31 | b,
32 | C
33 | }
34 |
35 | @Test
36 | public void test() throws Exception {
37 | final EnumStringParser enumStringParser = EnumStringParser.getParser(TestEnum.class);
38 | assertEquals(TestEnum.A, enumStringParser.parse("A"));
39 | assertEquals(TestEnum.b, enumStringParser.parse("b"));
40 | assertEquals(TestEnum.C, enumStringParser.parse("C"));
41 | }
42 |
43 | @Test(expected=ParseException.class)
44 | public void testNoMatchBecauseOfCase() throws Exception {
45 | final EnumStringParser enumStringParser = EnumStringParser.getParser(TestEnum.class);
46 | enumStringParser.parse("a");
47 | }
48 |
49 | @Test(expected=ParseException.class)
50 | public void testNoMatchBecauseWrong() throws Exception {
51 | final EnumStringParser enumStringParser = EnumStringParser.getParser(TestEnum.class);
52 | enumStringParser.parse("D");
53 | }
54 |
55 | @Test
56 | public void testNorm() throws Exception {
57 | final EnumStringParser enumStringParser = EnumStringParser.getParser(TestEnum.class, true);
58 | assertEquals(TestEnum.A, enumStringParser.parse("a"));
59 | assertEquals(TestEnum.C, enumStringParser.parse("c"));
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/lang/MutableStringTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.lang;
21 |
22 | import static org.junit.Assert.assertEquals;
23 | import static org.junit.Assert.assertFalse;
24 | import static org.junit.Assert.assertTrue;
25 |
26 | import java.io.IOException;
27 |
28 | import org.junit.Test;
29 |
30 | import it.unimi.dsi.fastutil.io.FastByteArrayInputStream;
31 | import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream;
32 |
33 | public class MutableStringTest {
34 | @Test
35 | public void testSqueezeSpace() {
36 | final MutableString s = new MutableString(new char[] { 32, 13, 10, 32, 32, 32, 13, 10, 32, 32, 32, 13, 10, 32, 32, 32, 32, 32 });
37 |
38 | assertEquals(new MutableString(" \r\n \r\n \r\n "), s.squeezeSpace());
39 | assertEquals(new MutableString(" "), s.squeezeWhitespace());
40 | }
41 |
42 | @Test
43 | public void testSubsequence() {
44 | final MutableString s = new MutableString("abc");
45 | final CharSequence ss = s.subSequence(1, 3);
46 | assertEquals(new MutableString("bc"), ss);
47 | assertEquals(1, ss.subSequence(1, 2).length());
48 | }
49 |
50 | @Test
51 | public void testSkipSelfDelimUTF8() throws IOException {
52 | final FastByteArrayOutputStream fastByteArrayOutputStream = new FastByteArrayOutputStream();
53 | new MutableString("a").writeSelfDelimUTF8(fastByteArrayOutputStream);
54 | new MutableString("b").writeSelfDelimUTF8(fastByteArrayOutputStream);
55 | new MutableString("\u221E").writeSelfDelimUTF8(fastByteArrayOutputStream);
56 | new MutableString("c").writeSelfDelimUTF8(fastByteArrayOutputStream);
57 | fastByteArrayOutputStream.flush();
58 | final FastByteArrayInputStream fastByteArrayInputStream = new FastByteArrayInputStream(fastByteArrayOutputStream.array);
59 | assertEquals("a", new MutableString().readSelfDelimUTF8(fastByteArrayInputStream).toString());
60 | assertEquals("b", new MutableString().readSelfDelimUTF8(fastByteArrayInputStream).toString());
61 | assertEquals(1, MutableString.skipSelfDelimUTF8(fastByteArrayInputStream));
62 | assertEquals("c", new MutableString().readSelfDelimUTF8(fastByteArrayInputStream).toString());
63 | fastByteArrayInputStream.position(0);
64 | assertEquals("a", new MutableString().readSelfDelimUTF8(fastByteArrayInputStream).toString());
65 | assertEquals(1, MutableString.skipSelfDelimUTF8(fastByteArrayInputStream));
66 | assertEquals("\uu221E", new MutableString().readSelfDelimUTF8(fastByteArrayInputStream).toString());
67 | assertEquals("c", new MutableString().readSelfDelimUTF8(fastByteArrayInputStream).toString());
68 | }
69 |
70 | @Test
71 | public void testIsEmpty() {
72 | assertTrue(new MutableString().compact().isEmpty());
73 | assertTrue(new MutableString().loose().isEmpty());
74 | assertFalse(new MutableString(" ").compact().isEmpty());
75 | assertFalse(new MutableString(" ").loose().isEmpty());
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/lang/TwoStrings.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.lang;
21 |
22 | import java.util.Objects;
23 |
24 | public class TwoStrings {
25 | private final String a;
26 | private final String b;
27 | private final Object context;
28 | public void test() {}
29 |
30 | public TwoStrings(final String a, final String b) {
31 | this(null, a, b);
32 | }
33 |
34 | public TwoStrings(final String... a) {
35 | this(null, a);
36 | }
37 |
38 | public static TwoStrings getInstance(final String a) {
39 | return new TwoStrings(a, a);
40 | }
41 |
42 | public static TwoStrings getInstance(final String... a) {
43 | return getInstance(Integer.toString(a.length));
44 | }
45 |
46 | public TwoStrings(final Object context, final String a, final String b) {
47 | this.a = a;
48 | this.b = b;
49 | this.context = context;
50 | }
51 |
52 | public TwoStrings(final Object context, final String... a) {
53 | this.a = a[0];
54 | this.b = Integer.toString(a.length);
55 | this.context = context;
56 | }
57 |
58 | public static TwoStrings getInstance(final Object context, final String a) {
59 | return new TwoStrings(context, a, a);
60 | }
61 |
62 | public static TwoStrings getInstance(final Object context, final String... a) {
63 | return getInstance(context, Integer.toString(a.length));
64 | }
65 |
66 | @Override
67 | public boolean equals(final Object obj) {
68 | if (this == obj) return true;
69 | if (obj == null) return false;
70 | if (getClass() != obj.getClass()) return false;
71 | final TwoStrings other = (TwoStrings)obj;
72 | if (a == null) {
73 | if (other.a != null) return false;
74 | }
75 | else if (!a.equals(other.a)) return false;
76 | if (b == null) {
77 | if (other.b != null) return false;
78 | }
79 | else if (!b.equals(other.b)) return false;
80 | if (context == null) {
81 | if (other.context != null) return false;
82 | }
83 | else if (!context.equals(other.context)) return false;
84 | return true;
85 | }
86 |
87 | @Override
88 | public String toString() {
89 | return getClass().getName() + "(" + context + ", " + a + ", " + b + ")";
90 | }
91 |
92 | @Override
93 | public int hashCode() {
94 | return Objects.hash(a, b, context);
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/parser/callback/LinkExtractorTest.data:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | Chapter 2. Vocabulary
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/parser/callback/LinkExtractorTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.parser.callback;
21 |
22 | import static org.junit.Assert.assertEquals;
23 |
24 | import java.io.IOException;
25 | import java.nio.ByteBuffer;
26 | import java.nio.charset.StandardCharsets;
27 |
28 | import org.junit.Ignore;
29 | import org.junit.Test;
30 |
31 | import com.google.common.io.ByteStreams;
32 |
33 | import it.unimi.dsi.fastutil.objects.ObjectLinkedOpenHashSet;
34 | import it.unimi.dsi.parser.BulletParser;
35 |
36 | @Deprecated
37 | @Ignore
38 | public class LinkExtractorTest {
39 |
40 | @Test
41 | public void testExtractor() throws IOException {
42 | final char[] text = StandardCharsets.UTF_8.decode(ByteBuffer.wrap(ByteStreams.toByteArray(this.getClass().getResourceAsStream("LinkExtractorTest.data")))).toString().toCharArray();
43 |
44 | final BulletParser parser = new BulletParser();
45 | final LinkExtractor linkExtractor = new LinkExtractor();
46 | parser.setCallback(linkExtractor);
47 | parser.parse(text);
48 |
49 | testExtractorResults(linkExtractor);
50 | }
51 |
52 | private void testExtractorResults(final LinkExtractor linkExtractor) {
53 | assertEquals(new ObjectLinkedOpenHashSet<>(new String[] { "manual.css", "http://link.com/", "http://anchor.com/", "http://badanchor.com/" }), linkExtractor.urls);
54 | assertEquals("http://base.com/", linkExtractor.base());
55 | assertEquals("http://refresh.com/", linkExtractor.metaRefresh());
56 | assertEquals("http://location.com/", linkExtractor.metaLocation());
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/parser/callback/TextExtractorTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * DSI utilities
3 | *
4 | * Copyright (C) 2010-2023 Sebastiano Vigna
5 | *
6 | * This program and the accompanying materials are made available under the
7 | * terms of the GNU Lesser General Public License v2.1 or later,
8 | * which is available at
9 | * http://www.gnu.org/licenses/old-licenses/lgpl-2.1-standalone.html,
10 | * or the Apache Software License 2.0, which is available at
11 | * https://www.apache.org/licenses/LICENSE-2.0.
12 | *
13 | * This program is distributed in the hope that it will be useful, but
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 | * or FITNESS FOR A PARTICULAR PURPOSE.
16 | *
17 | * SPDX-License-Identifier: LGPL-2.1-or-later OR Apache-2.0
18 | */
19 |
20 | package it.unimi.dsi.parser.callback;
21 |
22 | import static org.junit.Assert.assertTrue;
23 |
24 | import org.junit.Ignore;
25 | import org.junit.Test;
26 |
27 | import it.unimi.dsi.parser.BulletParser;
28 |
29 | @Deprecated
30 | @Ignore
31 | public class TextExtractorTest {
32 |
33 | @Test
34 | public void testBRBreaksFlow() {
35 | final char a[] = "ciao mamma ".toCharArray();
36 | final BulletParser bulletParser = new BulletParser();
37 | final TextExtractor textExtractor = new TextExtractor();
38 | bulletParser.setCallback(textExtractor);
39 | bulletParser.parse(a);
40 | assertTrue(textExtractor.text.toString(), textExtractor.text.indexOf(' ') != -1);
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/test/it/unimi/dsi/parser/test.data:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Lilypond Snippet Repository ♪♫
6 |
9 |
16 |
17 |
18 |