├── .gitignore
├── .travis.yml
├── src
├── main
│ ├── java
│ │ └── com
│ │ │ └── spotify
│ │ │ └── sparkey
│ │ │ ├── CorruptedIndexException.java
│ │ │ ├── SparkeyReaderClosedException.java
│ │ │ ├── ReadWriteData.java
│ │ │ ├── SnappyWriter.java
│ │ │ ├── RandomAccessData.java
│ │ │ ├── FileFlushingData.java
│ │ │ ├── BlockPositionedInputStream.java
│ │ │ ├── CompressionType.java
│ │ │ ├── BlockOutput.java
│ │ │ ├── ArrayUtil.java
│ │ │ ├── RandomAccessDataStateless.java
│ │ │ ├── extra
│ │ │ ├── ThreadLocalSparkeyReader.java
│ │ │ ├── AbstractDelegatingSparkeyReader.java
│ │ │ ├── SparkeyValidator.java
│ │ │ └── ReloadableSparkeyReader.java
│ │ │ ├── CommonHeader.java
│ │ │ ├── UncompressedBlockPositionedInputStream.java
│ │ │ ├── UncompressedBlockRandomInput.java
│ │ │ ├── BlockRandomInput.java
│ │ │ ├── CompressorType.java
│ │ │ ├── FileReadWriteData.java
│ │ │ ├── EmptyInputStream.java
│ │ │ ├── UncompressedBlockOutput.java
│ │ │ ├── AddressSize.java
│ │ │ ├── CompressionTypeBackend.java
│ │ │ ├── HashType.java
│ │ │ ├── SparkeyReader.java
│ │ │ ├── SparkeyImplSelector.java
│ │ │ ├── CompressedReader.java
│ │ │ ├── CompressedOutputStream.java
│ │ │ ├── LogWriter.java
│ │ │ ├── SparkeyWriter.java
│ │ │ ├── CompressedWriter.java
│ │ │ ├── InMemoryData.java
│ │ │ ├── CompressedRandomReader.java
│ │ │ ├── SingleThreadedSparkeyReader.java
│ │ │ ├── SingleThreadedSparkeyWriter.java
│ │ │ ├── MurmurHash3.java
│ │ │ └── ByteBufferCleaner.java
│ ├── java9
│ │ └── com
│ │ │ └── spotify
│ │ │ └── sparkey
│ │ │ └── ArrayUtil.java
│ └── java22
│ │ └── com
│ │ └── spotify
│ │ └── sparkey
│ │ ├── CompressionTypeBackendJ22.java
│ │ ├── UncompressedUtilJ22.java
│ │ ├── UncompressedBlockRandomInputJ22.java
│ │ ├── MemorySegmentInputStream.java
│ │ ├── UncompressedLogReaderJ22.java
│ │ ├── SingleThreadedSparkeyReaderJ22.java
│ │ └── SparkeyImplSelector.java
└── test
│ └── java
│ └── com
│ └── spotify
│ └── sparkey
│ ├── SortHelperTest.java
│ ├── OpenMapsAsserter.java
│ ├── CompressedOutputStreamTest.java
│ ├── CommonHeaderTest.java
│ ├── TestSparkeyWriter.java
│ ├── SparkeyTestHelper.java
│ ├── AddressSizeTest.java
│ ├── extra
│ ├── DelegatingSparkeyReaderTest.java
│ └── ReloadableSparkeyReaderTest.java
│ ├── IndexHashTest.java
│ ├── SortHelperBenchmark.java
│ ├── SparkeyTest.java
│ ├── BytesWrittenTest.java
│ ├── system
│ ├── BaseSystemTest.java
│ ├── WriteHashBenchmark.java
│ ├── AppendBenchmark.java
│ ├── RandomLookupProfiling.java
│ ├── FsyncBenchmark.java
│ ├── ReloadableReaderExample.java
│ ├── LargeFilesTest.java
│ ├── LookupBenchmark.java
│ ├── QuickLookupBenchmark.java
│ ├── SparkeyExample.java
│ ├── MemoryLock.java
│ └── ReaderParametrizedLargeFilesTest.java
│ ├── ReadOnlyMemMapTest.java
│ └── CompressedReaderTest.java
├── NOTICE
├── cleanup-failed-release.sh
├── RELEASE
├── POST-RELEASE-CHECKLIST.md
└── BENCHMARK.md
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | target/
3 | *.iml
4 | profiling.mph/
5 | *.spi
6 | *.spl
7 | benchmark-results/
8 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 |
3 | jdk:
4 | - oraclejdk11
5 |
6 | install:
7 | - mvn -B install -DskipTests=true -Dgpg.skip=true
8 |
9 | sudo: false
10 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/CorruptedIndexException.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import java.io.IOException;
4 |
5 | public class CorruptedIndexException extends IOException {
6 | public CorruptedIndexException(String message) {
7 | super(message);
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/SparkeyReaderClosedException.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import java.io.IOException;
4 |
5 | public class SparkeyReaderClosedException extends IOException {
6 | public SparkeyReaderClosedException(String message) {
7 | super(message);
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | Sparkey-java
2 | Copyright 2012-2013 Spotify AB
3 |
4 | This product includes software developed at
5 | Spotify AB (http://www.spotify.com/).
6 |
7 | This project includes MurmurHash3, written by Austin Appleby, which is
8 | placed in the public domain. The original software is available from
9 | https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
10 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/ReadWriteData.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import java.io.IOException;
4 |
5 | interface ReadWriteData extends RandomAccessData {
6 |
7 | void writeLittleEndianLong(long value) throws IOException;
8 |
9 | void writeLittleEndianInt(int value) throws IOException;
10 |
11 | void close() throws IOException;
12 |
13 | void writeUnsignedByte(int value) throws IOException;
14 | }
15 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/SortHelperTest.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import com.carrotsearch.sizeof.RamUsageEstimator;
4 | import org.junit.Test;
5 |
6 | import static org.junit.Assert.assertEquals;
7 |
8 | public class SortHelperTest {
9 |
10 | @Test
11 | public void testEntrySize() {
12 | long size = RamUsageEstimator.sizeOf(SortHelper.Entry.fromHash(123, 456, 789));
13 | assertEquals(SortHelper.ENTRY_SIZE, size);
14 |
15 | }
16 | }
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/OpenMapsAsserter.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import org.junit.After;
4 | import org.junit.Before;
5 |
6 | import static org.junit.Assert.assertEquals;
7 | import static org.junit.Assume.assumeTrue;
8 |
9 | public class OpenMapsAsserter {
10 |
11 | private int openMaps;
12 | private int openFiles;
13 |
14 | @Before
15 | public void setUp() throws Exception {
16 | openMaps = Sparkey.getOpenMaps();
17 | openFiles = Sparkey.getOpenFiles();
18 | }
19 |
20 | @After
21 | public void tearDown() throws Exception {
22 | assertEquals(openMaps, Sparkey.getOpenMaps());
23 | assertEquals(openFiles, Sparkey.getOpenFiles());
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/CompressedOutputStreamTest.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import org.junit.Test;
4 |
5 | import java.io.*;
6 |
7 | /**
8 | * Tests CompressedOutputStream
9 | */
10 | public class CompressedOutputStreamTest {
11 | @Test
12 | public void testLargeWrite() throws IOException {
13 | for (CompressorType compressor : CompressorType.values()) {
14 | File testFile = File.createTempFile("sparkey-test", "");
15 | testFile.deleteOnExit();
16 | FileOutputStream fos = new FileOutputStream(testFile);
17 |
18 | byte[] buf = new byte[1000 * 1000];
19 | CompressedOutputStream os = new CompressedOutputStream(compressor, 10, fos, fos.getFD());
20 | os.write(buf);
21 |
22 | testFile.delete();
23 | }
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/SnappyWriter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | @Deprecated
19 | public class SnappyWriter extends CompressedWriter {
20 | public SnappyWriter(CompressedOutputStream compressedOutputStream, int maxEntriesPerBlock) {
21 | super(compressedOutputStream, maxEntriesPerBlock);
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/RandomAccessData.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 |
20 | interface RandomAccessData {
21 |
22 | void seek(long pos) throws IOException;
23 |
24 | int readUnsignedByte() throws IOException;
25 |
26 | int readLittleEndianInt() throws IOException;
27 |
28 | long readLittleEndianLong() throws IOException;
29 | }
30 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/CommonHeaderTest.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 |
6 | import java.io.IOException;
7 |
8 | /**
9 | * Tests CommonHeader
10 | */
11 | public class CommonHeaderTest {
12 |
13 | @Test
14 | public void testCommonHeader() throws IOException {
15 | CommonHeader ch = new CommonHeader(1,2,3,4,5,6,7) { };
16 | try {
17 | ch = new CommonHeader(1,2,3,4,5,-1,7) { };
18 | Assert.fail("Negative key len size should trigger IOException");
19 | } catch (IOException e) {
20 | // pass
21 | }
22 | try {
23 | ch = new CommonHeader(1,2,3,4,4294967296L,6,7) { };
24 | Assert.fail("Key len size larger than 2**31 should trigger IOException");
25 | } catch (IOException e) {
26 | // pass
27 | }
28 | try {
29 | ch = new CommonHeader(1,2,3,4,-1,6,7) { };
30 | Assert.fail("Value len size smaller than 0 should trigger IOException");
31 | } catch (IOException e) {
32 | // pass
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/FileFlushingData.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import java.io.File;
4 | import java.io.FileOutputStream;
5 | import java.io.IOException;
6 |
7 | class FileFlushingData extends InMemoryData {
8 |
9 | private final File file;
10 | private final IndexHeader header;
11 | private final boolean fsync;
12 |
13 | FileFlushingData(final long size, final File file, final IndexHeader header, final boolean fsync) {
14 | super(size);
15 | this.file = file;
16 | this.header = header;
17 | this.fsync = fsync;
18 | }
19 |
20 | @Override
21 | public void close() throws IOException {
22 | try (FileOutputStream stream = new FileOutputStream(file)) {
23 | stream.write(header.asBytes());
24 | for (byte[] chunk : chunks) {
25 | stream.write(chunk);
26 | }
27 | stream.flush(); // Not needed for FileOutputStream, but still semantically correct
28 | if (fsync) {
29 | stream.getFD().sync();
30 | }
31 | } finally {
32 | super.close();
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/BlockPositionedInputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 | import java.io.InputStream;
20 |
21 | abstract class BlockPositionedInputStream extends InputStream {
22 |
23 | protected final InputStream input;
24 |
25 | public BlockPositionedInputStream(InputStream input) {
26 | this.input = input;
27 | }
28 |
29 | abstract long getBlockPosition();
30 |
31 | @Override
32 | public void close() throws IOException {
33 | input.close();
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/CompressionType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | public enum CompressionType {
19 | NONE(new CompressionTypeBackendUncompressed()),
20 | SNAPPY(new CompressionTypeBackendCompressed(CompressorType.SNAPPY)),
21 | ZSTD(new CompressionTypeBackendCompressed(CompressorType.ZSTD)),;
22 |
23 | private final CompressionTypeBackend backend;
24 |
25 | CompressionType(CompressionTypeBackend backend) {
26 | this.backend = backend;
27 | }
28 |
29 | CompressionTypeBackend getBackend() {
30 | return backend;
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/BlockOutput.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 | import java.io.InputStream;
20 |
21 | interface BlockOutput {
22 | void put(byte[] key, int keyLen, byte[] value, int valueLen) throws IOException;
23 |
24 | void put(byte[] key, int keyLen, InputStream value, long valueLen) throws IOException;
25 |
26 | void delete(byte[] key, int keyLen) throws IOException;
27 |
28 | void flush(boolean fsync) throws IOException;
29 |
30 | void close(boolean fsync) throws IOException;
31 |
32 | int getMaxEntriesPerBlock();
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/ArrayUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | /**
19 | * Array comparison utilities.
20 | * Overridden in Java 9+ MRJAR layer to use Arrays.equals() intrinsic (SIMD optimized).
21 | */
22 | class ArrayUtil {
23 |
24 | /**
25 | * Compare byte array ranges.
26 | * Java 8: Manual loop.
27 | * Java 9+: Arrays.equals() intrinsic with SIMD optimization.
28 | */
29 | static boolean equals(int len, byte[] a, int aOffset, byte[] b, int bOffset) {
30 | for (int i = 0; i < len; i++) {
31 | if (a[aOffset + i] != b[bOffset + i]) {
32 | return false;
33 | }
34 | }
35 | return true;
36 | }
37 |
38 | private ArrayUtil() {
39 | // Utility class
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/TestSparkeyWriter.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import com.google.common.io.Files;
4 | import java.io.File;
5 | import java.io.IOException;
6 |
7 | public class TestSparkeyWriter {
8 |
9 | public static void writeHashAndCompare(final SparkeyWriter writer2) throws IOException {
10 | final SingleThreadedSparkeyWriter writer = (SingleThreadedSparkeyWriter) writer2;
11 |
12 | final File indexFile = writer.indexFile;
13 | final File memFile = Sparkey.setEnding(indexFile, ".mem.spi");
14 |
15 | try {
16 | writer.setConstructionMethod(SparkeyWriter.ConstructionMethod.IN_MEMORY);
17 | writer.writeHash();
18 | indexFile.renameTo(memFile);
19 | final IndexHeader memHeader = IndexHeader.read(memFile);
20 |
21 | writer.setHashSeed(memHeader.getHashSeed());
22 |
23 | writer.setConstructionMethod(SparkeyWriter.ConstructionMethod.SORTING);
24 | writer.writeHash();
25 | final IndexHeader sortHeader = IndexHeader.read(indexFile);
26 |
27 | if (!Files.equal(indexFile, memFile)) {
28 | throw new RuntimeException(
29 | "Files are not equal: " + indexFile + ", " + memFile + "\n" +
30 | sortHeader.toString() + "\n" + memHeader.toString());
31 | }
32 | } finally {
33 | writer.setConstructionMethod(SparkeyWriter.ConstructionMethod.AUTO);
34 | memFile.delete();
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java9/com/spotify/sparkey/ArrayUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.util.Arrays;
19 |
20 | /**
21 | * Java 9+ optimized array comparison using Arrays.equals() intrinsic.
22 | *
23 | * The JIT compiler recognizes Arrays.equals() with ranges and generates
24 | * vectorized code (AVX2/AVX-512) for ~2-4x speedup on modern CPUs.
25 | */
26 | class ArrayUtil {
27 |
28 | /**
29 | * Compare byte array ranges using Java 9+ Arrays.equals() intrinsic.
30 | * JIT-compiled to SIMD instructions (AVX2/AVX-512).
31 | */
32 | static boolean equals(int len, byte[] a, int aOffset, byte[] b, int bOffset) {
33 | return Arrays.equals(a, aOffset, aOffset + len, b, bOffset, bOffset + len);
34 | }
35 |
36 | private ArrayUtil() {
37 | // Utility class
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/RandomAccessDataStateless.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 |
20 | /**
21 | * Stateless random access data interface with position-based reads.
22 | *
23 | * Unlike {@link RandomAccessData} which uses seek() followed by reads,
24 | * this interface passes position directly to each read method, enabling
25 | * truly immutable implementations without mutable position state.
26 | *
27 | * This design is inherently thread-safe and easier to reason about since
28 | * there's no shared mutable state.
29 | */
30 | interface RandomAccessDataStateless {
31 |
32 | int readUnsignedByte(long pos) throws IOException;
33 |
34 | int readLittleEndianInt(long pos) throws IOException;
35 |
36 | long readLittleEndianLong(long pos) throws IOException;
37 | }
38 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/SparkeyTestHelper.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 |
6 | /**
7 | * Test helper class for accessing package-private SparkeyImplSelector methods.
8 | * This class is in the same package as SparkeyImplSelector to access package-private members.
9 | */
10 | public class SparkeyTestHelper {
11 |
12 | /**
13 | * Open an uncompressed reader using Java 22+ MemorySegment API.
14 | * Delegates to SparkeyImplSelector.openUncompressedJ22().
15 | *
16 | * @param file File base to use
17 | * @return UncompressedSparkeyReaderJ22 (on Java 22+)
18 | * @throws UnsupportedOperationException on Java < 22
19 | * @throws IOException if the file cannot be opened
20 | */
21 | public static SparkeyReader openUncompressedJ22(File file) throws IOException {
22 | return SparkeyImplSelector.openUncompressedJ22(file);
23 | }
24 |
25 | /**
26 | * Open a single-threaded reader using Java 22+ MemorySegment API.
27 | * Delegates to SparkeyImplSelector.openSingleThreadedJ22().
28 | *
29 | * @param file File base to use
30 | * @return SingleThreadedSparkeyReaderJ22 (on Java 22+)
31 | * @throws UnsupportedOperationException on Java < 22
32 | * @throws IOException if the file cannot be opened
33 | */
34 | public static SparkeyReader openSingleThreadedJ22(File file) throws IOException {
35 | return SparkeyImplSelector.openSingleThreadedJ22(file);
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/AddressSizeTest.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import org.junit.Assert;
4 | import org.junit.Test;
5 |
6 | import java.io.IOException;
7 |
8 | /**
9 | * Tests AddressSize
10 | */
11 | public class AddressSizeTest extends OpenMapsAsserter {
12 |
13 |
14 |
15 | @Test
16 | public void testAddressSizeLong() throws IOException {
17 | byte[] BYTES = new byte[] {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08};
18 | InMemoryData imd = new InMemoryData(BYTES.length);
19 | for (byte b : BYTES) {
20 | imd.writeUnsignedByte(b);
21 | }
22 | imd.seek(0);
23 | Assert.assertEquals(0x0807060504030201L, AddressSize.LONG.readAddress(imd));
24 | imd.seek(0);
25 | AddressSize.LONG.writeAddress(0x0807060504030201L, imd);
26 | imd.seek(0);
27 | for (byte b : BYTES) {
28 | Assert.assertEquals(imd.readUnsignedByte(), b);
29 | }
30 | }
31 |
32 |
33 | @Test
34 | public void testAddressSizeInt() throws IOException {
35 | byte[] BYTES = new byte[] {0x01, 0x02, 0x03, 0x04};
36 | InMemoryData imd = new InMemoryData(BYTES.length);
37 | for (byte b : BYTES) {
38 | imd.writeUnsignedByte(b);
39 | }
40 | imd.seek(0);
41 | Assert.assertEquals(0x04030201L, AddressSize.INT.readAddress(imd));
42 | imd.seek(0);
43 | AddressSize.INT.writeAddress(0x04030201L, imd);
44 | imd.seek(0);
45 | for (byte b : BYTES) {
46 | Assert.assertEquals(imd.readUnsignedByte(), b);
47 | }
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/extra/ThreadLocalSparkeyReader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.extra;
17 |
18 | import com.spotify.sparkey.*;
19 |
20 | import java.io.File;
21 | import java.io.IOException;
22 |
23 | /**
24 | * A thread-safe Sparkey Reader.
25 | *
26 | *
This class extends {@link PooledSparkeyReader}, providing all the benefits of
27 | * bounded memory usage and virtual thread compatibility while maintaining backward
28 | * compatibility with existing code.
29 | *
30 | * @deprecated Use {@link PooledSparkeyReader} directly for better clarity.
31 | * This class is maintained for backward compatibility.
32 | *
33 | * @see PooledSparkeyReader the recommended implementation
34 | */
35 | @Deprecated
36 | public class ThreadLocalSparkeyReader extends PooledSparkeyReader {
37 |
38 | public ThreadLocalSparkeyReader(File indexFile) throws IOException {
39 | super(Sparkey.openSingleThreadedReader(indexFile), computeDefaultPoolSize());
40 | }
41 |
42 | }
43 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/extra/DelegatingSparkeyReaderTest.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey.extra;
2 |
3 | import com.spotify.sparkey.SparkeyReader;
4 |
5 | import org.junit.Test;
6 |
7 | import java.io.IOException;
8 |
9 | import static org.mockito.Mockito.*;
10 |
11 | public class DelegatingSparkeyReaderTest {
12 |
13 | private static final class MockDelegatingSparkeyReader extends AbstractDelegatingSparkeyReader {
14 | private final SparkeyReader delegate = mock(SparkeyReader.class);
15 |
16 | @Override
17 | protected SparkeyReader getDelegateReader() {
18 | return this.delegate;
19 | }
20 | }
21 |
22 | @Test
23 | public void testDelegation() throws IOException {
24 | final MockDelegatingSparkeyReader reader = new MockDelegatingSparkeyReader();
25 | final SparkeyReader delegate = reader.getDelegateReader();
26 | final String key = "key";
27 |
28 | reader.getAsString(key);
29 | verify(delegate).getAsString(key);
30 |
31 | reader.getAsByteArray(key.getBytes());
32 | verify(delegate).getAsByteArray(key.getBytes());
33 |
34 | reader.getAsEntry(key.getBytes());
35 | verify(delegate).getAsEntry(key.getBytes());
36 |
37 | reader.getIndexHeader();
38 | verify(delegate).getIndexHeader();
39 |
40 | reader.getLogHeader();
41 | verify(delegate).getLogHeader();
42 |
43 | reader.duplicate();
44 | verify(delegate).duplicate();
45 |
46 | reader.iterator();
47 | verify(delegate).iterator();
48 |
49 | reader.getTotalBytes();
50 | verify(delegate).getTotalBytes();
51 |
52 | reader.close();
53 | verify(delegate).close();
54 | }
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/IndexHashTest.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import com.spotify.sparkey.system.BaseSystemTest;
4 | import org.junit.After;
5 | import org.junit.Before;
6 | import org.junit.Test;
7 |
8 | import java.io.File;
9 | import java.io.IOException;
10 | import java.io.RandomAccessFile;
11 |
12 | import static org.junit.Assert.assertEquals;
13 | import static org.junit.Assert.fail;
14 |
15 | public class IndexHashTest extends BaseSystemTest {
16 | @Before
17 | public void setUp() throws Exception {
18 | super.setUp();
19 | }
20 |
21 | @After
22 | public void tearDown() throws Exception {
23 | super.tearDown();
24 | }
25 |
26 | @Test
27 | public void testCorruptHashFile() throws Exception {
28 | SparkeyWriter writer = Sparkey.createNew(indexFile, CompressionType.NONE, 1);
29 | for (int i = 0; i < 100; i++) {
30 | writer.put("key" + i, "value" + i);
31 | }
32 | writer.close();
33 | TestSparkeyWriter.writeHashAndCompare(writer);
34 |
35 | corruptFile(indexFile);
36 |
37 | assertEquals(0, Sparkey.getOpenFiles());
38 | assertEquals(0, Sparkey.getOpenMaps());
39 |
40 | try {
41 | Sparkey.open(indexFile);
42 | fail();
43 | } catch (Exception e) {
44 | assertEquals(RuntimeException.class, e.getClass());
45 | }
46 |
47 | assertEquals(0, Sparkey.getOpenFiles());
48 | assertEquals(0, Sparkey.getOpenMaps());
49 | }
50 |
51 | private void corruptFile(File indexFile) throws IOException {
52 | RandomAccessFile randomAccessFile = new RandomAccessFile(indexFile, "rw");
53 | randomAccessFile.setLength(randomAccessFile.length() - 100);
54 | randomAccessFile.close();
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/CommonHeader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 |
20 | abstract class CommonHeader {
21 | final int majorVersion;
22 | final int minorVersion;
23 | final int fileIdentifier;
24 |
25 | long dataEnd;
26 | long maxKeyLen;
27 | long maxValueLen;
28 | long numPuts;
29 |
30 | CommonHeader(int majorVersion, int minorVersion, int fileIdentifier, long dataEnd, long maxKeyLen, long maxValueLen, long numPuts) throws IOException {
31 | this.majorVersion = majorVersion;
32 | this.minorVersion = minorVersion;
33 | this.fileIdentifier = fileIdentifier;
34 | this.dataEnd = dataEnd;
35 | this.maxKeyLen = maxKeyLen;
36 | this.maxValueLen = maxValueLen;
37 | this.numPuts = numPuts;
38 | if (this.maxKeyLen > Integer.MAX_VALUE || this.maxKeyLen < 0) {
39 | throw new IOException("Too large max key len: " + this.maxKeyLen);
40 | }
41 | if (this.maxValueLen < 0) {
42 | throw new IOException("Too large max value len: " + this.maxValueLen);
43 | }
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/UncompressedBlockPositionedInputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 | import java.io.InputStream;
20 |
21 | final class UncompressedBlockPositionedInputStream extends BlockPositionedInputStream {
22 |
23 | private long position;
24 |
25 | public UncompressedBlockPositionedInputStream(InputStream data, long start) {
26 | super(data);
27 | position = start;
28 | }
29 |
30 | @Override
31 | long getBlockPosition() {
32 | return position;
33 | }
34 |
35 | @Override
36 | public int read() throws IOException {
37 | position++;
38 | return input.read();
39 | }
40 |
41 | @Override
42 | public int read(byte[] b) throws IOException {
43 | return read(b, 0, b.length);
44 | }
45 |
46 | @Override
47 | public int read(byte[] b, int off, int len) throws IOException {
48 | position += len;
49 | return input.read(b, off, len);
50 | }
51 |
52 | @Override
53 | public long skip(long n) throws IOException {
54 | long skipped = input.skip(n);
55 | position += skipped;
56 | return skipped;
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src/main/java22/com/spotify/sparkey/CompressionTypeBackendJ22.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | /**
19 | * Java 22+ version of CompressionTypeBackend that works with J22 types.
20 | */
21 | interface CompressionTypeBackendJ22 {
22 | BlockRandomInput createRandomAccessData(ReadOnlyMemMapJ22 data, int maxBlockSize);
23 | }
24 |
25 | class CompressionTypeBackendJ22Uncompressed implements CompressionTypeBackendJ22 {
26 | @Override
27 | public BlockRandomInput createRandomAccessData(ReadOnlyMemMapJ22 data, int maxBlockSize) {
28 | return new UncompressedBlockRandomInputJ22(data);
29 | }
30 | }
31 |
32 | class CompressionTypeBackendJ22Compressed implements CompressionTypeBackendJ22 {
33 | private final CompressorType compressor;
34 |
35 | public CompressionTypeBackendJ22Compressed(CompressorType compressor) {
36 | this.compressor = compressor;
37 | }
38 |
39 | @Override
40 | public BlockRandomInput createRandomAccessData(ReadOnlyMemMapJ22 data, int maxBlockSize) {
41 | return new CompressedRandomReader(compressor, new UncompressedBlockRandomInputJ22(data), maxBlockSize);
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/SortHelperBenchmark.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import org.openjdk.jmh.annotations.Benchmark;
4 | import org.openjdk.jmh.annotations.BenchmarkMode;
5 | import org.openjdk.jmh.annotations.Fork;
6 | import org.openjdk.jmh.annotations.Measurement;
7 | import org.openjdk.jmh.annotations.Mode;
8 | import org.openjdk.jmh.annotations.OutputTimeUnit;
9 | import org.openjdk.jmh.annotations.Scope;
10 | import org.openjdk.jmh.annotations.State;
11 | import org.openjdk.jmh.annotations.Warmup;
12 |
13 | import java.util.concurrent.TimeUnit;
14 |
15 | import static com.spotify.sparkey.SortHelper.ENTRY_COMPARATOR;
16 |
17 | @BenchmarkMode(Mode.AverageTime)
18 | @OutputTimeUnit(TimeUnit.NANOSECONDS)
19 | @State(Scope.Thread)
20 | @Fork(value = 1, warmups = 1)
21 | @Measurement(iterations = 5, time = 10)
22 | @Warmup(iterations = 5, time = 10)
23 | public class SortHelperBenchmark {
24 | private static final SortHelper.Entry E1 = new SortHelper.Entry(123, 456, 1);
25 | private static final SortHelper.Entry E2 = new SortHelper.Entry(123, 456, 2);
26 | private static final SortHelper.Entry E3 = new SortHelper.Entry(7567, 222, 1);
27 | private static final SortHelper.Entry E4 = new SortHelper.Entry(7567, 222, 2);
28 |
29 | @Benchmark
30 | public int measureRealE1_E1() {
31 | return ENTRY_COMPARATOR.compare(E1, E1);
32 | }
33 |
34 | @Benchmark
35 | public int measureRealE1_E2() {
36 | return ENTRY_COMPARATOR.compare(E1, E2);
37 | }
38 |
39 | @Benchmark
40 | public int measureRealE2_E1() {
41 | return ENTRY_COMPARATOR.compare(E2, E2);
42 | }
43 |
44 | @Benchmark
45 | public int measureRealE1_E3() {
46 | return ENTRY_COMPARATOR.compare(E1, E3);
47 | }
48 |
49 | @Benchmark
50 | public int measureRealE1_E4() {
51 | return ENTRY_COMPARATOR.compare(E1, E4);
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/SparkeyTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2015 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import org.junit.Test;
19 |
20 | import java.io.File;
21 |
22 | import static org.junit.Assert.*;
23 |
24 | public class SparkeyTest {
25 |
26 | @Test
27 | public void testFilenames() throws Exception {
28 | assertEquals(new File("foo.bar.spi"), Sparkey.getIndexFile(new File("foo.bar")));
29 | assertEquals(new File("foo.bar.spi"), Sparkey.getIndexFile(new File("foo.bar.")));
30 | assertEquals(new File("foo.bar.spi"), Sparkey.getIndexFile(new File("foo.bar.spi")));
31 | assertEquals(new File("foo.bar.spi"), Sparkey.getIndexFile(new File("foo.bar.spl")));
32 | assertEquals(new File("foo.bar.baz.spi"), Sparkey.getIndexFile(new File("foo.bar.baz")));
33 |
34 | assertEquals(new File("foo.bar.spl"), Sparkey.getLogFile(new File("foo.bar")));
35 | assertEquals(new File("foo.bar.spl"), Sparkey.getLogFile(new File("foo.bar.")));
36 | assertEquals(new File("foo.bar.spl"), Sparkey.getLogFile(new File("foo.bar.spi")));
37 | assertEquals(new File("foo.bar.spl"), Sparkey.getLogFile(new File("foo.bar.spl")));
38 | assertEquals(new File("foo.bar.baz.spl"), Sparkey.getLogFile(new File("foo.bar.baz")));
39 |
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/BytesWrittenTest.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import org.junit.After;
4 | import org.junit.Before;
5 | import org.junit.Test;
6 |
7 | import java.io.File;
8 | import java.io.IOException;
9 |
10 | import static org.junit.Assert.assertEquals;
11 |
12 | public class BytesWrittenTest extends OpenMapsAsserter {
13 |
14 | private File file;
15 |
16 | @Before
17 | public void setUp() throws Exception {
18 | super.setUp();
19 | file = File.createTempFile("sparkey_test_", ".spl");
20 | }
21 |
22 | @After
23 | public void tearDown() throws Exception {
24 | file.delete();
25 | super.tearDown();
26 | }
27 |
28 | @Test
29 | public void testNone() throws Exception {
30 | test(CompressionType.NONE);
31 | }
32 |
33 | @Test
34 | public void testSnappy() throws Exception {
35 | test(CompressionType.SNAPPY);
36 | }
37 |
38 | @Test
39 | public void testZstd() throws Exception {
40 | test(CompressionType.ZSTD);
41 | }
42 |
43 | private void test(CompressionType compressionType) throws IOException {
44 | SparkeyWriter writer = Sparkey.createNew(file, compressionType, 20);
45 | for (int i = 0; i < 13; i++) {
46 | writer.put(size(17), size(47));
47 | }
48 | for (int i = 0; i < 19; i++) {
49 | writer.put(size(130), size(32000));
50 | }
51 | for (int i = 0; i < 3; i++) {
52 | writer.delete(size(130));
53 | }
54 | writer.close();
55 | assertEquals(13 * (17 + 47 + 1 + 1) + 19 * (130 + 32000 + 2 + 3), LogHeader.read(file).getPutSize());
56 | assertEquals(3 * (130 + 2 + 1), LogHeader.read(file).getDeleteSize());
57 | }
58 |
59 | private String size(int size) {
60 | StringBuilder stringBuilder = new StringBuilder();
61 | for (int i = 0; i < size; i++) {
62 | stringBuilder.append("x");
63 | }
64 | return stringBuilder.toString();
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/UncompressedBlockRandomInput.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 |
20 | class UncompressedBlockRandomInput implements BlockRandomInput {
21 | private final ReadOnlyMemMap data;
22 |
23 | UncompressedBlockRandomInput(ReadOnlyMemMap data) {
24 | this.data = data;
25 | }
26 |
27 | @Override
28 | public void close() {
29 | data.close();
30 | }
31 |
32 | @Override
33 | public void seek(long pos) throws IOException {
34 | data.seek(pos);
35 | }
36 |
37 | @Override
38 | public int readUnsignedByte() throws IOException {
39 | return data.readUnsignedByte();
40 | }
41 |
42 | @Override
43 | public void readFully(byte[] buffer, int offset, int length) throws IOException {
44 | data.readFully(buffer, offset, length);
45 | }
46 |
47 | @Override
48 | public boolean readFullyCompare(int length, byte[] key) throws IOException {
49 | return data.readFullyCompare(length, key);
50 | }
51 |
52 | @Override
53 | public void skipBytes(long amount) throws IOException {
54 | data.skipBytes(amount);
55 | }
56 |
57 | @Override
58 | public UncompressedBlockRandomInput duplicate() {
59 | return new UncompressedBlockRandomInput(data.duplicate());
60 | }
61 |
62 | @Override
63 | public void closeDuplicate() {
64 | data.closeDuplicate();
65 | }
66 |
67 | @Override
68 | public long getLoadedBytes() {
69 | return data.getLoadedBytes();
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/BlockRandomInput.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 |
20 | interface BlockRandomInput {
21 |
22 | void close();
23 |
24 | void seek(long pos) throws IOException;
25 |
26 | int readUnsignedByte() throws IOException;
27 |
28 | void readFully(byte[] buffer, int offset, int length) throws IOException;
29 |
30 | void skipBytes(long amount) throws IOException;
31 |
32 | BlockRandomInput duplicate();
33 |
34 | void closeDuplicate();
35 |
36 | long getLoadedBytes();
37 |
38 | /**
39 | * Compare bytes at current position with the provided byte array, advancing position by length bytes.
40 | *
41 | * This method always advances the current position by {@code length} bytes, regardless of whether
42 | * the comparison succeeds or fails. This matches the semantics of {@link #readFully(byte[], int, int)}.
43 | *
44 | * This is more efficient than calling {@code readFully()} followed by {@code Arrays.equals()}, as it:
45 | * - Avoids allocating a temporary buffer
46 | * - Avoids copying data from memory-mapped storage
47 | * - Uses vectorized comparison (SIMD) on supporting implementations
48 | *
49 | * @param length number of bytes to read and compare
50 | * @param key byte array to compare against (only first {@code length} bytes are compared)
51 | * @return true if the bytes at current position match the first {@code length} bytes of {@code key}
52 | */
53 | boolean readFullyCompare(int length, byte[] key) throws IOException;
54 | }
55 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/system/BaseSystemTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.system;
17 |
18 | import com.spotify.sparkey.OpenMapsAsserter;
19 | import com.spotify.sparkey.Sparkey;
20 | import com.spotify.sparkey.UtilTest;
21 | import com.sun.management.UnixOperatingSystemMXBean;
22 | import org.junit.After;
23 | import org.junit.Before;
24 | import org.junit.Test;
25 |
26 | import java.io.File;
27 | import java.lang.management.ManagementFactory;
28 | import java.lang.management.OperatingSystemMXBean;
29 |
30 | public class BaseSystemTest extends OpenMapsAsserter {
31 | protected File indexFile;
32 | protected File logFile;
33 |
34 | @Before
35 | public void setUp() throws Exception {
36 | super.setUp();
37 | UtilTest.setMapBits(10);
38 | indexFile = File.createTempFile("sparkey", ".spi");
39 | logFile = Sparkey.getLogFile(indexFile);
40 | indexFile.deleteOnExit();
41 | logFile.deleteOnExit();
42 | }
43 |
44 | @After
45 | public void tearDown() throws Exception {
46 | UtilTest.delete(indexFile);
47 | UtilTest.delete(logFile);
48 | super.tearDown();
49 | }
50 |
51 | @Test
52 | public void testDummy() throws Exception {
53 | }
54 |
55 | static long countOpenFileDescriptors() {
56 | OperatingSystemMXBean os = ManagementFactory.getOperatingSystemMXBean();
57 | if(os instanceof UnixOperatingSystemMXBean){
58 | long openFileDescriptorCount = ((UnixOperatingSystemMXBean) os).getOpenFileDescriptorCount();
59 | return openFileDescriptorCount;
60 | }
61 | return -1;
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/cleanup-failed-release.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Cleanup script for failed maven releases
3 |
4 | set -e
5 |
6 | echo "=== Cleaning up failed release artifacts ==="
7 | echo
8 |
9 | # Get current version from pom.xml
10 | CURRENT_VERSION=$(grep -m 1 "" pom.xml | sed 's/.*\(.*\)-SNAPSHOT<\/version>.*/\1/')
11 | VERSION_TAG="sparkey-$CURRENT_VERSION"
12 |
13 | echo "Current version: $CURRENT_VERSION"
14 | echo "Expected tag: $VERSION_TAG"
15 | echo
16 |
17 | # 1. Delete release files
18 | echo "1. Cleaning up release files..."
19 | if [ -f release.properties ] || [ -f pom.xml.releaseBackup ]; then
20 | rm -f release.properties pom.xml.releaseBackup
21 | echo " ✓ Deleted release.properties and pom.xml.releaseBackup"
22 | else
23 | echo " ✓ No release files to clean"
24 | fi
25 | echo
26 |
27 | # 2. Delete local tag
28 | echo "2. Checking for local tag..."
29 | if git tag | grep -q "^$VERSION_TAG\$"; then
30 | git tag -d "$VERSION_TAG"
31 | echo " ✓ Deleted local tag: $VERSION_TAG"
32 | else
33 | echo " ✓ No local tag to delete"
34 | fi
35 | echo
36 |
37 | # 3. Check for remote tag
38 | echo "3. Checking for remote tag..."
39 | if git ls-remote --tags origin | grep -q "refs/tags/$VERSION_TAG\$"; then
40 | echo " ⚠ Remote tag exists: $VERSION_TAG"
41 | read -p " Delete remote tag? (y/N) " -n 1 -r
42 | echo
43 | if [[ $REPLY =~ ^[Yy]$ ]]; then
44 | git push --delete origin "$VERSION_TAG"
45 | echo " ✓ Deleted remote tag: $VERSION_TAG"
46 | else
47 | echo " ⚠ Remote tag NOT deleted (you can delete it later with: git push --delete origin $VERSION_TAG)"
48 | fi
49 | else
50 | echo " ✓ No remote tag to delete"
51 | fi
52 | echo
53 |
54 | # 4. Reset pom.xml if needed
55 | echo "4. Checking pom.xml version..."
56 | POM_VERSION=$(grep -m 1 "" pom.xml | sed 's/.*\(.*\)<\/version>.*/\1/')
57 | if [[ "$POM_VERSION" != *"-SNAPSHOT" ]]; then
58 | echo " ⚠ WARNING: pom.xml version is $POM_VERSION (not a SNAPSHOT)"
59 | echo " You may need to manually reset to $CURRENT_VERSION-SNAPSHOT"
60 | echo " Or run: mvn release:rollback"
61 | else
62 | echo " ✓ pom.xml version is correct: $POM_VERSION"
63 | fi
64 | echo
65 |
66 | echo "=== Cleanup complete ==="
67 | echo
68 | echo "Run ./verify-release-ready.sh to check if everything is ready for release"
69 |
--------------------------------------------------------------------------------
/src/main/java22/com/spotify/sparkey/UncompressedUtilJ22.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 |
20 | /**
21 | * Utility methods for reading from immutable memory-mapped files.
22 | * All methods take explicit positions. Use Util.unsignedVLQSize() to determine byte count.
23 | */
24 | final class UncompressedUtilJ22 {
25 |
26 | private UncompressedUtilJ22() {}
27 |
28 | /**
29 | * Read variable-length quantity (VLQ) integer at given position.
30 | * Returns the decoded value. Use Util.unsignedVLQSize(value) to determine bytes consumed.
31 | */
32 | static int readVLQInt(ReadOnlyMemMapJ22 data, long position) throws IOException {
33 | long p = position;
34 | int value = 0;
35 | int shift = 0;
36 |
37 | while (true) {
38 | int b = data.readUnsignedByte(p++);
39 | value |= (b & 0x7F) << shift;
40 | if ((b & 0x80) == 0) {
41 | break;
42 | }
43 | shift += 7;
44 | }
45 |
46 | return value;
47 | }
48 |
49 | /**
50 | * Read variable-length quantity (VLQ) as long at given position.
51 | * Supports values larger than Integer.MAX_VALUE.
52 | * Returns the decoded value. Use Util.unsignedVLQSize(value) to determine bytes consumed.
53 | */
54 | static long readVLQLong(ReadOnlyMemMapJ22 data, long position) throws IOException {
55 | long p = position;
56 | long value = 0;
57 | int shift = 0;
58 |
59 | while (true) {
60 | int b = data.readUnsignedByte(p++);
61 | value |= (long)(b & 0x7F) << shift;
62 | if ((b & 0x80) == 0) {
63 | break;
64 | }
65 | shift += 7;
66 | if (shift >= 64) {
67 | throw new RuntimeException("VLQ overflow - value too large for long");
68 | }
69 | }
70 |
71 | return value;
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/CompressorType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 |
20 | import com.github.luben.zstd.Zstd;
21 |
22 | import org.xerial.snappy.Snappy;
23 |
24 | enum CompressorType {
25 | SNAPPY {
26 | @Override
27 | int maxCompressedLength(int blockSize) {
28 | return Snappy.maxCompressedLength(blockSize);
29 | }
30 |
31 | @Override
32 | int uncompress(byte[] compressed, int compressedSize, byte[] uncompressed) throws IOException {
33 | return Snappy.uncompress(compressed, 0, compressedSize, uncompressed, 0);
34 | }
35 |
36 | @Override
37 | int compress(byte[] uncompressed, int uncompressedSize, byte[] compressed) throws IOException {
38 | return Snappy.compress(uncompressed, 0, uncompressedSize, compressed, 0);
39 | }
40 | },
41 |
42 | ZSTD {
43 | @Override
44 | int maxCompressedLength(int blockSize) {
45 | return (int)Zstd.compressBound(blockSize);
46 | }
47 |
48 | @Override
49 | int uncompress(byte[] compressed, int compressedSize, byte[] uncompressed) throws IOException {
50 | return (int)Zstd.decompressByteArray(uncompressed, 0, uncompressed.length, compressed, 0, compressedSize);
51 | }
52 |
53 | @Override
54 | int compress(byte[] uncompressed, int uncompressedSize, byte[] compressed) throws IOException {
55 | return (int)Zstd.compressByteArray(compressed, 0, compressed.length, uncompressed, 0, uncompressedSize, 3);
56 | }
57 | },;
58 |
59 | abstract int maxCompressedLength(int blockSize);
60 |
61 | abstract int uncompress(byte[] compressed, int compressedSize, byte[] uncompressed) throws IOException;
62 |
63 | abstract int compress(byte[] uncompressed, int uncompressedSize, byte[] compressed) throws IOException;
64 | }
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/FileReadWriteData.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import java.io.File;
4 | import java.io.IOException;
5 | import java.io.RandomAccessFile;
6 |
7 | /**
8 | * Slow implementation - Use {@link ReadWriteMemMap} instead. Implemented for reference and performance comparisons.
9 | */
10 | @Deprecated
11 | class FileReadWriteData implements ReadWriteData {
12 |
13 | private final RandomAccessFile file;
14 | private final IndexHeader header;
15 | private final boolean fsync;
16 | private final int offset;
17 | private boolean closed = false;
18 |
19 | FileReadWriteData(final long size, final File file, final IndexHeader header, final boolean fsync) throws IOException {
20 | offset = header.size();
21 | this.file = new RandomAccessFile(file, "rw");
22 | Sparkey.incrOpenFiles();
23 | this.file.setLength(offset + size);
24 | this.header = header;
25 | this.fsync = fsync;
26 | }
27 |
28 | public void writeLittleEndianLong(long value) throws IOException {
29 | // RandomAccessFile uses big-endian so this needs to be reversed
30 | file.writeLong(Long.reverseBytes(value));
31 | }
32 |
33 | public void writeLittleEndianInt(int value) throws IOException {
34 | // RandomAccessFile uses big-endian so this needs to be reversed
35 | file.writeInt(Integer.reverseBytes(value));
36 | }
37 |
38 | @Override
39 | public void close() throws IOException {
40 | if (closed) {
41 | return;
42 | }
43 | closed = true;
44 | file.seek(0);
45 | file.write(header.asBytes());
46 | if (fsync) {
47 | file.getFD().sync();
48 | }
49 | Sparkey.decrOpenFiles();
50 | file.close();
51 | }
52 |
53 | @Override
54 | public void writeUnsignedByte(final int value) throws IOException {
55 | file.writeByte(value);
56 | }
57 |
58 | @Override
59 | public void seek(final long pos) throws IOException {
60 | file.seek(offset + pos);
61 | }
62 |
63 | @Override
64 | public int readUnsignedByte() throws IOException {
65 | return file.readUnsignedByte();
66 | }
67 |
68 | @Override
69 | public int readLittleEndianInt() throws IOException {
70 | // RandomAccessFile uses big-endian so this needs to be reversed
71 | return Integer.reverseBytes(file.readInt());
72 | }
73 |
74 | @Override
75 | public long readLittleEndianLong() throws IOException {
76 | // RandomAccessFile uses big-endian so this needs to be reversed
77 | return Long.reverseBytes(file.readLong());
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/EmptyInputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.InputStream;
19 |
20 | /**
21 | * Immutable singleton empty InputStream for DELETE entries and zero-length values.
22 | * More efficient than wrapping an empty MemorySegment.
23 | *
24 | * Similar to InputStream.nullInputStream() from Java 11+, but works on Java 8+.
25 | */
26 | final class EmptyInputStream extends InputStream {
27 |
28 | /**
29 | * Singleton instance - completely thread-safe since there's no mutable state.
30 | */
31 | static final InputStream INSTANCE = new EmptyInputStream();
32 |
33 | private EmptyInputStream() {
34 | // Private constructor - use INSTANCE
35 | }
36 |
37 | @Override
38 | public int read() {
39 | return -1; // Always EOF
40 | }
41 |
42 | @Override
43 | public int read(byte[] b) {
44 | if (b == null) {
45 | throw new NullPointerException();
46 | }
47 | return -1; // Always EOF
48 | }
49 |
50 | @Override
51 | public int read(byte[] b, int off, int len) {
52 | if (b == null) {
53 | throw new NullPointerException();
54 | }
55 | if (off < 0 || len < 0 || len > b.length - off) {
56 | throw new IndexOutOfBoundsException();
57 | }
58 | return -1; // Always EOF
59 | }
60 |
61 | @Override
62 | public long skip(long n) {
63 | return 0; // Nothing to skip
64 | }
65 |
66 | @Override
67 | public int available() {
68 | return 0; // No bytes available
69 | }
70 |
71 | @Override
72 | public void close() {
73 | // No-op
74 | }
75 |
76 | @Override
77 | public boolean markSupported() {
78 | return true; // Mark/reset are trivial for empty stream
79 | }
80 |
81 | @Override
82 | public void mark(int readlimit) {
83 | // No-op - no state to save
84 | }
85 |
86 | @Override
87 | public void reset() {
88 | // No-op - nothing to reset to
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/system/WriteHashBenchmark.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2014 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.system;
17 |
18 | import com.spotify.sparkey.CompressionType;
19 | import com.spotify.sparkey.Sparkey;
20 | import com.spotify.sparkey.SparkeyWriter;
21 | import com.spotify.sparkey.UtilTest;
22 | import org.openjdk.jmh.annotations.*;
23 |
24 | import java.io.File;
25 | import java.io.IOException;
26 | import java.util.concurrent.TimeUnit;
27 |
28 | @State(Scope.Benchmark)
29 | @Warmup(iterations = 2)
30 | @Measurement(iterations = 4)
31 | @Fork(value = 1, warmups = 0)
32 | public class WriteHashBenchmark {
33 |
34 | private File indexFile;
35 | private File logFile;
36 | private SparkeyWriter writer;
37 |
38 | @Setup(Level.Trial)
39 | public void setup() throws IOException {
40 | indexFile = new File("test.spi");
41 | logFile = Sparkey.getLogFile(indexFile);
42 |
43 | CompressionType compressionType = CompressionType.NONE;
44 |
45 | indexFile.deleteOnExit();
46 | logFile.deleteOnExit();
47 | UtilTest.delete(indexFile);
48 | UtilTest.delete(logFile);
49 |
50 | writer = Sparkey.createNew(indexFile, compressionType, 1024);
51 |
52 | for (int i = 0; i < numElements; i++) {
53 | writer.put("key_" + i, "value_" + i);
54 | }
55 | }
56 |
57 | @TearDown(Level.Trial)
58 | public void tearDown() throws IOException {
59 | writer.close();
60 | UtilTest.delete(indexFile);
61 | UtilTest.delete(logFile);
62 | }
63 |
64 | @Param({"1000", "10000", "100000", "1000000", "10000000"})
65 | public int numElements;
66 |
67 | @Param({"IN_MEMORY", "SORTING"})
68 | public SparkeyWriter.ConstructionMethod constructionMethod;
69 |
70 | @Benchmark
71 | @BenchmarkMode(Mode.SingleShotTime)
72 | @OutputTimeUnit(TimeUnit.SECONDS)
73 | public void test() throws IOException {
74 | writer.setConstructionMethod(constructionMethod);
75 | writer.writeHash();
76 | }
77 | }
78 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/UncompressedBlockOutput.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.FileDescriptor;
19 | import java.io.IOException;
20 | import java.io.InputStream;
21 | import java.io.OutputStream;
22 |
23 | final class UncompressedBlockOutput implements BlockOutput {
24 | private final byte[] buf = new byte[1024*1024];
25 | private final OutputStream outputStream;
26 | private final FileDescriptor fileDescriptor;
27 |
28 | UncompressedBlockOutput(OutputStream outputStream, FileDescriptor fileDescriptor) {
29 | this.outputStream = outputStream;
30 | this.fileDescriptor = fileDescriptor;
31 | }
32 |
33 | @Override
34 | public void put(byte[] key, int keyLen, byte[] value, int valueLen) throws IOException {
35 | Util.writeUnsignedVLQ(keyLen + 1, outputStream);
36 | Util.writeUnsignedVLQ(valueLen, outputStream);
37 | outputStream.write(key, 0, keyLen);
38 | outputStream.write(value, 0, valueLen);
39 | }
40 |
41 | @Override
42 | public void put(byte[] key, int keyLen, InputStream value, long valueLen) throws IOException {
43 | Util.writeUnsignedVLQ(keyLen + 1, outputStream);
44 | Util.writeUnsignedVLQ(valueLen, outputStream);
45 | outputStream.write(key, 0, keyLen);
46 | Util.copy(valueLen, value, outputStream, buf);
47 | }
48 |
49 | @Override
50 | public void delete(byte[] key, int keyLen) throws IOException {
51 | outputStream.write(0);
52 | Util.writeUnsignedVLQ(keyLen, outputStream);
53 | outputStream.write(key, 0, keyLen);
54 | }
55 |
56 | @Override
57 | public void flush(boolean fsync) throws IOException {
58 | outputStream.flush();
59 | if (fsync) {
60 | fileDescriptor.sync();
61 | }
62 | }
63 |
64 | @Override
65 | public void close(boolean fsync) throws IOException {
66 | flush(fsync);
67 | outputStream.close();
68 | }
69 |
70 | @Override
71 | public int getMaxEntriesPerBlock() {
72 | return 1;
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/extra/AbstractDelegatingSparkeyReader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.extra;
17 |
18 | import com.spotify.sparkey.IndexHeader;
19 | import com.spotify.sparkey.LogHeader;
20 | import com.spotify.sparkey.SparkeyReader;
21 |
22 | import java.io.IOException;
23 | import java.util.Iterator;
24 |
25 | /**
26 | * A superclass for Sparkey readers that delegate to another {@link SparkeyReader}.
27 | *
28 | * Subclasses must override the {@link AbstractDelegatingSparkeyReader#getDelegateReader()}
29 | * method.
30 | */
31 | public abstract class AbstractDelegatingSparkeyReader implements SparkeyReader {
32 |
33 | protected abstract SparkeyReader getDelegateReader();
34 |
35 | @Override
36 | public String getAsString(String key) throws IOException {
37 | return getDelegateReader().getAsString(key);
38 | }
39 |
40 | @Override
41 | public byte[] getAsByteArray(byte[] key) throws IOException {
42 | return getDelegateReader().getAsByteArray(key);
43 | }
44 |
45 | @Override
46 | public Entry getAsEntry(byte[] key) throws IOException {
47 | return getDelegateReader().getAsEntry(key);
48 | }
49 |
50 | @Override
51 | public void close() {
52 | getDelegateReader().close();
53 | }
54 |
55 | @Override
56 | public IndexHeader getIndexHeader() {
57 | return getDelegateReader().getIndexHeader();
58 | }
59 |
60 | @Override
61 | public LogHeader getLogHeader() {
62 | return getDelegateReader().getLogHeader();
63 | }
64 |
65 | @Override
66 | public SparkeyReader duplicate() {
67 | return getDelegateReader().duplicate();
68 | }
69 |
70 | @Override
71 | public Iterator iterator() {
72 | return getDelegateReader().iterator();
73 | }
74 |
75 | @Override
76 | public long getLoadedBytes() {
77 | return getDelegateReader().getLoadedBytes();
78 | }
79 |
80 | @Override
81 | public long getTotalBytes() {
82 | return getDelegateReader().getTotalBytes();
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/system/AppendBenchmark.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2014 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.system;
17 |
18 | import com.spotify.sparkey.CompressionType;
19 | import com.spotify.sparkey.Sparkey;
20 | import com.spotify.sparkey.SparkeyWriter;
21 | import com.spotify.sparkey.UtilTest;
22 | import org.openjdk.jmh.annotations.*;
23 |
24 | import java.io.File;
25 | import java.io.IOException;
26 | import java.util.concurrent.TimeUnit;
27 |
28 | @State(Scope.Benchmark)
29 | @Warmup(iterations = 2)
30 | @Measurement(iterations = 4)
31 | @Fork(value = 1, warmups = 0)
32 | public class AppendBenchmark {
33 |
34 | private File indexFile;
35 | private File logFile;
36 | private SparkeyWriter writer;
37 |
38 | @Setup(Level.Trial)
39 | public void setup() throws IOException {
40 | indexFile = new File("test.spi");
41 | logFile = Sparkey.getLogFile(indexFile);
42 |
43 | CompressionType compressionType = CompressionType.valueOf(type);
44 |
45 | indexFile.deleteOnExit();
46 | logFile.deleteOnExit();
47 | UtilTest.delete(indexFile);
48 | UtilTest.delete(logFile);
49 |
50 | writer = Sparkey.createNew(indexFile, compressionType, 1024);
51 | }
52 |
53 | @TearDown(Level.Trial)
54 | public void tearDown() throws IOException {
55 | writer.close();
56 | UtilTest.delete(indexFile);
57 | UtilTest.delete(logFile);
58 | }
59 |
60 | @Param({"NONE", "SNAPPY", "ZSTD"})
61 | public String type;
62 |
63 | @Benchmark
64 | @BenchmarkMode(Mode.Throughput)
65 | @OutputTimeUnit(TimeUnit.SECONDS)
66 | public void testSmall() throws IOException {
67 | writer.put("key" , "value");
68 | }
69 |
70 | private static final String MEDIUM_KEY = String.format("%200s", "key");
71 | private static final String MEDIUM_VALUE = String.format("%200s", "value");
72 |
73 | @Benchmark
74 | @BenchmarkMode(Mode.Throughput)
75 | @OutputTimeUnit(TimeUnit.SECONDS)
76 | public void testMedium() throws IOException {
77 | writer.put(MEDIUM_KEY , MEDIUM_VALUE);
78 | }
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/ReadOnlyMemMapTest.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import org.junit.Test;
4 |
5 | import java.io.File;
6 | import java.io.IOException;
7 | import java.util.ArrayList;
8 | import java.util.Collections;
9 | import java.util.List;
10 | import java.util.concurrent.atomic.AtomicBoolean;
11 |
12 | import static org.junit.Assert.assertEquals;
13 | import static org.junit.Assert.fail;
14 |
15 | public class ReadOnlyMemMapTest extends OpenMapsAsserter {
16 |
17 | @Test
18 | public void testDontRunOutOfFileDescriptors() throws Exception {
19 | for (int iter = 0; iter < 100; iter++) {
20 | ReadOnlyMemMap memMap = new ReadOnlyMemMap(new File("README.md"));
21 | ArrayList maps = new ArrayList<>();
22 | for (int i = 0; i < 100; i++) {
23 | maps.add(memMap.duplicate());
24 | }
25 | memMap.close();
26 | for (ReadOnlyMemMap map : maps) {
27 | try {
28 | map.readUnsignedByte();
29 | fail();
30 | } catch (SparkeyReaderClosedException e) {
31 | }
32 | try {
33 | map.seek(1);
34 | fail();
35 | } catch (SparkeyReaderClosedException e) {
36 | }
37 | try {
38 | map.skipBytes(1);
39 | fail();
40 | } catch (SparkeyReaderClosedException e) {
41 | }
42 | }
43 | assertEquals(0, Sparkey.getOpenFiles());
44 | assertEquals(0, Sparkey.getOpenMaps());
45 | }
46 | }
47 |
48 | @Test
49 | public void testConcurrentReadWhileClosing() throws Exception {
50 | final AtomicBoolean running = new AtomicBoolean(true);
51 | final ReadOnlyMemMap memMap = new ReadOnlyMemMap(new File("README.md"));
52 | final List failures = Collections.synchronizedList(new ArrayList<>());
53 | List threads = new ArrayList<>();
54 | for (int i = 0; i < 100; i++) {
55 | Thread thread = new Thread(() -> {
56 | ReadOnlyMemMap map = memMap.duplicate();
57 | while (running.get()) {
58 | try {
59 | map.seek(1);
60 | map.readUnsignedByte();
61 | map.skipBytes(1);
62 | } catch (IOException e) {
63 | if (!e.getMessage().equals("Reader has been closed")) {
64 | e.printStackTrace();
65 | failures.add(e);
66 | }
67 | }
68 | }
69 | });
70 | threads.add(thread);
71 | thread.start();
72 | }
73 | memMap.close();
74 | Thread.sleep(100);
75 | running.set(false);
76 | for (Thread thread : threads) {
77 | thread.join();
78 | }
79 | assertEquals(0, failures.size());
80 |
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/system/RandomLookupProfiling.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.system;
17 |
18 | import com.spotify.sparkey.*;
19 | import org.junit.Test;
20 |
21 | import java.io.File;
22 | import java.io.IOException;
23 | import java.util.Random;
24 |
25 | public class RandomLookupProfiling {
26 |
27 | private static final int NUM_ENTRIES = 100 * 1024;
28 |
29 | public static void main(String[] args) throws IOException {
30 | File indexFile = new File("profiling.spi");
31 | File logFile = Sparkey.getLogFile(indexFile);
32 | indexFile.deleteOnExit();
33 | logFile.deleteOnExit();
34 |
35 | fillWithData(indexFile, CompressionType.NONE, NUM_ENTRIES);
36 |
37 | int runs = 0;
38 | double speedSum = 0;
39 | while (true) {
40 | long t3 = System.currentTimeMillis();
41 |
42 | int numLookups = 1000 * 1000;
43 | randomLookup(indexFile, numLookups);
44 | long t4 = System.currentTimeMillis();
45 | double speed = 1000.0 * (double) numLookups / (t4 - t3);
46 | speedSum += speed;
47 | runs++;
48 | System.out.println("Random lookups / sec: " + speed);
49 | System.out.println("Average: " + speedSum / runs);
50 | }
51 | }
52 |
53 | private static void randomLookup(File indexFile, int numLookups) throws IOException {
54 | SparkeyReader reader = Sparkey.open(indexFile);
55 | Random random = new Random();
56 | for (int i = 0; i < numLookups; i++) {
57 | String s = reader.getAsString("Key" + random.nextInt(NUM_ENTRIES));
58 | }
59 | }
60 |
61 | private static void fillWithData(File indexFile, CompressionType compression, int numEntries) throws IOException {
62 | SparkeyWriter writer = Sparkey.createNew(indexFile, compression, 32 * 1024);
63 | String smallValue = String.format("%d", 0);
64 | for (int i = 0; i < numEntries; i++) {
65 | writer.put("Key" + i, smallValue);
66 | }
67 | writer.writeHash(HashType.HASH_64_BITS);
68 | writer.close();
69 | }
70 |
71 | @Test
72 | public void dummy() {
73 | // Just to make the junit test runner work
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/AddressSize.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.DataOutputStream;
19 | import java.io.IOException;
20 |
21 | enum AddressSize {
22 | LONG(8) {
23 | @Override
24 | long readAddress(RandomAccessData data) throws IOException {
25 | return data.readLittleEndianLong();
26 | }
27 |
28 | @Override
29 | long readAddress(RandomAccessDataStateless data, long pos) throws IOException {
30 | return data.readLittleEndianLong(pos);
31 | }
32 |
33 | @Override
34 | void writeAddress(long address, ReadWriteData data) throws IOException {
35 | data.writeLittleEndianLong(address);
36 | }
37 |
38 | @Override
39 | void writeAddress(final long address, final DataOutputStream data) throws IOException {
40 | data.writeLong(address);
41 | }
42 | },
43 | INT(4) {
44 | @Override
45 | long readAddress(RandomAccessData data) throws IOException {
46 | return data.readLittleEndianInt() & INT_MASK;
47 | }
48 |
49 | @Override
50 | long readAddress(RandomAccessDataStateless data, long pos) throws IOException {
51 | return data.readLittleEndianInt(pos) & INT_MASK;
52 | }
53 |
54 | @Override
55 | void writeAddress(long address, ReadWriteData data) throws IOException {
56 | data.writeLittleEndianInt((int) address);
57 | }
58 |
59 | @Override
60 | void writeAddress(final long address, final DataOutputStream data) throws IOException {
61 | data.writeInt((int) address); // TODO: overflow?
62 | }
63 | };
64 |
65 | private static final long INT_MASK = (1L << 32) - 1;
66 | private final int size;
67 |
68 | AddressSize(int size) {
69 | this.size = size;
70 | }
71 |
72 | abstract long readAddress(RandomAccessData data) throws IOException;
73 | abstract long readAddress(RandomAccessDataStateless data, long pos) throws IOException;
74 |
75 | abstract void writeAddress(long address, ReadWriteData data) throws IOException;
76 | abstract void writeAddress(long address, DataOutputStream data) throws IOException;
77 |
78 | public int size() {
79 | return size;
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/CompressedReaderTest.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import org.junit.Test;
4 |
5 | import java.io.*;
6 |
7 | import static org.junit.Assert.assertEquals;
8 |
9 | /**
10 | * Tests CompressedReader
11 | */
12 | public class CompressedReaderTest {
13 | // A stream that reads the same array repeatedly, forever.
14 | private class RepeatingInputStream extends InputStream {
15 | private byte[] buffer;
16 | private int pos = 0;
17 |
18 | public RepeatingInputStream(byte[] buf) throws IOException {
19 | buffer = buf;
20 | }
21 |
22 | public int read() throws IOException {
23 | int ret = buffer[pos];
24 | skip(1);
25 | return ret;
26 | }
27 |
28 | public int read(byte[] b, int off, int len) throws IOException {
29 | int remain = len;
30 | while (remain > 0) {
31 | int avail = buffer.length - pos;
32 | int copy = Math.min(avail, remain);
33 | System.arraycopy(buffer, pos, b, off, copy);
34 | skip(copy);
35 | off += copy;
36 | remain -= copy;
37 | }
38 | return len;
39 | }
40 |
41 | public long skip(long n) throws IOException {
42 | pos = (int)((n + pos) % buffer.length);
43 | return n;
44 | }
45 | }
46 |
47 | private CompressedReader reader(CompressorType compressor) throws IOException {
48 | byte[] uncompressed = new byte[10];
49 | for (int i = 0; i < uncompressed.length; ++i) {
50 | uncompressed[i] = (byte)i;
51 | }
52 |
53 | ByteArrayOutputStream bytes = new ByteArrayOutputStream();
54 | byte[] compressed = new byte[compressor.maxCompressedLength(uncompressed.length)];
55 | int length = compressor.compress(uncompressed, uncompressed.length, compressed);
56 | Util.writeUnsignedVLQ(length, bytes);
57 | bytes.write(compressed, 0, length);
58 |
59 | InputStream buf = new RepeatingInputStream(bytes.toByteArray());
60 | return new CompressedReader(compressor, buf, uncompressed.length, 0);
61 | }
62 |
63 | @Test
64 | public void testLargeSkip() throws IOException {
65 | for (CompressorType compressor : CompressorType.values()) {
66 | long ret = reader(compressor).skip(1000 * 1000);
67 | assertEquals(1000 * 1000, ret);
68 | }
69 | }
70 |
71 | @Test
72 | public void testLargeRead() throws IOException {
73 | for (CompressorType compressor : CompressorType.values()) {
74 | byte[] buf = new byte[1000 * 1000];
75 | int ret = reader(compressor).read(buf);
76 | assertEquals(1000 * 1000, ret);
77 | }
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/CompressionTypeBackend.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.FileDescriptor;
19 | import java.io.IOException;
20 | import java.io.InputStream;
21 | import java.io.OutputStream;
22 |
23 | interface CompressionTypeBackend {
24 | BlockOutput createBlockOutput(FileDescriptor fd, OutputStream outputStream, int maxBlockSize, int maxEntriesPerBlock) throws IOException;
25 | BlockPositionedInputStream createBlockInput(InputStream inputStream, int maxBlockSize, long start);
26 | BlockRandomInput createRandomAccessData(ReadOnlyMemMap data, int maxBlockSize);
27 | }
28 |
29 | class CompressionTypeBackendUncompressed implements CompressionTypeBackend {
30 | @Override
31 | public BlockPositionedInputStream createBlockInput(InputStream inputStream, int maxBlockSize, long start) {
32 | return new UncompressedBlockPositionedInputStream(inputStream, start);
33 | }
34 |
35 | @Override
36 | public BlockRandomInput createRandomAccessData(ReadOnlyMemMap data, int maxBlockSize) {
37 | return new UncompressedBlockRandomInput(data);
38 | }
39 |
40 | @Override
41 | public BlockOutput createBlockOutput(FileDescriptor fd, OutputStream outputStream, int maxBlockSize, int maxEntriesPerBlock) throws IOException {
42 | return new UncompressedBlockOutput(outputStream, fd);
43 | }
44 | }
45 |
46 | class CompressionTypeBackendCompressed implements CompressionTypeBackend {
47 | private final CompressorType compressor;
48 |
49 | public CompressionTypeBackendCompressed(CompressorType compressor) {
50 | this.compressor = compressor;
51 | }
52 |
53 | @Override
54 | public BlockPositionedInputStream createBlockInput(InputStream inputStream, int maxBlockSize, long start) {
55 | return new CompressedReader(compressor, inputStream, maxBlockSize, start);
56 | }
57 |
58 | @Override
59 | public BlockRandomInput createRandomAccessData(ReadOnlyMemMap data, int maxBlockSize) {
60 | return new CompressedRandomReader(compressor, new UncompressedBlockRandomInput(data), maxBlockSize);
61 | }
62 |
63 | @Override
64 | public BlockOutput createBlockOutput(FileDescriptor fd, OutputStream outputStream, int maxBlockSize, int maxEntriesPerBlock) throws IOException {
65 | return new CompressedWriter(new CompressedOutputStream(compressor, maxBlockSize, outputStream, fd), maxEntriesPerBlock);
66 | }
67 | }
68 |
69 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/system/FsyncBenchmark.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2014 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.system;
17 |
18 | import com.spotify.sparkey.CompressionType;
19 | import com.spotify.sparkey.Sparkey;
20 | import com.spotify.sparkey.SparkeyWriter;
21 | import com.spotify.sparkey.UtilTest;
22 | import org.openjdk.jmh.annotations.Benchmark;
23 | import org.openjdk.jmh.annotations.BenchmarkMode;
24 | import org.openjdk.jmh.annotations.Fork;
25 | import org.openjdk.jmh.annotations.Level;
26 | import org.openjdk.jmh.annotations.Measurement;
27 | import org.openjdk.jmh.annotations.Mode;
28 | import org.openjdk.jmh.annotations.OperationsPerInvocation;
29 | import org.openjdk.jmh.annotations.OutputTimeUnit;
30 | import org.openjdk.jmh.annotations.Param;
31 | import org.openjdk.jmh.annotations.Scope;
32 | import org.openjdk.jmh.annotations.Setup;
33 | import org.openjdk.jmh.annotations.State;
34 | import org.openjdk.jmh.annotations.TearDown;
35 | import org.openjdk.jmh.annotations.Warmup;
36 |
37 | import java.io.File;
38 | import java.io.IOException;
39 | import java.util.concurrent.TimeUnit;
40 |
41 | @State(Scope.Benchmark)
42 | @Warmup(iterations = 2)
43 | @Measurement(iterations = 4)
44 | @Fork(value = 1, warmups = 0)
45 | public class FsyncBenchmark {
46 |
47 | private File indexFile;
48 | private File logFile;
49 | private SparkeyWriter writer;
50 |
51 | @Param({"NONE", "SNAPPY", "ZSTD"})
52 | public String type;
53 |
54 | @Param({"true", "false"})
55 | public boolean fsync;
56 |
57 | @Setup(Level.Trial)
58 | public void setup() throws IOException {
59 | indexFile = new File("test.spi");
60 | logFile = Sparkey.getLogFile(indexFile);
61 |
62 | CompressionType compressionType = CompressionType.valueOf(type);
63 |
64 | indexFile.deleteOnExit();
65 | logFile.deleteOnExit();
66 | UtilTest.delete(indexFile);
67 | UtilTest.delete(logFile);
68 |
69 | writer = Sparkey.createNew(indexFile, compressionType, 1024);
70 | writer.setFsync(fsync);
71 | }
72 |
73 | @TearDown(Level.Trial)
74 | public void tearDown() throws IOException {
75 | writer.close();
76 | UtilTest.delete(indexFile);
77 | UtilTest.delete(logFile);
78 | }
79 |
80 | @Benchmark
81 | @BenchmarkMode(Mode.Throughput)
82 | @OutputTimeUnit(TimeUnit.SECONDS)
83 | @OperationsPerInvocation(1000)
84 | public void testFsync() throws IOException {
85 | for (int i = 0; i < 1000; i++) {
86 | writer.put("key" , "value");
87 | }
88 | writer.flush();
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/system/ReloadableReaderExample.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.system;
17 |
18 | import com.google.common.util.concurrent.ListeningExecutorService;
19 | import com.google.common.util.concurrent.MoreExecutors;
20 | import com.spotify.sparkey.CompressionType;
21 | import com.spotify.sparkey.extra.ReloadableSparkeyReader;
22 | import com.spotify.sparkey.Sparkey;
23 | import com.spotify.sparkey.SparkeyWriter;
24 | import org.junit.Ignore;
25 |
26 | import java.io.File;
27 | import java.io.IOException;
28 | import java.util.concurrent.ExecutionException;
29 | import java.util.concurrent.Executors;
30 | import java.util.concurrent.TimeUnit;
31 |
32 | @Ignore
33 | public class ReloadableReaderExample {
34 |
35 | private static final int ENTRIES = 1000;
36 | private static final CompressionType TYPE = CompressionType.NONE;
37 |
38 | public static void main(String[] args)
39 | throws IOException, InterruptedException, ExecutionException {
40 | run();
41 | }
42 |
43 | private static void run() throws IOException, InterruptedException, ExecutionException {
44 | ListeningExecutorService executorService = MoreExecutors.listeningDecorator(Executors.newSingleThreadExecutor());
45 |
46 | // create dummy log/index files, and load the reader from them
47 | final File logFile = new File("reloadabletest.spl");
48 | create(Sparkey.getIndexFile(logFile));
49 | final ReloadableSparkeyReader reader = ReloadableSparkeyReader.fromLogFile(logFile, executorService).toCompletableFuture().get();
50 |
51 | // should be ignored (same file)
52 | reader.load(logFile);
53 |
54 | // should load from second file now
55 | final File logFile2 = new File("reloadabletest2.spl");
56 | create(Sparkey.getIndexFile(logFile2));
57 | reader.load(logFile2);
58 |
59 | reader.close();
60 | executorService.shutdown();
61 | executorService.awaitTermination(10, TimeUnit.SECONDS);
62 |
63 | Sparkey.getIndexFile(logFile).delete();
64 | logFile.delete();
65 | Sparkey.getIndexFile(logFile2).delete();
66 | logFile2.delete();
67 |
68 | System.out.println("Done!");
69 | }
70 |
71 | private static void create(File indexFile) throws IOException {
72 | final SparkeyWriter writer = Sparkey.createNew(indexFile, TYPE, 512);
73 | for (int i = 0; i < ENTRIES; i++) {
74 | writer.put("Key" + i, "Value" + i);
75 | }
76 | writer.flush();
77 | writer.writeHash();
78 | writer.close();
79 | }
80 |
81 | }
82 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/extra/ReloadableSparkeyReaderTest.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey.extra;
2 |
3 | import com.google.common.util.concurrent.ListeningExecutorService;
4 | import com.google.common.util.concurrent.MoreExecutors;
5 | import com.spotify.sparkey.CompressionType;
6 | import com.spotify.sparkey.OpenMapsAsserter;
7 | import com.spotify.sparkey.Sparkey;
8 | import com.spotify.sparkey.SparkeyWriter;
9 | import org.junit.After;
10 | import org.junit.Before;
11 | import org.junit.Test;
12 |
13 | import java.io.File;
14 | import java.io.IOException;
15 | import java.util.concurrent.ExecutionException;
16 | import java.util.concurrent.Executors;
17 |
18 | import static org.junit.Assert.assertEquals;
19 |
20 | public class ReloadableSparkeyReaderTest extends OpenMapsAsserter {
21 | private final ListeningExecutorService executorService = MoreExecutors.listeningDecorator(Executors.newSingleThreadExecutor());
22 | private File logFile1;
23 | private File logFile2;
24 |
25 | @Before
26 | public void setUp() throws Exception {
27 | super.setUp();
28 | logFile1 = createLogFile("key1", "value1");
29 | logFile2 = createLogFile("key2", "value2");
30 |
31 | logFile1.deleteOnExit();
32 | logFile2.deleteOnExit();
33 | }
34 |
35 | @After
36 | public void tearDown() throws Exception {
37 | logFile1.delete();
38 | Sparkey.getIndexFile(logFile1).delete();
39 | logFile2.delete();
40 | Sparkey.getIndexFile(logFile2).delete();
41 | super.tearDown();
42 | }
43 |
44 | private static File createLogFile(String key, String value) throws IOException {
45 | final File logFile = File.createTempFile("sparkey", ".spl");
46 |
47 | SparkeyWriter writer = Sparkey.createNew(logFile, CompressionType.NONE, 1024);
48 | writer.put(key, value);
49 | writer.writeHash();
50 | writer.close();
51 |
52 | return logFile;
53 | }
54 |
55 | @Test
56 | public void testFromLogFile() throws ExecutionException, InterruptedException, IOException {
57 | try (ReloadableSparkeyReader reader = ReloadableSparkeyReader.fromLogFile(logFile1, executorService)
58 | .toCompletableFuture().get()) {
59 | assertEquals("value1", reader.getAsString("key1"));
60 | }
61 | }
62 |
63 | @Test
64 | public void testReload() throws ExecutionException, InterruptedException, IOException {
65 | try (ReloadableSparkeyReader reader = ReloadableSparkeyReader.fromLogFile(logFile1, executorService)
66 | .toCompletableFuture().get()) {
67 | reader.load(logFile2).toCompletableFuture().get();
68 | assertEquals("value2", reader.getAsString("key2"));
69 |
70 | reader.load(logFile1).toCompletableFuture().get();
71 | assertEquals("value1", reader.getAsString("key1"));
72 | }
73 | }
74 |
75 | @Test(expected = IllegalArgumentException.class)
76 | public void testNullExecutorService() {
77 | ReloadableSparkeyReader.fromLogFile(logFile1, null);
78 | }
79 |
80 | @Test(expected = IllegalArgumentException.class)
81 | public void testInvalidLogFile() {
82 | ReloadableSparkeyReader.fromLogFile(new File("some-nonexisting-file"), executorService);
83 | }
84 |
85 | }
86 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/HashType.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.DataOutputStream;
19 | import java.io.IOException;
20 |
21 | public enum HashType {
22 | HASH_64_BITS(8) {
23 | @Override
24 | long readHash(RandomAccessData data) throws IOException {
25 | return data.readLittleEndianLong();
26 | }
27 |
28 | @Override
29 | long readHash(RandomAccessDataStateless data, long pos) throws IOException {
30 | return data.readLittleEndianLong(pos);
31 | }
32 |
33 | @Override
34 | void writeHash(long hash, ReadWriteData data) throws IOException {
35 | data.writeLittleEndianLong(hash);
36 | }
37 |
38 | @Override
39 | void writeHash(final long hash, final DataOutputStream data) throws IOException {
40 | data.writeLong(hash);
41 | }
42 |
43 | @Override
44 | long hash(int keyLen, byte[] key, int seed) {
45 | return MurmurHash3.murmurHash3_x64_64(key, keyLen, seed);
46 | }
47 | },
48 | HASH_32_BITS(4) {
49 | @Override
50 | long readHash(RandomAccessData data) throws IOException {
51 | return data.readLittleEndianInt() & INT_MASK;
52 | }
53 |
54 | @Override
55 | long readHash(RandomAccessDataStateless data, long pos) throws IOException {
56 | return data.readLittleEndianInt(pos) & INT_MASK;
57 | }
58 |
59 | @Override
60 | void writeHash(long hash, ReadWriteData data) throws IOException {
61 | data.writeLittleEndianInt((int) hash);
62 | }
63 |
64 | @Override
65 | void writeHash(final long hash, final DataOutputStream data) throws IOException {
66 | data.writeInt((int) hash);
67 | }
68 |
69 | @Override
70 | long hash(int keyLen, byte[] key, int seed) {
71 | return MurmurHash3.murmurHash3_x86_32(key, keyLen, seed) & BITS_32;
72 | }
73 | };
74 |
75 | private static final long BITS_32 = ((1L << 32) - 1);
76 | private static final long INT_MASK = (1L << 32) - 1;
77 | private final int size;
78 |
79 | HashType(int size) {
80 | this.size = size;
81 | }
82 |
83 |
84 | abstract long readHash(RandomAccessData data) throws IOException;
85 | abstract long readHash(RandomAccessDataStateless data, long pos) throws IOException;
86 |
87 | abstract void writeHash(long hash, ReadWriteData data) throws IOException;
88 | abstract void writeHash(long hash, DataOutputStream data) throws IOException;
89 |
90 | abstract long hash(int keyLen, byte[] key, int seed);
91 |
92 | public int size() {
93 | return size;
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/src/main/java22/com/spotify/sparkey/UncompressedBlockRandomInputJ22.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 | import java.lang.foreign.MemorySegment;
20 |
21 | /**
22 | * Java 22+ uncompressed block random input.
23 | *
24 | * Wraps immutable ReadOnlyMemMapJ22 with stateful position tracking.
25 | */
26 | class UncompressedBlockRandomInputJ22 implements BlockRandomInput {
27 | private final ReadOnlyMemMapJ22 data;
28 | private long position;
29 |
30 | UncompressedBlockRandomInputJ22(ReadOnlyMemMapJ22 data) {
31 | this.data = data;
32 | this.position = 0;
33 | }
34 |
35 | @Override
36 | public void close() {
37 | data.close();
38 | }
39 |
40 | @Override
41 | public void seek(long pos) throws IOException {
42 | this.position = pos;
43 | }
44 |
45 | @Override
46 | public int readUnsignedByte() throws IOException {
47 | int result = data.readUnsignedByte(position);
48 | position++;
49 | return result;
50 | }
51 |
52 | @Override
53 | public void readFully(byte[] buffer, int offset, int length) throws IOException {
54 | data.readFully(position, buffer, offset, length);
55 | position += length;
56 | }
57 |
58 | @Override
59 | public void skipBytes(long amount) throws IOException {
60 | position += amount;
61 | }
62 |
63 | @Override
64 | public UncompressedBlockRandomInputJ22 duplicate() {
65 | return new UncompressedBlockRandomInputJ22(data.duplicate());
66 | }
67 |
68 | @Override
69 | public void closeDuplicate() {
70 | data.closeDuplicate();
71 | }
72 |
73 | @Override
74 | public long getLoadedBytes() {
75 | // ReadOnlyMemMapJ22 doesn't track loaded bytes (MemorySegment is all-or-nothing)
76 | // Return 0 as conservative estimate
77 | return 0;
78 | }
79 |
80 | @Override
81 | public boolean readFullyCompare(int length, byte[] key) throws IOException {
82 | boolean result = data.readFullyCompare(position, length, key);
83 | position += length;
84 | return result;
85 | }
86 |
87 | /**
88 | * Get current position in the data stream.
89 | * Used for tracking value positions for lazy access.
90 | */
91 | long getPosition() {
92 | return position;
93 | }
94 |
95 | /**
96 | * Get a zero-copy slice of the underlying data.
97 | * Used for lazy value access - no allocation, no copying.
98 | */
99 | MemorySegment asSlice(long pos, long length) throws IOException {
100 | return data.asSlice(pos, length);
101 | }
102 | }
103 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/system/LargeFilesTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.system;
17 |
18 | import com.spotify.sparkey.*;
19 | import org.junit.Test;
20 |
21 | import java.io.IOException;
22 |
23 | import static org.junit.Assert.*;
24 |
25 | public class LargeFilesTest extends BaseSystemTest {
26 | @Test
27 | public void testLargeLogFile() throws IOException {
28 | UtilTest.setMapBits(10);
29 | String expectedValue = "value";
30 | while (expectedValue.length() < 5*1024) { // Larger than a map chunk
31 | expectedValue += expectedValue;
32 | }
33 |
34 | byte[] value = expectedValue.getBytes();
35 |
36 | SparkeyWriter writer = Sparkey.createNew(indexFile, CompressionType.NONE, 1024);
37 | for (int i = 0; i < 2000; i++) {
38 | writer.put(("key_" + i).getBytes(), value);
39 | }
40 | TestSparkeyWriter.writeHashAndCompare(writer);
41 | writer.close();
42 |
43 | assertTrue(logFile.length() > 2000 * 5 * 1024);
44 | SparkeyReader reader = Sparkey.open(indexFile);
45 | assertEquals(indexFile.length() + logFile.length(), reader.getTotalBytes());
46 | for (int i = 0; i < 2000; i += 100) {
47 | assertEquals(expectedValue, reader.getAsString("key_" + i));
48 | }
49 | assertEquals(null, reader.getAsString("key_" + 2000));
50 | reader.close();
51 | }
52 |
53 | @Test
54 | public void testSmallIndexFile() throws IOException {
55 | testLargeIndexFileInner(7000);
56 | }
57 |
58 | @Test
59 | public void testMediumIndexFile() throws IOException {
60 | testLargeIndexFileInner(150000);
61 | }
62 |
63 | @Test
64 | public void testLargeIndexFile() throws IOException {
65 | testLargeIndexFileInner(500000);
66 | }
67 |
68 | private void testLargeIndexFileInner(final long size) throws IOException {
69 | SparkeyWriter writer = Sparkey.createNew(indexFile, CompressionType.NONE, 1024);
70 | for (int i = 0; i < size; i++) {
71 | writer.put(("key_" + i), "" + (i % 13));
72 | }
73 | writer.setHashType(HashType.HASH_64_BITS);
74 | TestSparkeyWriter.writeHashAndCompare(writer);
75 | writer.close();
76 |
77 | assertTrue(indexFile.length() > size * 8L);
78 | SparkeyReader reader = Sparkey.open(indexFile);
79 | assertTrue(0 <= reader.getLoadedBytes());
80 | assertTrue(reader.getLoadedBytes() <= reader.getTotalBytes());
81 | for (int i = 0; i < 1000; i++) {
82 | long key = i * size / 1000L;
83 | assertEquals("" + (key % 13), reader.getAsString("key_" + key));
84 | }
85 | assertEquals(null, reader.getAsString("key_" + size));
86 | reader.close();
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/system/LookupBenchmark.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2014 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.system;
17 |
18 | import com.spotify.sparkey.CompressionType;
19 | import com.spotify.sparkey.Sparkey;
20 | import com.spotify.sparkey.SparkeyReader;
21 | import com.spotify.sparkey.SparkeyWriter;
22 | import com.spotify.sparkey.UtilTest;
23 | import org.openjdk.jmh.annotations.Benchmark;
24 | import org.openjdk.jmh.annotations.BenchmarkMode;
25 | import org.openjdk.jmh.annotations.Fork;
26 | import org.openjdk.jmh.annotations.Level;
27 | import org.openjdk.jmh.annotations.Measurement;
28 | import org.openjdk.jmh.annotations.Mode;
29 | import org.openjdk.jmh.annotations.OutputTimeUnit;
30 | import org.openjdk.jmh.annotations.Param;
31 | import org.openjdk.jmh.annotations.Scope;
32 | import org.openjdk.jmh.annotations.Setup;
33 | import org.openjdk.jmh.annotations.State;
34 | import org.openjdk.jmh.annotations.TearDown;
35 | import org.openjdk.jmh.annotations.Warmup;
36 |
37 | import java.io.File;
38 | import java.io.IOException;
39 | import java.util.Random;
40 | import java.util.concurrent.TimeUnit;
41 |
42 | @State(Scope.Benchmark)
43 | @Warmup(iterations = 2)
44 | @Measurement(iterations = 4)
45 | @Fork(value = 1, warmups = 0)
46 | public class LookupBenchmark {
47 |
48 | private File indexFile;
49 | private File logFile;
50 | private SparkeyReader reader;
51 | private Random random;
52 |
53 | @Setup(Level.Trial)
54 | public void setup() throws IOException {
55 | indexFile = new File("test.spi");
56 | logFile = Sparkey.getLogFile(indexFile);
57 |
58 | CompressionType compressionType = CompressionType.valueOf(type);
59 |
60 | indexFile.deleteOnExit();
61 | logFile.deleteOnExit();
62 | UtilTest.delete(indexFile);
63 | UtilTest.delete(logFile);
64 |
65 | SparkeyWriter writer = Sparkey.createNew(indexFile, compressionType, 1024);
66 | for (int i = 0; i < numElements; i++) {
67 | writer.put("key_" + i, "value_" + i);
68 | }
69 | writer.writeHash();
70 | writer.close();
71 |
72 | reader = Sparkey.open(indexFile);
73 | random = new Random(891273791623L);
74 |
75 | }
76 |
77 | @TearDown(Level.Trial)
78 | public void tearDown() throws IOException {
79 | reader.close();
80 | UtilTest.delete(indexFile);
81 | UtilTest.delete(logFile);
82 | }
83 |
84 | @Param({"1000", "10000", "100000", "1000000", "10000000", "100000000"})
85 | public int numElements;
86 |
87 | @Param({"NONE", "SNAPPY", "ZSTD"})
88 | public String type;
89 |
90 | @Benchmark
91 | @BenchmarkMode(Mode.Throughput)
92 | @OutputTimeUnit(TimeUnit.SECONDS)
93 | public String test() throws IOException {
94 | return reader.getAsString("key_" + random.nextInt(numElements));
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/SparkeyReader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.Closeable;
19 | import java.io.IOException;
20 | import java.io.InputStream;
21 | import java.util.Iterator;
22 |
23 | public interface SparkeyReader extends Iterable, Closeable {
24 | /**
25 | * @param key the key to search for, interpreted as an UTF-8 string.
26 | * @return null if the key/value pair was not found, otherwise the value interpreted as an UTF-8 string.
27 | */
28 | String getAsString(String key) throws IOException;
29 |
30 | /**
31 | * @param key the key to search for
32 | * @return null if the key/value pair was not found, otherwise the raw byte array value
33 | */
34 | byte[] getAsByteArray(byte[] key) throws IOException;
35 |
36 | /**
37 | * This is mostly useful for retrieving large values that don't fit in a byte array.
38 | *
39 | * @param key the key to search for
40 | * @return null if the key/value pair was not found, otherwise the entry.
41 | *
42 | */
43 | Entry getAsEntry(byte[] key) throws IOException;
44 |
45 | IndexHeader getIndexHeader();
46 | LogHeader getLogHeader();
47 |
48 | /**
49 | * Create a duplicate of the reader. Useful for using the reader from another thread.
50 | * @return a duplicate of the reader.
51 | */
52 | SparkeyReader duplicate();
53 |
54 | // Deliberately override to avoid throwing IOException
55 | @Override
56 | void close();
57 |
58 | /**
59 | * Get an iterator over all the live entries.
60 | *
61 | * The iterator object is not thread safe,
62 | * and the entry objects are highly volatile
63 | * and will be invalidated by the next
64 | * iteration step. Don't leak this entry,
65 | * copy whatever data you want from it instead.
66 | *
67 | * @return an iterator
68 | */
69 | @Override
70 | Iterator iterator();
71 |
72 | interface Entry {
73 | int getKeyLength();
74 | byte[] getKey();
75 | String getKeyAsString();
76 |
77 | long getValueLength();
78 | byte[] getValue() throws IOException;
79 | String getValueAsString() throws IOException;
80 | InputStream getValueAsStream();
81 |
82 | Type getType();
83 | }
84 |
85 | /**
86 | * Get the number of index and log file bytes loaded in memory.
87 | *
88 | * This number is based on MappedByteBuffer.isLoaded() and the resolution is
89 | * in increments of the memory chunk size (1 GB)
90 | *
91 | * @deprecated because it won't always be possible to compute the correct value
92 | */
93 | @Deprecated
94 | long getLoadedBytes();
95 |
96 | /**
97 | * Get the total number of index and log file bytes.
98 | */
99 | long getTotalBytes();
100 |
101 | enum Type {
102 | PUT, DELETE
103 | }
104 | }
105 |
--------------------------------------------------------------------------------
/src/main/java22/com/spotify/sparkey/MemorySegmentInputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.InputStream;
19 | import java.lang.foreign.MemorySegment;
20 | import java.lang.foreign.ValueLayout;
21 |
22 | /**
23 | * Zero-copy InputStream backed by a MemorySegment.
24 | * Supports values larger than 2GB (limited only by MemorySegment size).
25 | */
26 | final class MemorySegmentInputStream extends InputStream {
27 | private static final ValueLayout.OfByte JAVA_BYTE = ValueLayout.JAVA_BYTE;
28 |
29 | private final MemorySegment segment;
30 | private final long size;
31 | private long position;
32 | private long mark;
33 |
34 | MemorySegmentInputStream(MemorySegment segment) {
35 | this.segment = segment;
36 | this.size = segment.byteSize();
37 | this.position = 0;
38 | this.mark = 0;
39 | }
40 |
41 | @Override
42 | public int read() {
43 | if (position >= size) {
44 | return -1;
45 | }
46 | byte b = segment.get(JAVA_BYTE, position);
47 | position++;
48 | return ((int) b) & 0xFF;
49 | }
50 |
51 | @Override
52 | public int read(byte[] b, int off, int len) {
53 | if (b == null) {
54 | throw new NullPointerException();
55 | }
56 | if (off < 0 || len < 0 || len > b.length - off) {
57 | throw new IndexOutOfBoundsException();
58 | }
59 | if (len == 0) {
60 | return 0;
61 | }
62 | if (position >= size) {
63 | return -1;
64 | }
65 |
66 | // Calculate how much we can actually read
67 | long remaining = size - position;
68 | // Cast is safe: result never exceeds len (which is already an int)
69 | int toRead = (int) Math.min(len, remaining);
70 |
71 | // Zero-copy read from MemorySegment to byte array
72 | MemorySegment.copy(segment, JAVA_BYTE, position, b, off, toRead);
73 | position += toRead;
74 |
75 | return toRead;
76 | }
77 |
78 | @Override
79 | public long skip(long n) {
80 | if (n <= 0) {
81 | return 0;
82 | }
83 | long remaining = size - position;
84 | long skipped = Math.min(n, remaining);
85 | position += skipped;
86 | return skipped;
87 | }
88 |
89 | @Override
90 | public int available() {
91 | long remaining = size - position;
92 | // Clamp to Integer.MAX_VALUE for API compatibility
93 | return (int) Math.min(remaining, Integer.MAX_VALUE);
94 | }
95 |
96 | @Override
97 | public boolean markSupported() {
98 | return true;
99 | }
100 |
101 | @Override
102 | public synchronized void mark(int readlimit) {
103 | mark = position;
104 | }
105 |
106 | @Override
107 | public synchronized void reset() {
108 | position = mark;
109 | }
110 |
111 | @Override
112 | public void close() {
113 | // No-op: MemorySegment lifecycle is managed externally
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/system/QuickLookupBenchmark.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2014 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.system;
17 |
18 | import com.spotify.sparkey.CompressionType;
19 | import com.spotify.sparkey.Sparkey;
20 | import com.spotify.sparkey.SparkeyReader;
21 | import com.spotify.sparkey.SparkeyWriter;
22 | import com.spotify.sparkey.UtilTest;
23 | import org.openjdk.jmh.annotations.Benchmark;
24 | import org.openjdk.jmh.annotations.BenchmarkMode;
25 | import org.openjdk.jmh.annotations.Fork;
26 | import org.openjdk.jmh.annotations.Level;
27 | import org.openjdk.jmh.annotations.Measurement;
28 | import org.openjdk.jmh.annotations.Mode;
29 | import org.openjdk.jmh.annotations.OutputTimeUnit;
30 | import org.openjdk.jmh.annotations.Param;
31 | import org.openjdk.jmh.annotations.Scope;
32 | import org.openjdk.jmh.annotations.Setup;
33 | import org.openjdk.jmh.annotations.State;
34 | import org.openjdk.jmh.annotations.TearDown;
35 | import org.openjdk.jmh.annotations.Warmup;
36 |
37 | import java.io.File;
38 | import java.io.IOException;
39 | import java.util.ArrayList;
40 | import java.util.Arrays;
41 | import java.util.Collections;
42 | import java.util.List;
43 | import java.util.Random;
44 | import java.util.concurrent.TimeUnit;
45 | import java.util.concurrent.atomic.AtomicInteger;
46 |
47 | @State(Scope.Benchmark)
48 | @Warmup(iterations = 4, time = 3)
49 | @Measurement(iterations = 10, time = 3)
50 | @Fork(value = 1, warmups = 0)
51 | public class QuickLookupBenchmark {
52 |
53 | private File indexFile;
54 | private File logFile;
55 | private SparkeyReader reader;
56 | private String[] keysArray;
57 | private int counter = 0;
58 | private int bitmask;
59 |
60 | @Setup(Level.Trial)
61 | public void setup() throws IOException {
62 | indexFile = new File("test.spi");
63 | logFile = Sparkey.getLogFile(indexFile);
64 |
65 | CompressionType compressionType = CompressionType.valueOf(type);
66 |
67 | indexFile.deleteOnExit();
68 | logFile.deleteOnExit();
69 | UtilTest.delete(indexFile);
70 | UtilTest.delete(logFile);
71 |
72 | bitmask = numElements - 1;
73 |
74 | SparkeyWriter writer = Sparkey.createNew(indexFile, compressionType, 1024);
75 | final List keys = new ArrayList<>();
76 | for (int i = 0; i < numElements; i++) {
77 | final String key = "key_" + i;
78 | writer.put(key, "value_" + i);
79 | keys.add(key);
80 | }
81 | Collections.shuffle(keys, new Random(891273791623L));
82 | keysArray = keys.toArray(new String[0]);
83 | writer.writeHash();
84 | writer.close();
85 |
86 | reader = Sparkey.open(indexFile);
87 | }
88 |
89 | @TearDown(Level.Trial)
90 | public void tearDown() throws IOException {
91 | reader.close();
92 | UtilTest.delete(indexFile);
93 | UtilTest.delete(logFile);
94 | }
95 |
96 | @Param({"1048576"})
97 | public int numElements;
98 |
99 | @Param({"NONE"})
100 | public String type;
101 |
102 | @Benchmark
103 | @BenchmarkMode(Mode.Throughput)
104 | @OutputTimeUnit(TimeUnit.SECONDS)
105 | public String test() throws IOException {
106 | return reader.getAsString("key_" + keysArray[++counter & bitmask]);
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/RELEASE:
--------------------------------------------------------------------------------
1 | How to release:
2 |
3 | ## Pre-Release Verification
4 |
5 | Before starting the release, run the verification script to catch common issues:
6 |
7 | ```bash
8 | ./verify-release-ready.sh
9 | ```
10 |
11 | This checks:
12 | - Java 8 is active
13 | - Git working directory is clean
14 | - On master branch
15 | - Maven settings configured
16 | - Javadoc builds without errors
17 | - SCM URLs match git remote
18 | - Tests pass
19 | - GPG key is available
20 | - Bytecode version is Java 8
21 |
22 | If you have a custom SSH alias for GitHub (like `github.com-spotify`), configure git URL rewriting:
23 |
24 | ```bash
25 | git config --local url."git@github.com-spotify:".insteadOf "git@github.com:"
26 | ```
27 |
28 | ## Maven Settings
29 |
30 | **IMPORTANT**: Sonatype has migrated from legacy OSSRH (oss.sonatype.org) to the
31 | Central Publishing Portal (central.sonatype.com). You need a user token, not a password.
32 |
33 | ### Get User Token
34 |
35 | 1. Go to https://central.sonatype.com/account
36 | 2. Generate or view your user token
37 | 3. Add the token to ~/.m2/settings.xml:
38 |
39 | ```xml
40 |
41 |
42 |
43 | central
44 | YOUR_TOKEN_USERNAME
45 | YOUR_TOKEN_PASSWORD
46 |
47 |
48 |
49 | ```
50 |
51 | Optional (for GPG passphrase):
52 | ```xml
53 |
54 |
55 | gpg
56 |
57 | true
58 |
59 |
60 | gpg
61 | YOUR_GPG_PASSPHRASE
62 |
63 |
64 |
65 | ```
66 |
67 | ## Release Process
68 |
69 | ```bash
70 | # 1. Run pre-release verification
71 | ./verify-release-ready.sh
72 |
73 | # 2. Execute release (both prepare and perform can be run together)
74 | mvn -B release:prepare release:perform -Darguments="-DskipTests=true"
75 |
76 | # Note: Tests are skipped during release since they were already verified in step 1
77 | ```
78 |
79 | **What happens during release:**
80 | - `release:prepare` - Creates release tag, bumps version, commits to git, pushes to GitHub
81 | - `release:perform` - Builds artifacts, signs with GPG, uploads to Maven Central, auto-publishes
82 |
83 | **Important notes:**
84 | - The `-Psonatype-oss-release` profile is NO longer needed with Central Publishing Portal
85 | - Artifacts are automatically published (no manual staging/closing required)
86 | - Release takes ~1 minute total
87 |
88 | ## Post-Release Verification
89 |
90 | After release completes successfully:
91 |
92 | ```bash
93 | # Quick verification
94 | git fetch --tags
95 | git tag | grep sparkey-X.X.X
96 | grep "" pom.xml | head -1 # Should show X.X.X+1-SNAPSHOT
97 | ```
98 |
99 | **Check deployment status:**
100 | https://central.sonatype.com/publishing/deployments
101 |
102 | **Full post-release checklist:**
103 | See [POST-RELEASE-CHECKLIST.md](POST-RELEASE-CHECKLIST.md) for complete verification steps.
104 |
105 | **Create GitHub release (optional):**
106 | ```bash
107 | gh release create sparkey-X.X.X --title "Version X.X.X" --notes "See CHANGELOG.md"
108 | ```
109 |
110 | ## Troubleshooting Failed Releases
111 |
112 | If the release fails partway through:
113 |
114 | ```bash
115 | # Run cleanup script
116 | ./cleanup-failed-release.sh
117 |
118 | # Review what went wrong
119 | # Fix the issue (update pom.xml, fix tests, etc.)
120 |
121 | # Re-run verification
122 | ./verify-release-ready.sh
123 |
124 | # Retry release
125 | mvn -B release:prepare release:perform -Darguments="-DskipTests=true"
126 | ```
127 |
128 | Common issues and solutions documented in [POST-RELEASE-CHECKLIST.md](POST-RELEASE-CHECKLIST.md#troubleshooting)
129 |
130 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/SparkeyImplSelector.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import com.spotify.sparkey.extra.PooledSparkeyReader;
4 |
5 | import java.io.File;
6 | import java.io.IOException;
7 |
8 | /**
9 | * Selects the appropriate Sparkey implementation based on the Java version.
10 | *
11 | * This class is overridden via Multi-Release JAR to provide optimized
12 | * implementations on Java 22+. The base implementation uses Java 8-compatible
13 | * FileChannel-based readers.
14 | */
15 | class SparkeyImplSelector {
16 |
17 | /**
18 | * Open a SparkeyReader with the optimal implementation for the current Java version.
19 | *
20 | * Base implementation (Java 8-21): Returns PooledSparkeyReader using FileChannel.
21 | * Java 22+ override: Returns optimized implementations using MemorySegment API.
22 | *
23 | * @param file File base to use, the actual file endings will be set to .spi and .spl
24 | * @return an optimal SparkeyReader for the current Java version
25 | * @throws IOException if the file cannot be opened
26 | */
27 | static SparkeyReader open(File file) throws IOException {
28 | return PooledSparkeyReader.open(file);
29 | }
30 |
31 | /**
32 | * Open a single-threaded SparkeyReader.
33 | *
34 | * This is not thread-safe and should only be used from one thread.
35 | *
36 | * @param file File base to use, the actual file endings will be set to .spi and .spl
37 | * @return a single-threaded SparkeyReader
38 | * @throws IOException if the file cannot be opened
39 | */
40 | static SparkeyReader openSingleThreaded(File file) throws IOException {
41 | return SingleThreadedSparkeyReader.open(file);
42 | }
43 |
44 | /**
45 | * Open a pooled SparkeyReader with default pool size.
46 | *
47 | * @param file File base to use, the actual file endings will be set to .spi and .spl
48 | * @return a pooled SparkeyReader
49 | * @throws IOException if the file cannot be opened
50 | */
51 | static SparkeyReader openPooled(File file) throws IOException {
52 | return PooledSparkeyReader.open(file);
53 | }
54 |
55 | /**
56 | * Open a pooled SparkeyReader with the specified pool size.
57 | *
58 | * @param file File base to use, the actual file endings will be set to .spi and .spl
59 | * @param poolSize number of reader instances (minimum 1)
60 | * @return a pooled SparkeyReader
61 | * @throws IOException if the file cannot be opened
62 | */
63 | static SparkeyReader openPooled(File file, int poolSize) throws IOException {
64 | return PooledSparkeyReader.open(file, poolSize);
65 | }
66 |
67 | /**
68 | * Open an uncompressed reader using Java 22+ MemorySegment API.
69 | * Only available on Java 22+.
70 | *
71 | * @param file File base to use, the actual file endings will be set to .spi and .spl
72 | * @return UncompressedSparkeyReaderJ22 (on Java 22+)
73 | * @throws UnsupportedOperationException on Java < 22
74 | * @throws IOException if the file cannot be opened
75 | */
76 | static SparkeyReader openUncompressedJ22(File file) throws IOException {
77 | throw new UnsupportedOperationException(
78 | "UncompressedSparkeyReaderJ22 requires Java 22+, currently running " +
79 | System.getProperty("java.version"));
80 | }
81 |
82 | /**
83 | * Open a single-threaded reader using Java 22+ MemorySegment API.
84 | * Only available on Java 22+.
85 | *
86 | * @param file File base to use, the actual file endings will be set to .spi and .spl
87 | * @return SingleThreadedSparkeyReaderJ22 (on Java 22+)
88 | * @throws UnsupportedOperationException on Java < 22
89 | * @throws IOException if the file cannot be opened
90 | */
91 | static SparkeyReader openSingleThreadedJ22(File file) throws IOException {
92 | throw new UnsupportedOperationException(
93 | "SingleThreadedSparkeyReaderJ22 requires Java 22+, currently running " +
94 | System.getProperty("java.version"));
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/CompressedReader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 | import java.io.InputStream;
20 |
21 | final class CompressedReader extends BlockPositionedInputStream {
22 | private final CompressorType compressor;
23 | private final byte[] uncompressedBuf;
24 | private final byte[] compressedBuf;
25 | private int bufPos;
26 | private int blockSize;
27 |
28 | private long curBlockStart;
29 | private long nextBlockStart;
30 |
31 | public CompressedReader(CompressorType compressor, InputStream data, int maxBlockSize, long start) {
32 | super(data);
33 | this.compressor = compressor;
34 | blockSize = 0;
35 | bufPos = 0;
36 | curBlockStart = start;
37 | nextBlockStart = start;
38 | uncompressedBuf = new byte[maxBlockSize];
39 | compressedBuf = new byte[compressor.maxCompressedLength(maxBlockSize)];
40 | }
41 |
42 | @Override
43 | public int read() throws IOException {
44 | if (bufPos == blockSize) {
45 | fetchBlock();
46 | }
47 | return ((int) uncompressedBuf[bufPos++]) & 0xFF;
48 | }
49 |
50 | private void fetchBlock() throws IOException {
51 | int compressedSize = Util.readUnsignedVLQInt(input);
52 | input.read(compressedBuf, 0, compressedSize);
53 | int uncompressedSize = compressor.uncompress(compressedBuf, compressedSize, uncompressedBuf);
54 | bufPos = 0;
55 | blockSize = uncompressedSize;
56 |
57 | curBlockStart = nextBlockStart;
58 | nextBlockStart = curBlockStart + Util.unsignedVLQSize(compressedSize) + compressedSize;
59 | }
60 |
61 | @Override
62 | public int read(byte[] b) throws IOException {
63 | return read(b, 0, b.length);
64 | }
65 |
66 | @Override
67 | public int read(byte[] b, int off, int len) throws IOException {
68 | int remain = len;
69 | while (remain > 0) {
70 | int didRead = readImpl(b, off, remain);
71 | off += didRead;
72 | remain -= didRead;
73 | }
74 | return len;
75 | }
76 |
77 | private int readImpl(byte[] b, int off, int len) throws IOException {
78 | int available = available();
79 | if (len <= available) {
80 | System.arraycopy(uncompressedBuf, bufPos, b, off, len);
81 | bufPos += len;
82 | return len;
83 | } else {
84 | System.arraycopy(uncompressedBuf, bufPos, b, off, available);
85 | bufPos = blockSize;
86 | fetchBlock();
87 | return available;
88 | }
89 | }
90 |
91 | @Override
92 | public long skip(long n) throws IOException {
93 | long remain = n;
94 | while (remain > 0) {
95 | remain -= skipImpl(remain);
96 | }
97 | return n;
98 | }
99 |
100 | private long skipImpl(long n) throws IOException {
101 | int available = available();
102 | if (n <= available) {
103 | bufPos += n;
104 | return n;
105 | } else {
106 | bufPos = blockSize;
107 | fetchBlock();
108 | return available;
109 | }
110 | }
111 |
112 | @Override
113 | long getBlockPosition() {
114 | if (bufPos == blockSize) {
115 | return nextBlockStart;
116 | }
117 | return curBlockStart;
118 | }
119 |
120 | @Override
121 | public int available() throws IOException {
122 | return blockSize - bufPos;
123 | }
124 | }
125 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/CompressedOutputStream.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.FileDescriptor;
19 | import java.io.IOException;
20 | import java.io.OutputStream;
21 | import java.io.SyncFailedException;
22 |
23 | final class CompressedOutputStream extends OutputStream {
24 | private final CompressorType compressor;
25 | private final int maxBlockSize;
26 | private final OutputStream output;
27 |
28 | private final byte[] uncompressedBuffer;
29 | private final byte[] compressedBuffer;
30 | private final FileDescriptor fileDescriptor;
31 | private int pending;
32 | private CompressedWriter listener = CompressedWriter.DUMMY;
33 |
34 | CompressedOutputStream(CompressorType compressor, int maxBlockSize, OutputStream output, FileDescriptor fileDescriptor) throws IOException {
35 | this.compressor = compressor;
36 | this.fileDescriptor = fileDescriptor;
37 | if (maxBlockSize < 10) {
38 | throw new IOException("Too small block size - won't be able to fit keylen + valuelen in a single block");
39 | }
40 | this.maxBlockSize = maxBlockSize;
41 | this.output = output;
42 | uncompressedBuffer = new byte[maxBlockSize];
43 | compressedBuffer = new byte[compressor.maxCompressedLength(maxBlockSize)];
44 | }
45 |
46 | @Override
47 | public void flush() throws IOException {
48 | if (pending == 0) {
49 | return;
50 | }
51 |
52 | int compressedSize = compressor.compress(uncompressedBuffer, pending, compressedBuffer);
53 | Util.writeUnsignedVLQ(compressedSize, output);
54 | output.write(compressedBuffer, 0, compressedSize);
55 | output.flush();
56 | pending = 0;
57 | listener.afterFlush();
58 | }
59 |
60 | public void fsync() throws SyncFailedException {
61 | fileDescriptor.sync();
62 | }
63 |
64 | @Override
65 | public void close() throws IOException {
66 | flush();
67 | output.close();
68 | }
69 |
70 | @Override
71 | public void write(byte[] b) throws IOException {
72 | write(b, 0, b.length);
73 | }
74 |
75 | @Override
76 | public void write(byte[] b, int off, int len) throws IOException {
77 | while (len > 0) {
78 | int written = writeImpl(b, off, len);
79 | off += written;
80 | len -= written;
81 | }
82 | }
83 |
84 | private int writeImpl(byte[] b, int off, int len) throws IOException {
85 | int remaining = remaining();
86 | if (len < remaining) {
87 | System.arraycopy(b, off, uncompressedBuffer, pending, len);
88 | pending += len;
89 | return len;
90 | } else {
91 | System.arraycopy(b, off, uncompressedBuffer, pending, remaining);
92 | pending = maxBlockSize;
93 | flush();
94 | return remaining;
95 | }
96 | }
97 |
98 | @Override
99 | public void write(int b) throws IOException {
100 | uncompressedBuffer[pending++] = (byte) b;
101 | if (pending == maxBlockSize) {
102 | flush();
103 | }
104 | }
105 |
106 | int getPending() {
107 | return pending;
108 | }
109 |
110 | int remaining() {
111 | return maxBlockSize - pending;
112 | }
113 |
114 | void setListener(CompressedWriter compressedWriter) {
115 | listener = compressedWriter;
116 | }
117 |
118 | int getMaxBlockSize() {
119 | return maxBlockSize;
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/POST-RELEASE-CHECKLIST.md:
--------------------------------------------------------------------------------
1 | # Post-Release Checklist
2 |
3 | After a successful `mvn release:perform`, verify everything completed correctly.
4 |
5 | ## Immediate Verification (< 5 minutes)
6 |
7 | ### 1. Check Git State
8 |
9 | ```bash
10 | # Verify tag was created and pushed
11 | git fetch --tags
12 | git tag | grep sparkey-X.X.X
13 | git ls-remote --tags origin | grep sparkey-X.X.X
14 |
15 | # Verify version was bumped
16 | grep "" pom.xml | head -1
17 | # Should show X.X.X+1-SNAPSHOT
18 |
19 | # Check release commits
20 | git log --oneline -3
21 | # Should show:
22 | # - [maven-release-plugin] prepare for next development iteration
23 | # - [maven-release-plugin] prepare release sparkey-X.X.X
24 |
25 | # Verify working tree is clean
26 | git status
27 | ```
28 |
29 | ### 2. Verify Central Publishing Portal
30 |
31 | Check deployment status:
32 | https://central.sonatype.com/publishing/deployments
33 |
34 | Look for your deployment:
35 | - **Status**: Should be "PUBLISHED" or "PUBLISHING"
36 | - **Deployment ID**: From the release log
37 | - **No validation errors**
38 |
39 | ### 3. Check Local Artifacts
40 |
41 | Verify artifacts were installed to local Maven repo:
42 | ```bash
43 | ls -lh ~/.m2/repository/com/spotify/sparkey/sparkey/X.X.X/
44 | ```
45 |
46 | Should contain:
47 | - `sparkey-X.X.X.jar`
48 | - `sparkey-X.X.X-sources.jar`
49 | - `sparkey-X.X.X-javadoc.jar`
50 | - `sparkey-X.X.X.pom`
51 | - All `.asc` signature files
52 |
53 | ## Maven Central Verification (30 minutes - 2 hours)
54 |
55 | ### 4. Check Maven Central Search
56 |
57 | After ~30 minutes, verify artifact appears:
58 | - https://central.sonatype.com/artifact/com.spotify.sparkey/sparkey/X.X.X
59 | - https://search.maven.org/artifact/com.spotify.sparkey/sparkey/X.X.X
60 |
61 | ### 5. Verify Downloadable
62 |
63 | Try downloading the artifact:
64 | ```bash
65 | curl -O https://repo1.maven.org/maven2/com/spotify/sparkey/sparkey/X.X.X/sparkey-X.X.X.pom
66 | cat sparkey-X.X.X.pom | grep -A 2 ""
67 | rm sparkey-X.X.X.pom
68 | ```
69 |
70 | ## GitHub Release (Optional but Recommended)
71 |
72 | ### 6. Create GitHub Release
73 |
74 | ```bash
75 | # View CHANGELOG for release notes
76 | cat CHANGELOG.md | head -30
77 |
78 | # Create GitHub release
79 | gh release create sparkey-X.X.X \
80 | --title "Version X.X.X" \
81 | --notes "$(sed -n '/^#### X.X.X$/,/^#### [0-9]/p' CHANGELOG.md | head -n -1)"
82 | ```
83 |
84 | Or manually at: https://github.com/spotify/sparkey-java/releases/new
85 |
86 | ## Communication (If Needed)
87 |
88 | ### 7. Announce Release
89 |
90 | If this is a major release or contains important fixes:
91 | - Update README.md if needed
92 | - Post announcement (Slack, mailing list, etc.)
93 | - Update dependent projects
94 |
95 | ## Rollback (If Problems Found)
96 |
97 | If you discover issues AFTER release:
98 |
99 | **DO NOT delete from Maven Central** (artifacts are immutable)
100 |
101 | Instead:
102 | 1. Fix the issue
103 | 2. Release a new patch version immediately (X.X.X+1)
104 | 3. Document the issue in CHANGELOG.md
105 |
106 | ## Troubleshooting
107 |
108 | ### Deployment shows "FAILED" on Central Portal
109 |
110 | 1. Check the error message on https://central.sonatype.com/publishing/deployments
111 | 2. Common issues:
112 | - Missing metadata (name, description, url)
113 | - Invalid POM structure
114 | - Missing or invalid signatures
115 |
116 | ### Tag pushed but no artifacts on Maven Central
117 |
118 | 1. Check if `release:perform` completed successfully
119 | 2. Look for deployment errors in the Maven output
120 | 3. Check https://central.sonatype.com/publishing/deployments for the deployment
121 |
122 | ### Version not bumped correctly
123 |
124 | The release plugin should have bumped the version. If not:
125 | ```bash
126 | # Manually bump version
127 | # Edit pom.xml: X.X.X-SNAPSHOT → X.X.X+1-SNAPSHOT
128 | git add pom.xml
129 | git commit -m "Bump version to X.X.X+1-SNAPSHOT"
130 | git push
131 | ```
132 |
--------------------------------------------------------------------------------
/src/main/java22/com/spotify/sparkey/UncompressedLogReaderJ22.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 | import java.io.InputStream;
20 | import java.lang.foreign.MemorySegment;
21 | import java.nio.charset.StandardCharsets;
22 |
23 | /**
24 | * Fully immutable log reader for UNCOMPRESSED files.
25 | * Optimized version that skips entry block handling.
26 | *
27 | * For uncompressed files:
28 | * - No entry blocks - each position points directly to an entry
29 | * - No need to skip entries within blocks
30 | */
31 | final class UncompressedLogReaderJ22 {
32 | final ReadOnlyMemMapJ22 data; // Package-private for inlined access from UncompressedIndexHashJ22
33 | private final LogHeader logHeader;
34 |
35 | UncompressedLogReaderJ22(ReadOnlyMemMapJ22 data, LogHeader logHeader) {
36 | this.data = data;
37 | this.logHeader = logHeader;
38 | }
39 |
40 | /**
41 | * Simple immutable entry that reads value data on demand.
42 | * Supports values larger than 2GB via getValueAsStream().
43 | * getValue() throws IllegalStateException for values > Integer.MAX_VALUE.
44 | */
45 | static final class ImmutableEntry implements SparkeyReader.Entry {
46 | private final int keyLen;
47 | private final byte[] key;
48 | private final long valueLen;
49 | private final long valuePosition;
50 | private final ReadOnlyMemMapJ22 data;
51 |
52 | ImmutableEntry(int keyLen, byte[] key,
53 | long valueLen, long valuePosition, ReadOnlyMemMapJ22 data) {
54 | this.keyLen = keyLen;
55 | // Defensive copy: ensure immutability even if caller reuses the key array
56 | this.key = java.util.Arrays.copyOf(key, keyLen);
57 | this.valueLen = valueLen;
58 | this.valuePosition = valuePosition;
59 | this.data = data;
60 | }
61 |
62 | @Override
63 | public int getKeyLength() {
64 | return keyLen;
65 | }
66 |
67 | @Override
68 | public byte[] getKey() {
69 | return key;
70 | }
71 |
72 | @Override
73 | public String getKeyAsString() {
74 | return new String(key, StandardCharsets.UTF_8);
75 | }
76 |
77 | @Override
78 | public long getValueLength() {
79 | return valueLen;
80 | }
81 |
82 | @Override
83 | public byte[] getValue() throws IOException {
84 | if (valueLen > Integer.MAX_VALUE) {
85 | throw new IllegalStateException("Value size is " + valueLen +
86 | " bytes, exceeds byte[] limit. Use getValueAsStream() instead.");
87 | }
88 | return data.readBytes(valuePosition, (int) valueLen);
89 | }
90 |
91 | @Override
92 | public String getValueAsString() throws IOException {
93 | return new String(getValue(), StandardCharsets.UTF_8);
94 | }
95 |
96 | @Override
97 | public InputStream getValueAsStream() {
98 | // Zero-copy stream backed by MemorySegment - no allocation, supports values > 2GB
99 | if (valueLen == 0) {
100 | // Use singleton for empty streams (edge case, but possible)
101 | return EmptyInputStream.INSTANCE;
102 | }
103 | try {
104 | MemorySegment valueSegment = data.asSlice(valuePosition, valueLen);
105 | return new MemorySegmentInputStream(valueSegment);
106 | } catch (IOException e) {
107 | throw new RuntimeException(e);
108 | }
109 | }
110 |
111 | @Override
112 | public SparkeyReader.Type getType() {
113 | return SparkeyReader.Type.PUT;
114 | }
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/extra/SparkeyValidator.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey.extra;
2 |
3 | import com.spotify.sparkey.LogHeader;
4 | import com.spotify.sparkey.Sparkey;
5 | import com.spotify.sparkey.SparkeyLogIterator;
6 | import com.spotify.sparkey.SparkeyReader;
7 |
8 | import java.io.File;
9 | import java.io.IOException;
10 | import java.util.Arrays;
11 |
12 | public class SparkeyValidator {
13 | public static void main(String[] args) throws IOException {
14 | if (args.length < 1) {
15 | System.err.println("Usage: sparkey-validator ");
16 | System.exit(1);
17 | return;
18 | }
19 |
20 | File input = new File(args[0]);
21 | final File logFile = Sparkey.getLogFile(input);
22 | if (!logFile.exists()) {
23 | System.err.println(logFile.getAbsolutePath() + " does not exist");
24 | System.exit(1);
25 | return;
26 | }
27 | if (!logFile.isFile()) {
28 | System.err.println(logFile.getAbsolutePath() + " is not a file");
29 | System.exit(1);
30 | return;
31 | }
32 |
33 | final File indexFile = Sparkey.getIndexFile(input);
34 | if (!indexFile.exists()) {
35 | System.err.println(indexFile.getAbsolutePath() + " does not exist");
36 | System.exit(1);
37 | return;
38 | }
39 | if (!indexFile.isFile()) {
40 | System.err.println(indexFile.getAbsolutePath() + " is not a file");
41 | System.exit(1);
42 | return;
43 | }
44 |
45 | LogHeader logHeader = Sparkey.getLogHeader(logFile);
46 |
47 | final SparkeyReader reader = Sparkey.open(indexFile);
48 | final SparkeyReader reader2 = Sparkey.open(indexFile);
49 |
50 | boolean inconsistent = false;
51 |
52 | System.out.println("Validating log iterator.");
53 |
54 | final SparkeyLogIterator iterator = new SparkeyLogIterator(logFile);
55 | for (SparkeyReader.Entry entry : iterator) {
56 | switch (entry.getType()) {
57 | case PUT:
58 | validateKey(logHeader, entry);
59 | validateValue(logHeader, entry);
60 |
61 | // Just make sure this doesn't crash
62 | reader.getAsByteArray(entry.getKey());
63 |
64 | break;
65 | case DELETE:
66 | validateKey(logHeader, entry);
67 | checkState(0 == entry.getValueLength());
68 |
69 | // Just make sure this doesn't crash
70 | reader.getAsByteArray(entry.getKey());
71 |
72 | break;
73 | default:
74 | throw new RuntimeException("Unknown type: " + entry.getType());
75 | }
76 | }
77 |
78 | System.out.println("Validating hash iterator and random lookups.");
79 | for (SparkeyReader.Entry entry : reader) {
80 | final byte[] value = entry.getValue();
81 |
82 | validateKey(logHeader, entry);
83 | validateValue(logHeader, entry, value);
84 |
85 |
86 | final byte[] value2 = reader2.getAsByteArray(entry.getKey());
87 | if (!Arrays.equals(value, value2)) {
88 | System.err.println("Inconsistency for key: " + entry.getKeyAsString() + " when iterating and doing a lookup");
89 | inconsistent = true;
90 | }
91 |
92 | }
93 |
94 | System.out.println("Done!");
95 |
96 | if (inconsistent) {
97 | System.exit(1);
98 | }
99 | }
100 |
101 | private static void validateKey(LogHeader logHeader, SparkeyReader.Entry entry) {
102 | checkState(entry.getKeyLength() <= logHeader.getMaxKeyLen());
103 | checkState(entry.getKeyLength() == entry.getKey().length);
104 | }
105 |
106 | private static void validateValue(LogHeader logHeader, SparkeyReader.Entry entry) throws IOException {
107 | final byte[] value = entry.getValue();
108 | validateValue(logHeader, entry, value);
109 | }
110 |
111 | private static void validateValue(LogHeader logHeader, SparkeyReader.Entry entry, byte[] value) {
112 | checkState(entry.getValueLength() <= logHeader.getMaxValueLen());
113 | checkState(entry.getValueLength() == value.length);
114 | }
115 |
116 | private static void checkState(boolean b) {
117 | if (!b) {
118 | throw new IllegalStateException();
119 | }
120 | }
121 |
122 |
123 | }
124 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/system/SparkeyExample.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.system;
17 |
18 | import com.spotify.sparkey.*;
19 | import org.junit.Test;
20 |
21 | import java.io.File;
22 | import java.io.IOException;
23 | import java.util.Random;
24 |
25 | public class SparkeyExample {
26 | private static final int NUM_RANDOM_READS = 1000;
27 | private static final int N = 2000;
28 |
29 | public static void main(String[] args) throws Exception {
30 | final File indexFile = new File("test.spi");
31 |
32 | create(indexFile);
33 |
34 | final Random random = new Random(11234);
35 | final SparkeyReader reader = Sparkey.open(indexFile);
36 |
37 | randomReads(random, reader);
38 |
39 | rawIteration(new SparkeyLogIterator(Sparkey.getLogFile(indexFile)));
40 |
41 | iteration(reader);
42 |
43 | reader.close();
44 | }
45 |
46 | private static void randomReads(Random random, SparkeyReader reader) throws IOException {
47 | for (int i = 0; i < NUM_RANDOM_READS; i++) {
48 | int k = random.nextInt(N);
49 | String key = "Key" + k;
50 | String entry = reader.getAsString(key);
51 | if (!("Value" + k).equals(entry)) {
52 | throw new RuntimeException("Expected " + "Value" + k + " but got " + entry);
53 | }
54 | }
55 | }
56 |
57 | private static void create(File indexFile) throws IOException {
58 | final SparkeyWriter writer = Sparkey.createNew(indexFile, CompressionType.SNAPPY, 512);
59 | for (int i = 0; i < N; i++) {
60 | writer.put("Key" + i, "Value" + i);
61 | }
62 | writer.flush();
63 | writer.writeHash();
64 | writer.close();
65 | }
66 |
67 | private static void iteration(final SparkeyReader reader) throws IOException {
68 | int i = 0;
69 | for (SparkeyReader.Entry entry : reader) {
70 | String key = entry.getKeyAsString();
71 | String value = entry.getValueAsString();
72 |
73 | String expectedKey = "Key" + i;
74 | String expectedValue = "Value" + i;
75 |
76 | if (!key.equals(expectedKey)) {
77 | throw new RuntimeException("Expected " + expectedKey + " but got " + key);
78 | }
79 | if (!value.equals(expectedValue)) {
80 | throw new RuntimeException("Expected '" + expectedValue + "' but got '" + value + "' for key '" + key + "'");
81 | }
82 | i++;
83 | }
84 | if (i != N) {
85 | throw new RuntimeException("Only got " + i + " entries, expected " + N);
86 | }
87 | }
88 |
89 | private static void rawIteration(final SparkeyLogIterator logIterator) throws IOException {
90 | int i = 0;
91 | for (SparkeyReader.Entry entry : logIterator) {
92 | if (entry.getType() == SparkeyReader.Type.PUT) {
93 | String key = entry.getKeyAsString();
94 | String value = entry.getValueAsString();
95 |
96 | String expectedKey = "Key" + (i % N);
97 | String expectedValue = "Value" + (i);
98 | if (!key.equals(expectedKey)) {
99 | throw new RuntimeException("Expected " + expectedKey + " but got " + key);
100 | }
101 | if (!value.equals(expectedValue)) {
102 | throw new RuntimeException("Expected " + expectedValue + " but got " + value);
103 | }
104 |
105 | i++;
106 | }
107 | }
108 | if (i != N) {
109 | throw new RuntimeException("Only got " + i + " entries, expected " + 2 * N);
110 | }
111 | }
112 |
113 | @Test
114 | public void dummy() {
115 | // Just to make the junit test runner work
116 | }
117 |
118 | }
119 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/LogWriter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.*;
19 | import java.nio.charset.StandardCharsets;
20 |
21 | final class LogWriter {
22 | private final LogHeader header;
23 | private final File file;
24 | private final BlockOutput logStream;
25 | private boolean closed;
26 |
27 | private LogWriter(File file, CompressionType compressionType, int compressionBlockSize) throws IOException {
28 | this.file = file;
29 | header = new LogHeader(compressionType, compressionBlockSize);
30 | header.write(file, false);
31 | logStream = setup(header, file);
32 | }
33 |
34 | private LogWriter(File file) throws IOException {
35 | this.file = file;
36 | if (!file.exists()) {
37 | throw new FileNotFoundException(file.getCanonicalPath());
38 | }
39 | header = LogHeader.read(file);
40 | logStream = setup(header, file);
41 | }
42 |
43 | File getFile() {
44 | return file;
45 | }
46 |
47 | private static BlockOutput setup(LogHeader header, File file) throws IOException {
48 | truncate(file, header.getDataEnd());
49 | FileOutputStream fileOutputStream = new FileOutputStream(file, true);
50 | Sparkey.incrOpenFiles();
51 | FileDescriptor fd = fileOutputStream.getFD();
52 | OutputStream stream = new BufferedOutputStream(fileOutputStream, 1024 * 1024);
53 | return header.getCompressionTypeBackend().createBlockOutput(fd, stream, header.getCompressionBlockSize(),
54 | header.getMaxEntriesPerBlock());
55 | }
56 |
57 | private static void truncate(File file, long size) throws IOException {
58 | try (RandomAccessFile rw = new RandomAccessFile(file, "rw")) {
59 | rw.setLength(size);
60 | }
61 | }
62 |
63 | static LogWriter createNew(File file, CompressionType compressionType, int compressionBlockSize) throws IOException {
64 | return new LogWriter(file, compressionType, compressionBlockSize);
65 | }
66 |
67 | static LogWriter openExisting(File file) throws IOException {
68 | return new LogWriter(file);
69 | }
70 |
71 | void flush(boolean fsync) throws IOException {
72 | logStream.flush(fsync);
73 | writeHeader(fsync);
74 | }
75 |
76 | private void writeHeader(boolean fsync) throws IOException {
77 | header.setMaxEntriesPerBlock(logStream.getMaxEntriesPerBlock());
78 | header.setDataEnd(file.length());
79 | header.write(file, fsync);
80 | }
81 |
82 | void close(boolean fsync) throws IOException {
83 | if (closed) {
84 | return;
85 | }
86 | closed = true;
87 | logStream.close(fsync);
88 | Sparkey.decrOpenFiles();
89 | writeHeader(fsync);
90 | }
91 |
92 | void put(String key, String value) throws IOException {
93 | put(key.getBytes(StandardCharsets.UTF_8), value.getBytes(StandardCharsets.UTF_8));
94 | }
95 |
96 | void put(byte[] key, byte[] value) throws IOException {
97 | logStream.put(key, key.length, value, value.length);
98 | header.put(key.length, value.length);
99 | }
100 |
101 | void put(byte[] key, InputStream value, long valueLen) throws IOException {
102 | logStream.put(key, key.length, value, valueLen);
103 | header.put(key.length, valueLen);
104 | }
105 |
106 | void delete(String key) throws IOException {
107 | delete(key.getBytes(StandardCharsets.UTF_8));
108 | }
109 |
110 | void delete(byte[] key) throws IOException {
111 | if (key.length <= header.getMaxKeyLen()) {
112 | logStream.delete(key, key.length);
113 | header.delete(key.length);
114 | }
115 | }
116 |
117 | }
118 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/system/MemoryLock.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.system;
17 |
18 | import java.lang.foreign.*;
19 | import java.lang.invoke.MethodHandle;
20 |
21 | /**
22 | * Wrapper for native mlock() syscall using Foreign Function & Memory API.
23 | * Allows locking memory-mapped pages in RAM to prevent page faults during benchmarking.
24 | */
25 | public class MemoryLock {
26 |
27 | private static final Linker LINKER = Linker.nativeLinker();
28 | private static final MethodHandle MLOCK;
29 | private static final MethodHandle MUNLOCK;
30 |
31 | static {
32 | // Look up native mlock and munlock functions
33 | // int mlock(const void *addr, size_t len);
34 | // int munlock(const void *addr, size_t len);
35 |
36 | SymbolLookup stdlib = LINKER.defaultLookup();
37 |
38 | FunctionDescriptor mlockDesc = FunctionDescriptor.of(
39 | ValueLayout.JAVA_INT, // return type: int
40 | ValueLayout.ADDRESS, // addr: void*
41 | ValueLayout.JAVA_LONG // len: size_t
42 | );
43 |
44 | MemorySegment mlockAddr = stdlib.find("mlock")
45 | .orElseThrow(() -> new UnsupportedOperationException("mlock not available"));
46 | MemorySegment munlockAddr = stdlib.find("munlock")
47 | .orElseThrow(() -> new UnsupportedOperationException("munlock not available"));
48 |
49 | MLOCK = LINKER.downcallHandle(mlockAddr, mlockDesc);
50 | MUNLOCK = LINKER.downcallHandle(munlockAddr, mlockDesc);
51 | }
52 |
53 | /**
54 | * Lock a MemorySegment in RAM, preventing it from being paged out.
55 | * Per mlock(2) man page: "All pages that contain a part of the specified address
56 | * range are guaranteed to be resident in RAM when the call returns successfully."
57 | *
58 | * @param segment The memory segment to lock
59 | * @return true if successful, false if mlock failed (e.g., insufficient privileges)
60 | */
61 | public static boolean lock(MemorySegment segment) {
62 | try {
63 | int result = (int) MLOCK.invoke(segment, segment.byteSize());
64 | return result == 0;
65 | } catch (Throwable e) {
66 | return false;
67 | }
68 | }
69 |
70 | /**
71 | * Unlock a previously locked MemorySegment, allowing it to be paged out.
72 | *
73 | * @param segment The memory segment to unlock
74 | * @return true if successful, false if munlock failed
75 | */
76 | public static boolean unlock(MemorySegment segment) {
77 | try {
78 | int result = (int) MUNLOCK.invoke(segment, segment.byteSize());
79 | return result == 0;
80 | } catch (Throwable e) {
81 | return false;
82 | }
83 | }
84 |
85 | /**
86 | * Check if mlock is likely to work by testing current ulimit -l.
87 | * Returns the maximum lockable memory in bytes, or -1 if unlimited.
88 | */
89 | public static long getMaxLockedMemory() {
90 | try {
91 | // This would require another FFI call to getrlimit(RLIMIT_MEMLOCK)
92 | // For now, just suggest checking manually
93 | ProcessBuilder pb = new ProcessBuilder("sh", "-c", "ulimit -l");
94 | Process p = pb.start();
95 | byte[] output = p.getInputStream().readAllBytes();
96 | p.waitFor();
97 |
98 | String result = new String(output).trim();
99 | if (result.equals("unlimited")) {
100 | return -1;
101 | }
102 |
103 | // ulimit -l returns KB, convert to bytes
104 | return Long.parseLong(result) * 1024;
105 | } catch (Exception e) {
106 | System.err.println("Could not check ulimit -l: " + e.getMessage());
107 | return 0;
108 | }
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/SparkeyWriter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.Closeable;
19 | import java.io.IOException;
20 | import java.io.InputStream;
21 |
22 | public interface SparkeyWriter extends Closeable {
23 | /**
24 | * Append the key/value pair to the writer, as UTF-8.
25 | */
26 | void put(String key, String value) throws IOException;
27 |
28 | /**
29 | * Append the key/value pair to the writer.
30 | */
31 | void put(byte[] key, byte[] value) throws IOException;
32 |
33 | /**
34 | * Append the key/value pair to the writer.
35 | *
36 | * Only uses the first valueLen bytes from valueStream.
37 | */
38 | void put(byte[] key, InputStream valueStream, long valueLen) throws IOException;
39 |
40 | /**
41 | * Deletes the key from the writer, as UTF-8
42 | */
43 | void delete(String key) throws IOException;
44 |
45 | /**
46 | * Deletes the key from the writer.
47 | */
48 | void delete(byte[] key) throws IOException;
49 |
50 | /**
51 | * Flush all pending writes to file.
52 | */
53 | void flush() throws IOException;
54 |
55 | /**
56 | * Flush and close the writer.
57 | */
58 | @Override
59 | void close() throws IOException;
60 |
61 | /**
62 | * Create or rewrite the index,
63 | * which is required for random lookups to be visible.
64 | */
65 | void writeHash() throws IOException;
66 |
67 | /**
68 | * Create or rewrite the index,
69 | * which is required for random lookups to be visible.
70 | *
71 | * @param hashType choice of hash type, can be 32 or 64 bits.
72 | * @deprecated Use writer.setHashType(hashType); writer.writeHash(); instead
73 | */
74 | @Deprecated
75 | void writeHash(HashType hashType) throws IOException;
76 |
77 | /**
78 | * Set whether or not flushes and hash writes should be synced to disk.
79 | *
80 | * @param fsync whether or not flushes and hash writes should be synced to disk
81 | */
82 | void setFsync(boolean fsync);
83 |
84 | /**
85 | * Set the hash type for all subsequent writeHash operations.
86 | * @param hashType choice of hash type, can be 32 or 64 bits.
87 | * if null, will use the default.
88 | */
89 | void setHashType(HashType hashType);
90 |
91 | /**
92 | * Set the sparsity for all subsequent writeHash operations.
93 | * A sparsity of 1.0 would mean that every slot in the hash table is occupied.
94 | * The actual minimum sparsity level is 1.3, values lower than this are ignored.
95 | * @param sparsity
96 | */
97 | void setHashSparsity(double sparsity);
98 |
99 | /**
100 | * Set the hash seed to use. Default: a random seed
101 | * If set to 0, a random seed will be used.
102 | * @param hashSeed
103 | */
104 | void setHashSeed(int hashSeed);
105 |
106 | /**
107 | * Set the maximum amount of memory to use for index construction.
108 | * Default: Runtime.freeMemory() / 2
109 | * @param maxMemory
110 | */
111 | void setMaxMemory(long maxMemory);
112 |
113 | /**
114 | * Set which construction method to use to create the hash index.
115 | * Default: AUTO
116 | * @param method
117 | */
118 | void setConstructionMethod(ConstructionMethod method);
119 |
120 | enum ConstructionMethod {
121 | /**
122 | * Chooses construction method dynamically based on size of data and available memory.
123 | */
124 | AUTO,
125 |
126 | /**
127 | * Write hash index in memory
128 | */
129 | IN_MEMORY,
130 |
131 | /**
132 | * Sort hash entries before writing to the hash index.
133 | */
134 | SORTING
135 | }
136 | }
137 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/CompressedWriter.java:
--------------------------------------------------------------------------------
1 | package com.spotify.sparkey;
2 |
3 | import java.io.IOException;
4 | import java.io.InputStream;
5 |
6 | class CompressedWriter implements BlockOutput {
7 | public static final CompressedWriter DUMMY = new CompressedWriter();
8 |
9 | private final byte[] buf = new byte[1024*1024];
10 | private final CompressedOutputStream compressedOutputStream;
11 |
12 | private int currentNumEntries;
13 | private int maxEntriesPerBlock;
14 | private boolean flushed;
15 | private final int maxBlockSize;
16 |
17 | // Only used to initialize dummy
18 | private CompressedWriter() {
19 | compressedOutputStream = null;
20 | maxBlockSize = 0;
21 | }
22 |
23 | public CompressedWriter(CompressedOutputStream compressedOutputStream, int maxEntriesPerBlock) {
24 | this.compressedOutputStream = compressedOutputStream;
25 | this.maxEntriesPerBlock = maxEntriesPerBlock;
26 | compressedOutputStream.setListener(this);
27 | maxBlockSize = this.compressedOutputStream.getMaxBlockSize();
28 | }
29 |
30 | public void afterFlush() {
31 | maxEntriesPerBlock = Math.max(currentNumEntries, maxEntriesPerBlock);
32 | currentNumEntries = 0;
33 | flushed = true;
34 | }
35 |
36 | @Override
37 | public void flush(boolean fsync) throws IOException {
38 | compressedOutputStream.flush();
39 | if (fsync) {
40 | compressedOutputStream.fsync();
41 | }
42 | }
43 |
44 | @Override
45 | public void put(byte[] key, int keyLen, byte[] value, int valueLen) throws IOException {
46 | int keySize = Util.unsignedVLQSize(keyLen + 1) + Util.unsignedVLQSize(valueLen);
47 | int totalSize = keySize + keyLen + valueLen;
48 |
49 | smartFlush(keySize, totalSize);
50 | flushed = false;
51 | currentNumEntries++;
52 |
53 | Util.writeUnsignedVLQ(keyLen + 1, compressedOutputStream);
54 | Util.writeUnsignedVLQ(valueLen, compressedOutputStream);
55 | compressedOutputStream.write(key, 0, keyLen);
56 | compressedOutputStream.write(value, 0, valueLen);
57 |
58 |
59 | // Make sure that the beginning of each block is the start of a key/value pair
60 | if (flushed && compressedOutputStream.getPending() > 0) {
61 | compressedOutputStream.flush();
62 | }
63 | }
64 |
65 | @Override
66 | public void put(byte[] key, int keyLen, InputStream value, long valueLen) throws IOException {
67 | int keySize = Util.unsignedVLQSize(keyLen + 1) + Util.unsignedVLQSize(valueLen);
68 | long totalSize = keySize + keyLen + valueLen;
69 |
70 | smartFlush(keySize, totalSize);
71 | flushed = false;
72 | currentNumEntries++;
73 |
74 | Util.writeUnsignedVLQ(keyLen + 1, compressedOutputStream);
75 | Util.writeUnsignedVLQ(valueLen, compressedOutputStream);
76 | compressedOutputStream.write(key, 0, keyLen);
77 | Util.copy(valueLen, value, compressedOutputStream, buf);
78 |
79 | // Make sure that the beginning of each block is the start of a key/value pair
80 | if (flushed && compressedOutputStream.getPending() > 0) {
81 | compressedOutputStream.flush();
82 | }
83 | }
84 |
85 | private void smartFlush(int keySize, long totalSize) throws IOException {
86 | int remaining = compressedOutputStream.remaining();
87 | if (remaining < keySize) {
88 | flush(false);
89 | } else if (remaining < totalSize && totalSize < maxBlockSize - remaining) {
90 | flush(false);
91 | }
92 | }
93 |
94 | @Override
95 | public void delete(byte[] key, int keyLen) throws IOException {
96 | int keySize = 1 + Util.unsignedVLQSize(keyLen + 1);
97 | smartFlush(keySize, keySize + keyLen);
98 |
99 | flushed = false;
100 | currentNumEntries++;
101 |
102 | compressedOutputStream.write(0);
103 | Util.writeUnsignedVLQ(keyLen, compressedOutputStream);
104 | compressedOutputStream.write(key, 0, keyLen);
105 |
106 | // Make sure that the beginning of each block is the start of a key/value pair
107 | if (flushed && compressedOutputStream.getPending() > 0) {
108 | compressedOutputStream.flush();
109 | }
110 | }
111 |
112 | @Override
113 | public void close(boolean fsync) throws IOException {
114 | flush(fsync);
115 | compressedOutputStream.close();
116 | }
117 |
118 | public int getMaxEntriesPerBlock() {
119 | return maxEntriesPerBlock;
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/src/main/java/com/spotify/sparkey/InMemoryData.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011-2013 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey;
17 |
18 | import java.io.IOException;
19 | import java.util.ArrayList;
20 |
21 | class InMemoryData implements ReadWriteData {
22 | private static final int CHUNK_SIZE = 1 << 30;
23 | private static final int BITMASK_30 = ((1 << 30) - 1);
24 |
25 | protected final byte[][] chunks;
26 | private final long size;
27 | private final int numChunks;
28 |
29 | private int curChunkIndex;
30 | private byte[] curChunk;
31 | private int curChunkPos;
32 |
33 | InMemoryData(long size) {
34 | this.size = size;
35 | if (size < 0) {
36 | throw new IllegalArgumentException("Negative size: " + size);
37 | }
38 |
39 | final ArrayList chunksBuffer = new ArrayList<>();
40 | long offset = 0;
41 | while (offset < size) {
42 | long remaining = size - offset;
43 | int chunkSize = (int) Math.min(remaining, CHUNK_SIZE);
44 | chunksBuffer.add(new byte[chunkSize]);
45 | offset += CHUNK_SIZE;
46 | }
47 | chunks = chunksBuffer.toArray(new byte[chunksBuffer.size()][]);
48 | numChunks = chunks.length;
49 |
50 | curChunkIndex = 0;
51 | curChunk = chunks[0];
52 | }
53 |
54 | public void writeLittleEndianLong(long value) throws IOException {
55 | writeUnsignedByte((int) ((value) & 0xFF));
56 | writeUnsignedByte((int) ((value >>> 8) & 0xFF));
57 | writeUnsignedByte((int) ((value >>> 16) & 0xFF));
58 | writeUnsignedByte((int) ((value >>> 24) & 0xFF));
59 | writeUnsignedByte((int) ((value >>> 32) & 0xFF));
60 | writeUnsignedByte((int) ((value >>> 40) & 0xFF));
61 | writeUnsignedByte((int) ((value >>> 48) & 0xFF));
62 | writeUnsignedByte((int) ((value >>> 56) & 0xFF));
63 | }
64 |
65 | public void writeLittleEndianInt(int value) throws IOException {
66 | writeUnsignedByte((value) & 0xFF);
67 | writeUnsignedByte((value >>> 8) & 0xFF);
68 | writeUnsignedByte((value >>> 16) & 0xFF);
69 | writeUnsignedByte((value >>> 24) & 0xFF);
70 | }
71 |
72 | @Override
73 | public void close() throws IOException {
74 | for (int i = 0; i < numChunks; i++) {
75 | chunks[i] = null;
76 | }
77 | curChunk = null;
78 | }
79 |
80 | @Override
81 | public void seek(long pos) throws IOException {
82 | if (pos > size) {
83 | throw new IOException("Corrupt index: referencing data outside of range");
84 | }
85 | int chunkIndex = (int) (pos >>> 30);
86 | curChunkIndex = chunkIndex;
87 | curChunk = chunks[chunkIndex];
88 | curChunkPos = ((int) pos) & BITMASK_30;
89 | }
90 |
91 | @Override
92 | public void writeUnsignedByte(int value) throws IOException {
93 | if (curChunkPos == CHUNK_SIZE) {
94 | next();
95 | }
96 | curChunk[curChunkPos++] = (byte) value;
97 | }
98 |
99 | private void next() throws IOException {
100 | curChunkIndex++;
101 | if (curChunkIndex >= chunks.length) {
102 | throw new IOException("Corrupt index: referencing data outside of range");
103 | }
104 | curChunk = chunks[curChunkIndex];
105 | curChunkPos = 0;
106 | }
107 |
108 | @Override
109 | public int readUnsignedByte() throws IOException {
110 | if (curChunkPos == CHUNK_SIZE) {
111 | next();
112 | }
113 | return Util.unsignedByte(curChunk[curChunkPos++]);
114 | }
115 |
116 | @Override
117 | public int readLittleEndianInt() throws IOException {
118 | return Util.readLittleEndianIntSlowly(this);
119 | }
120 |
121 | @Override
122 | public long readLittleEndianLong() throws IOException {
123 | return Util.readLittleEndianLongSlowly(this);
124 | }
125 |
126 | @Override
127 | public String toString() {
128 | return "InMemoryData{" +
129 | "size=" + size +
130 | '}';
131 | }
132 | }
133 |
--------------------------------------------------------------------------------
/src/test/java/com/spotify/sparkey/system/ReaderParametrizedLargeFilesTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2025 Spotify AB
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not
5 | * use this file except in compliance with the License. You may obtain a copy of
6 | * the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13 | * License for the specific language governing permissions and limitations under
14 | * the License.
15 | */
16 | package com.spotify.sparkey.system;
17 |
18 | import com.spotify.sparkey.CompressionType;
19 | import com.spotify.sparkey.HashType;
20 | import com.spotify.sparkey.Sparkey;
21 | import com.spotify.sparkey.SparkeyReader;
22 | import com.spotify.sparkey.SparkeyWriter;
23 | import com.spotify.sparkey.TestSparkeyWriter;
24 | import com.spotify.sparkey.UtilTest;
25 | import org.junit.Test;
26 | import org.junit.runner.RunWith;
27 | import org.junit.runners.Parameterized;
28 | import org.junit.runners.Parameterized.Parameters;
29 |
30 | import java.io.IOException;
31 | import java.util.Collection;
32 |
33 | import static org.junit.Assert.assertEquals;
34 | import static org.junit.Assert.assertNull;
35 | import static org.junit.Assert.assertTrue;
36 |
37 | /**
38 | * Parametrized large file tests that run against all available reader implementations.
39 | * Tests verify that all reader types correctly handle large files (>2GB chunks, large indices).
40 | */
41 | @RunWith(Parameterized.class)
42 | public class ReaderParametrizedLargeFilesTest extends BaseSystemTest {
43 |
44 | private final ReaderType readerType;
45 |
46 | public ReaderParametrizedLargeFilesTest(ReaderType readerType) {
47 | this.readerType = readerType;
48 | }
49 |
50 | @Parameters(name = "{0}")
51 | public static Collection