├── .gitignore ├── .travis.yml ├── src ├── main │ ├── java │ │ └── com │ │ │ └── spotify │ │ │ └── sparkey │ │ │ ├── CorruptedIndexException.java │ │ │ ├── SparkeyReaderClosedException.java │ │ │ ├── ReadWriteData.java │ │ │ ├── SnappyWriter.java │ │ │ ├── RandomAccessData.java │ │ │ ├── FileFlushingData.java │ │ │ ├── BlockPositionedInputStream.java │ │ │ ├── CompressionType.java │ │ │ ├── BlockOutput.java │ │ │ ├── ArrayUtil.java │ │ │ ├── RandomAccessDataStateless.java │ │ │ ├── extra │ │ │ ├── ThreadLocalSparkeyReader.java │ │ │ ├── AbstractDelegatingSparkeyReader.java │ │ │ ├── SparkeyValidator.java │ │ │ └── ReloadableSparkeyReader.java │ │ │ ├── CommonHeader.java │ │ │ ├── UncompressedBlockPositionedInputStream.java │ │ │ ├── UncompressedBlockRandomInput.java │ │ │ ├── BlockRandomInput.java │ │ │ ├── CompressorType.java │ │ │ ├── FileReadWriteData.java │ │ │ ├── EmptyInputStream.java │ │ │ ├── UncompressedBlockOutput.java │ │ │ ├── AddressSize.java │ │ │ ├── CompressionTypeBackend.java │ │ │ ├── HashType.java │ │ │ ├── SparkeyReader.java │ │ │ ├── SparkeyImplSelector.java │ │ │ ├── CompressedReader.java │ │ │ ├── CompressedOutputStream.java │ │ │ ├── LogWriter.java │ │ │ ├── SparkeyWriter.java │ │ │ ├── CompressedWriter.java │ │ │ ├── InMemoryData.java │ │ │ ├── CompressedRandomReader.java │ │ │ ├── SingleThreadedSparkeyReader.java │ │ │ ├── SingleThreadedSparkeyWriter.java │ │ │ ├── MurmurHash3.java │ │ │ └── ByteBufferCleaner.java │ ├── java9 │ │ └── com │ │ │ └── spotify │ │ │ └── sparkey │ │ │ └── ArrayUtil.java │ └── java22 │ │ └── com │ │ └── spotify │ │ └── sparkey │ │ ├── CompressionTypeBackendJ22.java │ │ ├── UncompressedUtilJ22.java │ │ ├── UncompressedBlockRandomInputJ22.java │ │ ├── MemorySegmentInputStream.java │ │ ├── UncompressedLogReaderJ22.java │ │ ├── SingleThreadedSparkeyReaderJ22.java │ │ └── SparkeyImplSelector.java └── test │ └── java │ └── com │ └── spotify │ └── sparkey │ ├── SortHelperTest.java │ ├── OpenMapsAsserter.java │ ├── CompressedOutputStreamTest.java │ ├── CommonHeaderTest.java │ ├── TestSparkeyWriter.java │ ├── SparkeyTestHelper.java │ ├── AddressSizeTest.java │ ├── extra │ ├── DelegatingSparkeyReaderTest.java │ └── ReloadableSparkeyReaderTest.java │ ├── IndexHashTest.java │ ├── SortHelperBenchmark.java │ ├── SparkeyTest.java │ ├── BytesWrittenTest.java │ ├── system │ ├── BaseSystemTest.java │ ├── WriteHashBenchmark.java │ ├── AppendBenchmark.java │ ├── RandomLookupProfiling.java │ ├── FsyncBenchmark.java │ ├── ReloadableReaderExample.java │ ├── LargeFilesTest.java │ ├── LookupBenchmark.java │ ├── QuickLookupBenchmark.java │ ├── SparkeyExample.java │ ├── MemoryLock.java │ └── ReaderParametrizedLargeFilesTest.java │ ├── ReadOnlyMemMapTest.java │ └── CompressedReaderTest.java ├── NOTICE ├── cleanup-failed-release.sh ├── RELEASE ├── POST-RELEASE-CHECKLIST.md └── BENCHMARK.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | target/ 3 | *.iml 4 | profiling.mph/ 5 | *.spi 6 | *.spl 7 | benchmark-results/ 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | 3 | jdk: 4 | - oraclejdk11 5 | 6 | install: 7 | - mvn -B install -DskipTests=true -Dgpg.skip=true 8 | 9 | sudo: false 10 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/CorruptedIndexException.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import java.io.IOException; 4 | 5 | public class CorruptedIndexException extends IOException { 6 | public CorruptedIndexException(String message) { 7 | super(message); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/SparkeyReaderClosedException.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import java.io.IOException; 4 | 5 | public class SparkeyReaderClosedException extends IOException { 6 | public SparkeyReaderClosedException(String message) { 7 | super(message); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Sparkey-java 2 | Copyright 2012-2013 Spotify AB 3 | 4 | This product includes software developed at 5 | Spotify AB (http://www.spotify.com/). 6 | 7 | This project includes MurmurHash3, written by Austin Appleby, which is 8 | placed in the public domain. The original software is available from 9 | https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp 10 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/ReadWriteData.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import java.io.IOException; 4 | 5 | interface ReadWriteData extends RandomAccessData { 6 | 7 | void writeLittleEndianLong(long value) throws IOException; 8 | 9 | void writeLittleEndianInt(int value) throws IOException; 10 | 11 | void close() throws IOException; 12 | 13 | void writeUnsignedByte(int value) throws IOException; 14 | } 15 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/SortHelperTest.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import com.carrotsearch.sizeof.RamUsageEstimator; 4 | import org.junit.Test; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | 8 | public class SortHelperTest { 9 | 10 | @Test 11 | public void testEntrySize() { 12 | long size = RamUsageEstimator.sizeOf(SortHelper.Entry.fromHash(123, 456, 789)); 13 | assertEquals(SortHelper.ENTRY_SIZE, size); 14 | 15 | } 16 | } -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/OpenMapsAsserter.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | import static org.junit.Assume.assumeTrue; 8 | 9 | public class OpenMapsAsserter { 10 | 11 | private int openMaps; 12 | private int openFiles; 13 | 14 | @Before 15 | public void setUp() throws Exception { 16 | openMaps = Sparkey.getOpenMaps(); 17 | openFiles = Sparkey.getOpenFiles(); 18 | } 19 | 20 | @After 21 | public void tearDown() throws Exception { 22 | assertEquals(openMaps, Sparkey.getOpenMaps()); 23 | assertEquals(openFiles, Sparkey.getOpenFiles()); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/CompressedOutputStreamTest.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import org.junit.Test; 4 | 5 | import java.io.*; 6 | 7 | /** 8 | * Tests CompressedOutputStream 9 | */ 10 | public class CompressedOutputStreamTest { 11 | @Test 12 | public void testLargeWrite() throws IOException { 13 | for (CompressorType compressor : CompressorType.values()) { 14 | File testFile = File.createTempFile("sparkey-test", ""); 15 | testFile.deleteOnExit(); 16 | FileOutputStream fos = new FileOutputStream(testFile); 17 | 18 | byte[] buf = new byte[1000 * 1000]; 19 | CompressedOutputStream os = new CompressedOutputStream(compressor, 10, fos, fos.getFD()); 20 | os.write(buf); 21 | 22 | testFile.delete(); 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/SnappyWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | @Deprecated 19 | public class SnappyWriter extends CompressedWriter { 20 | public SnappyWriter(CompressedOutputStream compressedOutputStream, int maxEntriesPerBlock) { 21 | super(compressedOutputStream, maxEntriesPerBlock); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/RandomAccessData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | 20 | interface RandomAccessData { 21 | 22 | void seek(long pos) throws IOException; 23 | 24 | int readUnsignedByte() throws IOException; 25 | 26 | int readLittleEndianInt() throws IOException; 27 | 28 | long readLittleEndianLong() throws IOException; 29 | } 30 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/CommonHeaderTest.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | import java.io.IOException; 7 | 8 | /** 9 | * Tests CommonHeader 10 | */ 11 | public class CommonHeaderTest { 12 | 13 | @Test 14 | public void testCommonHeader() throws IOException { 15 | CommonHeader ch = new CommonHeader(1,2,3,4,5,6,7) { }; 16 | try { 17 | ch = new CommonHeader(1,2,3,4,5,-1,7) { }; 18 | Assert.fail("Negative key len size should trigger IOException"); 19 | } catch (IOException e) { 20 | // pass 21 | } 22 | try { 23 | ch = new CommonHeader(1,2,3,4,4294967296L,6,7) { }; 24 | Assert.fail("Key len size larger than 2**31 should trigger IOException"); 25 | } catch (IOException e) { 26 | // pass 27 | } 28 | try { 29 | ch = new CommonHeader(1,2,3,4,-1,6,7) { }; 30 | Assert.fail("Value len size smaller than 0 should trigger IOException"); 31 | } catch (IOException e) { 32 | // pass 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/FileFlushingData.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import java.io.File; 4 | import java.io.FileOutputStream; 5 | import java.io.IOException; 6 | 7 | class FileFlushingData extends InMemoryData { 8 | 9 | private final File file; 10 | private final IndexHeader header; 11 | private final boolean fsync; 12 | 13 | FileFlushingData(final long size, final File file, final IndexHeader header, final boolean fsync) { 14 | super(size); 15 | this.file = file; 16 | this.header = header; 17 | this.fsync = fsync; 18 | } 19 | 20 | @Override 21 | public void close() throws IOException { 22 | try (FileOutputStream stream = new FileOutputStream(file)) { 23 | stream.write(header.asBytes()); 24 | for (byte[] chunk : chunks) { 25 | stream.write(chunk); 26 | } 27 | stream.flush(); // Not needed for FileOutputStream, but still semantically correct 28 | if (fsync) { 29 | stream.getFD().sync(); 30 | } 31 | } finally { 32 | super.close(); 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/BlockPositionedInputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | import java.io.InputStream; 20 | 21 | abstract class BlockPositionedInputStream extends InputStream { 22 | 23 | protected final InputStream input; 24 | 25 | public BlockPositionedInputStream(InputStream input) { 26 | this.input = input; 27 | } 28 | 29 | abstract long getBlockPosition(); 30 | 31 | @Override 32 | public void close() throws IOException { 33 | input.close(); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/CompressionType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | public enum CompressionType { 19 | NONE(new CompressionTypeBackendUncompressed()), 20 | SNAPPY(new CompressionTypeBackendCompressed(CompressorType.SNAPPY)), 21 | ZSTD(new CompressionTypeBackendCompressed(CompressorType.ZSTD)),; 22 | 23 | private final CompressionTypeBackend backend; 24 | 25 | CompressionType(CompressionTypeBackend backend) { 26 | this.backend = backend; 27 | } 28 | 29 | CompressionTypeBackend getBackend() { 30 | return backend; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/BlockOutput.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | import java.io.InputStream; 20 | 21 | interface BlockOutput { 22 | void put(byte[] key, int keyLen, byte[] value, int valueLen) throws IOException; 23 | 24 | void put(byte[] key, int keyLen, InputStream value, long valueLen) throws IOException; 25 | 26 | void delete(byte[] key, int keyLen) throws IOException; 27 | 28 | void flush(boolean fsync) throws IOException; 29 | 30 | void close(boolean fsync) throws IOException; 31 | 32 | int getMaxEntriesPerBlock(); 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/ArrayUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | /** 19 | * Array comparison utilities. 20 | * Overridden in Java 9+ MRJAR layer to use Arrays.equals() intrinsic (SIMD optimized). 21 | */ 22 | class ArrayUtil { 23 | 24 | /** 25 | * Compare byte array ranges. 26 | * Java 8: Manual loop. 27 | * Java 9+: Arrays.equals() intrinsic with SIMD optimization. 28 | */ 29 | static boolean equals(int len, byte[] a, int aOffset, byte[] b, int bOffset) { 30 | for (int i = 0; i < len; i++) { 31 | if (a[aOffset + i] != b[bOffset + i]) { 32 | return false; 33 | } 34 | } 35 | return true; 36 | } 37 | 38 | private ArrayUtil() { 39 | // Utility class 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/TestSparkeyWriter.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import com.google.common.io.Files; 4 | import java.io.File; 5 | import java.io.IOException; 6 | 7 | public class TestSparkeyWriter { 8 | 9 | public static void writeHashAndCompare(final SparkeyWriter writer2) throws IOException { 10 | final SingleThreadedSparkeyWriter writer = (SingleThreadedSparkeyWriter) writer2; 11 | 12 | final File indexFile = writer.indexFile; 13 | final File memFile = Sparkey.setEnding(indexFile, ".mem.spi"); 14 | 15 | try { 16 | writer.setConstructionMethod(SparkeyWriter.ConstructionMethod.IN_MEMORY); 17 | writer.writeHash(); 18 | indexFile.renameTo(memFile); 19 | final IndexHeader memHeader = IndexHeader.read(memFile); 20 | 21 | writer.setHashSeed(memHeader.getHashSeed()); 22 | 23 | writer.setConstructionMethod(SparkeyWriter.ConstructionMethod.SORTING); 24 | writer.writeHash(); 25 | final IndexHeader sortHeader = IndexHeader.read(indexFile); 26 | 27 | if (!Files.equal(indexFile, memFile)) { 28 | throw new RuntimeException( 29 | "Files are not equal: " + indexFile + ", " + memFile + "\n" + 30 | sortHeader.toString() + "\n" + memHeader.toString()); 31 | } 32 | } finally { 33 | writer.setConstructionMethod(SparkeyWriter.ConstructionMethod.AUTO); 34 | memFile.delete(); 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java9/com/spotify/sparkey/ArrayUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.util.Arrays; 19 | 20 | /** 21 | * Java 9+ optimized array comparison using Arrays.equals() intrinsic. 22 | * 23 | * The JIT compiler recognizes Arrays.equals() with ranges and generates 24 | * vectorized code (AVX2/AVX-512) for ~2-4x speedup on modern CPUs. 25 | */ 26 | class ArrayUtil { 27 | 28 | /** 29 | * Compare byte array ranges using Java 9+ Arrays.equals() intrinsic. 30 | * JIT-compiled to SIMD instructions (AVX2/AVX-512). 31 | */ 32 | static boolean equals(int len, byte[] a, int aOffset, byte[] b, int bOffset) { 33 | return Arrays.equals(a, aOffset, aOffset + len, b, bOffset, bOffset + len); 34 | } 35 | 36 | private ArrayUtil() { 37 | // Utility class 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/RandomAccessDataStateless.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | 20 | /** 21 | * Stateless random access data interface with position-based reads. 22 | * 23 | * Unlike {@link RandomAccessData} which uses seek() followed by reads, 24 | * this interface passes position directly to each read method, enabling 25 | * truly immutable implementations without mutable position state. 26 | * 27 | * This design is inherently thread-safe and easier to reason about since 28 | * there's no shared mutable state. 29 | */ 30 | interface RandomAccessDataStateless { 31 | 32 | int readUnsignedByte(long pos) throws IOException; 33 | 34 | int readLittleEndianInt(long pos) throws IOException; 35 | 36 | long readLittleEndianLong(long pos) throws IOException; 37 | } 38 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/SparkeyTestHelper.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | 6 | /** 7 | * Test helper class for accessing package-private SparkeyImplSelector methods. 8 | * This class is in the same package as SparkeyImplSelector to access package-private members. 9 | */ 10 | public class SparkeyTestHelper { 11 | 12 | /** 13 | * Open an uncompressed reader using Java 22+ MemorySegment API. 14 | * Delegates to SparkeyImplSelector.openUncompressedJ22(). 15 | * 16 | * @param file File base to use 17 | * @return UncompressedSparkeyReaderJ22 (on Java 22+) 18 | * @throws UnsupportedOperationException on Java < 22 19 | * @throws IOException if the file cannot be opened 20 | */ 21 | public static SparkeyReader openUncompressedJ22(File file) throws IOException { 22 | return SparkeyImplSelector.openUncompressedJ22(file); 23 | } 24 | 25 | /** 26 | * Open a single-threaded reader using Java 22+ MemorySegment API. 27 | * Delegates to SparkeyImplSelector.openSingleThreadedJ22(). 28 | * 29 | * @param file File base to use 30 | * @return SingleThreadedSparkeyReaderJ22 (on Java 22+) 31 | * @throws UnsupportedOperationException on Java < 22 32 | * @throws IOException if the file cannot be opened 33 | */ 34 | public static SparkeyReader openSingleThreadedJ22(File file) throws IOException { 35 | return SparkeyImplSelector.openSingleThreadedJ22(file); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/AddressSizeTest.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | import java.io.IOException; 7 | 8 | /** 9 | * Tests AddressSize 10 | */ 11 | public class AddressSizeTest extends OpenMapsAsserter { 12 | 13 | 14 | 15 | @Test 16 | public void testAddressSizeLong() throws IOException { 17 | byte[] BYTES = new byte[] {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}; 18 | InMemoryData imd = new InMemoryData(BYTES.length); 19 | for (byte b : BYTES) { 20 | imd.writeUnsignedByte(b); 21 | } 22 | imd.seek(0); 23 | Assert.assertEquals(0x0807060504030201L, AddressSize.LONG.readAddress(imd)); 24 | imd.seek(0); 25 | AddressSize.LONG.writeAddress(0x0807060504030201L, imd); 26 | imd.seek(0); 27 | for (byte b : BYTES) { 28 | Assert.assertEquals(imd.readUnsignedByte(), b); 29 | } 30 | } 31 | 32 | 33 | @Test 34 | public void testAddressSizeInt() throws IOException { 35 | byte[] BYTES = new byte[] {0x01, 0x02, 0x03, 0x04}; 36 | InMemoryData imd = new InMemoryData(BYTES.length); 37 | for (byte b : BYTES) { 38 | imd.writeUnsignedByte(b); 39 | } 40 | imd.seek(0); 41 | Assert.assertEquals(0x04030201L, AddressSize.INT.readAddress(imd)); 42 | imd.seek(0); 43 | AddressSize.INT.writeAddress(0x04030201L, imd); 44 | imd.seek(0); 45 | for (byte b : BYTES) { 46 | Assert.assertEquals(imd.readUnsignedByte(), b); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/extra/ThreadLocalSparkeyReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.extra; 17 | 18 | import com.spotify.sparkey.*; 19 | 20 | import java.io.File; 21 | import java.io.IOException; 22 | 23 | /** 24 | * A thread-safe Sparkey Reader. 25 | * 26 | *

This class extends {@link PooledSparkeyReader}, providing all the benefits of 27 | * bounded memory usage and virtual thread compatibility while maintaining backward 28 | * compatibility with existing code. 29 | * 30 | * @deprecated Use {@link PooledSparkeyReader} directly for better clarity. 31 | * This class is maintained for backward compatibility. 32 | * 33 | * @see PooledSparkeyReader the recommended implementation 34 | */ 35 | @Deprecated 36 | public class ThreadLocalSparkeyReader extends PooledSparkeyReader { 37 | 38 | public ThreadLocalSparkeyReader(File indexFile) throws IOException { 39 | super(Sparkey.openSingleThreadedReader(indexFile), computeDefaultPoolSize()); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/extra/DelegatingSparkeyReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey.extra; 2 | 3 | import com.spotify.sparkey.SparkeyReader; 4 | 5 | import org.junit.Test; 6 | 7 | import java.io.IOException; 8 | 9 | import static org.mockito.Mockito.*; 10 | 11 | public class DelegatingSparkeyReaderTest { 12 | 13 | private static final class MockDelegatingSparkeyReader extends AbstractDelegatingSparkeyReader { 14 | private final SparkeyReader delegate = mock(SparkeyReader.class); 15 | 16 | @Override 17 | protected SparkeyReader getDelegateReader() { 18 | return this.delegate; 19 | } 20 | } 21 | 22 | @Test 23 | public void testDelegation() throws IOException { 24 | final MockDelegatingSparkeyReader reader = new MockDelegatingSparkeyReader(); 25 | final SparkeyReader delegate = reader.getDelegateReader(); 26 | final String key = "key"; 27 | 28 | reader.getAsString(key); 29 | verify(delegate).getAsString(key); 30 | 31 | reader.getAsByteArray(key.getBytes()); 32 | verify(delegate).getAsByteArray(key.getBytes()); 33 | 34 | reader.getAsEntry(key.getBytes()); 35 | verify(delegate).getAsEntry(key.getBytes()); 36 | 37 | reader.getIndexHeader(); 38 | verify(delegate).getIndexHeader(); 39 | 40 | reader.getLogHeader(); 41 | verify(delegate).getLogHeader(); 42 | 43 | reader.duplicate(); 44 | verify(delegate).duplicate(); 45 | 46 | reader.iterator(); 47 | verify(delegate).iterator(); 48 | 49 | reader.getTotalBytes(); 50 | verify(delegate).getTotalBytes(); 51 | 52 | reader.close(); 53 | verify(delegate).close(); 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/IndexHashTest.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import com.spotify.sparkey.system.BaseSystemTest; 4 | import org.junit.After; 5 | import org.junit.Before; 6 | import org.junit.Test; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | import java.io.RandomAccessFile; 11 | 12 | import static org.junit.Assert.assertEquals; 13 | import static org.junit.Assert.fail; 14 | 15 | public class IndexHashTest extends BaseSystemTest { 16 | @Before 17 | public void setUp() throws Exception { 18 | super.setUp(); 19 | } 20 | 21 | @After 22 | public void tearDown() throws Exception { 23 | super.tearDown(); 24 | } 25 | 26 | @Test 27 | public void testCorruptHashFile() throws Exception { 28 | SparkeyWriter writer = Sparkey.createNew(indexFile, CompressionType.NONE, 1); 29 | for (int i = 0; i < 100; i++) { 30 | writer.put("key" + i, "value" + i); 31 | } 32 | writer.close(); 33 | TestSparkeyWriter.writeHashAndCompare(writer); 34 | 35 | corruptFile(indexFile); 36 | 37 | assertEquals(0, Sparkey.getOpenFiles()); 38 | assertEquals(0, Sparkey.getOpenMaps()); 39 | 40 | try { 41 | Sparkey.open(indexFile); 42 | fail(); 43 | } catch (Exception e) { 44 | assertEquals(RuntimeException.class, e.getClass()); 45 | } 46 | 47 | assertEquals(0, Sparkey.getOpenFiles()); 48 | assertEquals(0, Sparkey.getOpenMaps()); 49 | } 50 | 51 | private void corruptFile(File indexFile) throws IOException { 52 | RandomAccessFile randomAccessFile = new RandomAccessFile(indexFile, "rw"); 53 | randomAccessFile.setLength(randomAccessFile.length() - 100); 54 | randomAccessFile.close(); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/CommonHeader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | 20 | abstract class CommonHeader { 21 | final int majorVersion; 22 | final int minorVersion; 23 | final int fileIdentifier; 24 | 25 | long dataEnd; 26 | long maxKeyLen; 27 | long maxValueLen; 28 | long numPuts; 29 | 30 | CommonHeader(int majorVersion, int minorVersion, int fileIdentifier, long dataEnd, long maxKeyLen, long maxValueLen, long numPuts) throws IOException { 31 | this.majorVersion = majorVersion; 32 | this.minorVersion = minorVersion; 33 | this.fileIdentifier = fileIdentifier; 34 | this.dataEnd = dataEnd; 35 | this.maxKeyLen = maxKeyLen; 36 | this.maxValueLen = maxValueLen; 37 | this.numPuts = numPuts; 38 | if (this.maxKeyLen > Integer.MAX_VALUE || this.maxKeyLen < 0) { 39 | throw new IOException("Too large max key len: " + this.maxKeyLen); 40 | } 41 | if (this.maxValueLen < 0) { 42 | throw new IOException("Too large max value len: " + this.maxValueLen); 43 | } 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/UncompressedBlockPositionedInputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | import java.io.InputStream; 20 | 21 | final class UncompressedBlockPositionedInputStream extends BlockPositionedInputStream { 22 | 23 | private long position; 24 | 25 | public UncompressedBlockPositionedInputStream(InputStream data, long start) { 26 | super(data); 27 | position = start; 28 | } 29 | 30 | @Override 31 | long getBlockPosition() { 32 | return position; 33 | } 34 | 35 | @Override 36 | public int read() throws IOException { 37 | position++; 38 | return input.read(); 39 | } 40 | 41 | @Override 42 | public int read(byte[] b) throws IOException { 43 | return read(b, 0, b.length); 44 | } 45 | 46 | @Override 47 | public int read(byte[] b, int off, int len) throws IOException { 48 | position += len; 49 | return input.read(b, off, len); 50 | } 51 | 52 | @Override 53 | public long skip(long n) throws IOException { 54 | long skipped = input.skip(n); 55 | position += skipped; 56 | return skipped; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java22/com/spotify/sparkey/CompressionTypeBackendJ22.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | /** 19 | * Java 22+ version of CompressionTypeBackend that works with J22 types. 20 | */ 21 | interface CompressionTypeBackendJ22 { 22 | BlockRandomInput createRandomAccessData(ReadOnlyMemMapJ22 data, int maxBlockSize); 23 | } 24 | 25 | class CompressionTypeBackendJ22Uncompressed implements CompressionTypeBackendJ22 { 26 | @Override 27 | public BlockRandomInput createRandomAccessData(ReadOnlyMemMapJ22 data, int maxBlockSize) { 28 | return new UncompressedBlockRandomInputJ22(data); 29 | } 30 | } 31 | 32 | class CompressionTypeBackendJ22Compressed implements CompressionTypeBackendJ22 { 33 | private final CompressorType compressor; 34 | 35 | public CompressionTypeBackendJ22Compressed(CompressorType compressor) { 36 | this.compressor = compressor; 37 | } 38 | 39 | @Override 40 | public BlockRandomInput createRandomAccessData(ReadOnlyMemMapJ22 data, int maxBlockSize) { 41 | return new CompressedRandomReader(compressor, new UncompressedBlockRandomInputJ22(data), maxBlockSize); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/SortHelperBenchmark.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import org.openjdk.jmh.annotations.Benchmark; 4 | import org.openjdk.jmh.annotations.BenchmarkMode; 5 | import org.openjdk.jmh.annotations.Fork; 6 | import org.openjdk.jmh.annotations.Measurement; 7 | import org.openjdk.jmh.annotations.Mode; 8 | import org.openjdk.jmh.annotations.OutputTimeUnit; 9 | import org.openjdk.jmh.annotations.Scope; 10 | import org.openjdk.jmh.annotations.State; 11 | import org.openjdk.jmh.annotations.Warmup; 12 | 13 | import java.util.concurrent.TimeUnit; 14 | 15 | import static com.spotify.sparkey.SortHelper.ENTRY_COMPARATOR; 16 | 17 | @BenchmarkMode(Mode.AverageTime) 18 | @OutputTimeUnit(TimeUnit.NANOSECONDS) 19 | @State(Scope.Thread) 20 | @Fork(value = 1, warmups = 1) 21 | @Measurement(iterations = 5, time = 10) 22 | @Warmup(iterations = 5, time = 10) 23 | public class SortHelperBenchmark { 24 | private static final SortHelper.Entry E1 = new SortHelper.Entry(123, 456, 1); 25 | private static final SortHelper.Entry E2 = new SortHelper.Entry(123, 456, 2); 26 | private static final SortHelper.Entry E3 = new SortHelper.Entry(7567, 222, 1); 27 | private static final SortHelper.Entry E4 = new SortHelper.Entry(7567, 222, 2); 28 | 29 | @Benchmark 30 | public int measureRealE1_E1() { 31 | return ENTRY_COMPARATOR.compare(E1, E1); 32 | } 33 | 34 | @Benchmark 35 | public int measureRealE1_E2() { 36 | return ENTRY_COMPARATOR.compare(E1, E2); 37 | } 38 | 39 | @Benchmark 40 | public int measureRealE2_E1() { 41 | return ENTRY_COMPARATOR.compare(E2, E2); 42 | } 43 | 44 | @Benchmark 45 | public int measureRealE1_E3() { 46 | return ENTRY_COMPARATOR.compare(E1, E3); 47 | } 48 | 49 | @Benchmark 50 | public int measureRealE1_E4() { 51 | return ENTRY_COMPARATOR.compare(E1, E4); 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/SparkeyTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import org.junit.Test; 19 | 20 | import java.io.File; 21 | 22 | import static org.junit.Assert.*; 23 | 24 | public class SparkeyTest { 25 | 26 | @Test 27 | public void testFilenames() throws Exception { 28 | assertEquals(new File("foo.bar.spi"), Sparkey.getIndexFile(new File("foo.bar"))); 29 | assertEquals(new File("foo.bar.spi"), Sparkey.getIndexFile(new File("foo.bar."))); 30 | assertEquals(new File("foo.bar.spi"), Sparkey.getIndexFile(new File("foo.bar.spi"))); 31 | assertEquals(new File("foo.bar.spi"), Sparkey.getIndexFile(new File("foo.bar.spl"))); 32 | assertEquals(new File("foo.bar.baz.spi"), Sparkey.getIndexFile(new File("foo.bar.baz"))); 33 | 34 | assertEquals(new File("foo.bar.spl"), Sparkey.getLogFile(new File("foo.bar"))); 35 | assertEquals(new File("foo.bar.spl"), Sparkey.getLogFile(new File("foo.bar."))); 36 | assertEquals(new File("foo.bar.spl"), Sparkey.getLogFile(new File("foo.bar.spi"))); 37 | assertEquals(new File("foo.bar.spl"), Sparkey.getLogFile(new File("foo.bar.spl"))); 38 | assertEquals(new File("foo.bar.baz.spl"), Sparkey.getLogFile(new File("foo.bar.baz"))); 39 | 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/BytesWrittenTest.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import org.junit.After; 4 | import org.junit.Before; 5 | import org.junit.Test; 6 | 7 | import java.io.File; 8 | import java.io.IOException; 9 | 10 | import static org.junit.Assert.assertEquals; 11 | 12 | public class BytesWrittenTest extends OpenMapsAsserter { 13 | 14 | private File file; 15 | 16 | @Before 17 | public void setUp() throws Exception { 18 | super.setUp(); 19 | file = File.createTempFile("sparkey_test_", ".spl"); 20 | } 21 | 22 | @After 23 | public void tearDown() throws Exception { 24 | file.delete(); 25 | super.tearDown(); 26 | } 27 | 28 | @Test 29 | public void testNone() throws Exception { 30 | test(CompressionType.NONE); 31 | } 32 | 33 | @Test 34 | public void testSnappy() throws Exception { 35 | test(CompressionType.SNAPPY); 36 | } 37 | 38 | @Test 39 | public void testZstd() throws Exception { 40 | test(CompressionType.ZSTD); 41 | } 42 | 43 | private void test(CompressionType compressionType) throws IOException { 44 | SparkeyWriter writer = Sparkey.createNew(file, compressionType, 20); 45 | for (int i = 0; i < 13; i++) { 46 | writer.put(size(17), size(47)); 47 | } 48 | for (int i = 0; i < 19; i++) { 49 | writer.put(size(130), size(32000)); 50 | } 51 | for (int i = 0; i < 3; i++) { 52 | writer.delete(size(130)); 53 | } 54 | writer.close(); 55 | assertEquals(13 * (17 + 47 + 1 + 1) + 19 * (130 + 32000 + 2 + 3), LogHeader.read(file).getPutSize()); 56 | assertEquals(3 * (130 + 2 + 1), LogHeader.read(file).getDeleteSize()); 57 | } 58 | 59 | private String size(int size) { 60 | StringBuilder stringBuilder = new StringBuilder(); 61 | for (int i = 0; i < size; i++) { 62 | stringBuilder.append("x"); 63 | } 64 | return stringBuilder.toString(); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/UncompressedBlockRandomInput.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | 20 | class UncompressedBlockRandomInput implements BlockRandomInput { 21 | private final ReadOnlyMemMap data; 22 | 23 | UncompressedBlockRandomInput(ReadOnlyMemMap data) { 24 | this.data = data; 25 | } 26 | 27 | @Override 28 | public void close() { 29 | data.close(); 30 | } 31 | 32 | @Override 33 | public void seek(long pos) throws IOException { 34 | data.seek(pos); 35 | } 36 | 37 | @Override 38 | public int readUnsignedByte() throws IOException { 39 | return data.readUnsignedByte(); 40 | } 41 | 42 | @Override 43 | public void readFully(byte[] buffer, int offset, int length) throws IOException { 44 | data.readFully(buffer, offset, length); 45 | } 46 | 47 | @Override 48 | public boolean readFullyCompare(int length, byte[] key) throws IOException { 49 | return data.readFullyCompare(length, key); 50 | } 51 | 52 | @Override 53 | public void skipBytes(long amount) throws IOException { 54 | data.skipBytes(amount); 55 | } 56 | 57 | @Override 58 | public UncompressedBlockRandomInput duplicate() { 59 | return new UncompressedBlockRandomInput(data.duplicate()); 60 | } 61 | 62 | @Override 63 | public void closeDuplicate() { 64 | data.closeDuplicate(); 65 | } 66 | 67 | @Override 68 | public long getLoadedBytes() { 69 | return data.getLoadedBytes(); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/BlockRandomInput.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | 20 | interface BlockRandomInput { 21 | 22 | void close(); 23 | 24 | void seek(long pos) throws IOException; 25 | 26 | int readUnsignedByte() throws IOException; 27 | 28 | void readFully(byte[] buffer, int offset, int length) throws IOException; 29 | 30 | void skipBytes(long amount) throws IOException; 31 | 32 | BlockRandomInput duplicate(); 33 | 34 | void closeDuplicate(); 35 | 36 | long getLoadedBytes(); 37 | 38 | /** 39 | * Compare bytes at current position with the provided byte array, advancing position by length bytes. 40 | * 41 | * This method always advances the current position by {@code length} bytes, regardless of whether 42 | * the comparison succeeds or fails. This matches the semantics of {@link #readFully(byte[], int, int)}. 43 | * 44 | * This is more efficient than calling {@code readFully()} followed by {@code Arrays.equals()}, as it: 45 | * - Avoids allocating a temporary buffer 46 | * - Avoids copying data from memory-mapped storage 47 | * - Uses vectorized comparison (SIMD) on supporting implementations 48 | * 49 | * @param length number of bytes to read and compare 50 | * @param key byte array to compare against (only first {@code length} bytes are compared) 51 | * @return true if the bytes at current position match the first {@code length} bytes of {@code key} 52 | */ 53 | boolean readFullyCompare(int length, byte[] key) throws IOException; 54 | } 55 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/system/BaseSystemTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.system; 17 | 18 | import com.spotify.sparkey.OpenMapsAsserter; 19 | import com.spotify.sparkey.Sparkey; 20 | import com.spotify.sparkey.UtilTest; 21 | import com.sun.management.UnixOperatingSystemMXBean; 22 | import org.junit.After; 23 | import org.junit.Before; 24 | import org.junit.Test; 25 | 26 | import java.io.File; 27 | import java.lang.management.ManagementFactory; 28 | import java.lang.management.OperatingSystemMXBean; 29 | 30 | public class BaseSystemTest extends OpenMapsAsserter { 31 | protected File indexFile; 32 | protected File logFile; 33 | 34 | @Before 35 | public void setUp() throws Exception { 36 | super.setUp(); 37 | UtilTest.setMapBits(10); 38 | indexFile = File.createTempFile("sparkey", ".spi"); 39 | logFile = Sparkey.getLogFile(indexFile); 40 | indexFile.deleteOnExit(); 41 | logFile.deleteOnExit(); 42 | } 43 | 44 | @After 45 | public void tearDown() throws Exception { 46 | UtilTest.delete(indexFile); 47 | UtilTest.delete(logFile); 48 | super.tearDown(); 49 | } 50 | 51 | @Test 52 | public void testDummy() throws Exception { 53 | } 54 | 55 | static long countOpenFileDescriptors() { 56 | OperatingSystemMXBean os = ManagementFactory.getOperatingSystemMXBean(); 57 | if(os instanceof UnixOperatingSystemMXBean){ 58 | long openFileDescriptorCount = ((UnixOperatingSystemMXBean) os).getOpenFileDescriptorCount(); 59 | return openFileDescriptorCount; 60 | } 61 | return -1; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /cleanup-failed-release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Cleanup script for failed maven releases 3 | 4 | set -e 5 | 6 | echo "=== Cleaning up failed release artifacts ===" 7 | echo 8 | 9 | # Get current version from pom.xml 10 | CURRENT_VERSION=$(grep -m 1 "" pom.xml | sed 's/.*\(.*\)-SNAPSHOT<\/version>.*/\1/') 11 | VERSION_TAG="sparkey-$CURRENT_VERSION" 12 | 13 | echo "Current version: $CURRENT_VERSION" 14 | echo "Expected tag: $VERSION_TAG" 15 | echo 16 | 17 | # 1. Delete release files 18 | echo "1. Cleaning up release files..." 19 | if [ -f release.properties ] || [ -f pom.xml.releaseBackup ]; then 20 | rm -f release.properties pom.xml.releaseBackup 21 | echo " ✓ Deleted release.properties and pom.xml.releaseBackup" 22 | else 23 | echo " ✓ No release files to clean" 24 | fi 25 | echo 26 | 27 | # 2. Delete local tag 28 | echo "2. Checking for local tag..." 29 | if git tag | grep -q "^$VERSION_TAG\$"; then 30 | git tag -d "$VERSION_TAG" 31 | echo " ✓ Deleted local tag: $VERSION_TAG" 32 | else 33 | echo " ✓ No local tag to delete" 34 | fi 35 | echo 36 | 37 | # 3. Check for remote tag 38 | echo "3. Checking for remote tag..." 39 | if git ls-remote --tags origin | grep -q "refs/tags/$VERSION_TAG\$"; then 40 | echo " ⚠ Remote tag exists: $VERSION_TAG" 41 | read -p " Delete remote tag? (y/N) " -n 1 -r 42 | echo 43 | if [[ $REPLY =~ ^[Yy]$ ]]; then 44 | git push --delete origin "$VERSION_TAG" 45 | echo " ✓ Deleted remote tag: $VERSION_TAG" 46 | else 47 | echo " ⚠ Remote tag NOT deleted (you can delete it later with: git push --delete origin $VERSION_TAG)" 48 | fi 49 | else 50 | echo " ✓ No remote tag to delete" 51 | fi 52 | echo 53 | 54 | # 4. Reset pom.xml if needed 55 | echo "4. Checking pom.xml version..." 56 | POM_VERSION=$(grep -m 1 "" pom.xml | sed 's/.*\(.*\)<\/version>.*/\1/') 57 | if [[ "$POM_VERSION" != *"-SNAPSHOT" ]]; then 58 | echo " ⚠ WARNING: pom.xml version is $POM_VERSION (not a SNAPSHOT)" 59 | echo " You may need to manually reset to $CURRENT_VERSION-SNAPSHOT" 60 | echo " Or run: mvn release:rollback" 61 | else 62 | echo " ✓ pom.xml version is correct: $POM_VERSION" 63 | fi 64 | echo 65 | 66 | echo "=== Cleanup complete ===" 67 | echo 68 | echo "Run ./verify-release-ready.sh to check if everything is ready for release" 69 | -------------------------------------------------------------------------------- /src/main/java22/com/spotify/sparkey/UncompressedUtilJ22.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | 20 | /** 21 | * Utility methods for reading from immutable memory-mapped files. 22 | * All methods take explicit positions. Use Util.unsignedVLQSize() to determine byte count. 23 | */ 24 | final class UncompressedUtilJ22 { 25 | 26 | private UncompressedUtilJ22() {} 27 | 28 | /** 29 | * Read variable-length quantity (VLQ) integer at given position. 30 | * Returns the decoded value. Use Util.unsignedVLQSize(value) to determine bytes consumed. 31 | */ 32 | static int readVLQInt(ReadOnlyMemMapJ22 data, long position) throws IOException { 33 | long p = position; 34 | int value = 0; 35 | int shift = 0; 36 | 37 | while (true) { 38 | int b = data.readUnsignedByte(p++); 39 | value |= (b & 0x7F) << shift; 40 | if ((b & 0x80) == 0) { 41 | break; 42 | } 43 | shift += 7; 44 | } 45 | 46 | return value; 47 | } 48 | 49 | /** 50 | * Read variable-length quantity (VLQ) as long at given position. 51 | * Supports values larger than Integer.MAX_VALUE. 52 | * Returns the decoded value. Use Util.unsignedVLQSize(value) to determine bytes consumed. 53 | */ 54 | static long readVLQLong(ReadOnlyMemMapJ22 data, long position) throws IOException { 55 | long p = position; 56 | long value = 0; 57 | int shift = 0; 58 | 59 | while (true) { 60 | int b = data.readUnsignedByte(p++); 61 | value |= (long)(b & 0x7F) << shift; 62 | if ((b & 0x80) == 0) { 63 | break; 64 | } 65 | shift += 7; 66 | if (shift >= 64) { 67 | throw new RuntimeException("VLQ overflow - value too large for long"); 68 | } 69 | } 70 | 71 | return value; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/CompressorType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | 20 | import com.github.luben.zstd.Zstd; 21 | 22 | import org.xerial.snappy.Snappy; 23 | 24 | enum CompressorType { 25 | SNAPPY { 26 | @Override 27 | int maxCompressedLength(int blockSize) { 28 | return Snappy.maxCompressedLength(blockSize); 29 | } 30 | 31 | @Override 32 | int uncompress(byte[] compressed, int compressedSize, byte[] uncompressed) throws IOException { 33 | return Snappy.uncompress(compressed, 0, compressedSize, uncompressed, 0); 34 | } 35 | 36 | @Override 37 | int compress(byte[] uncompressed, int uncompressedSize, byte[] compressed) throws IOException { 38 | return Snappy.compress(uncompressed, 0, uncompressedSize, compressed, 0); 39 | } 40 | }, 41 | 42 | ZSTD { 43 | @Override 44 | int maxCompressedLength(int blockSize) { 45 | return (int)Zstd.compressBound(blockSize); 46 | } 47 | 48 | @Override 49 | int uncompress(byte[] compressed, int compressedSize, byte[] uncompressed) throws IOException { 50 | return (int)Zstd.decompressByteArray(uncompressed, 0, uncompressed.length, compressed, 0, compressedSize); 51 | } 52 | 53 | @Override 54 | int compress(byte[] uncompressed, int uncompressedSize, byte[] compressed) throws IOException { 55 | return (int)Zstd.compressByteArray(compressed, 0, compressed.length, uncompressed, 0, uncompressedSize, 3); 56 | } 57 | },; 58 | 59 | abstract int maxCompressedLength(int blockSize); 60 | 61 | abstract int uncompress(byte[] compressed, int compressedSize, byte[] uncompressed) throws IOException; 62 | 63 | abstract int compress(byte[] uncompressed, int uncompressedSize, byte[] compressed) throws IOException; 64 | } -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/FileReadWriteData.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.RandomAccessFile; 6 | 7 | /** 8 | * Slow implementation - Use {@link ReadWriteMemMap} instead. Implemented for reference and performance comparisons. 9 | */ 10 | @Deprecated 11 | class FileReadWriteData implements ReadWriteData { 12 | 13 | private final RandomAccessFile file; 14 | private final IndexHeader header; 15 | private final boolean fsync; 16 | private final int offset; 17 | private boolean closed = false; 18 | 19 | FileReadWriteData(final long size, final File file, final IndexHeader header, final boolean fsync) throws IOException { 20 | offset = header.size(); 21 | this.file = new RandomAccessFile(file, "rw"); 22 | Sparkey.incrOpenFiles(); 23 | this.file.setLength(offset + size); 24 | this.header = header; 25 | this.fsync = fsync; 26 | } 27 | 28 | public void writeLittleEndianLong(long value) throws IOException { 29 | // RandomAccessFile uses big-endian so this needs to be reversed 30 | file.writeLong(Long.reverseBytes(value)); 31 | } 32 | 33 | public void writeLittleEndianInt(int value) throws IOException { 34 | // RandomAccessFile uses big-endian so this needs to be reversed 35 | file.writeInt(Integer.reverseBytes(value)); 36 | } 37 | 38 | @Override 39 | public void close() throws IOException { 40 | if (closed) { 41 | return; 42 | } 43 | closed = true; 44 | file.seek(0); 45 | file.write(header.asBytes()); 46 | if (fsync) { 47 | file.getFD().sync(); 48 | } 49 | Sparkey.decrOpenFiles(); 50 | file.close(); 51 | } 52 | 53 | @Override 54 | public void writeUnsignedByte(final int value) throws IOException { 55 | file.writeByte(value); 56 | } 57 | 58 | @Override 59 | public void seek(final long pos) throws IOException { 60 | file.seek(offset + pos); 61 | } 62 | 63 | @Override 64 | public int readUnsignedByte() throws IOException { 65 | return file.readUnsignedByte(); 66 | } 67 | 68 | @Override 69 | public int readLittleEndianInt() throws IOException { 70 | // RandomAccessFile uses big-endian so this needs to be reversed 71 | return Integer.reverseBytes(file.readInt()); 72 | } 73 | 74 | @Override 75 | public long readLittleEndianLong() throws IOException { 76 | // RandomAccessFile uses big-endian so this needs to be reversed 77 | return Long.reverseBytes(file.readLong()); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/EmptyInputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.InputStream; 19 | 20 | /** 21 | * Immutable singleton empty InputStream for DELETE entries and zero-length values. 22 | * More efficient than wrapping an empty MemorySegment. 23 | * 24 | * Similar to InputStream.nullInputStream() from Java 11+, but works on Java 8+. 25 | */ 26 | final class EmptyInputStream extends InputStream { 27 | 28 | /** 29 | * Singleton instance - completely thread-safe since there's no mutable state. 30 | */ 31 | static final InputStream INSTANCE = new EmptyInputStream(); 32 | 33 | private EmptyInputStream() { 34 | // Private constructor - use INSTANCE 35 | } 36 | 37 | @Override 38 | public int read() { 39 | return -1; // Always EOF 40 | } 41 | 42 | @Override 43 | public int read(byte[] b) { 44 | if (b == null) { 45 | throw new NullPointerException(); 46 | } 47 | return -1; // Always EOF 48 | } 49 | 50 | @Override 51 | public int read(byte[] b, int off, int len) { 52 | if (b == null) { 53 | throw new NullPointerException(); 54 | } 55 | if (off < 0 || len < 0 || len > b.length - off) { 56 | throw new IndexOutOfBoundsException(); 57 | } 58 | return -1; // Always EOF 59 | } 60 | 61 | @Override 62 | public long skip(long n) { 63 | return 0; // Nothing to skip 64 | } 65 | 66 | @Override 67 | public int available() { 68 | return 0; // No bytes available 69 | } 70 | 71 | @Override 72 | public void close() { 73 | // No-op 74 | } 75 | 76 | @Override 77 | public boolean markSupported() { 78 | return true; // Mark/reset are trivial for empty stream 79 | } 80 | 81 | @Override 82 | public void mark(int readlimit) { 83 | // No-op - no state to save 84 | } 85 | 86 | @Override 87 | public void reset() { 88 | // No-op - nothing to reset to 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/system/WriteHashBenchmark.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2014 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.system; 17 | 18 | import com.spotify.sparkey.CompressionType; 19 | import com.spotify.sparkey.Sparkey; 20 | import com.spotify.sparkey.SparkeyWriter; 21 | import com.spotify.sparkey.UtilTest; 22 | import org.openjdk.jmh.annotations.*; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.util.concurrent.TimeUnit; 27 | 28 | @State(Scope.Benchmark) 29 | @Warmup(iterations = 2) 30 | @Measurement(iterations = 4) 31 | @Fork(value = 1, warmups = 0) 32 | public class WriteHashBenchmark { 33 | 34 | private File indexFile; 35 | private File logFile; 36 | private SparkeyWriter writer; 37 | 38 | @Setup(Level.Trial) 39 | public void setup() throws IOException { 40 | indexFile = new File("test.spi"); 41 | logFile = Sparkey.getLogFile(indexFile); 42 | 43 | CompressionType compressionType = CompressionType.NONE; 44 | 45 | indexFile.deleteOnExit(); 46 | logFile.deleteOnExit(); 47 | UtilTest.delete(indexFile); 48 | UtilTest.delete(logFile); 49 | 50 | writer = Sparkey.createNew(indexFile, compressionType, 1024); 51 | 52 | for (int i = 0; i < numElements; i++) { 53 | writer.put("key_" + i, "value_" + i); 54 | } 55 | } 56 | 57 | @TearDown(Level.Trial) 58 | public void tearDown() throws IOException { 59 | writer.close(); 60 | UtilTest.delete(indexFile); 61 | UtilTest.delete(logFile); 62 | } 63 | 64 | @Param({"1000", "10000", "100000", "1000000", "10000000"}) 65 | public int numElements; 66 | 67 | @Param({"IN_MEMORY", "SORTING"}) 68 | public SparkeyWriter.ConstructionMethod constructionMethod; 69 | 70 | @Benchmark 71 | @BenchmarkMode(Mode.SingleShotTime) 72 | @OutputTimeUnit(TimeUnit.SECONDS) 73 | public void test() throws IOException { 74 | writer.setConstructionMethod(constructionMethod); 75 | writer.writeHash(); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/UncompressedBlockOutput.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.FileDescriptor; 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | import java.io.OutputStream; 22 | 23 | final class UncompressedBlockOutput implements BlockOutput { 24 | private final byte[] buf = new byte[1024*1024]; 25 | private final OutputStream outputStream; 26 | private final FileDescriptor fileDescriptor; 27 | 28 | UncompressedBlockOutput(OutputStream outputStream, FileDescriptor fileDescriptor) { 29 | this.outputStream = outputStream; 30 | this.fileDescriptor = fileDescriptor; 31 | } 32 | 33 | @Override 34 | public void put(byte[] key, int keyLen, byte[] value, int valueLen) throws IOException { 35 | Util.writeUnsignedVLQ(keyLen + 1, outputStream); 36 | Util.writeUnsignedVLQ(valueLen, outputStream); 37 | outputStream.write(key, 0, keyLen); 38 | outputStream.write(value, 0, valueLen); 39 | } 40 | 41 | @Override 42 | public void put(byte[] key, int keyLen, InputStream value, long valueLen) throws IOException { 43 | Util.writeUnsignedVLQ(keyLen + 1, outputStream); 44 | Util.writeUnsignedVLQ(valueLen, outputStream); 45 | outputStream.write(key, 0, keyLen); 46 | Util.copy(valueLen, value, outputStream, buf); 47 | } 48 | 49 | @Override 50 | public void delete(byte[] key, int keyLen) throws IOException { 51 | outputStream.write(0); 52 | Util.writeUnsignedVLQ(keyLen, outputStream); 53 | outputStream.write(key, 0, keyLen); 54 | } 55 | 56 | @Override 57 | public void flush(boolean fsync) throws IOException { 58 | outputStream.flush(); 59 | if (fsync) { 60 | fileDescriptor.sync(); 61 | } 62 | } 63 | 64 | @Override 65 | public void close(boolean fsync) throws IOException { 66 | flush(fsync); 67 | outputStream.close(); 68 | } 69 | 70 | @Override 71 | public int getMaxEntriesPerBlock() { 72 | return 1; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/extra/AbstractDelegatingSparkeyReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.extra; 17 | 18 | import com.spotify.sparkey.IndexHeader; 19 | import com.spotify.sparkey.LogHeader; 20 | import com.spotify.sparkey.SparkeyReader; 21 | 22 | import java.io.IOException; 23 | import java.util.Iterator; 24 | 25 | /** 26 | * A superclass for Sparkey readers that delegate to another {@link SparkeyReader}. 27 | * 28 | * Subclasses must override the {@link AbstractDelegatingSparkeyReader#getDelegateReader()} 29 | * method. 30 | */ 31 | public abstract class AbstractDelegatingSparkeyReader implements SparkeyReader { 32 | 33 | protected abstract SparkeyReader getDelegateReader(); 34 | 35 | @Override 36 | public String getAsString(String key) throws IOException { 37 | return getDelegateReader().getAsString(key); 38 | } 39 | 40 | @Override 41 | public byte[] getAsByteArray(byte[] key) throws IOException { 42 | return getDelegateReader().getAsByteArray(key); 43 | } 44 | 45 | @Override 46 | public Entry getAsEntry(byte[] key) throws IOException { 47 | return getDelegateReader().getAsEntry(key); 48 | } 49 | 50 | @Override 51 | public void close() { 52 | getDelegateReader().close(); 53 | } 54 | 55 | @Override 56 | public IndexHeader getIndexHeader() { 57 | return getDelegateReader().getIndexHeader(); 58 | } 59 | 60 | @Override 61 | public LogHeader getLogHeader() { 62 | return getDelegateReader().getLogHeader(); 63 | } 64 | 65 | @Override 66 | public SparkeyReader duplicate() { 67 | return getDelegateReader().duplicate(); 68 | } 69 | 70 | @Override 71 | public Iterator iterator() { 72 | return getDelegateReader().iterator(); 73 | } 74 | 75 | @Override 76 | public long getLoadedBytes() { 77 | return getDelegateReader().getLoadedBytes(); 78 | } 79 | 80 | @Override 81 | public long getTotalBytes() { 82 | return getDelegateReader().getTotalBytes(); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/system/AppendBenchmark.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2014 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.system; 17 | 18 | import com.spotify.sparkey.CompressionType; 19 | import com.spotify.sparkey.Sparkey; 20 | import com.spotify.sparkey.SparkeyWriter; 21 | import com.spotify.sparkey.UtilTest; 22 | import org.openjdk.jmh.annotations.*; 23 | 24 | import java.io.File; 25 | import java.io.IOException; 26 | import java.util.concurrent.TimeUnit; 27 | 28 | @State(Scope.Benchmark) 29 | @Warmup(iterations = 2) 30 | @Measurement(iterations = 4) 31 | @Fork(value = 1, warmups = 0) 32 | public class AppendBenchmark { 33 | 34 | private File indexFile; 35 | private File logFile; 36 | private SparkeyWriter writer; 37 | 38 | @Setup(Level.Trial) 39 | public void setup() throws IOException { 40 | indexFile = new File("test.spi"); 41 | logFile = Sparkey.getLogFile(indexFile); 42 | 43 | CompressionType compressionType = CompressionType.valueOf(type); 44 | 45 | indexFile.deleteOnExit(); 46 | logFile.deleteOnExit(); 47 | UtilTest.delete(indexFile); 48 | UtilTest.delete(logFile); 49 | 50 | writer = Sparkey.createNew(indexFile, compressionType, 1024); 51 | } 52 | 53 | @TearDown(Level.Trial) 54 | public void tearDown() throws IOException { 55 | writer.close(); 56 | UtilTest.delete(indexFile); 57 | UtilTest.delete(logFile); 58 | } 59 | 60 | @Param({"NONE", "SNAPPY", "ZSTD"}) 61 | public String type; 62 | 63 | @Benchmark 64 | @BenchmarkMode(Mode.Throughput) 65 | @OutputTimeUnit(TimeUnit.SECONDS) 66 | public void testSmall() throws IOException { 67 | writer.put("key" , "value"); 68 | } 69 | 70 | private static final String MEDIUM_KEY = String.format("%200s", "key"); 71 | private static final String MEDIUM_VALUE = String.format("%200s", "value"); 72 | 73 | @Benchmark 74 | @BenchmarkMode(Mode.Throughput) 75 | @OutputTimeUnit(TimeUnit.SECONDS) 76 | public void testMedium() throws IOException { 77 | writer.put(MEDIUM_KEY , MEDIUM_VALUE); 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/ReadOnlyMemMapTest.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import org.junit.Test; 4 | 5 | import java.io.File; 6 | import java.io.IOException; 7 | import java.util.ArrayList; 8 | import java.util.Collections; 9 | import java.util.List; 10 | import java.util.concurrent.atomic.AtomicBoolean; 11 | 12 | import static org.junit.Assert.assertEquals; 13 | import static org.junit.Assert.fail; 14 | 15 | public class ReadOnlyMemMapTest extends OpenMapsAsserter { 16 | 17 | @Test 18 | public void testDontRunOutOfFileDescriptors() throws Exception { 19 | for (int iter = 0; iter < 100; iter++) { 20 | ReadOnlyMemMap memMap = new ReadOnlyMemMap(new File("README.md")); 21 | ArrayList maps = new ArrayList<>(); 22 | for (int i = 0; i < 100; i++) { 23 | maps.add(memMap.duplicate()); 24 | } 25 | memMap.close(); 26 | for (ReadOnlyMemMap map : maps) { 27 | try { 28 | map.readUnsignedByte(); 29 | fail(); 30 | } catch (SparkeyReaderClosedException e) { 31 | } 32 | try { 33 | map.seek(1); 34 | fail(); 35 | } catch (SparkeyReaderClosedException e) { 36 | } 37 | try { 38 | map.skipBytes(1); 39 | fail(); 40 | } catch (SparkeyReaderClosedException e) { 41 | } 42 | } 43 | assertEquals(0, Sparkey.getOpenFiles()); 44 | assertEquals(0, Sparkey.getOpenMaps()); 45 | } 46 | } 47 | 48 | @Test 49 | public void testConcurrentReadWhileClosing() throws Exception { 50 | final AtomicBoolean running = new AtomicBoolean(true); 51 | final ReadOnlyMemMap memMap = new ReadOnlyMemMap(new File("README.md")); 52 | final List failures = Collections.synchronizedList(new ArrayList<>()); 53 | List threads = new ArrayList<>(); 54 | for (int i = 0; i < 100; i++) { 55 | Thread thread = new Thread(() -> { 56 | ReadOnlyMemMap map = memMap.duplicate(); 57 | while (running.get()) { 58 | try { 59 | map.seek(1); 60 | map.readUnsignedByte(); 61 | map.skipBytes(1); 62 | } catch (IOException e) { 63 | if (!e.getMessage().equals("Reader has been closed")) { 64 | e.printStackTrace(); 65 | failures.add(e); 66 | } 67 | } 68 | } 69 | }); 70 | threads.add(thread); 71 | thread.start(); 72 | } 73 | memMap.close(); 74 | Thread.sleep(100); 75 | running.set(false); 76 | for (Thread thread : threads) { 77 | thread.join(); 78 | } 79 | assertEquals(0, failures.size()); 80 | 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/system/RandomLookupProfiling.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.system; 17 | 18 | import com.spotify.sparkey.*; 19 | import org.junit.Test; 20 | 21 | import java.io.File; 22 | import java.io.IOException; 23 | import java.util.Random; 24 | 25 | public class RandomLookupProfiling { 26 | 27 | private static final int NUM_ENTRIES = 100 * 1024; 28 | 29 | public static void main(String[] args) throws IOException { 30 | File indexFile = new File("profiling.spi"); 31 | File logFile = Sparkey.getLogFile(indexFile); 32 | indexFile.deleteOnExit(); 33 | logFile.deleteOnExit(); 34 | 35 | fillWithData(indexFile, CompressionType.NONE, NUM_ENTRIES); 36 | 37 | int runs = 0; 38 | double speedSum = 0; 39 | while (true) { 40 | long t3 = System.currentTimeMillis(); 41 | 42 | int numLookups = 1000 * 1000; 43 | randomLookup(indexFile, numLookups); 44 | long t4 = System.currentTimeMillis(); 45 | double speed = 1000.0 * (double) numLookups / (t4 - t3); 46 | speedSum += speed; 47 | runs++; 48 | System.out.println("Random lookups / sec: " + speed); 49 | System.out.println("Average: " + speedSum / runs); 50 | } 51 | } 52 | 53 | private static void randomLookup(File indexFile, int numLookups) throws IOException { 54 | SparkeyReader reader = Sparkey.open(indexFile); 55 | Random random = new Random(); 56 | for (int i = 0; i < numLookups; i++) { 57 | String s = reader.getAsString("Key" + random.nextInt(NUM_ENTRIES)); 58 | } 59 | } 60 | 61 | private static void fillWithData(File indexFile, CompressionType compression, int numEntries) throws IOException { 62 | SparkeyWriter writer = Sparkey.createNew(indexFile, compression, 32 * 1024); 63 | String smallValue = String.format("%d", 0); 64 | for (int i = 0; i < numEntries; i++) { 65 | writer.put("Key" + i, smallValue); 66 | } 67 | writer.writeHash(HashType.HASH_64_BITS); 68 | writer.close(); 69 | } 70 | 71 | @Test 72 | public void dummy() { 73 | // Just to make the junit test runner work 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/AddressSize.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.DataOutputStream; 19 | import java.io.IOException; 20 | 21 | enum AddressSize { 22 | LONG(8) { 23 | @Override 24 | long readAddress(RandomAccessData data) throws IOException { 25 | return data.readLittleEndianLong(); 26 | } 27 | 28 | @Override 29 | long readAddress(RandomAccessDataStateless data, long pos) throws IOException { 30 | return data.readLittleEndianLong(pos); 31 | } 32 | 33 | @Override 34 | void writeAddress(long address, ReadWriteData data) throws IOException { 35 | data.writeLittleEndianLong(address); 36 | } 37 | 38 | @Override 39 | void writeAddress(final long address, final DataOutputStream data) throws IOException { 40 | data.writeLong(address); 41 | } 42 | }, 43 | INT(4) { 44 | @Override 45 | long readAddress(RandomAccessData data) throws IOException { 46 | return data.readLittleEndianInt() & INT_MASK; 47 | } 48 | 49 | @Override 50 | long readAddress(RandomAccessDataStateless data, long pos) throws IOException { 51 | return data.readLittleEndianInt(pos) & INT_MASK; 52 | } 53 | 54 | @Override 55 | void writeAddress(long address, ReadWriteData data) throws IOException { 56 | data.writeLittleEndianInt((int) address); 57 | } 58 | 59 | @Override 60 | void writeAddress(final long address, final DataOutputStream data) throws IOException { 61 | data.writeInt((int) address); // TODO: overflow? 62 | } 63 | }; 64 | 65 | private static final long INT_MASK = (1L << 32) - 1; 66 | private final int size; 67 | 68 | AddressSize(int size) { 69 | this.size = size; 70 | } 71 | 72 | abstract long readAddress(RandomAccessData data) throws IOException; 73 | abstract long readAddress(RandomAccessDataStateless data, long pos) throws IOException; 74 | 75 | abstract void writeAddress(long address, ReadWriteData data) throws IOException; 76 | abstract void writeAddress(long address, DataOutputStream data) throws IOException; 77 | 78 | public int size() { 79 | return size; 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/CompressedReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import org.junit.Test; 4 | 5 | import java.io.*; 6 | 7 | import static org.junit.Assert.assertEquals; 8 | 9 | /** 10 | * Tests CompressedReader 11 | */ 12 | public class CompressedReaderTest { 13 | // A stream that reads the same array repeatedly, forever. 14 | private class RepeatingInputStream extends InputStream { 15 | private byte[] buffer; 16 | private int pos = 0; 17 | 18 | public RepeatingInputStream(byte[] buf) throws IOException { 19 | buffer = buf; 20 | } 21 | 22 | public int read() throws IOException { 23 | int ret = buffer[pos]; 24 | skip(1); 25 | return ret; 26 | } 27 | 28 | public int read(byte[] b, int off, int len) throws IOException { 29 | int remain = len; 30 | while (remain > 0) { 31 | int avail = buffer.length - pos; 32 | int copy = Math.min(avail, remain); 33 | System.arraycopy(buffer, pos, b, off, copy); 34 | skip(copy); 35 | off += copy; 36 | remain -= copy; 37 | } 38 | return len; 39 | } 40 | 41 | public long skip(long n) throws IOException { 42 | pos = (int)((n + pos) % buffer.length); 43 | return n; 44 | } 45 | } 46 | 47 | private CompressedReader reader(CompressorType compressor) throws IOException { 48 | byte[] uncompressed = new byte[10]; 49 | for (int i = 0; i < uncompressed.length; ++i) { 50 | uncompressed[i] = (byte)i; 51 | } 52 | 53 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(); 54 | byte[] compressed = new byte[compressor.maxCompressedLength(uncompressed.length)]; 55 | int length = compressor.compress(uncompressed, uncompressed.length, compressed); 56 | Util.writeUnsignedVLQ(length, bytes); 57 | bytes.write(compressed, 0, length); 58 | 59 | InputStream buf = new RepeatingInputStream(bytes.toByteArray()); 60 | return new CompressedReader(compressor, buf, uncompressed.length, 0); 61 | } 62 | 63 | @Test 64 | public void testLargeSkip() throws IOException { 65 | for (CompressorType compressor : CompressorType.values()) { 66 | long ret = reader(compressor).skip(1000 * 1000); 67 | assertEquals(1000 * 1000, ret); 68 | } 69 | } 70 | 71 | @Test 72 | public void testLargeRead() throws IOException { 73 | for (CompressorType compressor : CompressorType.values()) { 74 | byte[] buf = new byte[1000 * 1000]; 75 | int ret = reader(compressor).read(buf); 76 | assertEquals(1000 * 1000, ret); 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/CompressionTypeBackend.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.FileDescriptor; 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | import java.io.OutputStream; 22 | 23 | interface CompressionTypeBackend { 24 | BlockOutput createBlockOutput(FileDescriptor fd, OutputStream outputStream, int maxBlockSize, int maxEntriesPerBlock) throws IOException; 25 | BlockPositionedInputStream createBlockInput(InputStream inputStream, int maxBlockSize, long start); 26 | BlockRandomInput createRandomAccessData(ReadOnlyMemMap data, int maxBlockSize); 27 | } 28 | 29 | class CompressionTypeBackendUncompressed implements CompressionTypeBackend { 30 | @Override 31 | public BlockPositionedInputStream createBlockInput(InputStream inputStream, int maxBlockSize, long start) { 32 | return new UncompressedBlockPositionedInputStream(inputStream, start); 33 | } 34 | 35 | @Override 36 | public BlockRandomInput createRandomAccessData(ReadOnlyMemMap data, int maxBlockSize) { 37 | return new UncompressedBlockRandomInput(data); 38 | } 39 | 40 | @Override 41 | public BlockOutput createBlockOutput(FileDescriptor fd, OutputStream outputStream, int maxBlockSize, int maxEntriesPerBlock) throws IOException { 42 | return new UncompressedBlockOutput(outputStream, fd); 43 | } 44 | } 45 | 46 | class CompressionTypeBackendCompressed implements CompressionTypeBackend { 47 | private final CompressorType compressor; 48 | 49 | public CompressionTypeBackendCompressed(CompressorType compressor) { 50 | this.compressor = compressor; 51 | } 52 | 53 | @Override 54 | public BlockPositionedInputStream createBlockInput(InputStream inputStream, int maxBlockSize, long start) { 55 | return new CompressedReader(compressor, inputStream, maxBlockSize, start); 56 | } 57 | 58 | @Override 59 | public BlockRandomInput createRandomAccessData(ReadOnlyMemMap data, int maxBlockSize) { 60 | return new CompressedRandomReader(compressor, new UncompressedBlockRandomInput(data), maxBlockSize); 61 | } 62 | 63 | @Override 64 | public BlockOutput createBlockOutput(FileDescriptor fd, OutputStream outputStream, int maxBlockSize, int maxEntriesPerBlock) throws IOException { 65 | return new CompressedWriter(new CompressedOutputStream(compressor, maxBlockSize, outputStream, fd), maxEntriesPerBlock); 66 | } 67 | } 68 | 69 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/system/FsyncBenchmark.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2014 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.system; 17 | 18 | import com.spotify.sparkey.CompressionType; 19 | import com.spotify.sparkey.Sparkey; 20 | import com.spotify.sparkey.SparkeyWriter; 21 | import com.spotify.sparkey.UtilTest; 22 | import org.openjdk.jmh.annotations.Benchmark; 23 | import org.openjdk.jmh.annotations.BenchmarkMode; 24 | import org.openjdk.jmh.annotations.Fork; 25 | import org.openjdk.jmh.annotations.Level; 26 | import org.openjdk.jmh.annotations.Measurement; 27 | import org.openjdk.jmh.annotations.Mode; 28 | import org.openjdk.jmh.annotations.OperationsPerInvocation; 29 | import org.openjdk.jmh.annotations.OutputTimeUnit; 30 | import org.openjdk.jmh.annotations.Param; 31 | import org.openjdk.jmh.annotations.Scope; 32 | import org.openjdk.jmh.annotations.Setup; 33 | import org.openjdk.jmh.annotations.State; 34 | import org.openjdk.jmh.annotations.TearDown; 35 | import org.openjdk.jmh.annotations.Warmup; 36 | 37 | import java.io.File; 38 | import java.io.IOException; 39 | import java.util.concurrent.TimeUnit; 40 | 41 | @State(Scope.Benchmark) 42 | @Warmup(iterations = 2) 43 | @Measurement(iterations = 4) 44 | @Fork(value = 1, warmups = 0) 45 | public class FsyncBenchmark { 46 | 47 | private File indexFile; 48 | private File logFile; 49 | private SparkeyWriter writer; 50 | 51 | @Param({"NONE", "SNAPPY", "ZSTD"}) 52 | public String type; 53 | 54 | @Param({"true", "false"}) 55 | public boolean fsync; 56 | 57 | @Setup(Level.Trial) 58 | public void setup() throws IOException { 59 | indexFile = new File("test.spi"); 60 | logFile = Sparkey.getLogFile(indexFile); 61 | 62 | CompressionType compressionType = CompressionType.valueOf(type); 63 | 64 | indexFile.deleteOnExit(); 65 | logFile.deleteOnExit(); 66 | UtilTest.delete(indexFile); 67 | UtilTest.delete(logFile); 68 | 69 | writer = Sparkey.createNew(indexFile, compressionType, 1024); 70 | writer.setFsync(fsync); 71 | } 72 | 73 | @TearDown(Level.Trial) 74 | public void tearDown() throws IOException { 75 | writer.close(); 76 | UtilTest.delete(indexFile); 77 | UtilTest.delete(logFile); 78 | } 79 | 80 | @Benchmark 81 | @BenchmarkMode(Mode.Throughput) 82 | @OutputTimeUnit(TimeUnit.SECONDS) 83 | @OperationsPerInvocation(1000) 84 | public void testFsync() throws IOException { 85 | for (int i = 0; i < 1000; i++) { 86 | writer.put("key" , "value"); 87 | } 88 | writer.flush(); 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/system/ReloadableReaderExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.system; 17 | 18 | import com.google.common.util.concurrent.ListeningExecutorService; 19 | import com.google.common.util.concurrent.MoreExecutors; 20 | import com.spotify.sparkey.CompressionType; 21 | import com.spotify.sparkey.extra.ReloadableSparkeyReader; 22 | import com.spotify.sparkey.Sparkey; 23 | import com.spotify.sparkey.SparkeyWriter; 24 | import org.junit.Ignore; 25 | 26 | import java.io.File; 27 | import java.io.IOException; 28 | import java.util.concurrent.ExecutionException; 29 | import java.util.concurrent.Executors; 30 | import java.util.concurrent.TimeUnit; 31 | 32 | @Ignore 33 | public class ReloadableReaderExample { 34 | 35 | private static final int ENTRIES = 1000; 36 | private static final CompressionType TYPE = CompressionType.NONE; 37 | 38 | public static void main(String[] args) 39 | throws IOException, InterruptedException, ExecutionException { 40 | run(); 41 | } 42 | 43 | private static void run() throws IOException, InterruptedException, ExecutionException { 44 | ListeningExecutorService executorService = MoreExecutors.listeningDecorator(Executors.newSingleThreadExecutor()); 45 | 46 | // create dummy log/index files, and load the reader from them 47 | final File logFile = new File("reloadabletest.spl"); 48 | create(Sparkey.getIndexFile(logFile)); 49 | final ReloadableSparkeyReader reader = ReloadableSparkeyReader.fromLogFile(logFile, executorService).toCompletableFuture().get(); 50 | 51 | // should be ignored (same file) 52 | reader.load(logFile); 53 | 54 | // should load from second file now 55 | final File logFile2 = new File("reloadabletest2.spl"); 56 | create(Sparkey.getIndexFile(logFile2)); 57 | reader.load(logFile2); 58 | 59 | reader.close(); 60 | executorService.shutdown(); 61 | executorService.awaitTermination(10, TimeUnit.SECONDS); 62 | 63 | Sparkey.getIndexFile(logFile).delete(); 64 | logFile.delete(); 65 | Sparkey.getIndexFile(logFile2).delete(); 66 | logFile2.delete(); 67 | 68 | System.out.println("Done!"); 69 | } 70 | 71 | private static void create(File indexFile) throws IOException { 72 | final SparkeyWriter writer = Sparkey.createNew(indexFile, TYPE, 512); 73 | for (int i = 0; i < ENTRIES; i++) { 74 | writer.put("Key" + i, "Value" + i); 75 | } 76 | writer.flush(); 77 | writer.writeHash(); 78 | writer.close(); 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/extra/ReloadableSparkeyReaderTest.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey.extra; 2 | 3 | import com.google.common.util.concurrent.ListeningExecutorService; 4 | import com.google.common.util.concurrent.MoreExecutors; 5 | import com.spotify.sparkey.CompressionType; 6 | import com.spotify.sparkey.OpenMapsAsserter; 7 | import com.spotify.sparkey.Sparkey; 8 | import com.spotify.sparkey.SparkeyWriter; 9 | import org.junit.After; 10 | import org.junit.Before; 11 | import org.junit.Test; 12 | 13 | import java.io.File; 14 | import java.io.IOException; 15 | import java.util.concurrent.ExecutionException; 16 | import java.util.concurrent.Executors; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | public class ReloadableSparkeyReaderTest extends OpenMapsAsserter { 21 | private final ListeningExecutorService executorService = MoreExecutors.listeningDecorator(Executors.newSingleThreadExecutor()); 22 | private File logFile1; 23 | private File logFile2; 24 | 25 | @Before 26 | public void setUp() throws Exception { 27 | super.setUp(); 28 | logFile1 = createLogFile("key1", "value1"); 29 | logFile2 = createLogFile("key2", "value2"); 30 | 31 | logFile1.deleteOnExit(); 32 | logFile2.deleteOnExit(); 33 | } 34 | 35 | @After 36 | public void tearDown() throws Exception { 37 | logFile1.delete(); 38 | Sparkey.getIndexFile(logFile1).delete(); 39 | logFile2.delete(); 40 | Sparkey.getIndexFile(logFile2).delete(); 41 | super.tearDown(); 42 | } 43 | 44 | private static File createLogFile(String key, String value) throws IOException { 45 | final File logFile = File.createTempFile("sparkey", ".spl"); 46 | 47 | SparkeyWriter writer = Sparkey.createNew(logFile, CompressionType.NONE, 1024); 48 | writer.put(key, value); 49 | writer.writeHash(); 50 | writer.close(); 51 | 52 | return logFile; 53 | } 54 | 55 | @Test 56 | public void testFromLogFile() throws ExecutionException, InterruptedException, IOException { 57 | try (ReloadableSparkeyReader reader = ReloadableSparkeyReader.fromLogFile(logFile1, executorService) 58 | .toCompletableFuture().get()) { 59 | assertEquals("value1", reader.getAsString("key1")); 60 | } 61 | } 62 | 63 | @Test 64 | public void testReload() throws ExecutionException, InterruptedException, IOException { 65 | try (ReloadableSparkeyReader reader = ReloadableSparkeyReader.fromLogFile(logFile1, executorService) 66 | .toCompletableFuture().get()) { 67 | reader.load(logFile2).toCompletableFuture().get(); 68 | assertEquals("value2", reader.getAsString("key2")); 69 | 70 | reader.load(logFile1).toCompletableFuture().get(); 71 | assertEquals("value1", reader.getAsString("key1")); 72 | } 73 | } 74 | 75 | @Test(expected = IllegalArgumentException.class) 76 | public void testNullExecutorService() { 77 | ReloadableSparkeyReader.fromLogFile(logFile1, null); 78 | } 79 | 80 | @Test(expected = IllegalArgumentException.class) 81 | public void testInvalidLogFile() { 82 | ReloadableSparkeyReader.fromLogFile(new File("some-nonexisting-file"), executorService); 83 | } 84 | 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/HashType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.DataOutputStream; 19 | import java.io.IOException; 20 | 21 | public enum HashType { 22 | HASH_64_BITS(8) { 23 | @Override 24 | long readHash(RandomAccessData data) throws IOException { 25 | return data.readLittleEndianLong(); 26 | } 27 | 28 | @Override 29 | long readHash(RandomAccessDataStateless data, long pos) throws IOException { 30 | return data.readLittleEndianLong(pos); 31 | } 32 | 33 | @Override 34 | void writeHash(long hash, ReadWriteData data) throws IOException { 35 | data.writeLittleEndianLong(hash); 36 | } 37 | 38 | @Override 39 | void writeHash(final long hash, final DataOutputStream data) throws IOException { 40 | data.writeLong(hash); 41 | } 42 | 43 | @Override 44 | long hash(int keyLen, byte[] key, int seed) { 45 | return MurmurHash3.murmurHash3_x64_64(key, keyLen, seed); 46 | } 47 | }, 48 | HASH_32_BITS(4) { 49 | @Override 50 | long readHash(RandomAccessData data) throws IOException { 51 | return data.readLittleEndianInt() & INT_MASK; 52 | } 53 | 54 | @Override 55 | long readHash(RandomAccessDataStateless data, long pos) throws IOException { 56 | return data.readLittleEndianInt(pos) & INT_MASK; 57 | } 58 | 59 | @Override 60 | void writeHash(long hash, ReadWriteData data) throws IOException { 61 | data.writeLittleEndianInt((int) hash); 62 | } 63 | 64 | @Override 65 | void writeHash(final long hash, final DataOutputStream data) throws IOException { 66 | data.writeInt((int) hash); 67 | } 68 | 69 | @Override 70 | long hash(int keyLen, byte[] key, int seed) { 71 | return MurmurHash3.murmurHash3_x86_32(key, keyLen, seed) & BITS_32; 72 | } 73 | }; 74 | 75 | private static final long BITS_32 = ((1L << 32) - 1); 76 | private static final long INT_MASK = (1L << 32) - 1; 77 | private final int size; 78 | 79 | HashType(int size) { 80 | this.size = size; 81 | } 82 | 83 | 84 | abstract long readHash(RandomAccessData data) throws IOException; 85 | abstract long readHash(RandomAccessDataStateless data, long pos) throws IOException; 86 | 87 | abstract void writeHash(long hash, ReadWriteData data) throws IOException; 88 | abstract void writeHash(long hash, DataOutputStream data) throws IOException; 89 | 90 | abstract long hash(int keyLen, byte[] key, int seed); 91 | 92 | public int size() { 93 | return size; 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/main/java22/com/spotify/sparkey/UncompressedBlockRandomInputJ22.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | import java.lang.foreign.MemorySegment; 20 | 21 | /** 22 | * Java 22+ uncompressed block random input. 23 | * 24 | * Wraps immutable ReadOnlyMemMapJ22 with stateful position tracking. 25 | */ 26 | class UncompressedBlockRandomInputJ22 implements BlockRandomInput { 27 | private final ReadOnlyMemMapJ22 data; 28 | private long position; 29 | 30 | UncompressedBlockRandomInputJ22(ReadOnlyMemMapJ22 data) { 31 | this.data = data; 32 | this.position = 0; 33 | } 34 | 35 | @Override 36 | public void close() { 37 | data.close(); 38 | } 39 | 40 | @Override 41 | public void seek(long pos) throws IOException { 42 | this.position = pos; 43 | } 44 | 45 | @Override 46 | public int readUnsignedByte() throws IOException { 47 | int result = data.readUnsignedByte(position); 48 | position++; 49 | return result; 50 | } 51 | 52 | @Override 53 | public void readFully(byte[] buffer, int offset, int length) throws IOException { 54 | data.readFully(position, buffer, offset, length); 55 | position += length; 56 | } 57 | 58 | @Override 59 | public void skipBytes(long amount) throws IOException { 60 | position += amount; 61 | } 62 | 63 | @Override 64 | public UncompressedBlockRandomInputJ22 duplicate() { 65 | return new UncompressedBlockRandomInputJ22(data.duplicate()); 66 | } 67 | 68 | @Override 69 | public void closeDuplicate() { 70 | data.closeDuplicate(); 71 | } 72 | 73 | @Override 74 | public long getLoadedBytes() { 75 | // ReadOnlyMemMapJ22 doesn't track loaded bytes (MemorySegment is all-or-nothing) 76 | // Return 0 as conservative estimate 77 | return 0; 78 | } 79 | 80 | @Override 81 | public boolean readFullyCompare(int length, byte[] key) throws IOException { 82 | boolean result = data.readFullyCompare(position, length, key); 83 | position += length; 84 | return result; 85 | } 86 | 87 | /** 88 | * Get current position in the data stream. 89 | * Used for tracking value positions for lazy access. 90 | */ 91 | long getPosition() { 92 | return position; 93 | } 94 | 95 | /** 96 | * Get a zero-copy slice of the underlying data. 97 | * Used for lazy value access - no allocation, no copying. 98 | */ 99 | MemorySegment asSlice(long pos, long length) throws IOException { 100 | return data.asSlice(pos, length); 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/system/LargeFilesTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.system; 17 | 18 | import com.spotify.sparkey.*; 19 | import org.junit.Test; 20 | 21 | import java.io.IOException; 22 | 23 | import static org.junit.Assert.*; 24 | 25 | public class LargeFilesTest extends BaseSystemTest { 26 | @Test 27 | public void testLargeLogFile() throws IOException { 28 | UtilTest.setMapBits(10); 29 | String expectedValue = "value"; 30 | while (expectedValue.length() < 5*1024) { // Larger than a map chunk 31 | expectedValue += expectedValue; 32 | } 33 | 34 | byte[] value = expectedValue.getBytes(); 35 | 36 | SparkeyWriter writer = Sparkey.createNew(indexFile, CompressionType.NONE, 1024); 37 | for (int i = 0; i < 2000; i++) { 38 | writer.put(("key_" + i).getBytes(), value); 39 | } 40 | TestSparkeyWriter.writeHashAndCompare(writer); 41 | writer.close(); 42 | 43 | assertTrue(logFile.length() > 2000 * 5 * 1024); 44 | SparkeyReader reader = Sparkey.open(indexFile); 45 | assertEquals(indexFile.length() + logFile.length(), reader.getTotalBytes()); 46 | for (int i = 0; i < 2000; i += 100) { 47 | assertEquals(expectedValue, reader.getAsString("key_" + i)); 48 | } 49 | assertEquals(null, reader.getAsString("key_" + 2000)); 50 | reader.close(); 51 | } 52 | 53 | @Test 54 | public void testSmallIndexFile() throws IOException { 55 | testLargeIndexFileInner(7000); 56 | } 57 | 58 | @Test 59 | public void testMediumIndexFile() throws IOException { 60 | testLargeIndexFileInner(150000); 61 | } 62 | 63 | @Test 64 | public void testLargeIndexFile() throws IOException { 65 | testLargeIndexFileInner(500000); 66 | } 67 | 68 | private void testLargeIndexFileInner(final long size) throws IOException { 69 | SparkeyWriter writer = Sparkey.createNew(indexFile, CompressionType.NONE, 1024); 70 | for (int i = 0; i < size; i++) { 71 | writer.put(("key_" + i), "" + (i % 13)); 72 | } 73 | writer.setHashType(HashType.HASH_64_BITS); 74 | TestSparkeyWriter.writeHashAndCompare(writer); 75 | writer.close(); 76 | 77 | assertTrue(indexFile.length() > size * 8L); 78 | SparkeyReader reader = Sparkey.open(indexFile); 79 | assertTrue(0 <= reader.getLoadedBytes()); 80 | assertTrue(reader.getLoadedBytes() <= reader.getTotalBytes()); 81 | for (int i = 0; i < 1000; i++) { 82 | long key = i * size / 1000L; 83 | assertEquals("" + (key % 13), reader.getAsString("key_" + key)); 84 | } 85 | assertEquals(null, reader.getAsString("key_" + size)); 86 | reader.close(); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/system/LookupBenchmark.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2014 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.system; 17 | 18 | import com.spotify.sparkey.CompressionType; 19 | import com.spotify.sparkey.Sparkey; 20 | import com.spotify.sparkey.SparkeyReader; 21 | import com.spotify.sparkey.SparkeyWriter; 22 | import com.spotify.sparkey.UtilTest; 23 | import org.openjdk.jmh.annotations.Benchmark; 24 | import org.openjdk.jmh.annotations.BenchmarkMode; 25 | import org.openjdk.jmh.annotations.Fork; 26 | import org.openjdk.jmh.annotations.Level; 27 | import org.openjdk.jmh.annotations.Measurement; 28 | import org.openjdk.jmh.annotations.Mode; 29 | import org.openjdk.jmh.annotations.OutputTimeUnit; 30 | import org.openjdk.jmh.annotations.Param; 31 | import org.openjdk.jmh.annotations.Scope; 32 | import org.openjdk.jmh.annotations.Setup; 33 | import org.openjdk.jmh.annotations.State; 34 | import org.openjdk.jmh.annotations.TearDown; 35 | import org.openjdk.jmh.annotations.Warmup; 36 | 37 | import java.io.File; 38 | import java.io.IOException; 39 | import java.util.Random; 40 | import java.util.concurrent.TimeUnit; 41 | 42 | @State(Scope.Benchmark) 43 | @Warmup(iterations = 2) 44 | @Measurement(iterations = 4) 45 | @Fork(value = 1, warmups = 0) 46 | public class LookupBenchmark { 47 | 48 | private File indexFile; 49 | private File logFile; 50 | private SparkeyReader reader; 51 | private Random random; 52 | 53 | @Setup(Level.Trial) 54 | public void setup() throws IOException { 55 | indexFile = new File("test.spi"); 56 | logFile = Sparkey.getLogFile(indexFile); 57 | 58 | CompressionType compressionType = CompressionType.valueOf(type); 59 | 60 | indexFile.deleteOnExit(); 61 | logFile.deleteOnExit(); 62 | UtilTest.delete(indexFile); 63 | UtilTest.delete(logFile); 64 | 65 | SparkeyWriter writer = Sparkey.createNew(indexFile, compressionType, 1024); 66 | for (int i = 0; i < numElements; i++) { 67 | writer.put("key_" + i, "value_" + i); 68 | } 69 | writer.writeHash(); 70 | writer.close(); 71 | 72 | reader = Sparkey.open(indexFile); 73 | random = new Random(891273791623L); 74 | 75 | } 76 | 77 | @TearDown(Level.Trial) 78 | public void tearDown() throws IOException { 79 | reader.close(); 80 | UtilTest.delete(indexFile); 81 | UtilTest.delete(logFile); 82 | } 83 | 84 | @Param({"1000", "10000", "100000", "1000000", "10000000", "100000000"}) 85 | public int numElements; 86 | 87 | @Param({"NONE", "SNAPPY", "ZSTD"}) 88 | public String type; 89 | 90 | @Benchmark 91 | @BenchmarkMode(Mode.Throughput) 92 | @OutputTimeUnit(TimeUnit.SECONDS) 93 | public String test() throws IOException { 94 | return reader.getAsString("key_" + random.nextInt(numElements)); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/SparkeyReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.Closeable; 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | import java.util.Iterator; 22 | 23 | public interface SparkeyReader extends Iterable, Closeable { 24 | /** 25 | * @param key the key to search for, interpreted as an UTF-8 string. 26 | * @return null if the key/value pair was not found, otherwise the value interpreted as an UTF-8 string. 27 | */ 28 | String getAsString(String key) throws IOException; 29 | 30 | /** 31 | * @param key the key to search for 32 | * @return null if the key/value pair was not found, otherwise the raw byte array value 33 | */ 34 | byte[] getAsByteArray(byte[] key) throws IOException; 35 | 36 | /** 37 | * This is mostly useful for retrieving large values that don't fit in a byte array. 38 | * 39 | * @param key the key to search for 40 | * @return null if the key/value pair was not found, otherwise the entry. 41 | * 42 | */ 43 | Entry getAsEntry(byte[] key) throws IOException; 44 | 45 | IndexHeader getIndexHeader(); 46 | LogHeader getLogHeader(); 47 | 48 | /** 49 | * Create a duplicate of the reader. Useful for using the reader from another thread. 50 | * @return a duplicate of the reader. 51 | */ 52 | SparkeyReader duplicate(); 53 | 54 | // Deliberately override to avoid throwing IOException 55 | @Override 56 | void close(); 57 | 58 | /** 59 | * Get an iterator over all the live entries. 60 | * 61 | * The iterator object is not thread safe, 62 | * and the entry objects are highly volatile 63 | * and will be invalidated by the next 64 | * iteration step. Don't leak this entry, 65 | * copy whatever data you want from it instead. 66 | * 67 | * @return an iterator 68 | */ 69 | @Override 70 | Iterator iterator(); 71 | 72 | interface Entry { 73 | int getKeyLength(); 74 | byte[] getKey(); 75 | String getKeyAsString(); 76 | 77 | long getValueLength(); 78 | byte[] getValue() throws IOException; 79 | String getValueAsString() throws IOException; 80 | InputStream getValueAsStream(); 81 | 82 | Type getType(); 83 | } 84 | 85 | /** 86 | * Get the number of index and log file bytes loaded in memory. 87 | * 88 | * This number is based on MappedByteBuffer.isLoaded() and the resolution is 89 | * in increments of the memory chunk size (1 GB) 90 | * 91 | * @deprecated because it won't always be possible to compute the correct value 92 | */ 93 | @Deprecated 94 | long getLoadedBytes(); 95 | 96 | /** 97 | * Get the total number of index and log file bytes. 98 | */ 99 | long getTotalBytes(); 100 | 101 | enum Type { 102 | PUT, DELETE 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java22/com/spotify/sparkey/MemorySegmentInputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.InputStream; 19 | import java.lang.foreign.MemorySegment; 20 | import java.lang.foreign.ValueLayout; 21 | 22 | /** 23 | * Zero-copy InputStream backed by a MemorySegment. 24 | * Supports values larger than 2GB (limited only by MemorySegment size). 25 | */ 26 | final class MemorySegmentInputStream extends InputStream { 27 | private static final ValueLayout.OfByte JAVA_BYTE = ValueLayout.JAVA_BYTE; 28 | 29 | private final MemorySegment segment; 30 | private final long size; 31 | private long position; 32 | private long mark; 33 | 34 | MemorySegmentInputStream(MemorySegment segment) { 35 | this.segment = segment; 36 | this.size = segment.byteSize(); 37 | this.position = 0; 38 | this.mark = 0; 39 | } 40 | 41 | @Override 42 | public int read() { 43 | if (position >= size) { 44 | return -1; 45 | } 46 | byte b = segment.get(JAVA_BYTE, position); 47 | position++; 48 | return ((int) b) & 0xFF; 49 | } 50 | 51 | @Override 52 | public int read(byte[] b, int off, int len) { 53 | if (b == null) { 54 | throw new NullPointerException(); 55 | } 56 | if (off < 0 || len < 0 || len > b.length - off) { 57 | throw new IndexOutOfBoundsException(); 58 | } 59 | if (len == 0) { 60 | return 0; 61 | } 62 | if (position >= size) { 63 | return -1; 64 | } 65 | 66 | // Calculate how much we can actually read 67 | long remaining = size - position; 68 | // Cast is safe: result never exceeds len (which is already an int) 69 | int toRead = (int) Math.min(len, remaining); 70 | 71 | // Zero-copy read from MemorySegment to byte array 72 | MemorySegment.copy(segment, JAVA_BYTE, position, b, off, toRead); 73 | position += toRead; 74 | 75 | return toRead; 76 | } 77 | 78 | @Override 79 | public long skip(long n) { 80 | if (n <= 0) { 81 | return 0; 82 | } 83 | long remaining = size - position; 84 | long skipped = Math.min(n, remaining); 85 | position += skipped; 86 | return skipped; 87 | } 88 | 89 | @Override 90 | public int available() { 91 | long remaining = size - position; 92 | // Clamp to Integer.MAX_VALUE for API compatibility 93 | return (int) Math.min(remaining, Integer.MAX_VALUE); 94 | } 95 | 96 | @Override 97 | public boolean markSupported() { 98 | return true; 99 | } 100 | 101 | @Override 102 | public synchronized void mark(int readlimit) { 103 | mark = position; 104 | } 105 | 106 | @Override 107 | public synchronized void reset() { 108 | position = mark; 109 | } 110 | 111 | @Override 112 | public void close() { 113 | // No-op: MemorySegment lifecycle is managed externally 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/system/QuickLookupBenchmark.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2014 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.system; 17 | 18 | import com.spotify.sparkey.CompressionType; 19 | import com.spotify.sparkey.Sparkey; 20 | import com.spotify.sparkey.SparkeyReader; 21 | import com.spotify.sparkey.SparkeyWriter; 22 | import com.spotify.sparkey.UtilTest; 23 | import org.openjdk.jmh.annotations.Benchmark; 24 | import org.openjdk.jmh.annotations.BenchmarkMode; 25 | import org.openjdk.jmh.annotations.Fork; 26 | import org.openjdk.jmh.annotations.Level; 27 | import org.openjdk.jmh.annotations.Measurement; 28 | import org.openjdk.jmh.annotations.Mode; 29 | import org.openjdk.jmh.annotations.OutputTimeUnit; 30 | import org.openjdk.jmh.annotations.Param; 31 | import org.openjdk.jmh.annotations.Scope; 32 | import org.openjdk.jmh.annotations.Setup; 33 | import org.openjdk.jmh.annotations.State; 34 | import org.openjdk.jmh.annotations.TearDown; 35 | import org.openjdk.jmh.annotations.Warmup; 36 | 37 | import java.io.File; 38 | import java.io.IOException; 39 | import java.util.ArrayList; 40 | import java.util.Arrays; 41 | import java.util.Collections; 42 | import java.util.List; 43 | import java.util.Random; 44 | import java.util.concurrent.TimeUnit; 45 | import java.util.concurrent.atomic.AtomicInteger; 46 | 47 | @State(Scope.Benchmark) 48 | @Warmup(iterations = 4, time = 3) 49 | @Measurement(iterations = 10, time = 3) 50 | @Fork(value = 1, warmups = 0) 51 | public class QuickLookupBenchmark { 52 | 53 | private File indexFile; 54 | private File logFile; 55 | private SparkeyReader reader; 56 | private String[] keysArray; 57 | private int counter = 0; 58 | private int bitmask; 59 | 60 | @Setup(Level.Trial) 61 | public void setup() throws IOException { 62 | indexFile = new File("test.spi"); 63 | logFile = Sparkey.getLogFile(indexFile); 64 | 65 | CompressionType compressionType = CompressionType.valueOf(type); 66 | 67 | indexFile.deleteOnExit(); 68 | logFile.deleteOnExit(); 69 | UtilTest.delete(indexFile); 70 | UtilTest.delete(logFile); 71 | 72 | bitmask = numElements - 1; 73 | 74 | SparkeyWriter writer = Sparkey.createNew(indexFile, compressionType, 1024); 75 | final List keys = new ArrayList<>(); 76 | for (int i = 0; i < numElements; i++) { 77 | final String key = "key_" + i; 78 | writer.put(key, "value_" + i); 79 | keys.add(key); 80 | } 81 | Collections.shuffle(keys, new Random(891273791623L)); 82 | keysArray = keys.toArray(new String[0]); 83 | writer.writeHash(); 84 | writer.close(); 85 | 86 | reader = Sparkey.open(indexFile); 87 | } 88 | 89 | @TearDown(Level.Trial) 90 | public void tearDown() throws IOException { 91 | reader.close(); 92 | UtilTest.delete(indexFile); 93 | UtilTest.delete(logFile); 94 | } 95 | 96 | @Param({"1048576"}) 97 | public int numElements; 98 | 99 | @Param({"NONE"}) 100 | public String type; 101 | 102 | @Benchmark 103 | @BenchmarkMode(Mode.Throughput) 104 | @OutputTimeUnit(TimeUnit.SECONDS) 105 | public String test() throws IOException { 106 | return reader.getAsString("key_" + keysArray[++counter & bitmask]); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /RELEASE: -------------------------------------------------------------------------------- 1 | How to release: 2 | 3 | ## Pre-Release Verification 4 | 5 | Before starting the release, run the verification script to catch common issues: 6 | 7 | ```bash 8 | ./verify-release-ready.sh 9 | ``` 10 | 11 | This checks: 12 | - Java 8 is active 13 | - Git working directory is clean 14 | - On master branch 15 | - Maven settings configured 16 | - Javadoc builds without errors 17 | - SCM URLs match git remote 18 | - Tests pass 19 | - GPG key is available 20 | - Bytecode version is Java 8 21 | 22 | If you have a custom SSH alias for GitHub (like `github.com-spotify`), configure git URL rewriting: 23 | 24 | ```bash 25 | git config --local url."git@github.com-spotify:".insteadOf "git@github.com:" 26 | ``` 27 | 28 | ## Maven Settings 29 | 30 | **IMPORTANT**: Sonatype has migrated from legacy OSSRH (oss.sonatype.org) to the 31 | Central Publishing Portal (central.sonatype.com). You need a user token, not a password. 32 | 33 | ### Get User Token 34 | 35 | 1. Go to https://central.sonatype.com/account 36 | 2. Generate or view your user token 37 | 3. Add the token to ~/.m2/settings.xml: 38 | 39 | ```xml 40 | 41 | 42 | 43 | central 44 | YOUR_TOKEN_USERNAME 45 | YOUR_TOKEN_PASSWORD 46 | 47 | 48 | 49 | ``` 50 | 51 | Optional (for GPG passphrase): 52 | ```xml 53 | 54 | 55 | gpg 56 | 57 | true 58 | 59 | 60 | gpg 61 | YOUR_GPG_PASSPHRASE 62 | 63 | 64 | 65 | ``` 66 | 67 | ## Release Process 68 | 69 | ```bash 70 | # 1. Run pre-release verification 71 | ./verify-release-ready.sh 72 | 73 | # 2. Execute release (both prepare and perform can be run together) 74 | mvn -B release:prepare release:perform -Darguments="-DskipTests=true" 75 | 76 | # Note: Tests are skipped during release since they were already verified in step 1 77 | ``` 78 | 79 | **What happens during release:** 80 | - `release:prepare` - Creates release tag, bumps version, commits to git, pushes to GitHub 81 | - `release:perform` - Builds artifacts, signs with GPG, uploads to Maven Central, auto-publishes 82 | 83 | **Important notes:** 84 | - The `-Psonatype-oss-release` profile is NO longer needed with Central Publishing Portal 85 | - Artifacts are automatically published (no manual staging/closing required) 86 | - Release takes ~1 minute total 87 | 88 | ## Post-Release Verification 89 | 90 | After release completes successfully: 91 | 92 | ```bash 93 | # Quick verification 94 | git fetch --tags 95 | git tag | grep sparkey-X.X.X 96 | grep "" pom.xml | head -1 # Should show X.X.X+1-SNAPSHOT 97 | ``` 98 | 99 | **Check deployment status:** 100 | https://central.sonatype.com/publishing/deployments 101 | 102 | **Full post-release checklist:** 103 | See [POST-RELEASE-CHECKLIST.md](POST-RELEASE-CHECKLIST.md) for complete verification steps. 104 | 105 | **Create GitHub release (optional):** 106 | ```bash 107 | gh release create sparkey-X.X.X --title "Version X.X.X" --notes "See CHANGELOG.md" 108 | ``` 109 | 110 | ## Troubleshooting Failed Releases 111 | 112 | If the release fails partway through: 113 | 114 | ```bash 115 | # Run cleanup script 116 | ./cleanup-failed-release.sh 117 | 118 | # Review what went wrong 119 | # Fix the issue (update pom.xml, fix tests, etc.) 120 | 121 | # Re-run verification 122 | ./verify-release-ready.sh 123 | 124 | # Retry release 125 | mvn -B release:prepare release:perform -Darguments="-DskipTests=true" 126 | ``` 127 | 128 | Common issues and solutions documented in [POST-RELEASE-CHECKLIST.md](POST-RELEASE-CHECKLIST.md#troubleshooting) 129 | 130 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/SparkeyImplSelector.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import com.spotify.sparkey.extra.PooledSparkeyReader; 4 | 5 | import java.io.File; 6 | import java.io.IOException; 7 | 8 | /** 9 | * Selects the appropriate Sparkey implementation based on the Java version. 10 | * 11 | * This class is overridden via Multi-Release JAR to provide optimized 12 | * implementations on Java 22+. The base implementation uses Java 8-compatible 13 | * FileChannel-based readers. 14 | */ 15 | class SparkeyImplSelector { 16 | 17 | /** 18 | * Open a SparkeyReader with the optimal implementation for the current Java version. 19 | * 20 | * Base implementation (Java 8-21): Returns PooledSparkeyReader using FileChannel. 21 | * Java 22+ override: Returns optimized implementations using MemorySegment API. 22 | * 23 | * @param file File base to use, the actual file endings will be set to .spi and .spl 24 | * @return an optimal SparkeyReader for the current Java version 25 | * @throws IOException if the file cannot be opened 26 | */ 27 | static SparkeyReader open(File file) throws IOException { 28 | return PooledSparkeyReader.open(file); 29 | } 30 | 31 | /** 32 | * Open a single-threaded SparkeyReader. 33 | * 34 | * This is not thread-safe and should only be used from one thread. 35 | * 36 | * @param file File base to use, the actual file endings will be set to .spi and .spl 37 | * @return a single-threaded SparkeyReader 38 | * @throws IOException if the file cannot be opened 39 | */ 40 | static SparkeyReader openSingleThreaded(File file) throws IOException { 41 | return SingleThreadedSparkeyReader.open(file); 42 | } 43 | 44 | /** 45 | * Open a pooled SparkeyReader with default pool size. 46 | * 47 | * @param file File base to use, the actual file endings will be set to .spi and .spl 48 | * @return a pooled SparkeyReader 49 | * @throws IOException if the file cannot be opened 50 | */ 51 | static SparkeyReader openPooled(File file) throws IOException { 52 | return PooledSparkeyReader.open(file); 53 | } 54 | 55 | /** 56 | * Open a pooled SparkeyReader with the specified pool size. 57 | * 58 | * @param file File base to use, the actual file endings will be set to .spi and .spl 59 | * @param poolSize number of reader instances (minimum 1) 60 | * @return a pooled SparkeyReader 61 | * @throws IOException if the file cannot be opened 62 | */ 63 | static SparkeyReader openPooled(File file, int poolSize) throws IOException { 64 | return PooledSparkeyReader.open(file, poolSize); 65 | } 66 | 67 | /** 68 | * Open an uncompressed reader using Java 22+ MemorySegment API. 69 | * Only available on Java 22+. 70 | * 71 | * @param file File base to use, the actual file endings will be set to .spi and .spl 72 | * @return UncompressedSparkeyReaderJ22 (on Java 22+) 73 | * @throws UnsupportedOperationException on Java < 22 74 | * @throws IOException if the file cannot be opened 75 | */ 76 | static SparkeyReader openUncompressedJ22(File file) throws IOException { 77 | throw new UnsupportedOperationException( 78 | "UncompressedSparkeyReaderJ22 requires Java 22+, currently running " + 79 | System.getProperty("java.version")); 80 | } 81 | 82 | /** 83 | * Open a single-threaded reader using Java 22+ MemorySegment API. 84 | * Only available on Java 22+. 85 | * 86 | * @param file File base to use, the actual file endings will be set to .spi and .spl 87 | * @return SingleThreadedSparkeyReaderJ22 (on Java 22+) 88 | * @throws UnsupportedOperationException on Java < 22 89 | * @throws IOException if the file cannot be opened 90 | */ 91 | static SparkeyReader openSingleThreadedJ22(File file) throws IOException { 92 | throw new UnsupportedOperationException( 93 | "SingleThreadedSparkeyReaderJ22 requires Java 22+, currently running " + 94 | System.getProperty("java.version")); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/CompressedReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | import java.io.InputStream; 20 | 21 | final class CompressedReader extends BlockPositionedInputStream { 22 | private final CompressorType compressor; 23 | private final byte[] uncompressedBuf; 24 | private final byte[] compressedBuf; 25 | private int bufPos; 26 | private int blockSize; 27 | 28 | private long curBlockStart; 29 | private long nextBlockStart; 30 | 31 | public CompressedReader(CompressorType compressor, InputStream data, int maxBlockSize, long start) { 32 | super(data); 33 | this.compressor = compressor; 34 | blockSize = 0; 35 | bufPos = 0; 36 | curBlockStart = start; 37 | nextBlockStart = start; 38 | uncompressedBuf = new byte[maxBlockSize]; 39 | compressedBuf = new byte[compressor.maxCompressedLength(maxBlockSize)]; 40 | } 41 | 42 | @Override 43 | public int read() throws IOException { 44 | if (bufPos == blockSize) { 45 | fetchBlock(); 46 | } 47 | return ((int) uncompressedBuf[bufPos++]) & 0xFF; 48 | } 49 | 50 | private void fetchBlock() throws IOException { 51 | int compressedSize = Util.readUnsignedVLQInt(input); 52 | input.read(compressedBuf, 0, compressedSize); 53 | int uncompressedSize = compressor.uncompress(compressedBuf, compressedSize, uncompressedBuf); 54 | bufPos = 0; 55 | blockSize = uncompressedSize; 56 | 57 | curBlockStart = nextBlockStart; 58 | nextBlockStart = curBlockStart + Util.unsignedVLQSize(compressedSize) + compressedSize; 59 | } 60 | 61 | @Override 62 | public int read(byte[] b) throws IOException { 63 | return read(b, 0, b.length); 64 | } 65 | 66 | @Override 67 | public int read(byte[] b, int off, int len) throws IOException { 68 | int remain = len; 69 | while (remain > 0) { 70 | int didRead = readImpl(b, off, remain); 71 | off += didRead; 72 | remain -= didRead; 73 | } 74 | return len; 75 | } 76 | 77 | private int readImpl(byte[] b, int off, int len) throws IOException { 78 | int available = available(); 79 | if (len <= available) { 80 | System.arraycopy(uncompressedBuf, bufPos, b, off, len); 81 | bufPos += len; 82 | return len; 83 | } else { 84 | System.arraycopy(uncompressedBuf, bufPos, b, off, available); 85 | bufPos = blockSize; 86 | fetchBlock(); 87 | return available; 88 | } 89 | } 90 | 91 | @Override 92 | public long skip(long n) throws IOException { 93 | long remain = n; 94 | while (remain > 0) { 95 | remain -= skipImpl(remain); 96 | } 97 | return n; 98 | } 99 | 100 | private long skipImpl(long n) throws IOException { 101 | int available = available(); 102 | if (n <= available) { 103 | bufPos += n; 104 | return n; 105 | } else { 106 | bufPos = blockSize; 107 | fetchBlock(); 108 | return available; 109 | } 110 | } 111 | 112 | @Override 113 | long getBlockPosition() { 114 | if (bufPos == blockSize) { 115 | return nextBlockStart; 116 | } 117 | return curBlockStart; 118 | } 119 | 120 | @Override 121 | public int available() throws IOException { 122 | return blockSize - bufPos; 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/CompressedOutputStream.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.FileDescriptor; 19 | import java.io.IOException; 20 | import java.io.OutputStream; 21 | import java.io.SyncFailedException; 22 | 23 | final class CompressedOutputStream extends OutputStream { 24 | private final CompressorType compressor; 25 | private final int maxBlockSize; 26 | private final OutputStream output; 27 | 28 | private final byte[] uncompressedBuffer; 29 | private final byte[] compressedBuffer; 30 | private final FileDescriptor fileDescriptor; 31 | private int pending; 32 | private CompressedWriter listener = CompressedWriter.DUMMY; 33 | 34 | CompressedOutputStream(CompressorType compressor, int maxBlockSize, OutputStream output, FileDescriptor fileDescriptor) throws IOException { 35 | this.compressor = compressor; 36 | this.fileDescriptor = fileDescriptor; 37 | if (maxBlockSize < 10) { 38 | throw new IOException("Too small block size - won't be able to fit keylen + valuelen in a single block"); 39 | } 40 | this.maxBlockSize = maxBlockSize; 41 | this.output = output; 42 | uncompressedBuffer = new byte[maxBlockSize]; 43 | compressedBuffer = new byte[compressor.maxCompressedLength(maxBlockSize)]; 44 | } 45 | 46 | @Override 47 | public void flush() throws IOException { 48 | if (pending == 0) { 49 | return; 50 | } 51 | 52 | int compressedSize = compressor.compress(uncompressedBuffer, pending, compressedBuffer); 53 | Util.writeUnsignedVLQ(compressedSize, output); 54 | output.write(compressedBuffer, 0, compressedSize); 55 | output.flush(); 56 | pending = 0; 57 | listener.afterFlush(); 58 | } 59 | 60 | public void fsync() throws SyncFailedException { 61 | fileDescriptor.sync(); 62 | } 63 | 64 | @Override 65 | public void close() throws IOException { 66 | flush(); 67 | output.close(); 68 | } 69 | 70 | @Override 71 | public void write(byte[] b) throws IOException { 72 | write(b, 0, b.length); 73 | } 74 | 75 | @Override 76 | public void write(byte[] b, int off, int len) throws IOException { 77 | while (len > 0) { 78 | int written = writeImpl(b, off, len); 79 | off += written; 80 | len -= written; 81 | } 82 | } 83 | 84 | private int writeImpl(byte[] b, int off, int len) throws IOException { 85 | int remaining = remaining(); 86 | if (len < remaining) { 87 | System.arraycopy(b, off, uncompressedBuffer, pending, len); 88 | pending += len; 89 | return len; 90 | } else { 91 | System.arraycopy(b, off, uncompressedBuffer, pending, remaining); 92 | pending = maxBlockSize; 93 | flush(); 94 | return remaining; 95 | } 96 | } 97 | 98 | @Override 99 | public void write(int b) throws IOException { 100 | uncompressedBuffer[pending++] = (byte) b; 101 | if (pending == maxBlockSize) { 102 | flush(); 103 | } 104 | } 105 | 106 | int getPending() { 107 | return pending; 108 | } 109 | 110 | int remaining() { 111 | return maxBlockSize - pending; 112 | } 113 | 114 | void setListener(CompressedWriter compressedWriter) { 115 | listener = compressedWriter; 116 | } 117 | 118 | int getMaxBlockSize() { 119 | return maxBlockSize; 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /POST-RELEASE-CHECKLIST.md: -------------------------------------------------------------------------------- 1 | # Post-Release Checklist 2 | 3 | After a successful `mvn release:perform`, verify everything completed correctly. 4 | 5 | ## Immediate Verification (< 5 minutes) 6 | 7 | ### 1. Check Git State 8 | 9 | ```bash 10 | # Verify tag was created and pushed 11 | git fetch --tags 12 | git tag | grep sparkey-X.X.X 13 | git ls-remote --tags origin | grep sparkey-X.X.X 14 | 15 | # Verify version was bumped 16 | grep "" pom.xml | head -1 17 | # Should show X.X.X+1-SNAPSHOT 18 | 19 | # Check release commits 20 | git log --oneline -3 21 | # Should show: 22 | # - [maven-release-plugin] prepare for next development iteration 23 | # - [maven-release-plugin] prepare release sparkey-X.X.X 24 | 25 | # Verify working tree is clean 26 | git status 27 | ``` 28 | 29 | ### 2. Verify Central Publishing Portal 30 | 31 | Check deployment status: 32 | https://central.sonatype.com/publishing/deployments 33 | 34 | Look for your deployment: 35 | - **Status**: Should be "PUBLISHED" or "PUBLISHING" 36 | - **Deployment ID**: From the release log 37 | - **No validation errors** 38 | 39 | ### 3. Check Local Artifacts 40 | 41 | Verify artifacts were installed to local Maven repo: 42 | ```bash 43 | ls -lh ~/.m2/repository/com/spotify/sparkey/sparkey/X.X.X/ 44 | ``` 45 | 46 | Should contain: 47 | - `sparkey-X.X.X.jar` 48 | - `sparkey-X.X.X-sources.jar` 49 | - `sparkey-X.X.X-javadoc.jar` 50 | - `sparkey-X.X.X.pom` 51 | - All `.asc` signature files 52 | 53 | ## Maven Central Verification (30 minutes - 2 hours) 54 | 55 | ### 4. Check Maven Central Search 56 | 57 | After ~30 minutes, verify artifact appears: 58 | - https://central.sonatype.com/artifact/com.spotify.sparkey/sparkey/X.X.X 59 | - https://search.maven.org/artifact/com.spotify.sparkey/sparkey/X.X.X 60 | 61 | ### 5. Verify Downloadable 62 | 63 | Try downloading the artifact: 64 | ```bash 65 | curl -O https://repo1.maven.org/maven2/com/spotify/sparkey/sparkey/X.X.X/sparkey-X.X.X.pom 66 | cat sparkey-X.X.X.pom | grep -A 2 "" 67 | rm sparkey-X.X.X.pom 68 | ``` 69 | 70 | ## GitHub Release (Optional but Recommended) 71 | 72 | ### 6. Create GitHub Release 73 | 74 | ```bash 75 | # View CHANGELOG for release notes 76 | cat CHANGELOG.md | head -30 77 | 78 | # Create GitHub release 79 | gh release create sparkey-X.X.X \ 80 | --title "Version X.X.X" \ 81 | --notes "$(sed -n '/^#### X.X.X$/,/^#### [0-9]/p' CHANGELOG.md | head -n -1)" 82 | ``` 83 | 84 | Or manually at: https://github.com/spotify/sparkey-java/releases/new 85 | 86 | ## Communication (If Needed) 87 | 88 | ### 7. Announce Release 89 | 90 | If this is a major release or contains important fixes: 91 | - Update README.md if needed 92 | - Post announcement (Slack, mailing list, etc.) 93 | - Update dependent projects 94 | 95 | ## Rollback (If Problems Found) 96 | 97 | If you discover issues AFTER release: 98 | 99 | **DO NOT delete from Maven Central** (artifacts are immutable) 100 | 101 | Instead: 102 | 1. Fix the issue 103 | 2. Release a new patch version immediately (X.X.X+1) 104 | 3. Document the issue in CHANGELOG.md 105 | 106 | ## Troubleshooting 107 | 108 | ### Deployment shows "FAILED" on Central Portal 109 | 110 | 1. Check the error message on https://central.sonatype.com/publishing/deployments 111 | 2. Common issues: 112 | - Missing metadata (name, description, url) 113 | - Invalid POM structure 114 | - Missing or invalid signatures 115 | 116 | ### Tag pushed but no artifacts on Maven Central 117 | 118 | 1. Check if `release:perform` completed successfully 119 | 2. Look for deployment errors in the Maven output 120 | 3. Check https://central.sonatype.com/publishing/deployments for the deployment 121 | 122 | ### Version not bumped correctly 123 | 124 | The release plugin should have bumped the version. If not: 125 | ```bash 126 | # Manually bump version 127 | # Edit pom.xml: X.X.X-SNAPSHOTX.X.X+1-SNAPSHOT 128 | git add pom.xml 129 | git commit -m "Bump version to X.X.X+1-SNAPSHOT" 130 | git push 131 | ``` 132 | -------------------------------------------------------------------------------- /src/main/java22/com/spotify/sparkey/UncompressedLogReaderJ22.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | import java.io.InputStream; 20 | import java.lang.foreign.MemorySegment; 21 | import java.nio.charset.StandardCharsets; 22 | 23 | /** 24 | * Fully immutable log reader for UNCOMPRESSED files. 25 | * Optimized version that skips entry block handling. 26 | * 27 | * For uncompressed files: 28 | * - No entry blocks - each position points directly to an entry 29 | * - No need to skip entries within blocks 30 | */ 31 | final class UncompressedLogReaderJ22 { 32 | final ReadOnlyMemMapJ22 data; // Package-private for inlined access from UncompressedIndexHashJ22 33 | private final LogHeader logHeader; 34 | 35 | UncompressedLogReaderJ22(ReadOnlyMemMapJ22 data, LogHeader logHeader) { 36 | this.data = data; 37 | this.logHeader = logHeader; 38 | } 39 | 40 | /** 41 | * Simple immutable entry that reads value data on demand. 42 | * Supports values larger than 2GB via getValueAsStream(). 43 | * getValue() throws IllegalStateException for values > Integer.MAX_VALUE. 44 | */ 45 | static final class ImmutableEntry implements SparkeyReader.Entry { 46 | private final int keyLen; 47 | private final byte[] key; 48 | private final long valueLen; 49 | private final long valuePosition; 50 | private final ReadOnlyMemMapJ22 data; 51 | 52 | ImmutableEntry(int keyLen, byte[] key, 53 | long valueLen, long valuePosition, ReadOnlyMemMapJ22 data) { 54 | this.keyLen = keyLen; 55 | // Defensive copy: ensure immutability even if caller reuses the key array 56 | this.key = java.util.Arrays.copyOf(key, keyLen); 57 | this.valueLen = valueLen; 58 | this.valuePosition = valuePosition; 59 | this.data = data; 60 | } 61 | 62 | @Override 63 | public int getKeyLength() { 64 | return keyLen; 65 | } 66 | 67 | @Override 68 | public byte[] getKey() { 69 | return key; 70 | } 71 | 72 | @Override 73 | public String getKeyAsString() { 74 | return new String(key, StandardCharsets.UTF_8); 75 | } 76 | 77 | @Override 78 | public long getValueLength() { 79 | return valueLen; 80 | } 81 | 82 | @Override 83 | public byte[] getValue() throws IOException { 84 | if (valueLen > Integer.MAX_VALUE) { 85 | throw new IllegalStateException("Value size is " + valueLen + 86 | " bytes, exceeds byte[] limit. Use getValueAsStream() instead."); 87 | } 88 | return data.readBytes(valuePosition, (int) valueLen); 89 | } 90 | 91 | @Override 92 | public String getValueAsString() throws IOException { 93 | return new String(getValue(), StandardCharsets.UTF_8); 94 | } 95 | 96 | @Override 97 | public InputStream getValueAsStream() { 98 | // Zero-copy stream backed by MemorySegment - no allocation, supports values > 2GB 99 | if (valueLen == 0) { 100 | // Use singleton for empty streams (edge case, but possible) 101 | return EmptyInputStream.INSTANCE; 102 | } 103 | try { 104 | MemorySegment valueSegment = data.asSlice(valuePosition, valueLen); 105 | return new MemorySegmentInputStream(valueSegment); 106 | } catch (IOException e) { 107 | throw new RuntimeException(e); 108 | } 109 | } 110 | 111 | @Override 112 | public SparkeyReader.Type getType() { 113 | return SparkeyReader.Type.PUT; 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/extra/SparkeyValidator.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey.extra; 2 | 3 | import com.spotify.sparkey.LogHeader; 4 | import com.spotify.sparkey.Sparkey; 5 | import com.spotify.sparkey.SparkeyLogIterator; 6 | import com.spotify.sparkey.SparkeyReader; 7 | 8 | import java.io.File; 9 | import java.io.IOException; 10 | import java.util.Arrays; 11 | 12 | public class SparkeyValidator { 13 | public static void main(String[] args) throws IOException { 14 | if (args.length < 1) { 15 | System.err.println("Usage: sparkey-validator "); 16 | System.exit(1); 17 | return; 18 | } 19 | 20 | File input = new File(args[0]); 21 | final File logFile = Sparkey.getLogFile(input); 22 | if (!logFile.exists()) { 23 | System.err.println(logFile.getAbsolutePath() + " does not exist"); 24 | System.exit(1); 25 | return; 26 | } 27 | if (!logFile.isFile()) { 28 | System.err.println(logFile.getAbsolutePath() + " is not a file"); 29 | System.exit(1); 30 | return; 31 | } 32 | 33 | final File indexFile = Sparkey.getIndexFile(input); 34 | if (!indexFile.exists()) { 35 | System.err.println(indexFile.getAbsolutePath() + " does not exist"); 36 | System.exit(1); 37 | return; 38 | } 39 | if (!indexFile.isFile()) { 40 | System.err.println(indexFile.getAbsolutePath() + " is not a file"); 41 | System.exit(1); 42 | return; 43 | } 44 | 45 | LogHeader logHeader = Sparkey.getLogHeader(logFile); 46 | 47 | final SparkeyReader reader = Sparkey.open(indexFile); 48 | final SparkeyReader reader2 = Sparkey.open(indexFile); 49 | 50 | boolean inconsistent = false; 51 | 52 | System.out.println("Validating log iterator."); 53 | 54 | final SparkeyLogIterator iterator = new SparkeyLogIterator(logFile); 55 | for (SparkeyReader.Entry entry : iterator) { 56 | switch (entry.getType()) { 57 | case PUT: 58 | validateKey(logHeader, entry); 59 | validateValue(logHeader, entry); 60 | 61 | // Just make sure this doesn't crash 62 | reader.getAsByteArray(entry.getKey()); 63 | 64 | break; 65 | case DELETE: 66 | validateKey(logHeader, entry); 67 | checkState(0 == entry.getValueLength()); 68 | 69 | // Just make sure this doesn't crash 70 | reader.getAsByteArray(entry.getKey()); 71 | 72 | break; 73 | default: 74 | throw new RuntimeException("Unknown type: " + entry.getType()); 75 | } 76 | } 77 | 78 | System.out.println("Validating hash iterator and random lookups."); 79 | for (SparkeyReader.Entry entry : reader) { 80 | final byte[] value = entry.getValue(); 81 | 82 | validateKey(logHeader, entry); 83 | validateValue(logHeader, entry, value); 84 | 85 | 86 | final byte[] value2 = reader2.getAsByteArray(entry.getKey()); 87 | if (!Arrays.equals(value, value2)) { 88 | System.err.println("Inconsistency for key: " + entry.getKeyAsString() + " when iterating and doing a lookup"); 89 | inconsistent = true; 90 | } 91 | 92 | } 93 | 94 | System.out.println("Done!"); 95 | 96 | if (inconsistent) { 97 | System.exit(1); 98 | } 99 | } 100 | 101 | private static void validateKey(LogHeader logHeader, SparkeyReader.Entry entry) { 102 | checkState(entry.getKeyLength() <= logHeader.getMaxKeyLen()); 103 | checkState(entry.getKeyLength() == entry.getKey().length); 104 | } 105 | 106 | private static void validateValue(LogHeader logHeader, SparkeyReader.Entry entry) throws IOException { 107 | final byte[] value = entry.getValue(); 108 | validateValue(logHeader, entry, value); 109 | } 110 | 111 | private static void validateValue(LogHeader logHeader, SparkeyReader.Entry entry, byte[] value) { 112 | checkState(entry.getValueLength() <= logHeader.getMaxValueLen()); 113 | checkState(entry.getValueLength() == value.length); 114 | } 115 | 116 | private static void checkState(boolean b) { 117 | if (!b) { 118 | throw new IllegalStateException(); 119 | } 120 | } 121 | 122 | 123 | } 124 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/system/SparkeyExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.system; 17 | 18 | import com.spotify.sparkey.*; 19 | import org.junit.Test; 20 | 21 | import java.io.File; 22 | import java.io.IOException; 23 | import java.util.Random; 24 | 25 | public class SparkeyExample { 26 | private static final int NUM_RANDOM_READS = 1000; 27 | private static final int N = 2000; 28 | 29 | public static void main(String[] args) throws Exception { 30 | final File indexFile = new File("test.spi"); 31 | 32 | create(indexFile); 33 | 34 | final Random random = new Random(11234); 35 | final SparkeyReader reader = Sparkey.open(indexFile); 36 | 37 | randomReads(random, reader); 38 | 39 | rawIteration(new SparkeyLogIterator(Sparkey.getLogFile(indexFile))); 40 | 41 | iteration(reader); 42 | 43 | reader.close(); 44 | } 45 | 46 | private static void randomReads(Random random, SparkeyReader reader) throws IOException { 47 | for (int i = 0; i < NUM_RANDOM_READS; i++) { 48 | int k = random.nextInt(N); 49 | String key = "Key" + k; 50 | String entry = reader.getAsString(key); 51 | if (!("Value" + k).equals(entry)) { 52 | throw new RuntimeException("Expected " + "Value" + k + " but got " + entry); 53 | } 54 | } 55 | } 56 | 57 | private static void create(File indexFile) throws IOException { 58 | final SparkeyWriter writer = Sparkey.createNew(indexFile, CompressionType.SNAPPY, 512); 59 | for (int i = 0; i < N; i++) { 60 | writer.put("Key" + i, "Value" + i); 61 | } 62 | writer.flush(); 63 | writer.writeHash(); 64 | writer.close(); 65 | } 66 | 67 | private static void iteration(final SparkeyReader reader) throws IOException { 68 | int i = 0; 69 | for (SparkeyReader.Entry entry : reader) { 70 | String key = entry.getKeyAsString(); 71 | String value = entry.getValueAsString(); 72 | 73 | String expectedKey = "Key" + i; 74 | String expectedValue = "Value" + i; 75 | 76 | if (!key.equals(expectedKey)) { 77 | throw new RuntimeException("Expected " + expectedKey + " but got " + key); 78 | } 79 | if (!value.equals(expectedValue)) { 80 | throw new RuntimeException("Expected '" + expectedValue + "' but got '" + value + "' for key '" + key + "'"); 81 | } 82 | i++; 83 | } 84 | if (i != N) { 85 | throw new RuntimeException("Only got " + i + " entries, expected " + N); 86 | } 87 | } 88 | 89 | private static void rawIteration(final SparkeyLogIterator logIterator) throws IOException { 90 | int i = 0; 91 | for (SparkeyReader.Entry entry : logIterator) { 92 | if (entry.getType() == SparkeyReader.Type.PUT) { 93 | String key = entry.getKeyAsString(); 94 | String value = entry.getValueAsString(); 95 | 96 | String expectedKey = "Key" + (i % N); 97 | String expectedValue = "Value" + (i); 98 | if (!key.equals(expectedKey)) { 99 | throw new RuntimeException("Expected " + expectedKey + " but got " + key); 100 | } 101 | if (!value.equals(expectedValue)) { 102 | throw new RuntimeException("Expected " + expectedValue + " but got " + value); 103 | } 104 | 105 | i++; 106 | } 107 | } 108 | if (i != N) { 109 | throw new RuntimeException("Only got " + i + " entries, expected " + 2 * N); 110 | } 111 | } 112 | 113 | @Test 114 | public void dummy() { 115 | // Just to make the junit test runner work 116 | } 117 | 118 | } 119 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/LogWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.*; 19 | import java.nio.charset.StandardCharsets; 20 | 21 | final class LogWriter { 22 | private final LogHeader header; 23 | private final File file; 24 | private final BlockOutput logStream; 25 | private boolean closed; 26 | 27 | private LogWriter(File file, CompressionType compressionType, int compressionBlockSize) throws IOException { 28 | this.file = file; 29 | header = new LogHeader(compressionType, compressionBlockSize); 30 | header.write(file, false); 31 | logStream = setup(header, file); 32 | } 33 | 34 | private LogWriter(File file) throws IOException { 35 | this.file = file; 36 | if (!file.exists()) { 37 | throw new FileNotFoundException(file.getCanonicalPath()); 38 | } 39 | header = LogHeader.read(file); 40 | logStream = setup(header, file); 41 | } 42 | 43 | File getFile() { 44 | return file; 45 | } 46 | 47 | private static BlockOutput setup(LogHeader header, File file) throws IOException { 48 | truncate(file, header.getDataEnd()); 49 | FileOutputStream fileOutputStream = new FileOutputStream(file, true); 50 | Sparkey.incrOpenFiles(); 51 | FileDescriptor fd = fileOutputStream.getFD(); 52 | OutputStream stream = new BufferedOutputStream(fileOutputStream, 1024 * 1024); 53 | return header.getCompressionTypeBackend().createBlockOutput(fd, stream, header.getCompressionBlockSize(), 54 | header.getMaxEntriesPerBlock()); 55 | } 56 | 57 | private static void truncate(File file, long size) throws IOException { 58 | try (RandomAccessFile rw = new RandomAccessFile(file, "rw")) { 59 | rw.setLength(size); 60 | } 61 | } 62 | 63 | static LogWriter createNew(File file, CompressionType compressionType, int compressionBlockSize) throws IOException { 64 | return new LogWriter(file, compressionType, compressionBlockSize); 65 | } 66 | 67 | static LogWriter openExisting(File file) throws IOException { 68 | return new LogWriter(file); 69 | } 70 | 71 | void flush(boolean fsync) throws IOException { 72 | logStream.flush(fsync); 73 | writeHeader(fsync); 74 | } 75 | 76 | private void writeHeader(boolean fsync) throws IOException { 77 | header.setMaxEntriesPerBlock(logStream.getMaxEntriesPerBlock()); 78 | header.setDataEnd(file.length()); 79 | header.write(file, fsync); 80 | } 81 | 82 | void close(boolean fsync) throws IOException { 83 | if (closed) { 84 | return; 85 | } 86 | closed = true; 87 | logStream.close(fsync); 88 | Sparkey.decrOpenFiles(); 89 | writeHeader(fsync); 90 | } 91 | 92 | void put(String key, String value) throws IOException { 93 | put(key.getBytes(StandardCharsets.UTF_8), value.getBytes(StandardCharsets.UTF_8)); 94 | } 95 | 96 | void put(byte[] key, byte[] value) throws IOException { 97 | logStream.put(key, key.length, value, value.length); 98 | header.put(key.length, value.length); 99 | } 100 | 101 | void put(byte[] key, InputStream value, long valueLen) throws IOException { 102 | logStream.put(key, key.length, value, valueLen); 103 | header.put(key.length, valueLen); 104 | } 105 | 106 | void delete(String key) throws IOException { 107 | delete(key.getBytes(StandardCharsets.UTF_8)); 108 | } 109 | 110 | void delete(byte[] key) throws IOException { 111 | if (key.length <= header.getMaxKeyLen()) { 112 | logStream.delete(key, key.length); 113 | header.delete(key.length); 114 | } 115 | } 116 | 117 | } 118 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/system/MemoryLock.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.system; 17 | 18 | import java.lang.foreign.*; 19 | import java.lang.invoke.MethodHandle; 20 | 21 | /** 22 | * Wrapper for native mlock() syscall using Foreign Function & Memory API. 23 | * Allows locking memory-mapped pages in RAM to prevent page faults during benchmarking. 24 | */ 25 | public class MemoryLock { 26 | 27 | private static final Linker LINKER = Linker.nativeLinker(); 28 | private static final MethodHandle MLOCK; 29 | private static final MethodHandle MUNLOCK; 30 | 31 | static { 32 | // Look up native mlock and munlock functions 33 | // int mlock(const void *addr, size_t len); 34 | // int munlock(const void *addr, size_t len); 35 | 36 | SymbolLookup stdlib = LINKER.defaultLookup(); 37 | 38 | FunctionDescriptor mlockDesc = FunctionDescriptor.of( 39 | ValueLayout.JAVA_INT, // return type: int 40 | ValueLayout.ADDRESS, // addr: void* 41 | ValueLayout.JAVA_LONG // len: size_t 42 | ); 43 | 44 | MemorySegment mlockAddr = stdlib.find("mlock") 45 | .orElseThrow(() -> new UnsupportedOperationException("mlock not available")); 46 | MemorySegment munlockAddr = stdlib.find("munlock") 47 | .orElseThrow(() -> new UnsupportedOperationException("munlock not available")); 48 | 49 | MLOCK = LINKER.downcallHandle(mlockAddr, mlockDesc); 50 | MUNLOCK = LINKER.downcallHandle(munlockAddr, mlockDesc); 51 | } 52 | 53 | /** 54 | * Lock a MemorySegment in RAM, preventing it from being paged out. 55 | * Per mlock(2) man page: "All pages that contain a part of the specified address 56 | * range are guaranteed to be resident in RAM when the call returns successfully." 57 | * 58 | * @param segment The memory segment to lock 59 | * @return true if successful, false if mlock failed (e.g., insufficient privileges) 60 | */ 61 | public static boolean lock(MemorySegment segment) { 62 | try { 63 | int result = (int) MLOCK.invoke(segment, segment.byteSize()); 64 | return result == 0; 65 | } catch (Throwable e) { 66 | return false; 67 | } 68 | } 69 | 70 | /** 71 | * Unlock a previously locked MemorySegment, allowing it to be paged out. 72 | * 73 | * @param segment The memory segment to unlock 74 | * @return true if successful, false if munlock failed 75 | */ 76 | public static boolean unlock(MemorySegment segment) { 77 | try { 78 | int result = (int) MUNLOCK.invoke(segment, segment.byteSize()); 79 | return result == 0; 80 | } catch (Throwable e) { 81 | return false; 82 | } 83 | } 84 | 85 | /** 86 | * Check if mlock is likely to work by testing current ulimit -l. 87 | * Returns the maximum lockable memory in bytes, or -1 if unlimited. 88 | */ 89 | public static long getMaxLockedMemory() { 90 | try { 91 | // This would require another FFI call to getrlimit(RLIMIT_MEMLOCK) 92 | // For now, just suggest checking manually 93 | ProcessBuilder pb = new ProcessBuilder("sh", "-c", "ulimit -l"); 94 | Process p = pb.start(); 95 | byte[] output = p.getInputStream().readAllBytes(); 96 | p.waitFor(); 97 | 98 | String result = new String(output).trim(); 99 | if (result.equals("unlimited")) { 100 | return -1; 101 | } 102 | 103 | // ulimit -l returns KB, convert to bytes 104 | return Long.parseLong(result) * 1024; 105 | } catch (Exception e) { 106 | System.err.println("Could not check ulimit -l: " + e.getMessage()); 107 | return 0; 108 | } 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/SparkeyWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.Closeable; 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | 22 | public interface SparkeyWriter extends Closeable { 23 | /** 24 | * Append the key/value pair to the writer, as UTF-8. 25 | */ 26 | void put(String key, String value) throws IOException; 27 | 28 | /** 29 | * Append the key/value pair to the writer. 30 | */ 31 | void put(byte[] key, byte[] value) throws IOException; 32 | 33 | /** 34 | * Append the key/value pair to the writer. 35 | * 36 | * Only uses the first valueLen bytes from valueStream. 37 | */ 38 | void put(byte[] key, InputStream valueStream, long valueLen) throws IOException; 39 | 40 | /** 41 | * Deletes the key from the writer, as UTF-8 42 | */ 43 | void delete(String key) throws IOException; 44 | 45 | /** 46 | * Deletes the key from the writer. 47 | */ 48 | void delete(byte[] key) throws IOException; 49 | 50 | /** 51 | * Flush all pending writes to file. 52 | */ 53 | void flush() throws IOException; 54 | 55 | /** 56 | * Flush and close the writer. 57 | */ 58 | @Override 59 | void close() throws IOException; 60 | 61 | /** 62 | * Create or rewrite the index, 63 | * which is required for random lookups to be visible. 64 | */ 65 | void writeHash() throws IOException; 66 | 67 | /** 68 | * Create or rewrite the index, 69 | * which is required for random lookups to be visible. 70 | * 71 | * @param hashType choice of hash type, can be 32 or 64 bits. 72 | * @deprecated Use writer.setHashType(hashType); writer.writeHash(); instead 73 | */ 74 | @Deprecated 75 | void writeHash(HashType hashType) throws IOException; 76 | 77 | /** 78 | * Set whether or not flushes and hash writes should be synced to disk. 79 | * 80 | * @param fsync whether or not flushes and hash writes should be synced to disk 81 | */ 82 | void setFsync(boolean fsync); 83 | 84 | /** 85 | * Set the hash type for all subsequent writeHash operations. 86 | * @param hashType choice of hash type, can be 32 or 64 bits. 87 | * if null, will use the default. 88 | */ 89 | void setHashType(HashType hashType); 90 | 91 | /** 92 | * Set the sparsity for all subsequent writeHash operations. 93 | * A sparsity of 1.0 would mean that every slot in the hash table is occupied. 94 | * The actual minimum sparsity level is 1.3, values lower than this are ignored. 95 | * @param sparsity 96 | */ 97 | void setHashSparsity(double sparsity); 98 | 99 | /** 100 | * Set the hash seed to use. Default: a random seed 101 | * If set to 0, a random seed will be used. 102 | * @param hashSeed 103 | */ 104 | void setHashSeed(int hashSeed); 105 | 106 | /** 107 | * Set the maximum amount of memory to use for index construction. 108 | * Default: Runtime.freeMemory() / 2 109 | * @param maxMemory 110 | */ 111 | void setMaxMemory(long maxMemory); 112 | 113 | /** 114 | * Set which construction method to use to create the hash index. 115 | * Default: AUTO 116 | * @param method 117 | */ 118 | void setConstructionMethod(ConstructionMethod method); 119 | 120 | enum ConstructionMethod { 121 | /** 122 | * Chooses construction method dynamically based on size of data and available memory. 123 | */ 124 | AUTO, 125 | 126 | /** 127 | * Write hash index in memory 128 | */ 129 | IN_MEMORY, 130 | 131 | /** 132 | * Sort hash entries before writing to the hash index. 133 | */ 134 | SORTING 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/CompressedWriter.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | 6 | class CompressedWriter implements BlockOutput { 7 | public static final CompressedWriter DUMMY = new CompressedWriter(); 8 | 9 | private final byte[] buf = new byte[1024*1024]; 10 | private final CompressedOutputStream compressedOutputStream; 11 | 12 | private int currentNumEntries; 13 | private int maxEntriesPerBlock; 14 | private boolean flushed; 15 | private final int maxBlockSize; 16 | 17 | // Only used to initialize dummy 18 | private CompressedWriter() { 19 | compressedOutputStream = null; 20 | maxBlockSize = 0; 21 | } 22 | 23 | public CompressedWriter(CompressedOutputStream compressedOutputStream, int maxEntriesPerBlock) { 24 | this.compressedOutputStream = compressedOutputStream; 25 | this.maxEntriesPerBlock = maxEntriesPerBlock; 26 | compressedOutputStream.setListener(this); 27 | maxBlockSize = this.compressedOutputStream.getMaxBlockSize(); 28 | } 29 | 30 | public void afterFlush() { 31 | maxEntriesPerBlock = Math.max(currentNumEntries, maxEntriesPerBlock); 32 | currentNumEntries = 0; 33 | flushed = true; 34 | } 35 | 36 | @Override 37 | public void flush(boolean fsync) throws IOException { 38 | compressedOutputStream.flush(); 39 | if (fsync) { 40 | compressedOutputStream.fsync(); 41 | } 42 | } 43 | 44 | @Override 45 | public void put(byte[] key, int keyLen, byte[] value, int valueLen) throws IOException { 46 | int keySize = Util.unsignedVLQSize(keyLen + 1) + Util.unsignedVLQSize(valueLen); 47 | int totalSize = keySize + keyLen + valueLen; 48 | 49 | smartFlush(keySize, totalSize); 50 | flushed = false; 51 | currentNumEntries++; 52 | 53 | Util.writeUnsignedVLQ(keyLen + 1, compressedOutputStream); 54 | Util.writeUnsignedVLQ(valueLen, compressedOutputStream); 55 | compressedOutputStream.write(key, 0, keyLen); 56 | compressedOutputStream.write(value, 0, valueLen); 57 | 58 | 59 | // Make sure that the beginning of each block is the start of a key/value pair 60 | if (flushed && compressedOutputStream.getPending() > 0) { 61 | compressedOutputStream.flush(); 62 | } 63 | } 64 | 65 | @Override 66 | public void put(byte[] key, int keyLen, InputStream value, long valueLen) throws IOException { 67 | int keySize = Util.unsignedVLQSize(keyLen + 1) + Util.unsignedVLQSize(valueLen); 68 | long totalSize = keySize + keyLen + valueLen; 69 | 70 | smartFlush(keySize, totalSize); 71 | flushed = false; 72 | currentNumEntries++; 73 | 74 | Util.writeUnsignedVLQ(keyLen + 1, compressedOutputStream); 75 | Util.writeUnsignedVLQ(valueLen, compressedOutputStream); 76 | compressedOutputStream.write(key, 0, keyLen); 77 | Util.copy(valueLen, value, compressedOutputStream, buf); 78 | 79 | // Make sure that the beginning of each block is the start of a key/value pair 80 | if (flushed && compressedOutputStream.getPending() > 0) { 81 | compressedOutputStream.flush(); 82 | } 83 | } 84 | 85 | private void smartFlush(int keySize, long totalSize) throws IOException { 86 | int remaining = compressedOutputStream.remaining(); 87 | if (remaining < keySize) { 88 | flush(false); 89 | } else if (remaining < totalSize && totalSize < maxBlockSize - remaining) { 90 | flush(false); 91 | } 92 | } 93 | 94 | @Override 95 | public void delete(byte[] key, int keyLen) throws IOException { 96 | int keySize = 1 + Util.unsignedVLQSize(keyLen + 1); 97 | smartFlush(keySize, keySize + keyLen); 98 | 99 | flushed = false; 100 | currentNumEntries++; 101 | 102 | compressedOutputStream.write(0); 103 | Util.writeUnsignedVLQ(keyLen, compressedOutputStream); 104 | compressedOutputStream.write(key, 0, keyLen); 105 | 106 | // Make sure that the beginning of each block is the start of a key/value pair 107 | if (flushed && compressedOutputStream.getPending() > 0) { 108 | compressedOutputStream.flush(); 109 | } 110 | } 111 | 112 | @Override 113 | public void close(boolean fsync) throws IOException { 114 | flush(fsync); 115 | compressedOutputStream.close(); 116 | } 117 | 118 | public int getMaxEntriesPerBlock() { 119 | return maxEntriesPerBlock; 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/InMemoryData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | import java.util.ArrayList; 20 | 21 | class InMemoryData implements ReadWriteData { 22 | private static final int CHUNK_SIZE = 1 << 30; 23 | private static final int BITMASK_30 = ((1 << 30) - 1); 24 | 25 | protected final byte[][] chunks; 26 | private final long size; 27 | private final int numChunks; 28 | 29 | private int curChunkIndex; 30 | private byte[] curChunk; 31 | private int curChunkPos; 32 | 33 | InMemoryData(long size) { 34 | this.size = size; 35 | if (size < 0) { 36 | throw new IllegalArgumentException("Negative size: " + size); 37 | } 38 | 39 | final ArrayList chunksBuffer = new ArrayList<>(); 40 | long offset = 0; 41 | while (offset < size) { 42 | long remaining = size - offset; 43 | int chunkSize = (int) Math.min(remaining, CHUNK_SIZE); 44 | chunksBuffer.add(new byte[chunkSize]); 45 | offset += CHUNK_SIZE; 46 | } 47 | chunks = chunksBuffer.toArray(new byte[chunksBuffer.size()][]); 48 | numChunks = chunks.length; 49 | 50 | curChunkIndex = 0; 51 | curChunk = chunks[0]; 52 | } 53 | 54 | public void writeLittleEndianLong(long value) throws IOException { 55 | writeUnsignedByte((int) ((value) & 0xFF)); 56 | writeUnsignedByte((int) ((value >>> 8) & 0xFF)); 57 | writeUnsignedByte((int) ((value >>> 16) & 0xFF)); 58 | writeUnsignedByte((int) ((value >>> 24) & 0xFF)); 59 | writeUnsignedByte((int) ((value >>> 32) & 0xFF)); 60 | writeUnsignedByte((int) ((value >>> 40) & 0xFF)); 61 | writeUnsignedByte((int) ((value >>> 48) & 0xFF)); 62 | writeUnsignedByte((int) ((value >>> 56) & 0xFF)); 63 | } 64 | 65 | public void writeLittleEndianInt(int value) throws IOException { 66 | writeUnsignedByte((value) & 0xFF); 67 | writeUnsignedByte((value >>> 8) & 0xFF); 68 | writeUnsignedByte((value >>> 16) & 0xFF); 69 | writeUnsignedByte((value >>> 24) & 0xFF); 70 | } 71 | 72 | @Override 73 | public void close() throws IOException { 74 | for (int i = 0; i < numChunks; i++) { 75 | chunks[i] = null; 76 | } 77 | curChunk = null; 78 | } 79 | 80 | @Override 81 | public void seek(long pos) throws IOException { 82 | if (pos > size) { 83 | throw new IOException("Corrupt index: referencing data outside of range"); 84 | } 85 | int chunkIndex = (int) (pos >>> 30); 86 | curChunkIndex = chunkIndex; 87 | curChunk = chunks[chunkIndex]; 88 | curChunkPos = ((int) pos) & BITMASK_30; 89 | } 90 | 91 | @Override 92 | public void writeUnsignedByte(int value) throws IOException { 93 | if (curChunkPos == CHUNK_SIZE) { 94 | next(); 95 | } 96 | curChunk[curChunkPos++] = (byte) value; 97 | } 98 | 99 | private void next() throws IOException { 100 | curChunkIndex++; 101 | if (curChunkIndex >= chunks.length) { 102 | throw new IOException("Corrupt index: referencing data outside of range"); 103 | } 104 | curChunk = chunks[curChunkIndex]; 105 | curChunkPos = 0; 106 | } 107 | 108 | @Override 109 | public int readUnsignedByte() throws IOException { 110 | if (curChunkPos == CHUNK_SIZE) { 111 | next(); 112 | } 113 | return Util.unsignedByte(curChunk[curChunkPos++]); 114 | } 115 | 116 | @Override 117 | public int readLittleEndianInt() throws IOException { 118 | return Util.readLittleEndianIntSlowly(this); 119 | } 120 | 121 | @Override 122 | public long readLittleEndianLong() throws IOException { 123 | return Util.readLittleEndianLongSlowly(this); 124 | } 125 | 126 | @Override 127 | public String toString() { 128 | return "InMemoryData{" + 129 | "size=" + size + 130 | '}'; 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/test/java/com/spotify/sparkey/system/ReaderParametrizedLargeFilesTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2025 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.system; 17 | 18 | import com.spotify.sparkey.CompressionType; 19 | import com.spotify.sparkey.HashType; 20 | import com.spotify.sparkey.Sparkey; 21 | import com.spotify.sparkey.SparkeyReader; 22 | import com.spotify.sparkey.SparkeyWriter; 23 | import com.spotify.sparkey.TestSparkeyWriter; 24 | import com.spotify.sparkey.UtilTest; 25 | import org.junit.Test; 26 | import org.junit.runner.RunWith; 27 | import org.junit.runners.Parameterized; 28 | import org.junit.runners.Parameterized.Parameters; 29 | 30 | import java.io.IOException; 31 | import java.util.Collection; 32 | 33 | import static org.junit.Assert.assertEquals; 34 | import static org.junit.Assert.assertNull; 35 | import static org.junit.Assert.assertTrue; 36 | 37 | /** 38 | * Parametrized large file tests that run against all available reader implementations. 39 | * Tests verify that all reader types correctly handle large files (>2GB chunks, large indices). 40 | */ 41 | @RunWith(Parameterized.class) 42 | public class ReaderParametrizedLargeFilesTest extends BaseSystemTest { 43 | 44 | private final ReaderType readerType; 45 | 46 | public ReaderParametrizedLargeFilesTest(ReaderType readerType) { 47 | this.readerType = readerType; 48 | } 49 | 50 | @Parameters(name = "{0}") 51 | public static Collection readerTypes() { 52 | return java.util.Arrays.asList(ReaderType.availableAsParameters()); 53 | } 54 | 55 | @Test 56 | public void testLargeLogFile() throws IOException { 57 | if (!readerType.supports(CompressionType.NONE)) { 58 | return; 59 | } 60 | 61 | UtilTest.setMapBits(10); 62 | String expectedValue = "value"; 63 | while (expectedValue.length() < 5 * 1024) { // Larger than a map chunk 64 | expectedValue += expectedValue; 65 | } 66 | 67 | byte[] value = expectedValue.getBytes(); 68 | 69 | SparkeyWriter writer = Sparkey.createNew(indexFile, CompressionType.NONE, 1024); 70 | for (int i = 0; i < 2000; i++) { 71 | writer.put(("key_" + i).getBytes(), value); 72 | } 73 | TestSparkeyWriter.writeHashAndCompare(writer); 74 | writer.close(); 75 | 76 | assertTrue(logFile.length() > 2000 * 5 * 1024); 77 | 78 | try (SparkeyReader reader = readerType.open(indexFile)) { 79 | assertEquals(indexFile.length() + logFile.length(), reader.getTotalBytes()); 80 | for (int i = 0; i < 2000; i += 100) { 81 | assertEquals(expectedValue, reader.getAsString("key_" + i)); 82 | } 83 | assertNull(reader.getAsString("key_" + 2000)); 84 | } 85 | } 86 | 87 | @Test 88 | public void testSmallIndexFile() throws IOException { 89 | if (!readerType.supports(CompressionType.NONE)) { 90 | return; 91 | } 92 | testLargeIndexFileInner(7000); 93 | } 94 | 95 | @Test 96 | public void testMediumIndexFile() throws IOException { 97 | if (!readerType.supports(CompressionType.NONE)) { 98 | return; 99 | } 100 | testLargeIndexFileInner(150000); 101 | } 102 | 103 | @Test 104 | public void testLargeIndexFile() throws IOException { 105 | if (!readerType.supports(CompressionType.NONE)) { 106 | return; 107 | } 108 | testLargeIndexFileInner(500000); 109 | } 110 | 111 | private void testLargeIndexFileInner(final long size) throws IOException { 112 | SparkeyWriter writer = Sparkey.createNew(indexFile, CompressionType.NONE, 1024); 113 | for (int i = 0; i < size; i++) { 114 | writer.put(("key_" + i), "" + (i % 13)); 115 | } 116 | writer.setHashType(HashType.HASH_64_BITS); 117 | TestSparkeyWriter.writeHashAndCompare(writer); 118 | writer.close(); 119 | 120 | assertTrue(indexFile.length() > size * 8L); 121 | 122 | try (SparkeyReader reader = readerType.open(indexFile)) { 123 | assertTrue(0 <= reader.getLoadedBytes()); 124 | assertTrue(reader.getLoadedBytes() <= reader.getTotalBytes()); 125 | for (int i = 0; i < 1000; i++) { 126 | long key = i * size / 1000L; 127 | assertEquals("" + (key % 13), reader.getAsString("key_" + key)); 128 | } 129 | assertNull(reader.getAsString("key_" + size)); 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/extra/ReloadableSparkeyReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey.extra; 17 | 18 | import com.spotify.sparkey.*; 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | 22 | import java.io.File; 23 | import java.io.IOException; 24 | import java.util.Objects; 25 | import java.util.concurrent.CompletableFuture; 26 | import java.util.concurrent.CompletionStage; 27 | import java.util.concurrent.ExecutorService; 28 | 29 | /** 30 | * A sparkey reader that can switch between log files at runtime. 31 | * 32 | * This reader is thread-safe. 33 | */ 34 | public class ReloadableSparkeyReader extends AbstractDelegatingSparkeyReader { 35 | private static final Logger log = LoggerFactory.getLogger(ReloadableSparkeyReader.class); 36 | 37 | private final ExecutorService executorService; 38 | 39 | private volatile SparkeyReader reader; 40 | private volatile File currentLogFile; 41 | 42 | /** 43 | * Creates a new {@link ReloadableSparkeyReader} from a log file. 44 | * 45 | * @param logFile The log file to start with. 46 | * @param executorService An executor service that is used to run reload tasks on. 47 | * @return A future that resolves to the sparkey reader once it has loaded the log file. 48 | */ 49 | public static CompletionStage fromLogFile(File logFile, ExecutorService executorService) { 50 | ReloadableSparkeyReader reader = new ReloadableSparkeyReader(executorService); 51 | return reader.load(logFile); 52 | } 53 | 54 | private ReloadableSparkeyReader(ExecutorService executorService) { 55 | if (executorService == null) { 56 | throw new IllegalArgumentException("executor service must not be null"); 57 | } 58 | this.executorService = executorService; 59 | } 60 | 61 | /** 62 | * Load a new log file into this reader. 63 | * @param logFile the log file to load. 64 | * @return A future that resolves to the sparkey reader once it has loaded the new log file. 65 | */ 66 | public CompletionStage load(final File logFile) { 67 | checkArgument(isValidLogFile(logFile)); 68 | CompletableFuture result = new CompletableFuture<>(); 69 | this.executorService.submit(() -> { 70 | switchReader(logFile); 71 | result.complete(this); 72 | }); 73 | return result; 74 | } 75 | 76 | private void checkArgument(boolean b) { 77 | if (!b) { 78 | throw new IllegalArgumentException(); 79 | } 80 | } 81 | 82 | @Override 83 | protected SparkeyReader getDelegateReader() { 84 | return this.reader; 85 | } 86 | 87 | private boolean isValidLogFile(File logFile) { 88 | return logFile != null && logFile.exists() && logFile.getName().endsWith(".spl"); 89 | } 90 | 91 | private SparkeyReader createFromLogFile(File logFile) { 92 | Objects.requireNonNull(logFile); 93 | checkArgument(logFile.exists()); 94 | checkArgument(logFile.getName().endsWith(".spl")); 95 | 96 | File indexFile = Sparkey.getIndexFile(logFile); 97 | if (!indexFile.exists()) { 98 | log.info("create sparkey index for log file {}", logFile.getAbsolutePath()); 99 | try { 100 | SparkeyWriter w = Sparkey.append(indexFile); 101 | w.writeHash(); 102 | w.close(); 103 | } catch (IOException ex) { 104 | throw new ReloadableSparkeyReaderException("couldn't create index file", ex); 105 | } 106 | } 107 | 108 | try { 109 | return Sparkey.open(indexFile); 110 | } catch (IOException ex) { 111 | throw new ReloadableSparkeyReaderException("couldn't create sparkey reader", ex); 112 | } 113 | } 114 | 115 | private synchronized void switchReader(File logFile) { 116 | if (this.currentLogFile != null && this.currentLogFile.equals(logFile)) { 117 | log.debug("ignore reload (same log file)"); 118 | return; 119 | } 120 | 121 | SparkeyReader newReader = createFromLogFile(logFile); 122 | SparkeyReader toClose = this.reader; 123 | 124 | this.currentLogFile = logFile; 125 | this.reader = newReader; 126 | 127 | long keys = reader.getLogHeader().getNumPuts() - reader.getLogHeader().getNumDeletes(); 128 | log.info("loaded sparkey index {}, {} keys", logFile.getAbsolutePath(), keys); 129 | 130 | if (toClose != null) { 131 | toClose.close(); 132 | } 133 | } 134 | 135 | public static class ReloadableSparkeyReaderException extends RuntimeException { 136 | public ReloadableSparkeyReaderException(String msg, Throwable t) { 137 | super(msg, t); 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/CompressedRandomReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.IOException; 19 | 20 | final class CompressedRandomReader implements BlockRandomInput { 21 | private final CompressorType compressor; 22 | 23 | private long position; 24 | 25 | private final BlockRandomInput data; 26 | private final int maxBlockSize; 27 | 28 | private final byte[] uncompressedBuf; 29 | private final byte[] compressedBuf; 30 | private int bufPos; 31 | private int blockSize; 32 | 33 | CompressedRandomReader(CompressorType compressor, BlockRandomInput data, int maxBlockSize) { 34 | this.compressor = compressor; 35 | this.data = data; 36 | this.maxBlockSize = maxBlockSize; 37 | blockSize = 0; 38 | bufPos = 0; 39 | uncompressedBuf = new byte[maxBlockSize]; 40 | compressedBuf = new byte[compressor.maxCompressedLength(maxBlockSize)]; 41 | } 42 | 43 | @Override 44 | public int readUnsignedByte() throws IOException { 45 | return ((int) readSignedByte()) & 0xFF; 46 | } 47 | 48 | private byte readSignedByte() throws IOException { 49 | if (bufPos >= blockSize) { 50 | fetchBlock(); 51 | } 52 | return uncompressedBuf[bufPos++]; 53 | } 54 | 55 | private void fetchBlock() throws IOException { 56 | int compressedSize = Util.readUnsignedVLQInt(data); 57 | data.readFully(compressedBuf, 0, compressedSize); 58 | bufPos = 0; 59 | blockSize = compressor.uncompress(compressedBuf, compressedSize, uncompressedBuf); 60 | position = -1; 61 | } 62 | 63 | @Override 64 | public void readFully(byte[] b, int off, int len) throws IOException { 65 | int remaining = blockSize - bufPos; 66 | if (remaining >= len) { 67 | System.arraycopy(uncompressedBuf, bufPos, b, off, len); 68 | bufPos += len; 69 | } else { 70 | System.arraycopy(uncompressedBuf, bufPos, b, off, remaining); 71 | fetchBlock(); 72 | readFully(b, off + remaining, len - remaining); 73 | } 74 | } 75 | 76 | @Override 77 | public boolean readFullyCompare(int length, byte[] key) throws IOException { 78 | int remaining = blockSize - bufPos; 79 | if (remaining >= length) { 80 | // Fast path: all bytes are in current buffer 81 | boolean result = Util.equals(length, key, 0, uncompressedBuf, bufPos); 82 | bufPos += length; // Always advance position (matches readFully semantics) 83 | return result; 84 | } else { 85 | // Slow path: comparison spans multiple blocks - need to fetch 86 | int offset = 0; 87 | while (offset < length) { 88 | if (bufPos >= blockSize) { 89 | fetchBlock(); 90 | } 91 | int available = Math.min(blockSize - bufPos, length - offset); 92 | if (!Util.equals(available, key, offset, uncompressedBuf, bufPos)) { 93 | // Continue advancing even on mismatch (matches readFully semantics) 94 | bufPos += available; 95 | offset += available; 96 | // Skip remaining bytes to fully advance position 97 | skipBytes(length - offset); 98 | return false; 99 | } 100 | bufPos += available; 101 | offset += available; 102 | } 103 | return true; 104 | } 105 | } 106 | 107 | @Override 108 | public void close() { 109 | data.close(); 110 | } 111 | 112 | /** 113 | * It's only valid to seek to known block starts. 114 | * 115 | * @param position 116 | */ 117 | @Override 118 | public void seek(long position) throws IOException { 119 | if (position != this.position) { 120 | this.position = position; 121 | blockSize = 0; 122 | data.seek(position); 123 | } 124 | bufPos = 0; 125 | } 126 | 127 | @Override 128 | public void skipBytes(long n) throws IOException { 129 | int remaining = blockSize - bufPos; 130 | if (n < remaining) { 131 | bufPos += n; 132 | } else { 133 | fetchBlock(); 134 | skipBytes(n - remaining); 135 | } 136 | } 137 | 138 | @Override 139 | public CompressedRandomReader duplicate() { 140 | CompressedRandomReader duplicate = new CompressedRandomReader(compressor, data.duplicate(), maxBlockSize); 141 | duplicate.bufPos = this.bufPos; 142 | duplicate.blockSize = this.blockSize; 143 | duplicate.position = this.position; 144 | System.arraycopy(this.uncompressedBuf, 0, duplicate.uncompressedBuf, 0, this.blockSize); 145 | return duplicate; 146 | } 147 | 148 | @Override 149 | public void closeDuplicate() { 150 | data.closeDuplicate(); 151 | } 152 | 153 | @Override 154 | public long getLoadedBytes() { 155 | return data.getLoadedBytes(); 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/SingleThreadedSparkeyReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.*; 19 | import java.nio.charset.StandardCharsets; 20 | import java.util.Iterator; 21 | import java.util.NoSuchElementException; 22 | 23 | final class SingleThreadedSparkeyReader implements SparkeyReader { 24 | private final IndexHash index; 25 | private final File indexFile; 26 | private final File logFile; 27 | private final IndexHeader header; 28 | private final LogHeader logHeader; 29 | 30 | private SingleThreadedSparkeyReader(File indexFile, File logFile) throws IOException { 31 | this(indexFile, logFile, IndexHash.open(indexFile, logFile)); 32 | } 33 | 34 | private SingleThreadedSparkeyReader(File indexFile, File logFile, IndexHash index) { 35 | this.index = index; 36 | this.indexFile = indexFile; 37 | this.logFile = logFile; 38 | header = index.header; 39 | logHeader = index.logHeader; 40 | } 41 | 42 | @Override 43 | public SingleThreadedSparkeyReader duplicate() { 44 | return new SingleThreadedSparkeyReader(indexFile, logFile, index.duplicate()); 45 | } 46 | 47 | static SingleThreadedSparkeyReader open(File file) throws IOException { 48 | return new SingleThreadedSparkeyReader(Sparkey.getIndexFile(file), Sparkey.getLogFile(file)); 49 | } 50 | 51 | @Override 52 | public void close() { 53 | index.close(); 54 | } 55 | 56 | @Override 57 | public String getAsString(String key) throws IOException { 58 | byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8); 59 | Entry res = getAsEntry(keyBytes); 60 | if (res == null) { 61 | return null; 62 | } 63 | return new String(res.getValue(), StandardCharsets.UTF_8); 64 | } 65 | 66 | @Override 67 | public byte[] getAsByteArray(byte[] key) throws IOException { 68 | Entry entry = getAsEntry(key); 69 | if (entry == null) { 70 | return null; 71 | } 72 | return entry.getValue(); 73 | } 74 | 75 | @Override 76 | public SparkeyReader.Entry getAsEntry(byte[] key) throws IOException { 77 | return index.get(key.length, key); 78 | } 79 | 80 | 81 | /** 82 | * @return a new iterator that can be safely used from a single thread. 83 | * Note that entries will be reused and modified, so any data you want from it must be consumed before 84 | * continuing iteration. You should not pass this entry on in any way. 85 | */ 86 | @Override 87 | public Iterator iterator() { 88 | SparkeyLogIterator logIterator; 89 | final IndexHash indexHash; 90 | try { 91 | logIterator = new SparkeyLogIterator(logFile, -1, index.header.getDataEnd()); 92 | indexHash = index.duplicate(); 93 | } catch (IOException e) { 94 | throw new RuntimeException(e); 95 | } 96 | final Iterator iterator = logIterator.iterator(); 97 | 98 | return new Iterator() { 99 | private SparkeyReader.Entry entry; 100 | private boolean ready; 101 | 102 | public boolean hasNext() { 103 | if (ready) { 104 | return true; 105 | } 106 | while (iterator.hasNext()) { 107 | // Safe cast, since the iterator is guaranteed to be a SparkeyLogIterator 108 | SparkeyLogIterator.Entry next = (SparkeyLogIterator.Entry) iterator.next(); 109 | 110 | if (next.getType() == SparkeyReader.Type.PUT) { 111 | int keyLen = next.getKeyLength(); 112 | try { 113 | if (isValid(keyLen, next.getKeyBuf(), next.getPosition(), next.getEntryIndex(), indexHash)) { 114 | entry = next; 115 | ready = true; 116 | return true; 117 | } 118 | } catch (IOException e) { 119 | throw new RuntimeException(e); 120 | } 121 | } 122 | } 123 | indexHash.closeDuplicate(); 124 | 125 | return false; 126 | } 127 | 128 | public Entry next() { 129 | if (!hasNext()) { 130 | throw new NoSuchElementException(); 131 | } 132 | ready = false; 133 | Entry localEntry = entry; 134 | entry = null; 135 | return localEntry; 136 | } 137 | 138 | public void remove() { 139 | throw new UnsupportedOperationException(); 140 | } 141 | }; 142 | } 143 | 144 | @Override 145 | public long getLoadedBytes() { 146 | return index.getLoadedBytes(); 147 | } 148 | 149 | @Override 150 | public long getTotalBytes() { 151 | return indexFile.length() + logFile.length(); 152 | } 153 | 154 | private static boolean isValid(int keyLen, byte[] keyBuf, long position, int entryIndex, IndexHash indexHash) throws IOException { 155 | return indexHash.isAt(keyLen, keyBuf, position, entryIndex); 156 | } 157 | 158 | @Override 159 | public IndexHeader getIndexHeader() { 160 | return header; 161 | } 162 | 163 | @Override 164 | public LogHeader getLogHeader() { 165 | return logHeader; 166 | } 167 | 168 | } 169 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/SingleThreadedSparkeyWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.File; 19 | import java.io.FileNotFoundException; 20 | import java.io.IOException; 21 | import java.io.InputStream; 22 | import java.util.Random; 23 | import java.util.UUID; 24 | 25 | class SingleThreadedSparkeyWriter implements SparkeyWriter { 26 | final LogWriter logWriter; 27 | final File logFile; 28 | final File indexFile; 29 | double sparsity; 30 | HashType hashType; 31 | boolean fsync; 32 | int hashSeed; 33 | long maxMemory = -1; 34 | ConstructionMethod method = ConstructionMethod.AUTO; 35 | 36 | SingleThreadedSparkeyWriter(File indexFile, LogWriter logWriter) { 37 | this.logFile = logWriter.getFile(); 38 | this.indexFile = indexFile; 39 | this.logWriter = logWriter; 40 | } 41 | 42 | static SingleThreadedSparkeyWriter createNew(File file) throws IOException { 43 | return createNew(file, CompressionType.NONE, 0); 44 | } 45 | 46 | static SingleThreadedSparkeyWriter createNew(File file, CompressionType compressionType, int blockSize) throws IOException { 47 | File indexFile = Sparkey.getIndexFile(file); 48 | if (indexFile.exists()) { 49 | indexFile.delete(); 50 | } 51 | File logFile = Sparkey.getLogFile(file); 52 | if (logFile.exists()) { 53 | logFile.delete(); 54 | } 55 | LogWriter logWriter = LogWriter.createNew(logFile, compressionType, blockSize); 56 | return new SingleThreadedSparkeyWriter(indexFile, logWriter); 57 | } 58 | 59 | static SingleThreadedSparkeyWriter append(File file) throws IOException { 60 | File logFile = Sparkey.getLogFile(file); 61 | if (!logFile.exists()) { 62 | throw new FileNotFoundException("File not found: " + logFile); 63 | } 64 | LogWriter writer = LogWriter.openExisting(logFile); 65 | 66 | File indexFile = Sparkey.getIndexFile(file); 67 | return new SingleThreadedSparkeyWriter(indexFile, writer); 68 | } 69 | 70 | static SingleThreadedSparkeyWriter appendOrCreate(File file, CompressionType type, int compressionBlockSize) throws IOException { 71 | File indexFile = Sparkey.getIndexFile(file); 72 | File logFile = Sparkey.getLogFile(file); 73 | 74 | LogWriter logWriter; 75 | if (logFile.exists()) { 76 | logWriter = LogWriter.openExisting(logFile); 77 | } else { 78 | logWriter = LogWriter.createNew(logFile, type, compressionBlockSize); 79 | } 80 | return new SingleThreadedSparkeyWriter(indexFile, logWriter); 81 | } 82 | 83 | @Override 84 | public void close() throws IOException { 85 | logWriter.close(fsync); 86 | } 87 | 88 | @Override 89 | public void writeHash() throws IOException { 90 | flush(); 91 | 92 | File parentFile = indexFile.getCanonicalFile().getParentFile(); 93 | File newFile = new File(parentFile, indexFile.getName() + "-tmp" + UUID.randomUUID().toString()); 94 | try { 95 | int hashSeed = this.hashSeed; 96 | if (hashSeed == 0) { 97 | hashSeed = new Random().nextInt(); 98 | } 99 | long maxMemory = this.maxMemory; 100 | if (maxMemory < 0) { 101 | maxMemory = Runtime.getRuntime().freeMemory() / 2; 102 | } 103 | IndexHash.createNew(newFile, logFile, hashType, sparsity, fsync, hashSeed, Math.max(maxMemory, 10*1024*1024L), method); 104 | Util.renameFile(newFile, indexFile); 105 | } finally { 106 | boolean deleted = newFile.delete(); 107 | } 108 | } 109 | 110 | @Override 111 | public void writeHash(HashType hashType) throws IOException { 112 | setHashType(hashType); 113 | writeHash(); 114 | } 115 | 116 | public void setFsync(boolean fsync) { 117 | this.fsync = fsync; 118 | } 119 | 120 | @Override 121 | public void setHashType(HashType hashType) { 122 | this.hashType = hashType; 123 | } 124 | 125 | @Override 126 | public void setHashSparsity(double sparsity) { 127 | this.sparsity = sparsity; 128 | } 129 | 130 | @Override 131 | public void setHashSeed(final int hashSeed) { 132 | this.hashSeed = hashSeed; 133 | } 134 | 135 | @Override 136 | public void setMaxMemory(final long maxMemory) { 137 | this.maxMemory = maxMemory; 138 | } 139 | 140 | @Override 141 | public void setConstructionMethod(final ConstructionMethod method) { 142 | this.method = method; 143 | } 144 | 145 | @Override 146 | public void put(String key, String value) throws IOException { 147 | logWriter.put(key, value); 148 | } 149 | 150 | @Override 151 | public void put(byte[] key, byte[] value) throws IOException { 152 | logWriter.put(key, value); 153 | } 154 | 155 | @Override 156 | public void put(byte[] key, InputStream valueStream, long valueLen) throws IOException { 157 | logWriter.put(key, valueStream, valueLen); 158 | } 159 | 160 | @Override 161 | public void delete(String key) throws IOException { 162 | logWriter.delete(key); 163 | } 164 | 165 | @Override 166 | public void delete(byte[] key) throws IOException { 167 | logWriter.delete(key); 168 | } 169 | 170 | @Override 171 | public void flush() throws IOException { 172 | logWriter.flush(fsync); 173 | } 174 | } 175 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/MurmurHash3.java: -------------------------------------------------------------------------------- 1 | //----------------------------------------------------------------------------- 2 | // MurmurHash3 was written by Austin Appleby, and is placed in the public 3 | // domain. The author hereby disclaims copyright to this source code. 4 | 5 | // Note - The x86 and x64 versions do _not_ produce the same results, as the 6 | // algorithms are optimized for their respective platforms. You can still 7 | // compile and run any of them on any platform, but your performance with the 8 | // non-native version will be less than optimal. 9 | 10 | package com.spotify.sparkey; 11 | 12 | /** 13 | * Java port of the MurmurHash3 algorithm. 14 | * Copied from http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp 15 | */ 16 | final class MurmurHash3 { 17 | 18 | static int murmurHash3_x86_32(byte[] data, int len, int seed) { 19 | final int nblocks = len / 4; 20 | 21 | int h1 = seed; 22 | 23 | int c1 = 0xcc9e2d51; 24 | int c2 = 0x1b873593; 25 | 26 | //---------- 27 | // body 28 | 29 | for (int i = 0; i < nblocks; i++) { 30 | int k1 = getBlock32(data, 4 * i); 31 | 32 | k1 *= c1; 33 | k1 = (k1 << 15) | (k1 >>> (32 - 15)); 34 | k1 *= c2; 35 | 36 | h1 ^= k1; 37 | h1 = (h1 << 13) | (h1 >>> (32 - 13)); 38 | h1 = h1 * 5 + 0xe6546b64; 39 | } 40 | 41 | //---------- 42 | // tail 43 | 44 | int tail = 4 * nblocks; 45 | 46 | int k1 = 0; 47 | 48 | switch (len & 3) { 49 | case 3: 50 | k1 ^= Util.unsignedByte(data[tail + 2]) << 16; 51 | case 2: 52 | k1 ^= Util.unsignedByte(data[tail + 1]) << 8; 53 | case 1: 54 | k1 ^= Util.unsignedByte(data[tail]); 55 | k1 *= c1; 56 | k1 = (k1 << 15) | (k1 >>> (32 - 15)); 57 | k1 *= c2; 58 | h1 ^= k1; 59 | } 60 | 61 | //---------- 62 | // finalization 63 | 64 | h1 ^= len; 65 | 66 | int h = h1; 67 | h ^= h >>> 16; 68 | h *= 0x85ebca6b; 69 | h ^= h >>> 13; 70 | h *= 0xc2b2ae35; 71 | h ^= h >>> 16; 72 | h1 = h; 73 | 74 | return h1; 75 | } 76 | 77 | private static int getBlock32(byte[] data, int i) { 78 | return Util.unsignedByte(data[i]) | 79 | Util.unsignedByte(data[i + 1]) << 8 | 80 | Util.unsignedByte(data[i + 2]) << 16 | 81 | Util.unsignedByte(data[i + 3]) << 24; 82 | } 83 | 84 | private static long getBlock64(byte[] data, int i) { 85 | long low = ((long) getBlock32(data, 8 * i)) & 0xFFFFFFFFL; 86 | long high = ((long) getBlock32(data, 8 * i + 4)) & 0xFFFFFFFFL; 87 | return low | high << 32; 88 | } 89 | 90 | private static long fmix64(long k) { 91 | k ^= k >>> 33; 92 | k *= 0xff51afd7ed558ccdL; 93 | k ^= k >>> 33; 94 | k *= 0xc4ceb9fe1a85ec53L; 95 | k ^= k >>> 33; 96 | 97 | return k; 98 | } 99 | 100 | static long murmurHash3_x64_64(byte[] data, int len, int seed) { 101 | final int nblocks = len / 16; 102 | 103 | long h1 = ((long) seed) & 0xFFFFFFFFL; 104 | long h2 = h1; 105 | 106 | final long c1 = 0x87c37b91114253d5L; 107 | final long c2 = 0x4cf5ad432745937fL; 108 | 109 | //---------- 110 | // body 111 | 112 | for (int i = 0; i < nblocks; i++) { 113 | long k1 = getBlock64(data, 2 * i); 114 | long k2 = getBlock64(data, 2 * i + 1); 115 | 116 | k1 *= c1; 117 | k1 = ROTL64(k1, 31); 118 | k1 *= c2; 119 | h1 ^= k1; 120 | 121 | h1 = ROTL64(h1, 27); 122 | h1 += h2; 123 | h1 = h1 * 5 + 0x52dce729L; 124 | 125 | k2 *= c2; 126 | k2 = ROTL64(k2, 33); 127 | k2 *= c1; 128 | h2 ^= k2; 129 | 130 | h2 = ROTL64(h2, 31); 131 | h2 += h1; 132 | h2 = h2 * 5 + 0x38495ab5L; 133 | } 134 | 135 | //---------- 136 | // tail 137 | 138 | int tail = 16 * nblocks; 139 | 140 | long k1 = 0; 141 | long k2 = 0; 142 | 143 | switch (len & 15) { 144 | case 15: 145 | k2 ^= (long) (Util.unsignedByte(data[tail + 14])) << 48; 146 | case 14: 147 | k2 ^= (long) (Util.unsignedByte(data[tail + 13])) << 40; 148 | case 13: 149 | k2 ^= (long) (Util.unsignedByte(data[tail + 12])) << 32; 150 | case 12: 151 | k2 ^= (long) (Util.unsignedByte(data[tail + 11])) << 24; 152 | case 11: 153 | k2 ^= (long) (Util.unsignedByte(data[tail + 10])) << 16; 154 | case 10: 155 | k2 ^= (long) (Util.unsignedByte(data[tail + 9])) << 8; 156 | case 9: 157 | k2 ^= (long) (Util.unsignedByte(data[tail + 8])) << 0; 158 | k2 *= c2; 159 | k2 = ROTL64(k2, 33); 160 | k2 *= c1; 161 | h2 ^= k2; 162 | 163 | case 8: 164 | k1 ^= (long) (Util.unsignedByte(data[tail + 7])) << 56; 165 | case 7: 166 | k1 ^= (long) (Util.unsignedByte(data[tail + 6])) << 48; 167 | case 6: 168 | k1 ^= (long) (Util.unsignedByte(data[tail + 5])) << 40; 169 | case 5: 170 | k1 ^= (long) (Util.unsignedByte(data[tail + 4])) << 32; 171 | case 4: 172 | k1 ^= (long) (Util.unsignedByte(data[tail + 3])) << 24; 173 | case 3: 174 | k1 ^= (long) (Util.unsignedByte(data[tail + 2])) << 16; 175 | case 2: 176 | k1 ^= (long) (Util.unsignedByte(data[tail + 1])) << 8; 177 | case 1: 178 | k1 ^= (long) (Util.unsignedByte(data[tail + 0])) << 0; 179 | k1 *= c1; 180 | k1 = ROTL64(k1, 31); 181 | k1 *= c2; 182 | h1 ^= k1; 183 | } 184 | 185 | //---------- 186 | // finalization 187 | 188 | h1 ^= len; 189 | h2 ^= len; 190 | 191 | h1 += h2; 192 | h2 += h1; 193 | 194 | h1 = fmix64(h1); 195 | h2 = fmix64(h2); 196 | 197 | h1 += h2; 198 | h2 += h1; 199 | 200 | return h1; 201 | } 202 | 203 | private static long ROTL64(long x, int r) { 204 | return (x << r) | (x >>> (64 - r)); 205 | } 206 | 207 | } 208 | -------------------------------------------------------------------------------- /src/main/java22/com/spotify/sparkey/SingleThreadedSparkeyReaderJ22.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011-2013 Spotify AB 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 | * use this file except in compliance with the License. You may obtain a copy of 6 | * the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 | * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 | * License for the specific language governing permissions and limitations under 14 | * the License. 15 | */ 16 | package com.spotify.sparkey; 17 | 18 | import java.io.*; 19 | import java.nio.charset.StandardCharsets; 20 | import java.util.Iterator; 21 | import java.util.NoSuchElementException; 22 | 23 | public final class SingleThreadedSparkeyReaderJ22 implements SparkeyReader { 24 | private final IndexHashJ22 index; 25 | private final File indexFile; 26 | private final File logFile; 27 | private final IndexHeader header; 28 | private final LogHeader logHeader; 29 | 30 | private SingleThreadedSparkeyReaderJ22(File indexFile, File logFile) throws IOException { 31 | this(indexFile, logFile, IndexHashJ22.open(indexFile, logFile)); 32 | } 33 | 34 | private SingleThreadedSparkeyReaderJ22(File indexFile, File logFile, IndexHashJ22 index) { 35 | this.index = index; 36 | this.indexFile = indexFile; 37 | this.logFile = logFile; 38 | header = index.header; 39 | logHeader = index.logHeader; 40 | } 41 | 42 | @Override 43 | public SingleThreadedSparkeyReaderJ22 duplicate() { 44 | return new SingleThreadedSparkeyReaderJ22(indexFile, logFile, index.duplicate()); 45 | } 46 | 47 | public static SparkeyReader open(File file) throws IOException { 48 | return new SingleThreadedSparkeyReaderJ22(Sparkey.getIndexFile(file), Sparkey.getLogFile(file)); 49 | } 50 | 51 | @Override 52 | public void close() { 53 | index.close(); 54 | } 55 | 56 | @Override 57 | public String getAsString(String key) throws IOException { 58 | byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8); 59 | Entry res = getAsEntry(keyBytes); 60 | if (res == null) { 61 | return null; 62 | } 63 | return new String(res.getValue(), StandardCharsets.UTF_8); 64 | } 65 | 66 | @Override 67 | public byte[] getAsByteArray(byte[] key) throws IOException { 68 | Entry entry = getAsEntry(key); 69 | if (entry == null) { 70 | return null; 71 | } 72 | return entry.getValue(); 73 | } 74 | 75 | /** 76 | * Get entry for the given key. 77 | * 78 | * IMPORTANT: The returned entry must be consumed immediately (call getValue() or 79 | * getValueAsStream()) before making another lookup, as the underlying stream 80 | * position will be invalidated. 81 | */ 82 | @Override 83 | public SparkeyReader.Entry getAsEntry(byte[] key) throws IOException { 84 | return index.get(key.length, key); 85 | } 86 | 87 | 88 | /** 89 | * @return a new iterator that can be safely used from a single thread. 90 | * Note that entries will be reused and modified, so any data you want from it must be consumed before 91 | * continuing iteration. You should not pass this entry on in any way. 92 | */ 93 | @Override 94 | public Iterator iterator() { 95 | SparkeyLogIterator logIterator; 96 | final IndexHashJ22 indexHash; 97 | try { 98 | logIterator = new SparkeyLogIterator(logFile, -1, index.header.getDataEnd()); 99 | indexHash = index.duplicate(); 100 | } catch (IOException e) { 101 | throw new RuntimeException(e); 102 | } 103 | final Iterator iterator = logIterator.iterator(); 104 | 105 | return new Iterator() { 106 | private SparkeyReader.Entry entry; 107 | private boolean ready; 108 | 109 | public boolean hasNext() { 110 | if (ready) { 111 | return true; 112 | } 113 | while (iterator.hasNext()) { 114 | // Safe cast, since the iterator is guaranteed to be a SparkeyLogIterator 115 | SparkeyLogIterator.Entry next = (SparkeyLogIterator.Entry) iterator.next(); 116 | 117 | if (next.getType() == SparkeyReader.Type.PUT) { 118 | int keyLen = next.getKeyLength(); 119 | try { 120 | if (isValid(keyLen, next.getKeyBuf(), next.getPosition(), next.getEntryIndex(), indexHash)) { 121 | entry = next; 122 | ready = true; 123 | return true; 124 | } 125 | } catch (IOException e) { 126 | throw new RuntimeException(e); 127 | } 128 | } 129 | } 130 | indexHash.closeDuplicate(); 131 | 132 | return false; 133 | } 134 | 135 | public Entry next() { 136 | if (!hasNext()) { 137 | throw new NoSuchElementException(); 138 | } 139 | ready = false; 140 | Entry localEntry = entry; 141 | entry = null; 142 | return localEntry; 143 | } 144 | 145 | public void remove() { 146 | throw new UnsupportedOperationException(); 147 | } 148 | }; 149 | } 150 | 151 | @Override 152 | public long getLoadedBytes() { 153 | return index.getLoadedBytes(); 154 | } 155 | 156 | @Override 157 | public long getTotalBytes() { 158 | return indexFile.length() + logFile.length(); 159 | } 160 | 161 | private static boolean isValid(int keyLen, byte[] keyBuf, long position, int entryIndex, IndexHashJ22 indexHash) throws IOException { 162 | return indexHash.isAt(keyLen, keyBuf, position, entryIndex); 163 | } 164 | 165 | @Override 166 | public IndexHeader getIndexHeader() { 167 | return header; 168 | } 169 | 170 | @Override 171 | public LogHeader getLogHeader() { 172 | return logHeader; 173 | } 174 | 175 | } 176 | -------------------------------------------------------------------------------- /src/main/java22/com/spotify/sparkey/SparkeyImplSelector.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import com.spotify.sparkey.extra.PooledSparkeyReader; 4 | 5 | import java.io.File; 6 | import java.io.IOException; 7 | 8 | /** 9 | * Java 22+ implementation selector. 10 | * 11 | * This Multi-Release JAR override provides optimized Sparkey implementations 12 | * using Java 22 features like the Foreign Function & Memory API (MemorySegment, Arena). 13 | * 14 | * Benefits of Java 22 implementation: 15 | * - No 2GB chunk limit (unlike FileChannel/MappedByteBuffer) 16 | * - Vectorized comparison using MemorySegment.mismatch() (SIMD optimization) 17 | * - Arena-based deterministic memory management 18 | * - Better performance for large files 19 | * - UncompressedSparkeyReaderJ22 for zero-overhead uncompressed reads 20 | * - Smart reader selection based on compression type 21 | */ 22 | class SparkeyImplSelector { 23 | 24 | /** 25 | * Open a thread-safe SparkeyReader with the optimal implementation for Java 22+. 26 | * Uses UncompressedSparkeyReaderJ22 for uncompressed files (zero-overhead, immutable, thread-safe). 27 | * Uses PooledSparkeyReader wrapping SingleThreadedSparkeyReaderJ22 for compressed files (thread-safe pool). 28 | * 29 | * @param file File base to use, the actual file endings will be set to .spi and .spl 30 | * @return a thread-safe SparkeyReader 31 | * @throws IOException if the file cannot be opened 32 | */ 33 | static SparkeyReader open(File file) throws IOException { 34 | return openPooled(file); 35 | } 36 | 37 | /** 38 | * Open a single-threaded SparkeyReader using Java 22 MemorySegment API. 39 | * For uncompressed files, returns UncompressedSparkeyReaderJ22 (already thread-safe). 40 | * For compressed files, returns SingleThreadedSparkeyReaderJ22. 41 | * 42 | * @param file File base to use, the actual file endings will be set to .spi and .spl 43 | * @return a single-threaded SparkeyReader 44 | * @throws IOException if the file cannot be opened 45 | */ 46 | static SparkeyReader openSingleThreaded(File file) throws IOException { 47 | File logFile = Sparkey.getLogFile(file); 48 | LogHeader logHeader = LogHeader.read(logFile); 49 | 50 | if (logHeader.getCompressionType() == CompressionType.NONE) { 51 | return UncompressedSparkeyReaderJ22.open(file); 52 | } 53 | 54 | return SingleThreadedSparkeyReaderJ22.open(file); 55 | } 56 | 57 | /** 58 | * Open a pooled SparkeyReader with default pool size using Java 22 optimizations. 59 | * For uncompressed files, returns UncompressedSparkeyReaderJ22 (already zero-overhead thread-safe, pooling not needed). 60 | * For compressed files, returns PooledSparkeyReader wrapping SingleThreadedSparkeyReaderJ22 instances. 61 | * 62 | * @param file File base to use, the actual file endings will be set to .spi and .spl 63 | * @return a pooled SparkeyReader 64 | * @throws IOException if the file cannot be opened 65 | */ 66 | static SparkeyReader openPooled(File file) throws IOException { 67 | File logFile = Sparkey.getLogFile(file); 68 | LogHeader logHeader = LogHeader.read(logFile); 69 | 70 | // For uncompressed files, the uncompressed reader is already zero-overhead thread-safe (immutable) 71 | // Pooling would just add unnecessary overhead, so return it directly 72 | if (logHeader.getCompressionType() == CompressionType.NONE) { 73 | return UncompressedSparkeyReaderJ22.open(file); 74 | } 75 | 76 | // For compressed files, pool SingleThreadedSparkeyReaderJ22 instances 77 | SparkeyReader baseReader = SingleThreadedSparkeyReaderJ22.open(file); 78 | return PooledSparkeyReader.fromReader(baseReader); 79 | } 80 | 81 | /** 82 | * Open a pooled SparkeyReader with the specified pool size using Java 22 optimizations. 83 | * For uncompressed files, returns UncompressedSparkeyReaderJ22 (already zero-overhead thread-safe, pooling not needed). 84 | * For compressed files, returns PooledSparkeyReader wrapping SingleThreadedSparkeyReaderJ22 instances. 85 | * 86 | * @param file File base to use, the actual file endings will be set to .spi and .spl 87 | * @param poolSize number of reader instances (minimum 1, ignored for uncompressed files) 88 | * @return a pooled SparkeyReader 89 | * @throws IOException if the file cannot be opened 90 | */ 91 | static SparkeyReader openPooled(File file, int poolSize) throws IOException { 92 | File logFile = Sparkey.getLogFile(file); 93 | LogHeader logHeader = LogHeader.read(logFile); 94 | 95 | // For uncompressed files, the uncompressed reader is already zero-overhead thread-safe (immutable) 96 | // Pooling would just add unnecessary overhead, so return it directly 97 | if (logHeader.getCompressionType() == CompressionType.NONE) { 98 | return UncompressedSparkeyReaderJ22.open(file); 99 | } 100 | 101 | // For compressed files, pool SingleThreadedSparkeyReaderJ22 instances 102 | SparkeyReader baseReader = SingleThreadedSparkeyReaderJ22.open(file); 103 | return PooledSparkeyReader.fromReader(baseReader, poolSize); 104 | } 105 | 106 | /** 107 | * Open an uncompressed reader using Java 22+ MemorySegment API. 108 | * 109 | * @param file File base to use, the actual file endings will be set to .spi and .spl 110 | * @return UncompressedSparkeyReaderJ22 111 | * @throws IOException if the file cannot be opened 112 | */ 113 | static SparkeyReader openUncompressedJ22(File file) throws IOException { 114 | return UncompressedSparkeyReaderJ22.open(file); 115 | } 116 | 117 | /** 118 | * Open a single-threaded reader using Java 22+ MemorySegment API. 119 | * 120 | * @param file File base to use, the actual file endings will be set to .spi and .spl 121 | * @return SingleThreadedSparkeyReaderJ22 122 | * @throws IOException if the file cannot be opened 123 | */ 124 | static SparkeyReader openSingleThreadedJ22(File file) throws IOException { 125 | return SingleThreadedSparkeyReaderJ22.open(file); 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /BENCHMARK.md: -------------------------------------------------------------------------------- 1 | # Sparkey Performance Benchmark 2 | 3 | This directory contains performance benchmarking tools for comparing different Sparkey reader implementations. 4 | 5 | ## Quick Start 6 | 7 | ### Running the Benchmark 8 | 9 | For scientific, statistically rigorous benchmarking using JMH: 10 | 11 | ```bash 12 | # Quick smoke test (~5 minutes) 13 | ./run-performance-report.sh --quick 14 | 15 | # Full benchmark (~15 minutes) 16 | ./run-performance-report.sh --full 17 | ``` 18 | 19 | This produces publication-quality results with proper warmup, statistical analysis, and confidence intervals. 20 | 21 | ## What It Measures 22 | 23 | The benchmark tests all available reader implementations across multiple dimensions: 24 | 25 | ### Reader Types 26 | 27 | The benchmark automatically discovers and tests all available reader implementations: 28 | 29 | **Java 8+ (always available):** 30 | - **SINGLE_THREADED_MMAP_JDK8** - Generic single-threaded reader using MappedByteBuffer 31 | - **POOLED_MMAP_JDK8** - Thread-safe pooled reader using MappedByteBuffer 32 | 33 | **Java 22+ (when running on Java 22+):** 34 | - **UNCOMPRESSED_MEMORYSEGMENT_J22** - Uncompressed-only reader using MemorySegment API (zero-copy) 35 | - **SINGLE_THREADED_MEMORYSEGMENT_J22** - Single-threaded reader using MemorySegment API 36 | - **POOLED_MEMORYSEGMENT_J22** - Thread-safe pooled reader using MemorySegment API 37 | 38 | ### Test Parameters 39 | 40 | - **Compression**: NONE (uncompressed), SNAPPY 41 | - **Value sizes**: Small (~6 bytes), Large (~56 bytes) 42 | - **Concurrency**: Single-threaded, Multi-threaded (8, 16, 32 threads) 43 | - **Entries**: 100,000 key-value pairs 44 | 45 | ## Benchmark Configuration 46 | 47 | The benchmark is implemented as a JMH test: `src/test/java/com/spotify/sparkey/system/ReaderComparisonBenchmark.java` 48 | 49 | ### Default Settings 50 | 51 | **Quick mode (`--quick`):** 52 | - Warmup: 1 iteration × 1 second = 1s per benchmark 53 | - Measurement: 5 iterations × 1 second = 5s per benchmark 54 | - Total per benchmark: ~6 seconds 55 | - Total time: ~1 minute for all configurations 56 | - Expected error: 5-10% (good for smoke testing) 57 | 58 | **Full mode (`--full`, default):** 59 | - Warmup: 3 iterations × 2 seconds = 6s per benchmark 60 | - Measurement: 10 iterations × 2 seconds = 20s per benchmark 61 | - Total per benchmark: ~26 seconds 62 | - Total time: ~4 minutes for all configurations 63 | - Expected error: <5% for most benchmarks, <10% for high-contention multithreaded 64 | 65 | Each iteration runs millions of operations on modern hardware, providing excellent statistical significance with predictable runtime. 66 | 67 | ### Output 68 | 69 | Results are saved to timestamped files in `benchmark-results/`: 70 | ``` 71 | benchmark-results/performance-report-20250115-143022.txt 72 | ``` 73 | 74 | ## Sample Output 75 | 76 | ``` 77 | Benchmark Mode Cnt Score Error Units 78 | ReaderComparisonBenchmark.lookupRandomSingleThreaded avgt 5 89.234 ± 2.156 ns/op 79 | (compressionType=NONE, readerType=UNCOMPRESSED_MEMORYSEGMENT_J22, valuePadding=0) 80 | ReaderComparisonBenchmark.lookupRandomSingleThreaded avgt 5 91.456 ± 1.823 ns/op 81 | (compressionType=NONE, readerType=SINGLE_THREADED_MMAP_JDK8, valuePadding=0) 82 | ReaderComparisonBenchmark.lookupRandomSingleThreaded avgt 5 156.340 ± 12.456 ns/op 83 | (compressionType=SNAPPY, readerType=SINGLE_THREADED_MMAP_JDK8, valuePadding=0) 84 | ReaderComparisonBenchmark.lookupRandomMultithreaded avgt 5 112.567 ± 8.234 ns/op 85 | (compressionType=NONE, readerType=POOLED_MEMORYSEGMENT_J22, valuePadding=0, threads=8) 86 | ``` 87 | 88 | ## Interpretation 89 | 90 | - **Score**: Average time per lookup in nanoseconds (lower is better) 91 | - **Error**: 99.9% confidence interval 92 | - **Mode**: `avgt` = average time 93 | 94 | ### Expected Results 95 | 96 | **Uncompressed (NONE):** 97 | - Java 22+ MemorySegment readers: ~85-95 ns/lookup (fastest, zero-copy) 98 | - Java 8 MappedByteBuffer readers: ~90-100 ns/lookup (baseline) 99 | - Performance gap widens with larger values due to zero-copy streaming 100 | 101 | **Compressed (SNAPPY):** 102 | - All readers similar performance (~150-170 ns/lookup) 103 | - Decompression overhead dominates, memory access optimization less visible 104 | 105 | **Multi-threaded:** 106 | - Pooled readers scale well across threads 107 | - Single-threaded readers not available in multi-threaded benchmarks 108 | 109 | ## Advanced Usage 110 | 111 | ### Customizing Parameters 112 | 113 | Edit `src/test/java/com/spotify/sparkey/system/ReaderComparisonBenchmark.java`: 114 | 115 | ```java 116 | @Param({"100000"}) // Number of entries 117 | public int numElements; 118 | 119 | @Param({"NONE", "SNAPPY"}) // Compression types 120 | public String compressionType; 121 | 122 | @Param({"SINGLE_THREADED_MMAP_JDK8", "POOLED_MMAP_JDK8", ...}) 123 | public String readerType; 124 | 125 | @Param({"0", "50"}) // Value padding (0=small, 50=large) 126 | public int valuePadding; 127 | ``` 128 | 129 | ### Running Specific Benchmarks 130 | 131 | To run only specific parameter combinations, modify `run-performance-report.sh` to add JMH filters: 132 | 133 | ```bash 134 | # Only test uncompressed 135 | -p compressionType=NONE 136 | 137 | # Only test specific reader 138 | -p readerType=UNCOMPRESSED_MEMORYSEGMENT_J22 139 | 140 | # Multiple filters 141 | -p compressionType=NONE -p valuePadding=0 142 | ``` 143 | 144 | ### Manual JMH Invocation 145 | 146 | If you need full control, run JMH manually after building: 147 | 148 | ```bash 149 | mvn clean package -DskipTests 150 | java -cp "target/test-classes:target/sparkey-*.jar:..." \ 151 | org.openjdk.jmh.Main \ 152 | ReaderComparisonBenchmark \ 153 | -wi 3 -w 2 \ 154 | -i 10 -r 2 155 | ``` 156 | 157 | Options: 158 | - `-wi ` - Number of warmup iterations 159 | - `-w ` - Time per warmup iteration 160 | - `-i ` - Number of measurement iterations 161 | - `-r ` - Time per measurement iteration 162 | - Add `-p =` to filter specific configurations 163 | 164 | See `run-performance-report.sh` for the exact classpath construction. 165 | -------------------------------------------------------------------------------- /src/main/java/com/spotify/sparkey/ByteBufferCleaner.java: -------------------------------------------------------------------------------- 1 | package com.spotify.sparkey; 2 | 3 | import java.lang.reflect.Field; 4 | import java.lang.reflect.InvocationTargetException; 5 | import java.lang.reflect.Method; 6 | import java.nio.ByteBuffer; 7 | import java.nio.MappedByteBuffer; 8 | 9 | /** 10 | * This code was taken from 11 | * https://stackoverflow.com/questions/2972986/how-to-unmap-a-file-from-memory-mapped-using-filechannel-in-java/19447758#19447758 12 | */ 13 | class ByteBufferCleaner { 14 | 15 | private static final Cleaner CLEANER = findCleaner(); 16 | 17 | private static Cleaner findCleaner() { 18 | // JavaSpecVer: 1.6, 1.7, 1.8, 9-18, 19+ 19 | try { 20 | String javaVersion = System.getProperty("java.specification.version", "99"); 21 | if (javaVersion.startsWith("1.")) { 22 | // Java 8 and earlier 23 | return new ByteBufferCleaner.OldCleaner(); 24 | } else { 25 | int version = Integer.parseInt(javaVersion); 26 | if (version >= 19) { 27 | // Java 19+: sun.misc.Unsafe.invokeCleaner is deprecated 28 | // Try jdk.internal.misc.Unsafe.invokeCleaner or fall back to no-op 29 | return new Java19Cleaner(); 30 | } else { 31 | // Java 9-18: Use sun.misc.Unsafe.invokeCleaner 32 | return new NewCleaner(); 33 | } 34 | } 35 | } catch(Exception e) { 36 | throw new Error(e); 37 | } 38 | } 39 | 40 | public static void cleanMapping(final MappedByteBuffer buffer) { 41 | CLEANER.clean(buffer); 42 | } 43 | 44 | /** 45 | * Clean an array of MappedByteBuffers, with optional sleep for multi-threaded scenarios. 46 | * On Java 19+, this is a no-op (no sleep, no clean). 47 | * On Java 8-18, sleeps if needed to allow other threads to see null assignments, then cleans. 48 | * 49 | * @param chunks the array of buffers to clean 50 | * @param otherRefsExist true if other threads might still hold references to the buffers 51 | */ 52 | public static void cleanChunks(MappedByteBuffer[] chunks, boolean otherRefsExist) { 53 | if (!CLEANER.needsClean()) { 54 | // Java 19+: No cleaning needed, so no sleep needed either 55 | return; 56 | } 57 | 58 | // Java 8-18: Need to clean, so sleep first if other references exist 59 | if (otherRefsExist) { 60 | try { 61 | // Wait for other threads to see that chunks are null before cleaning 62 | // If we clean too early, the JVM can crash 63 | Thread.sleep(100); 64 | } catch (InterruptedException e) { 65 | Thread.currentThread().interrupt(); 66 | } 67 | } 68 | 69 | // Clean all buffers 70 | for (MappedByteBuffer chunk : chunks) { 71 | CLEANER.clean(chunk); 72 | } 73 | } 74 | 75 | private interface Cleaner { 76 | void clean(MappedByteBuffer byteBuffer); 77 | boolean needsClean(); 78 | } 79 | 80 | private static class OldCleaner implements Cleaner { 81 | 82 | @Override 83 | public void clean(final MappedByteBuffer byteBuffer) { 84 | try { 85 | final Class clazz = byteBuffer.getClass(); 86 | final Method getCleanerMethod = clazz.getMethod("cleaner"); 87 | getCleanerMethod.setAccessible(true); 88 | final Object cleaner = getCleanerMethod.invoke(byteBuffer); 89 | if (cleaner != null) { 90 | cleaner.getClass().getMethod("clean").invoke(cleaner); 91 | } 92 | } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { 93 | throw new RuntimeException(e); 94 | } 95 | } 96 | 97 | @Override 98 | public boolean needsClean() { 99 | return true; 100 | } 101 | } 102 | 103 | private static class NewCleaner implements Cleaner { 104 | 105 | private final Method clean; 106 | private final Object theUnsafe; 107 | 108 | private NewCleaner() throws ClassNotFoundException, NoSuchFieldException, IllegalAccessException, NoSuchMethodException { 109 | Class unsafeClass = getUnsafeClass(); 110 | clean = unsafeClass.getMethod("invokeCleaner", ByteBuffer.class); 111 | clean.setAccessible(true); 112 | Field theUnsafeField = unsafeClass.getDeclaredField("theUnsafe"); 113 | theUnsafeField.setAccessible(true); 114 | theUnsafe = theUnsafeField.get(null); 115 | } 116 | 117 | private static Class getUnsafeClass() throws ClassNotFoundException { 118 | try { 119 | return Class.forName("sun.misc.Unsafe"); 120 | } catch(Exception ex) { 121 | // jdk.internal.misc.Unsafe doesn't yet have an invokeCleaner() method, 122 | // but that method should be added if sun.misc.Unsafe is removed. 123 | return Class.forName("jdk.internal.misc.Unsafe"); 124 | } 125 | } 126 | 127 | @Override 128 | public void clean(MappedByteBuffer byteBuffer) { 129 | try { 130 | clean.invoke(theUnsafe, byteBuffer); 131 | } catch (IllegalAccessException | InvocationTargetException e) { 132 | throw new RuntimeException(e); 133 | } 134 | } 135 | 136 | @Override 137 | public boolean needsClean() { 138 | return true; 139 | } 140 | } 141 | 142 | /** 143 | * Java 19+ cleaner that avoids deprecated sun.misc.Unsafe.invokeCleaner. 144 | * Since Java 9+ automatically unmaps buffers when GC'd via the Cleaner API, 145 | * manual cleaning is no longer necessary. This is a no-op implementation. 146 | * 147 | * Note: jdk.internal.misc.Unsafe exists but is not exported from java.base, 148 | * so it cannot be accessed via reflection without --add-exports JVM flag. 149 | */ 150 | private static class Java19Cleaner implements Cleaner { 151 | 152 | private Java19Cleaner() { 153 | // No initialization needed for no-op cleaner 154 | } 155 | 156 | @Override 157 | public void clean(MappedByteBuffer byteBuffer) { 158 | // No-op: Java 9+ automatically unmaps buffers when GC'd 159 | // The buffer will be cleaned up by the JVM's internal Cleaner mechanism 160 | // This avoids the deprecated sun.misc.Unsafe.invokeCleaner warning 161 | } 162 | 163 | @Override 164 | public boolean needsClean() { 165 | // Java 19+ doesn't need manual cleanup - it happens automatically 166 | return false; 167 | } 168 | } 169 | } 170 | --------------------------------------------------------------------------------