├── .gitignore ├── README.md ├── pom.xml └── src ├── main └── java │ └── org │ └── apache │ └── minibase │ ├── BloomFilter.java │ ├── Bytes.java │ ├── Config.java │ ├── DiskFile.java │ ├── DiskStore.java │ ├── KeyValue.java │ ├── MStore.java │ ├── MemStore.java │ └── MiniBase.java └── test └── java └── org └── apache └── minibase ├── TestBloomFilter.java ├── TestBytes.java ├── TestDiskFile.java ├── TestKeyValue.java ├── TestMemstoreFlush.java ├── TestMiniBase.java └── TestMultiIter.java /.gitignore: -------------------------------------------------------------------------------- 1 | /.externalToolBuilders 2 | .project 3 | *.settings/ 4 | .classpath 5 | /build 6 | /.idea/ 7 | /logs 8 | *target/ 9 | *.orig 10 | *~ 11 | hbase-*/test 12 | *.iws 13 | *.iml 14 | *.ipr 15 | patchprocess/ 16 | dependency-reduced-pom.xml 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MiniBase is an embedded KV storage engine, it's quit simple, not for production env, just for better understand HBase or 2 | other LSM-related index algorithm. 3 | 4 | In MiniBase, we use those basic algorithm and data structure: 5 | 6 | * BloomFilter: it can helps a lot when filtering much useless IO. 7 | * ConcurrentSkipListMap: Yeah, it's quite suitable when designing memstore. It can maintian an sorted key value set in 8 | high concurrency scenarios. 9 | * LSM Index Algorithm: the memstore part and disk store part. 10 | 11 | ### How to use ? 12 | 13 | ```java 14 | Config conf = new Config().setDataDir(dataDir).setMaxMemstoreSize(1).setFlushMaxRetries(1) 15 | .setMaxDiskFiles(10); 16 | MiniBase db = MiniBaseImpl.create(conf).open(); 17 | 18 | // Put 19 | db.put(Bytes.toBytes(1), Bytes.toBytes(1)); 20 | 21 | // Scan 22 | Iter kv = db.scan(); 23 | while (kv.hasNext()) { 24 | KeyValue kv = kv.next(); 25 | //... 26 | } 27 | ``` 28 | 29 | ### How to build and test? 30 | 31 | ```shell 32 | git clone git@github.com:openinx/minibase.git 33 | mvn clean package 34 | ``` 35 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.apache.minibase 8 | minibase 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | 14 | org.apache.maven.plugins 15 | maven-compiler-plugin 16 | 17 | 1.8 18 | 1.8 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | junit 27 | junit 28 | 4.12 29 | test 30 | 31 | 32 | log4j 33 | log4j 34 | 1.2.17 35 | 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /src/main/java/org/apache/minibase/BloomFilter.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | public class BloomFilter { 4 | private int k; 5 | private int bitsPerKey; 6 | private int bitLen; 7 | private byte[] result; 8 | 9 | public BloomFilter(int k, int bitsPerKey) { 10 | this.k = k; 11 | this.bitsPerKey = bitsPerKey; 12 | } 13 | 14 | public byte[] generate(byte[][] keys) { 15 | assert keys != null; 16 | bitLen = keys.length * bitsPerKey; 17 | bitLen = ((bitLen + 7) / 8) << 3; // align the bitLen. 18 | bitLen = bitLen < 64 ? 64 : bitLen; 19 | result = new byte[bitLen >> 3]; 20 | for (int i = 0; i < keys.length; i++) { 21 | assert keys[i] != null; 22 | int h = Bytes.hash(keys[i]); 23 | for (int t = 0; t < k; t++) { 24 | int idx = (h % bitLen + bitLen) % bitLen; 25 | result[idx / 8] |= (1 << (idx % 8)); 26 | int delta = (h >> 17) | (h << 15); 27 | h += delta; 28 | } 29 | } 30 | return result; 31 | } 32 | 33 | public boolean contains(byte[] key) { 34 | assert result != null; 35 | int h = Bytes.hash(key); 36 | for (int t = 0; t < k; t++) { 37 | int idx = (h % bitLen + bitLen) % bitLen; 38 | if ((result[idx / 8] & (1 << (idx % 8))) == 0) { 39 | return false; 40 | } 41 | int delta = (h >> 17) | (h << 15); 42 | h += delta; 43 | } 44 | return true; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/org/apache/minibase/Bytes.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import java.io.IOException; 4 | 5 | public class Bytes { 6 | 7 | public final static byte[] EMPTY_BYTES = new byte[0]; 8 | public final static String HEX_TMP = "0123456789ABCDEF"; 9 | 10 | public static byte[] toBytes(byte b) { 11 | return new byte[] { b }; 12 | } 13 | 14 | public static byte[] toBytes(String s) throws IOException { 15 | if (s == null) return new byte[0]; 16 | return s.getBytes("UTF-8"); 17 | } 18 | 19 | public static byte[] toBytes(int x) { 20 | byte[] b = new byte[4]; 21 | b[3] = (byte) (x & 0xFF); 22 | b[2] = (byte) ((x >> 8) & 0xFF); 23 | b[1] = (byte) ((x >> 16) & 0xFF); 24 | b[0] = (byte) ((x >> 24) & 0xFF); 25 | return b; 26 | } 27 | 28 | public static byte[] toBytes(long x) { 29 | byte[] b = new byte[8]; 30 | for (int i = 7; i >= 0; i--) { 31 | int j = (7 - i) << 3; 32 | b[i] = (byte) ((x >> j) & 0xFF); 33 | } 34 | return b; 35 | } 36 | 37 | public static String toHex(byte[] buf){ 38 | return toHex(buf, 0, buf.length); 39 | } 40 | 41 | public static String toHex(byte[] buf, int offset, int len) { 42 | StringBuilder sb = new StringBuilder(); 43 | for (int i = offset; i < offset + len; i++) { 44 | int x = buf[i]; 45 | if (x > 32 && x < 127) { 46 | sb.append((char) x); 47 | } else { 48 | sb.append("\\x").append(HEX_TMP.charAt((x >> 4) & 0x0F)).append(HEX_TMP.charAt(x & 0x0F)); 49 | } 50 | } 51 | return sb.toString(); 52 | } 53 | 54 | public static byte[] toBytes(byte[] a, byte[] b) { 55 | if (a == null) return b; 56 | if (b == null) return a; 57 | byte[] result = new byte[a.length + b.length]; 58 | System.arraycopy(a, 0, result, 0, a.length); 59 | System.arraycopy(b, 0, result, a.length, b.length); 60 | return result; 61 | } 62 | 63 | public static int toInt(byte[] a) { 64 | return (a[0] << 24) & 0xFF000000 | (a[1] << 16) & 0x00FF0000 | (a[2] << 8) & 0x0000FF00 65 | | (a[3] << 0) & 0x000000FF; 66 | } 67 | 68 | public static long toLong(byte[] a) { 69 | long x = 0; 70 | for (int i = 0; i < 8; i++) { 71 | int j = (7 - i) << 3; 72 | x |= ((0xFFL << j) & ((long) a[i] << j)); 73 | } 74 | return x; 75 | } 76 | 77 | public static byte[] slice(byte[] buf, int offset, int len) throws IOException { 78 | if (buf == null) { 79 | throw new IOException("buffer is null"); 80 | } 81 | if (offset < 0 || len < 0) { 82 | throw new IOException("Invalid offset: " + offset + " or len: " + len); 83 | } 84 | if (offset + len > buf.length) { 85 | throw new IOException("Buffer overflow, offset: " + offset + ", len: " + len 86 | + ", buf.length:" + buf.length); 87 | } 88 | byte[] result = new byte[len]; 89 | System.arraycopy(buf, offset, result, 0, len); 90 | return result; 91 | } 92 | 93 | public static int hash(byte[] key) { 94 | if (key == null) return 0; 95 | int h = 1; 96 | for (int i = 0; i < key.length; i++) { 97 | h = (h << 5) + h + key[i]; 98 | } 99 | return h; 100 | } 101 | 102 | public static int compare(byte[] a, byte[] b) { 103 | if (a == b) return 0; 104 | if (a == null) return -1; 105 | if (b == null) return 1; 106 | for (int i = 0, j = 0; i < a.length && j < b.length; i++, j++) { 107 | int x = a[i] & 0xFF; 108 | int y = b[i] & 0xFF; 109 | if (x != y) { 110 | return x - y; 111 | } 112 | } 113 | return a.length - b.length; 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/main/java/org/apache/minibase/Config.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | public class Config { 4 | 5 | private long maxMemstoreSize = 16 * 1024 * 1024; 6 | private int flushMaxRetries = 10; 7 | private String dataDir = "MiniBase"; 8 | private int maxDiskFiles = 10; 9 | private int maxThreadPoolSize = 5; 10 | 11 | private static final Config DEFAULT = new Config(); 12 | 13 | public Config setMaxMemstoreSize(long maxMemstoreSize) { 14 | this.maxMemstoreSize = maxMemstoreSize; 15 | return this; 16 | } 17 | 18 | public long getMaxMemstoreSize() { 19 | return this.maxMemstoreSize; 20 | } 21 | 22 | public Config setFlushMaxRetries(int flushMaxRetries) { 23 | this.flushMaxRetries = flushMaxRetries; 24 | return this; 25 | } 26 | 27 | public int getFlushMaxRetries() { 28 | return this.flushMaxRetries; 29 | } 30 | 31 | public Config setDataDir(String dataDir) { 32 | this.dataDir = dataDir; 33 | return this; 34 | } 35 | 36 | public String getDataDir() { 37 | return this.dataDir; 38 | } 39 | 40 | public Config setMaxDiskFiles(int maxDiskFiles) { 41 | this.maxDiskFiles = maxDiskFiles; 42 | return this; 43 | } 44 | 45 | public int getMaxDiskFiles() { 46 | return this.maxDiskFiles; 47 | } 48 | 49 | public Config setMaxThreadPoolSize(int maxThreadPoolSize) { 50 | this.maxThreadPoolSize = maxThreadPoolSize; 51 | return this; 52 | } 53 | 54 | public int getMaxThreadPoolSize() { 55 | return this.maxThreadPoolSize; 56 | } 57 | 58 | public static Config getDefault() { 59 | return DEFAULT; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/org/apache/minibase/DiskFile.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import org.apache.minibase.MStore.SeekIter; 4 | 5 | import java.io.Closeable; 6 | import java.io.File; 7 | import java.io.FileDescriptor; 8 | import java.io.FileOutputStream; 9 | import java.io.IOException; 10 | import java.io.RandomAccessFile; 11 | import java.util.ArrayList; 12 | import java.util.Iterator; 13 | import java.util.List; 14 | import java.util.SortedSet; 15 | import java.util.TreeSet; 16 | import java.util.zip.CRC32; 17 | import java.util.zip.Checksum; 18 | 19 | public class DiskFile implements Closeable { 20 | 21 | public static final int BLOCK_SIZE_UP_LIMIT = 1024 * 1024 * 2; 22 | public static final int BLOOM_FILTER_HASH_COUNT = 3; 23 | public static final int BLOOM_FILTER_BITS_PER_KEY = 10; 24 | 25 | // fileSize(8B)+ blockCount(4B) + blockIndexOffset(8B) + blockIndexOffset(8B) + DISK_FILE_MAGIC 26 | // (8B) 27 | public static final int TRAILER_SIZE = 8 + 4 + 8 + 8 + 8; 28 | public static final long DISK_FILE_MAGIC = 0xFAC881234221FFA9L; 29 | 30 | private String fname; 31 | private RandomAccessFile in; 32 | private SortedSet blockMetaSet = new TreeSet<>(); 33 | 34 | private long fileSize; 35 | private int blockCount; 36 | private long blockIndexOffset; 37 | private long blockIndexSize; 38 | 39 | public static class BlockMeta implements Comparable { 40 | 41 | private static final int OFFSET_SIZE = 8; 42 | private static final int SIZE_SIZE = 8; 43 | private static final int BF_LEN_SIZE = 4; 44 | 45 | private KeyValue lastKV; 46 | private long blockOffset; 47 | private long blockSize; 48 | private byte[] bloomFilter; 49 | 50 | /** 51 | * Only used for {@link SeekIter} to seek a target block meta. we only care about the lastKV, so 52 | * the other fields can be anything. 53 | * 54 | * @param lastKV the last key value to construct the dummy block meta. 55 | * @return the dummy block meta. 56 | */ 57 | private static BlockMeta createSeekDummy(KeyValue lastKV) { 58 | return new BlockMeta(lastKV, 0L, 0L, Bytes.EMPTY_BYTES); 59 | } 60 | 61 | public BlockMeta(KeyValue lastKV, long offset, long size, byte[] bloomFilter) { 62 | this.lastKV = lastKV; 63 | this.blockOffset = offset; 64 | this.blockSize = size; 65 | this.bloomFilter = bloomFilter; 66 | } 67 | 68 | public KeyValue getLastKV() { 69 | return this.lastKV; 70 | } 71 | 72 | public long getBlockOffset() { 73 | return this.blockOffset; 74 | } 75 | 76 | public long getBlockSize() { 77 | return this.blockSize; 78 | } 79 | 80 | public byte[] getBloomFilter() { 81 | return this.bloomFilter; 82 | } 83 | 84 | public int getSerializeSize() { 85 | // TODO the meta no need the value of last kv, will save much bytes. 86 | return lastKV.getSerializeSize() + OFFSET_SIZE + SIZE_SIZE + BF_LEN_SIZE + bloomFilter.length; 87 | } 88 | 89 | public byte[] toBytes() throws IOException { 90 | byte[] bytes = new byte[getSerializeSize()]; 91 | int pos = 0; 92 | 93 | // Encode last kv 94 | byte[] kvBytes = lastKV.toBytes(); 95 | System.arraycopy(kvBytes, 0, bytes, pos, kvBytes.length); 96 | pos += kvBytes.length; 97 | 98 | // Encode blockOffset 99 | byte[] offsetBytes = Bytes.toBytes(blockOffset); 100 | System.arraycopy(offsetBytes, 0, bytes, pos, offsetBytes.length); 101 | pos += offsetBytes.length; 102 | 103 | // Encode blockSize 104 | byte[] sizeBytes = Bytes.toBytes(blockSize); 105 | System.arraycopy(sizeBytes, 0, bytes, pos, sizeBytes.length); 106 | pos += sizeBytes.length; 107 | 108 | // Encode length of bloom filter 109 | byte[] bfLenBytes = Bytes.toBytes(bloomFilter.length); 110 | System.arraycopy(bfLenBytes, 0, bytes, pos, bfLenBytes.length); 111 | pos += bfLenBytes.length; 112 | 113 | // Encode bytes of bloom filter. 114 | System.arraycopy(bloomFilter, 0, bytes, pos, bloomFilter.length); 115 | pos += bloomFilter.length; 116 | 117 | if (pos != bytes.length) { 118 | throw new IOException( 119 | "pos(" + pos + ") should be equal to length of bytes (" + bytes.length + ")"); 120 | } 121 | return bytes; 122 | } 123 | 124 | public static BlockMeta parseFrom(byte[] buf, int offset) throws IOException { 125 | int pos = offset; 126 | 127 | // Decode last key value. 128 | KeyValue lastKV = KeyValue.parseFrom(buf, offset); 129 | pos += lastKV.getSerializeSize(); 130 | 131 | // Decode block blockOffset 132 | long blockOffset = Bytes.toLong(Bytes.slice(buf, pos, OFFSET_SIZE)); 133 | pos += OFFSET_SIZE; 134 | 135 | // Decode block blockSize 136 | long blockSize = Bytes.toLong(Bytes.slice(buf, pos, SIZE_SIZE)); 137 | pos += SIZE_SIZE; 138 | 139 | // Decode blockSize of block bloom filter 140 | int bloomFilterSize = Bytes.toInt(Bytes.slice(buf, pos, BF_LEN_SIZE)); 141 | pos += BF_LEN_SIZE; 142 | 143 | // Decode bytes of block bloom filter 144 | byte[] bloomFilter = Bytes.slice(buf, pos, bloomFilterSize); 145 | pos += bloomFilterSize; 146 | 147 | assert pos <= buf.length; 148 | return new BlockMeta(lastKV, blockOffset, blockSize, bloomFilter); 149 | } 150 | 151 | @Override 152 | public int compareTo(BlockMeta o) { 153 | return this.lastKV.compareTo(o.lastKV); 154 | } 155 | } 156 | 157 | public static class BlockIndexWriter { 158 | 159 | private List blockMetas = new ArrayList<>(); 160 | private int totalBytes = 0; 161 | 162 | public void append(KeyValue lastKV, long offset, long size, byte[] bloomFilter) { 163 | BlockMeta meta = new BlockMeta(lastKV, offset, size, bloomFilter); 164 | blockMetas.add(meta); 165 | totalBytes += meta.getSerializeSize(); 166 | } 167 | 168 | public byte[] serialize() throws IOException { 169 | byte[] buffer = new byte[totalBytes]; 170 | int pos = 0; 171 | for (BlockMeta meta : blockMetas) { 172 | byte[] metaBytes = meta.toBytes(); 173 | System.arraycopy(metaBytes, 0, buffer, pos, metaBytes.length); 174 | pos += metaBytes.length; 175 | } 176 | assert pos == totalBytes; 177 | return buffer; 178 | } 179 | } 180 | 181 | public static class BlockWriter { 182 | public static final int KV_SIZE_LEN = 4; 183 | public static final int CHECKSUM_LEN = 4; 184 | 185 | private int totalSize; 186 | private List kvBuf; 187 | private BloomFilter bloomFilter; 188 | private Checksum crc32; 189 | private KeyValue lastKV; 190 | private int keyValueCount; 191 | 192 | public BlockWriter() { 193 | totalSize = 0; 194 | kvBuf = new ArrayList<>(); 195 | bloomFilter = new BloomFilter(BLOOM_FILTER_HASH_COUNT, BLOOM_FILTER_BITS_PER_KEY); 196 | crc32 = new CRC32(); 197 | } 198 | 199 | public void append(KeyValue kv) throws IOException { 200 | // Update key value buffer 201 | kvBuf.add(kv); 202 | lastKV = kv; 203 | 204 | // Update checksum 205 | byte[] buf = kv.toBytes(); 206 | crc32.update(buf, 0, buf.length); 207 | 208 | totalSize += kv.getSerializeSize(); 209 | keyValueCount += 1; 210 | } 211 | 212 | public byte[] getBloomFilter() { 213 | byte[][] bytes = new byte[kvBuf.size()][]; 214 | for (int i = 0; i < kvBuf.size(); i++) { 215 | bytes[i] = kvBuf.get(i).getKey(); 216 | } 217 | return bloomFilter.generate(bytes); 218 | } 219 | 220 | public int getChecksum() { 221 | return (int) crc32.getValue(); 222 | } 223 | 224 | public KeyValue getLastKV() { 225 | return this.lastKV; 226 | } 227 | 228 | public int size() { 229 | return KV_SIZE_LEN + totalSize + CHECKSUM_LEN; 230 | } 231 | 232 | public int getKeyValueCount() { 233 | return keyValueCount; 234 | } 235 | 236 | public byte[] serialize() throws IOException { 237 | byte[] buffer = new byte[size()]; 238 | int pos = 0; 239 | 240 | // Append kv getSerializeSize. 241 | byte[] kvSize = Bytes.toBytes(kvBuf.size()); 242 | System.arraycopy(kvSize, 0, buffer, pos, kvSize.length); 243 | pos += kvSize.length; 244 | 245 | // Append all the key value 246 | for (int i = 0; i < kvBuf.size(); i++) { 247 | byte[] kv = kvBuf.get(i).toBytes(); 248 | System.arraycopy(kv, 0, buffer, pos, kv.length); 249 | pos += kv.length; 250 | } 251 | 252 | // Append checksum. 253 | byte[] checksum = Bytes.toBytes(this.getChecksum()); 254 | System.arraycopy(checksum, 0, buffer, pos, checksum.length); 255 | pos += checksum.length; 256 | 257 | assert pos == size(); 258 | return buffer; 259 | } 260 | } 261 | 262 | public static class BlockReader { 263 | 264 | private List kvBuf; 265 | 266 | public BlockReader(List kvBuf) { 267 | this.kvBuf = kvBuf; 268 | } 269 | 270 | public static BlockReader parseFrom(byte[] buffer, int offset, int size) throws IOException { 271 | int pos = 0; 272 | List kvBuf = new ArrayList(); 273 | Checksum crc32 = new CRC32(); 274 | 275 | // Parse kv getSerializeSize 276 | int kvSize = Bytes.toInt(Bytes.slice(buffer, offset + pos, BlockWriter.KV_SIZE_LEN)); 277 | pos += BlockWriter.KV_SIZE_LEN; 278 | 279 | // Parse all key value. 280 | for (int i = 0; i < kvSize; i++) { 281 | KeyValue kv = KeyValue.parseFrom(buffer, offset + pos); 282 | kvBuf.add(kv); 283 | crc32.update(buffer, offset + pos, kv.getSerializeSize()); 284 | pos += kv.getSerializeSize(); 285 | } 286 | 287 | // Parse checksum 288 | int checksum = Bytes.toInt(Bytes.slice(buffer, offset + pos, BlockWriter.CHECKSUM_LEN)); 289 | pos += BlockWriter.CHECKSUM_LEN; 290 | assert checksum == (int) (crc32.getValue() & 0xFFFFFFFF); 291 | 292 | assert pos == size : "pos: " + pos + ", getSerializeSize: " + size; 293 | 294 | return new BlockReader(kvBuf); 295 | } 296 | 297 | public List getKeyValues() { 298 | return kvBuf; 299 | } 300 | } 301 | 302 | public static class DiskFileWriter implements Closeable { 303 | private String fname; 304 | 305 | private long currentOffset; 306 | private BlockIndexWriter indexWriter; 307 | private BlockWriter currentWriter; 308 | private FileOutputStream out; 309 | 310 | private long fileSize = 0; 311 | private int blockCount = 0; 312 | private long blockIndexOffset = 0; 313 | private long blockIndexSize = 0; 314 | 315 | public DiskFileWriter(String fname) throws IOException { 316 | this.fname = fname; 317 | 318 | File f = new File(this.fname); 319 | f.createNewFile(); 320 | out = new FileOutputStream(f, true); 321 | currentOffset = 0; 322 | indexWriter = new BlockIndexWriter(); 323 | currentWriter = new BlockWriter(); 324 | } 325 | 326 | private void switchNextBlockWriter() throws IOException { 327 | assert currentWriter.getLastKV() != null; 328 | 329 | byte[] buffer = currentWriter.serialize(); 330 | out.write(buffer); 331 | indexWriter.append(currentWriter.getLastKV(), currentOffset, buffer.length, 332 | currentWriter.getBloomFilter()); 333 | 334 | currentOffset += buffer.length; 335 | blockCount += 1; 336 | 337 | // switch to the next block. 338 | currentWriter = new BlockWriter(); 339 | } 340 | 341 | public void append(KeyValue kv) throws IOException { 342 | if (kv == null) return; 343 | 344 | assert kv.getSerializeSize() + BlockWriter.KV_SIZE_LEN + BlockWriter.CHECKSUM_LEN < BLOCK_SIZE_UP_LIMIT; 345 | 346 | if ((currentWriter.getKeyValueCount() > 0) 347 | && (kv.getSerializeSize() + currentWriter.size() >= BLOCK_SIZE_UP_LIMIT)) { 348 | switchNextBlockWriter(); 349 | } 350 | 351 | currentWriter.append(kv); 352 | } 353 | 354 | public void appendIndex() throws IOException { 355 | if (currentWriter.getKeyValueCount() > 0) { 356 | switchNextBlockWriter(); 357 | } 358 | 359 | byte[] buffer = indexWriter.serialize(); 360 | blockIndexOffset = currentOffset; 361 | blockIndexSize = buffer.length; 362 | 363 | out.write(buffer); 364 | 365 | currentOffset += buffer.length; 366 | } 367 | 368 | public void appendTrailer() throws IOException { 369 | fileSize = currentOffset + TRAILER_SIZE; 370 | 371 | // fileSize(8B) 372 | byte[] buffer = Bytes.toBytes(fileSize); 373 | out.write(buffer); 374 | 375 | // blockCount(4B) 376 | buffer = Bytes.toBytes(blockCount); 377 | out.write(buffer); 378 | 379 | // blockIndexOffset(8B) 380 | buffer = Bytes.toBytes(blockIndexOffset); 381 | out.write(buffer); 382 | 383 | // blockIndexSize(8B) 384 | buffer = Bytes.toBytes(blockIndexSize); 385 | out.write(buffer); 386 | 387 | // DISK_FILE_MAGIC(8B) 388 | buffer = Bytes.toBytes(DISK_FILE_MAGIC); 389 | out.write(buffer); 390 | } 391 | 392 | public void close() throws IOException { 393 | if (out != null) { 394 | try { 395 | out.flush(); 396 | FileDescriptor fd = out.getFD(); 397 | fd.sync(); 398 | } finally { 399 | out.close(); 400 | } 401 | } 402 | } 403 | } 404 | 405 | public void open(String filename) throws IOException { 406 | this.fname = filename; 407 | 408 | File f = new File(fname); 409 | this.in = new RandomAccessFile(f, "r"); 410 | 411 | this.fileSize = f.length(); 412 | assert fileSize > TRAILER_SIZE; 413 | in.seek(fileSize - TRAILER_SIZE); 414 | 415 | byte[] buffer = new byte[8]; 416 | assert in.read(buffer) == buffer.length; 417 | assert this.fileSize == Bytes.toLong(buffer); 418 | 419 | buffer = new byte[4]; 420 | assert in.read(buffer) == buffer.length; 421 | this.blockCount = Bytes.toInt(buffer); 422 | 423 | buffer = new byte[8]; 424 | assert in.read(buffer) == buffer.length; 425 | this.blockIndexOffset = Bytes.toLong(buffer); 426 | 427 | buffer = new byte[8]; 428 | assert in.read(buffer) == buffer.length; 429 | this.blockIndexSize = Bytes.toLong(buffer); 430 | 431 | buffer = new byte[8]; 432 | assert in.read(buffer) == buffer.length; 433 | assert DISK_FILE_MAGIC == Bytes.toLong(buffer); 434 | 435 | // TODO Maybe a large memory, and overflow 436 | buffer = new byte[(int) blockIndexSize]; 437 | in.seek(blockIndexOffset); 438 | assert in.read(buffer) == blockIndexSize; 439 | 440 | // TODO blockOffset may overflow. 441 | int offset = 0; 442 | 443 | do { 444 | BlockMeta meta = BlockMeta.parseFrom(buffer, offset); 445 | offset += meta.getSerializeSize(); 446 | blockMetaSet.add(meta); 447 | } while (offset < buffer.length); 448 | 449 | assert blockMetaSet.size() == this.blockCount : "blockMetaSet.getSerializeSize:" + blockMetaSet.size() 450 | + ", blockCount: " + blockCount; 451 | } 452 | 453 | public String getFileName() { 454 | return fname; 455 | } 456 | 457 | private BlockReader load(BlockMeta meta) throws IOException { 458 | in.seek(meta.getBlockOffset()); 459 | 460 | // TODO Maybe overflow. 461 | byte[] buffer = new byte[(int) meta.getBlockSize()]; 462 | 463 | assert in.read(buffer) == buffer.length; 464 | return BlockReader.parseFrom(buffer, 0, buffer.length); 465 | } 466 | 467 | private class InternalIterator implements SeekIter { 468 | 469 | private int currentKVIndex = 0; 470 | private BlockReader currentReader; 471 | private Iterator blockMetaIter; 472 | 473 | public InternalIterator() { 474 | currentReader = null; 475 | blockMetaIter = blockMetaSet.iterator(); 476 | } 477 | 478 | private boolean nextBlockReader() throws IOException { 479 | if (blockMetaIter.hasNext()) { 480 | currentReader = load(blockMetaIter.next()); 481 | currentKVIndex = 0; 482 | return true; 483 | } else { 484 | return false; 485 | } 486 | } 487 | 488 | @Override 489 | public boolean hasNext() throws IOException { 490 | if (currentReader == null) { 491 | return nextBlockReader(); 492 | } else { 493 | if (currentKVIndex < currentReader.getKeyValues().size()) { 494 | return true; 495 | } else { 496 | return nextBlockReader(); 497 | } 498 | } 499 | } 500 | 501 | @Override 502 | public KeyValue next() throws IOException { 503 | return currentReader.getKeyValues().get(currentKVIndex++); 504 | } 505 | 506 | @Override 507 | public void seekTo(KeyValue target) throws IOException { 508 | // Locate the smallest block meta which has the lastKV >= target. 509 | blockMetaIter = blockMetaSet.tailSet(BlockMeta.createSeekDummy(target)).iterator(); 510 | currentReader = null; 511 | if (blockMetaIter.hasNext()) { 512 | currentReader = load(blockMetaIter.next()); 513 | currentKVIndex = 0; 514 | // Locate the smallest KV which is greater than or equals to the given KV. We're sure that 515 | // we can find the currentKVIndex, because lastKV of the block is greater than or equals 516 | // to the target KV. 517 | while (currentKVIndex < currentReader.getKeyValues().size()) { 518 | KeyValue curKV = currentReader.getKeyValues().get(currentKVIndex); 519 | if (curKV.compareTo(target) >= 0) { 520 | break; 521 | } 522 | currentKVIndex++; 523 | } 524 | if (currentKVIndex >= currentReader.getKeyValues().size()) { 525 | throw new IOException("Data block mis-encoded, lastKV of the currentReader >= kv, but " + 526 | "we found all kv < target"); 527 | } 528 | } 529 | } 530 | } 531 | 532 | public SeekIter iterator() { 533 | return new InternalIterator(); 534 | } 535 | 536 | @Override 537 | public void close() throws IOException { 538 | if (in != null) { 539 | in.close(); 540 | } 541 | } 542 | } 543 | -------------------------------------------------------------------------------- /src/main/java/org/apache/minibase/DiskStore.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import org.apache.log4j.Logger; 4 | import org.apache.minibase.DiskFile.DiskFileWriter; 5 | import org.apache.minibase.MStore.SeekIter; 6 | import org.apache.minibase.MiniBase.Compactor; 7 | import org.apache.minibase.MiniBase.Flusher; 8 | import org.apache.minibase.MiniBase.Iter; 9 | 10 | import java.io.Closeable; 11 | import java.io.File; 12 | import java.io.IOException; 13 | import java.util.ArrayList; 14 | import java.util.Collection; 15 | import java.util.List; 16 | import java.util.PriorityQueue; 17 | import java.util.concurrent.atomic.AtomicLong; 18 | import java.util.regex.Matcher; 19 | import java.util.regex.Pattern; 20 | 21 | public class DiskStore implements Closeable { 22 | 23 | private static final Logger LOG = Logger.getLogger(DiskStore.class); 24 | private static final String FILE_NAME_TMP_SUFFIX = ".tmp"; 25 | private static final String FILE_NAME_ARCHIVE_SUFFIX = ".archive"; 26 | private static final Pattern DATA_FILE_RE = Pattern.compile("data\\.([0-9]+)"); // data.1 27 | 28 | private String dataDir; 29 | private List diskFiles; 30 | 31 | private int maxDiskFiles; 32 | private volatile AtomicLong maxFileId; 33 | 34 | public DiskStore(String dataDir, int maxDiskFiles) { 35 | this.dataDir = dataDir; 36 | this.diskFiles = new ArrayList<>(); 37 | this.maxDiskFiles = maxDiskFiles; 38 | } 39 | 40 | private File[] listDiskFiles() { 41 | File f = new File(this.dataDir); 42 | return f.listFiles(fname -> DATA_FILE_RE.matcher(fname.getName()).matches()); 43 | } 44 | 45 | public synchronized long getMaxDiskId() { 46 | // TODO can we save the maxFileId ? and next time, need not to traverse the disk file. 47 | File[] files = listDiskFiles(); 48 | long maxFileId = -1L; 49 | for (File f : files) { 50 | Matcher matcher = DATA_FILE_RE.matcher(f.getName()); 51 | if (matcher.matches()) { 52 | maxFileId = Math.max(Long.parseLong(matcher.group(1)), maxFileId); 53 | } 54 | } 55 | return maxFileId; 56 | } 57 | 58 | public synchronized long nextDiskFileId() { 59 | return maxFileId.incrementAndGet(); 60 | } 61 | 62 | public void addDiskFile(DiskFile df) { 63 | synchronized (diskFiles) { 64 | diskFiles.add(df); 65 | } 66 | } 67 | 68 | public synchronized void addDiskFile(String filename) throws IOException { 69 | DiskFile df = new DiskFile(); 70 | df.open(filename); 71 | addDiskFile(df); 72 | } 73 | 74 | public synchronized String getNextDiskFileName() { 75 | return new File(this.dataDir, String.format("data.%020d", nextDiskFileId())).toString(); 76 | } 77 | 78 | public void open() throws IOException { 79 | File[] files = listDiskFiles(); 80 | for (File f : files) { 81 | DiskFile df = new DiskFile(); 82 | df.open(f.getAbsolutePath()); 83 | diskFiles.add(df); 84 | } 85 | maxFileId = new AtomicLong(getMaxDiskId()); 86 | } 87 | 88 | public List getDiskFiles() { 89 | synchronized (diskFiles) { 90 | return new ArrayList<>(diskFiles); 91 | } 92 | } 93 | 94 | public void removeDiskFiles(Collection files) { 95 | synchronized (diskFiles) { 96 | diskFiles.removeAll(files); 97 | } 98 | } 99 | 100 | public long getMaxDiskFiles() { 101 | return this.maxDiskFiles; 102 | } 103 | 104 | @Override 105 | public void close() throws IOException { 106 | IOException closedException = null; 107 | for (DiskFile df : diskFiles) { 108 | try { 109 | df.close(); 110 | } catch (IOException e) { 111 | closedException = e; 112 | } 113 | } 114 | if (closedException != null) { 115 | throw closedException; 116 | } 117 | } 118 | 119 | public SeekIter createIterator(List diskFiles) throws IOException { 120 | List> iters = new ArrayList<>(); 121 | diskFiles.forEach(df -> iters.add(df.iterator())); 122 | return new MultiIter(iters); 123 | } 124 | 125 | public SeekIter createIterator() throws IOException { 126 | return createIterator(getDiskFiles()); 127 | } 128 | 129 | public static class DefaultFlusher implements Flusher { 130 | private DiskStore diskStore; 131 | 132 | public DefaultFlusher(DiskStore diskStore) { 133 | this.diskStore = diskStore; 134 | } 135 | 136 | @Override 137 | public void flush(Iter it) throws IOException { 138 | String fileName = diskStore.getNextDiskFileName(); 139 | String fileTempName = fileName + FILE_NAME_TMP_SUFFIX; 140 | try { 141 | try (DiskFileWriter writer = new DiskFileWriter(fileTempName)) { 142 | while (it.hasNext()) { 143 | writer.append(it.next()); 144 | } 145 | writer.appendIndex(); 146 | writer.appendTrailer(); 147 | } 148 | File f = new File(fileTempName); 149 | if (!f.renameTo(new File(fileName))) { 150 | throw new IOException( 151 | "Rename " + fileTempName + " to " + fileName + " failed when flushing"); 152 | } 153 | // TODO any concurrent issue ? 154 | diskStore.addDiskFile(fileName); 155 | } finally { 156 | File f = new File(fileTempName); 157 | if (f.exists()) { 158 | f.delete(); 159 | } 160 | } 161 | } 162 | } 163 | 164 | public static class DefaultCompactor extends Compactor { 165 | private DiskStore diskStore; 166 | private volatile boolean running = true; 167 | 168 | public DefaultCompactor(DiskStore diskStore) { 169 | this.diskStore = diskStore; 170 | this.setDaemon(true); 171 | } 172 | 173 | private void performCompact(List filesToCompact) throws IOException { 174 | String fileName = diskStore.getNextDiskFileName(); 175 | String fileTempName = fileName + FILE_NAME_TMP_SUFFIX; 176 | try { 177 | try (DiskFileWriter writer = new DiskFileWriter(fileTempName)) { 178 | for (Iter it = diskStore.createIterator(filesToCompact); it.hasNext();) { 179 | writer.append(it.next()); 180 | } 181 | writer.appendIndex(); 182 | writer.appendTrailer(); 183 | } 184 | File f = new File(fileTempName); 185 | if (!f.renameTo(new File(fileName))) { 186 | throw new IOException("Rename " + fileTempName + " to " + fileName + " failed"); 187 | } 188 | 189 | // Rename the data files to archive files. 190 | // TODO when rename the files, will we effect the scan ? 191 | for (DiskFile df : filesToCompact) { 192 | df.close(); 193 | File file = new File(df.getFileName()); 194 | File archiveFile = new File(df.getFileName() + FILE_NAME_ARCHIVE_SUFFIX); 195 | if (!file.renameTo(archiveFile)) { 196 | LOG.error("Rename " + df.getFileName() + " to " + archiveFile.getName() + " failed."); 197 | } 198 | } 199 | diskStore.removeDiskFiles(filesToCompact); 200 | 201 | // TODO any concurrent issue ? 202 | diskStore.addDiskFile(fileName); 203 | } finally { 204 | File f = new File(fileTempName); 205 | if (f.exists()) { 206 | f.delete(); 207 | } 208 | } 209 | } 210 | 211 | @Override 212 | public void compact() throws IOException { 213 | List filesToCompact = new ArrayList<>(); 214 | filesToCompact.addAll(diskStore.getDiskFiles()); 215 | performCompact(filesToCompact); 216 | } 217 | 218 | public void run() { 219 | while (running) { 220 | try { 221 | boolean isCompacted = false; 222 | if (diskStore.getDiskFiles().size() > diskStore.getMaxDiskFiles()) { 223 | performCompact(diskStore.getDiskFiles()); 224 | isCompacted = true; 225 | } 226 | if (!isCompacted) { 227 | Thread.sleep(1000); 228 | } 229 | } catch (IOException e) { 230 | e.printStackTrace(); 231 | LOG.error("Major compaction failed: ", e); 232 | } catch (InterruptedException ie) { 233 | LOG.error("InterruptedException happened, stop running: ", ie); 234 | break; 235 | } 236 | } 237 | } 238 | 239 | public void stopRunning() { 240 | this.running = false; 241 | } 242 | } 243 | 244 | public static class MultiIter implements SeekIter { 245 | 246 | private class IterNode { 247 | KeyValue kv; 248 | SeekIter iter; 249 | 250 | public IterNode(KeyValue kv, SeekIter it) { 251 | this.kv = kv; 252 | this.iter = it; 253 | } 254 | } 255 | 256 | private SeekIter iters[]; 257 | private PriorityQueue queue; 258 | 259 | public MultiIter(SeekIter iters[]) throws IOException { 260 | assert iters != null; 261 | this.iters = iters; // Used for seekTo 262 | this.queue = new PriorityQueue<>(((o1, o2) -> o1.kv.compareTo(o2.kv))); 263 | for (int i = 0; i < iters.length; i++) { 264 | if (iters[i] != null && iters[i].hasNext()) { 265 | queue.add(new IterNode(iters[i].next(), iters[i])); 266 | } 267 | } 268 | } 269 | 270 | @SuppressWarnings("unchecked") 271 | public MultiIter(List> iters) throws IOException { 272 | this(iters.toArray(new SeekIter[0])); 273 | } 274 | 275 | @Override 276 | public boolean hasNext() throws IOException { 277 | return queue.size() > 0; 278 | } 279 | 280 | @Override 281 | public KeyValue next() throws IOException { 282 | while (!queue.isEmpty()) { 283 | IterNode first = queue.poll(); 284 | if (first.kv != null && first.iter != null) { 285 | if (first.iter.hasNext()) { 286 | queue.add(new IterNode(first.iter.next(), first.iter)); 287 | } 288 | return first.kv; 289 | } 290 | } 291 | return null; 292 | } 293 | 294 | @Override 295 | public void seekTo(KeyValue kv) throws IOException { 296 | queue.clear(); 297 | for (SeekIter it : iters) { 298 | it.seekTo(kv); 299 | if (it.hasNext()) { 300 | // Only the iterator which has some elements should be enqueued. 301 | queue.add(new IterNode(it.next(), it)); 302 | } 303 | } 304 | } 305 | } 306 | } 307 | -------------------------------------------------------------------------------- /src/main/java/org/apache/minibase/KeyValue.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import java.io.IOException; 4 | import java.util.Comparator; 5 | 6 | public class KeyValue implements Comparable { 7 | 8 | public static final int RAW_KEY_LEN_SIZE = 4; 9 | public static final int VAL_LEN_SIZE = 4; 10 | public static final int OP_SIZE = 1; 11 | public static final int SEQ_ID_SIZE = 8; 12 | public static final KeyValueComparator KV_CMP = new KeyValueComparator(); 13 | 14 | private byte[] key; 15 | private byte[] value; 16 | private Op op; 17 | private long sequenceId; 18 | 19 | public enum Op { 20 | Put((byte) 0), 21 | Delete((byte) 1); 22 | 23 | private byte code; 24 | 25 | Op(byte code) { 26 | this.code = code; 27 | } 28 | 29 | public static Op code2Op(byte code) { 30 | switch (code) { 31 | case 0: 32 | return Put; 33 | case 1: 34 | return Delete; 35 | default: 36 | throw new IllegalArgumentException("Unknown code: " + code); 37 | } 38 | } 39 | 40 | public byte getCode() { 41 | return this.code; 42 | } 43 | } 44 | 45 | public static KeyValue create(byte[] key, byte[] value, Op op, long sequenceId) { 46 | return new KeyValue(key, value, op, sequenceId); 47 | } 48 | 49 | public static KeyValue createPut(byte[] key, byte[] value, long sequenceId) { 50 | return KeyValue.create(key, value, Op.Put, sequenceId); 51 | } 52 | 53 | public static KeyValue createDelete(byte[] key, long sequenceId) { 54 | return KeyValue.create(key, Bytes.EMPTY_BYTES, Op.Delete, sequenceId); 55 | } 56 | 57 | private KeyValue(byte[] key, byte[] value, Op op, long sequenceId) { 58 | assert key != null; 59 | assert value != null; 60 | assert op != null; 61 | assert sequenceId >= 0; 62 | this.key = key; 63 | this.value = value; 64 | this.op = op; 65 | this.sequenceId = sequenceId; 66 | } 67 | 68 | public byte[] getKey() { 69 | return key; 70 | } 71 | 72 | public byte[] getValue() { 73 | return value; 74 | } 75 | 76 | public Op getOp() { 77 | return this.op; 78 | } 79 | 80 | public long getSequenceId() { 81 | return this.sequenceId; 82 | } 83 | 84 | private int getRawKeyLen() { 85 | return key.length + OP_SIZE + SEQ_ID_SIZE; 86 | } 87 | 88 | public byte[] toBytes() throws IOException { 89 | int rawKeyLen = getRawKeyLen(); 90 | int pos = 0; 91 | byte[] bytes = new byte[getSerializeSize()]; 92 | 93 | // Encode raw key length 94 | byte[] rawKeyLenBytes = Bytes.toBytes(rawKeyLen); 95 | System.arraycopy(rawKeyLenBytes, 0, bytes, pos, RAW_KEY_LEN_SIZE); 96 | pos += RAW_KEY_LEN_SIZE; 97 | 98 | // Encode value length. 99 | byte[] valLen = Bytes.toBytes(value.length); 100 | System.arraycopy(valLen, 0, bytes, pos, VAL_LEN_SIZE); 101 | pos += VAL_LEN_SIZE; 102 | 103 | // Encode key 104 | System.arraycopy(key, 0, bytes, pos, key.length); 105 | pos += key.length; 106 | 107 | // Encode Op 108 | bytes[pos] = op.getCode(); 109 | pos += 1; 110 | 111 | // Encode sequenceId 112 | byte[] seqIdBytes = Bytes.toBytes(sequenceId); 113 | System.arraycopy(seqIdBytes, 0, bytes, pos, seqIdBytes.length); 114 | pos += seqIdBytes.length; 115 | 116 | // Encode value 117 | System.arraycopy(value, 0, bytes, pos, value.length); 118 | return bytes; 119 | } 120 | 121 | @Override 122 | public int compareTo(KeyValue kv) { 123 | if (kv == null) { 124 | throw new IllegalArgumentException("kv to compare should be null"); 125 | } 126 | int ret = Bytes.compare(this.key, kv.key); 127 | if (ret != 0) { 128 | return ret; 129 | } 130 | if (this.sequenceId != kv.sequenceId) { 131 | return this.sequenceId > kv.sequenceId ? -1 : 1; 132 | } 133 | if (this.op != kv.op) { 134 | return this.op.getCode() > kv.op.getCode() ? -1 : 1; 135 | } 136 | return 0; 137 | } 138 | 139 | @Override 140 | public boolean equals(Object kv) { 141 | if (kv == null) return false; 142 | if (!(kv instanceof KeyValue)) return false; 143 | KeyValue that = (KeyValue) kv; 144 | return this.compareTo(that) == 0; 145 | } 146 | 147 | public int getSerializeSize() { 148 | return RAW_KEY_LEN_SIZE + VAL_LEN_SIZE + getRawKeyLen() + value.length; 149 | } 150 | 151 | @Override 152 | public String toString() { 153 | StringBuilder sb = new StringBuilder(); 154 | sb.append("key=").append(Bytes.toHex(this.key)).append("/op=").append(op).append 155 | ("/sequenceId=").append(this.sequenceId).append("/value=").append(Bytes.toHex(this 156 | .value)); 157 | return sb.toString(); 158 | } 159 | 160 | public static KeyValue parseFrom(byte[] bytes, int offset) throws IOException { 161 | if (bytes == null) { 162 | throw new IOException("buffer is null"); 163 | } 164 | if (offset + RAW_KEY_LEN_SIZE + VAL_LEN_SIZE >= bytes.length) { 165 | throw new IOException("Invalid offset or len. offset: " + offset + ", len: " + bytes.length); 166 | } 167 | // Decode raw key length 168 | int pos = offset; 169 | int rawKeyLen = Bytes.toInt(Bytes.slice(bytes, pos, RAW_KEY_LEN_SIZE)); 170 | pos += RAW_KEY_LEN_SIZE; 171 | 172 | // Decode value length 173 | int valLen = Bytes.toInt(Bytes.slice(bytes, pos, VAL_LEN_SIZE)); 174 | pos += VAL_LEN_SIZE; 175 | 176 | // Decode key 177 | int keyLen = rawKeyLen - OP_SIZE - SEQ_ID_SIZE; 178 | byte[] key = Bytes.slice(bytes, pos, keyLen); 179 | pos += keyLen; 180 | 181 | // Decode Op 182 | Op op = Op.code2Op(bytes[pos]); 183 | pos += 1; 184 | 185 | // Decode sequenceId 186 | long sequenceId = Bytes.toLong(Bytes.slice(bytes, pos, SEQ_ID_SIZE)); 187 | pos += SEQ_ID_SIZE; 188 | 189 | // Decode value. 190 | byte[] val = Bytes.slice(bytes, pos, valLen); 191 | return create(key, val, op, sequenceId); 192 | } 193 | 194 | public static KeyValue parseFrom(byte[] bytes) throws IOException { 195 | return parseFrom(bytes, 0); 196 | } 197 | 198 | private static class KeyValueComparator implements Comparator { 199 | 200 | @Override 201 | public int compare(KeyValue a, KeyValue b) { 202 | if (a == b) return 0; 203 | if (a == null) return -1; 204 | if (b == null) return 1; 205 | return a.compareTo(b); 206 | } 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /src/main/java/org/apache/minibase/MStore.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import org.apache.minibase.DiskStore.DefaultCompactor; 4 | import org.apache.minibase.DiskStore.DefaultFlusher; 5 | import org.apache.minibase.DiskStore.MultiIter; 6 | import org.apache.minibase.KeyValue.Op; 7 | 8 | import java.io.IOException; 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | import java.util.concurrent.ExecutorService; 12 | import java.util.concurrent.Executors; 13 | import java.util.concurrent.atomic.AtomicLong; 14 | 15 | public class MStore implements MiniBase { 16 | 17 | private ExecutorService pool; 18 | private MemStore memStore; 19 | private DiskStore diskStore; 20 | private Compactor compactor; 21 | private AtomicLong sequenceId; 22 | 23 | private Config conf; 24 | 25 | public MiniBase open() throws IOException { 26 | assert conf != null; 27 | 28 | // initialize the thread pool; 29 | this.pool = Executors.newFixedThreadPool(conf.getMaxThreadPoolSize()); 30 | 31 | // initialize the disk store. 32 | this.diskStore = new DiskStore(conf.getDataDir(), conf.getMaxDiskFiles()); 33 | this.diskStore.open(); 34 | // TODO initialize the max sequence id here. 35 | this.sequenceId = new AtomicLong(0); 36 | 37 | // initialize the memstore. 38 | this.memStore = new MemStore(conf, new DefaultFlusher(diskStore), pool); 39 | 40 | this.compactor = new DefaultCompactor(diskStore); 41 | this.compactor.start(); 42 | return this; 43 | } 44 | 45 | private MStore(Config conf) { 46 | this.conf = conf; 47 | } 48 | 49 | public static MStore create(Config conf) { 50 | return new MStore(conf); 51 | } 52 | 53 | public static MStore create() { 54 | return create(Config.getDefault()); 55 | } 56 | 57 | @Override 58 | public void put(byte[] key, byte[] value) throws IOException { 59 | this.memStore.add(KeyValue.createPut(key, value, sequenceId.incrementAndGet())); 60 | } 61 | 62 | @Override 63 | public KeyValue get(byte[] key) throws IOException { 64 | KeyValue result = null; 65 | Iter it = scan(key, Bytes.EMPTY_BYTES); 66 | if (it.hasNext()) { 67 | KeyValue kv = it.next(); 68 | if (Bytes.compare(kv.getKey(), key) == 0) { 69 | result = kv; 70 | } 71 | } 72 | return result; 73 | } 74 | 75 | @Override 76 | public void delete(byte[] key) throws IOException { 77 | this.memStore.add(KeyValue.createDelete(key, sequenceId.incrementAndGet())); 78 | } 79 | 80 | @Override 81 | public Iter scan(byte[] start, byte[] stop) throws IOException { 82 | List> iterList = new ArrayList<>(); 83 | iterList.add(memStore.createIterator()); 84 | iterList.add(diskStore.createIterator()); 85 | MultiIter it = new MultiIter(iterList); 86 | 87 | // with start being EMPTY_BYTES means minus infinity, will skip to seek. 88 | if (Bytes.compare(start, Bytes.EMPTY_BYTES) != 0) { 89 | it.seekTo(KeyValue.createDelete(start, sequenceId.get())); 90 | } 91 | 92 | KeyValue stopKV = null; 93 | if (Bytes.compare(stop, Bytes.EMPTY_BYTES) != 0) { 94 | // the smallest kv in all KeyValue with the same key. 95 | stopKV = KeyValue.createDelete(stop, Long.MAX_VALUE); 96 | } 97 | return new ScanIter(stopKV, it); 98 | } 99 | 100 | static class ScanIter implements Iter { 101 | 102 | private KeyValue stopKV; 103 | private Iter storeIt; 104 | // Last KV is the last key value which has the largest sequence id in key values with the 105 | // same key, but diff sequence id or op. 106 | private KeyValue lastKV = null; 107 | private KeyValue pendingKV = null; 108 | 109 | public ScanIter(KeyValue stopKV, SeekIter it) { 110 | this.stopKV = stopKV; 111 | this.storeIt = it; 112 | } 113 | 114 | @Override 115 | public boolean hasNext() throws IOException { 116 | if (pendingKV == null) { 117 | switchToNewKey(); 118 | } 119 | return pendingKV != null; 120 | } 121 | 122 | private boolean shouldStop(KeyValue kv) { 123 | return stopKV != null && Bytes.compare(stopKV.getKey(), kv.getKey()) <= 0; 124 | } 125 | 126 | private void switchToNewKey() throws IOException { 127 | if (lastKV != null && shouldStop(lastKV)) { 128 | return; 129 | } 130 | KeyValue curKV; 131 | while (storeIt.hasNext()) { 132 | curKV = storeIt.next(); 133 | if (shouldStop(curKV)) { 134 | return; 135 | } 136 | if (curKV.getOp() == Op.Put) { 137 | if (lastKV == null) { 138 | lastKV = pendingKV = curKV; 139 | return; 140 | } 141 | int ret = Bytes.compare(lastKV.getKey(), curKV.getKey()); 142 | if (ret < 0) { 143 | lastKV = pendingKV = curKV; 144 | return; 145 | } else if (ret > 0) { 146 | String msg = "KV mis-encoded, curKV < lastKV, curKV:" + Bytes.toHex(curKV.getKey()) + 147 | ", lastKV:" + Bytes.toHex(lastKV.getKey()); 148 | throw new IOException(msg); 149 | } 150 | // Same key with lastKV, should continue to fetch the next key value. 151 | } else if (curKV.getOp() == Op.Delete) { 152 | if (lastKV == null || Bytes.compare(lastKV.getKey(), curKV.getKey()) != 0) { 153 | lastKV = curKV; 154 | } 155 | } else { 156 | throw new IOException("Unknown op code: " + curKV.getOp()); 157 | } 158 | } 159 | } 160 | 161 | @Override 162 | public KeyValue next() throws IOException { 163 | if (pendingKV == null) { 164 | switchToNewKey(); 165 | } 166 | lastKV = pendingKV; 167 | pendingKV = null; 168 | return lastKV; 169 | } 170 | } 171 | 172 | @Override 173 | public void close() throws IOException { 174 | memStore.close(); 175 | diskStore.close(); 176 | compactor.interrupt(); 177 | } 178 | 179 | interface SeekIter extends Iter { 180 | 181 | /** 182 | * Seek to the smallest key value which is greater than or equals to the given key value. 183 | * 184 | * @param kv 185 | */ 186 | void seekTo(KeyValue kv) throws IOException; 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /src/main/java/org/apache/minibase/MemStore.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import org.apache.log4j.Logger; 4 | import org.apache.minibase.DiskStore.MultiIter; 5 | import org.apache.minibase.MStore.SeekIter; 6 | import org.apache.minibase.MiniBase.Flusher; 7 | 8 | import java.io.Closeable; 9 | import java.io.IOException; 10 | import java.util.ArrayList; 11 | import java.util.Iterator; 12 | import java.util.List; 13 | import java.util.NavigableMap; 14 | import java.util.SortedMap; 15 | import java.util.concurrent.ConcurrentSkipListMap; 16 | import java.util.concurrent.ExecutorService; 17 | import java.util.concurrent.atomic.AtomicBoolean; 18 | import java.util.concurrent.atomic.AtomicLong; 19 | import java.util.concurrent.locks.ReentrantReadWriteLock; 20 | 21 | public class MemStore implements Closeable { 22 | 23 | private static final Logger LOG = Logger.getLogger(MemStore.class); 24 | 25 | private final AtomicLong dataSize = new AtomicLong(); 26 | 27 | private volatile ConcurrentSkipListMap kvMap; 28 | private volatile ConcurrentSkipListMap snapshot; 29 | 30 | private final ReentrantReadWriteLock updateLock = new ReentrantReadWriteLock(); 31 | private final AtomicBoolean isSnapshotFlushing = new AtomicBoolean(false); 32 | private ExecutorService pool; 33 | 34 | private Config conf; 35 | private Flusher flusher; 36 | 37 | public MemStore(Config conf, Flusher flusher, ExecutorService pool) { 38 | this.conf = conf; 39 | this.flusher = flusher; 40 | this.pool = pool; 41 | 42 | dataSize.set(0); 43 | this.kvMap = new ConcurrentSkipListMap<>(); 44 | this.snapshot = null; 45 | } 46 | 47 | public void add(KeyValue kv) throws IOException { 48 | flushIfNeeded(true); 49 | updateLock.readLock().lock(); 50 | try { 51 | KeyValue prevKeyValue; 52 | if ((prevKeyValue = kvMap.put(kv, kv)) == null) { 53 | dataSize.addAndGet(kv.getSerializeSize()); 54 | } else { 55 | dataSize.addAndGet(kv.getSerializeSize() - prevKeyValue.getSerializeSize()); 56 | } 57 | } finally { 58 | updateLock.readLock().unlock(); 59 | } 60 | flushIfNeeded(false); 61 | } 62 | 63 | private void flushIfNeeded(boolean shouldBlocking) throws IOException { 64 | if (getDataSize() > conf.getMaxMemstoreSize()) { 65 | if (isSnapshotFlushing.get() && shouldBlocking) { 66 | throw new IOException( 67 | "Memstore is full, currentDataSize=" + dataSize.get() + "B, maxMemstoreSize=" 68 | + conf.getMaxMemstoreSize() + "B, please wait until the flushing is finished."); 69 | } else if (isSnapshotFlushing.compareAndSet(false, true)) { 70 | pool.submit(new FlusherTask()); 71 | } 72 | } 73 | } 74 | 75 | public long getDataSize() { 76 | return dataSize.get(); 77 | } 78 | 79 | public boolean isFlushing() { 80 | return this.isSnapshotFlushing.get(); 81 | } 82 | 83 | @Override 84 | public void close() throws IOException { 85 | } 86 | 87 | private class FlusherTask implements Runnable { 88 | @Override 89 | public void run() { 90 | // Step.1 memstore snpashot 91 | updateLock.writeLock().lock(); 92 | try { 93 | snapshot = kvMap; 94 | // TODO MemStoreIter may find the kvMap changed ? should synchronize ? 95 | kvMap = new ConcurrentSkipListMap<>(); 96 | dataSize.set(0); 97 | } finally { 98 | updateLock.writeLock().unlock(); 99 | } 100 | 101 | // Step.2 Flush the memstore to disk file. 102 | boolean success = false; 103 | for (int i = 0; i < conf.getFlushMaxRetries(); i++) { 104 | try { 105 | flusher.flush(new IteratorWrapper(snapshot)); 106 | success = true; 107 | } catch (IOException e) { 108 | LOG.error("Failed to flush memstore, retries=" + i + ", maxFlushRetries=" 109 | + conf.getFlushMaxRetries(), 110 | e); 111 | if (i >= conf.getFlushMaxRetries()) { 112 | break; 113 | } 114 | } 115 | } 116 | 117 | // Step.3 clear the snapshot. 118 | if (success) { 119 | // TODO MemStoreIter may get a NPE because we set null here ? should synchronize ? 120 | snapshot = null; 121 | isSnapshotFlushing.compareAndSet(true, false); 122 | } 123 | } 124 | } 125 | 126 | public SeekIter createIterator() throws IOException { 127 | return new MemStoreIter(kvMap, snapshot); 128 | } 129 | 130 | public static class IteratorWrapper implements SeekIter { 131 | 132 | private SortedMap sortedMap; 133 | private Iterator it; 134 | 135 | public IteratorWrapper(SortedMap sortedMap) { 136 | this.sortedMap = sortedMap; 137 | this.it = sortedMap.values().iterator(); 138 | } 139 | 140 | @Override 141 | public boolean hasNext() throws IOException { 142 | return it != null && it.hasNext(); 143 | } 144 | 145 | @Override 146 | public KeyValue next() throws IOException { 147 | return it.next(); 148 | } 149 | 150 | @Override 151 | public void seekTo(KeyValue kv) throws IOException { 152 | it = sortedMap.tailMap(kv).values().iterator(); 153 | } 154 | } 155 | 156 | private class MemStoreIter implements SeekIter { 157 | 158 | private MultiIter it; 159 | 160 | public MemStoreIter(NavigableMap kvSet, 161 | NavigableMap snapshot) throws IOException { 162 | List inputs = new ArrayList<>(); 163 | if (kvSet != null && kvSet.size() > 0) { 164 | inputs.add(new IteratorWrapper(kvMap)); 165 | } 166 | if (snapshot != null && snapshot.size() > 0) { 167 | inputs.add(new IteratorWrapper(snapshot)); 168 | } 169 | it = new MultiIter(inputs.toArray(new IteratorWrapper[0])); 170 | } 171 | 172 | @Override 173 | public boolean hasNext() throws IOException { 174 | return it.hasNext(); 175 | } 176 | 177 | @Override 178 | public KeyValue next() throws IOException { 179 | return it.next(); 180 | } 181 | 182 | @Override 183 | public void seekTo(KeyValue kv) throws IOException { 184 | it.seekTo(kv); 185 | } 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /src/main/java/org/apache/minibase/MiniBase.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import java.io.Closeable; 4 | import java.io.IOException; 5 | 6 | public interface MiniBase extends Closeable { 7 | 8 | void put(byte[] key, byte[] value) throws IOException; 9 | 10 | KeyValue get(byte[] key) throws IOException; 11 | 12 | void delete(byte[] key) throws IOException; 13 | 14 | /** 15 | * Fetch all the key values whose key located in the range [startKey, stopKey) 16 | * 17 | * @param startKey start key to scan (inclusive), if start is byte[0], it means negative 18 | * infinity. 19 | * @param stopKey to stop the scan. (exclusive), if stopKey is byte[0], it means positive 20 | * infinity. 21 | * @return Iterator for fetching the key value one by one. 22 | */ 23 | Iter scan(byte[] startKey, byte[] stopKey) throws IOException; 24 | 25 | /** 26 | * Full scan the Key Value store. 27 | * 28 | * @return Iterator to fetch the key value one by one. 29 | * @throws IOException 30 | */ 31 | default Iter scan() throws IOException { 32 | return scan(Bytes.EMPTY_BYTES, Bytes.EMPTY_BYTES); 33 | } 34 | 35 | interface Iter { 36 | boolean hasNext() throws IOException; 37 | 38 | KeyValue next() throws IOException; 39 | } 40 | 41 | interface Flusher { 42 | void flush(Iter it) throws IOException; 43 | } 44 | 45 | abstract class Compactor extends Thread { 46 | public abstract void compact() throws IOException; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/test/java/org/apache/minibase/TestBloomFilter.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | import java.io.IOException; 7 | 8 | public class TestBloomFilter { 9 | 10 | @Test 11 | public void testBloomFilter() throws IOException { 12 | String[] keys = { "hello world", "hi", "bloom", "filter", "key", "value", "1", "value" }; 13 | BloomFilter bf = new BloomFilter(3, 10); 14 | byte[][] keyBytes = new byte[keys.length][]; 15 | for (int i = 0; i < keys.length; i++) { 16 | keyBytes[i] = keys[i].getBytes(); 17 | } 18 | bf.generate(keyBytes); 19 | Assert.assertTrue(bf.contains(Bytes.toBytes("hi"))); 20 | Assert.assertFalse(bf.contains(Bytes.toBytes("h"))); 21 | Assert.assertFalse(bf.contains(Bytes.toBytes("he"))); 22 | Assert.assertTrue(bf.contains(Bytes.toBytes("hello world"))); 23 | Assert.assertTrue(bf.contains(Bytes.toBytes("bloom"))); 24 | Assert.assertTrue(bf.contains(Bytes.toBytes("key"))); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/test/java/org/apache/minibase/TestBytes.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | public class TestBytes { 7 | 8 | @Test 9 | public void testByte() { 10 | Assert.assertArrayEquals(new byte[]{'a'}, Bytes.toBytes((byte) 'a')); 11 | } 12 | 13 | @Test 14 | public void testToHex() { 15 | byte[] bytes = Bytes.toBytes(567890); 16 | String s = Bytes.toHex(bytes, 0, bytes.length); 17 | Assert.assertEquals("\\x00\\x08\\xAAR", s); 18 | } 19 | 20 | @Test 21 | public void testInt() { 22 | Assert.assertEquals(Bytes.toInt(Bytes.toBytes(123455)), 123455); 23 | Assert.assertEquals(Bytes.toInt(Bytes.toBytes(-1)), -1); 24 | Assert.assertEquals(Bytes.toInt(Bytes.toBytes(-0)), -0); 25 | Assert.assertEquals(Bytes.toInt(Bytes.toBytes(Integer.MAX_VALUE)), Integer.MAX_VALUE); 26 | Assert.assertEquals(Bytes.toInt(Bytes.toBytes(Integer.MIN_VALUE)), Integer.MIN_VALUE); 27 | } 28 | 29 | @Test 30 | public void testLong() { 31 | Assert.assertEquals(Bytes.toLong(Bytes.toBytes(123455L)), 123455L); 32 | Assert.assertEquals(Bytes.toLong(Bytes.toBytes(-1L)), -1L); 33 | Assert.assertEquals(Bytes.toLong(Bytes.toBytes(-0L)), -0L); 34 | Assert.assertEquals(Bytes.toLong(Bytes.toBytes(Long.MAX_VALUE)), Long.MAX_VALUE); 35 | Assert.assertEquals(Bytes.toLong(Bytes.toBytes(Long.MIN_VALUE)), Long.MIN_VALUE); 36 | } 37 | 38 | @Test 39 | public void testCompare() { 40 | Assert.assertEquals(Bytes.compare(null, null), 0); 41 | Assert.assertEquals(Bytes.compare(new byte[]{0x00}, new byte[0]), 1); 42 | Assert.assertEquals(Bytes.compare(new byte[]{0x00}, new byte[]{0x00}), 0); 43 | Assert.assertEquals(Bytes.compare(new byte[]{0x00}, null), 1); 44 | Assert.assertEquals(Bytes.compare(new byte[]{0x00}, new byte[]{0x01}), -1); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/org/apache/minibase/TestDiskFile.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import org.apache.minibase.DiskFile.BlockMeta; 4 | import org.apache.minibase.DiskFile.BlockReader; 5 | import org.apache.minibase.DiskFile.BlockWriter; 6 | import org.apache.minibase.DiskFile.DiskFileWriter; 7 | import org.apache.minibase.MiniBase.Iter; 8 | import org.junit.Assert; 9 | import org.junit.Test; 10 | 11 | import java.io.File; 12 | import java.io.IOException; 13 | import java.util.Random; 14 | 15 | public class TestDiskFile { 16 | 17 | public static final Random RANDOM = new Random(); 18 | 19 | @Test 20 | public void testBlockEncoding() throws IOException { 21 | BlockWriter bw = new BlockWriter(); 22 | byte[] lastBytes = null; 23 | for (int i = 0; i < 100; i++) { 24 | lastBytes = Bytes.toBytes(i); 25 | bw.append(KeyValue.createPut(lastBytes, lastBytes, 1L)); 26 | } 27 | Assert.assertEquals(bw.getLastKV(), KeyValue.createPut(lastBytes, lastBytes, 1L)); 28 | 29 | byte[] buffer = bw.serialize(); 30 | BlockReader br = BlockReader.parseFrom(bw.serialize(), 0, buffer.length); 31 | 32 | // Assert the bloom filter. 33 | byte[][] bytes = new byte[br.getKeyValues().size()][]; 34 | for (int i = 0; i < br.getKeyValues().size(); i++) { 35 | bytes[i] = br.getKeyValues().get(i).getKey(); 36 | } 37 | BloomFilter bloom = 38 | new BloomFilter(DiskFile.BLOOM_FILTER_HASH_COUNT, DiskFile.BLOOM_FILTER_BITS_PER_KEY); 39 | Assert.assertArrayEquals(bloom.generate(bytes), bw.getBloomFilter()); 40 | } 41 | 42 | @Test 43 | public void testBlockMeta() throws IOException { 44 | KeyValue lastKV = KeyValue.createPut(Bytes.toBytes("abc"), Bytes.toBytes("abc"), 1L); 45 | long offset = 1024, size = 1024; 46 | byte[] bloomFilter = Bytes.toBytes("bloomFilter"); 47 | 48 | BlockMeta meta = new BlockMeta(lastKV, offset, size, bloomFilter); 49 | byte[] buffer = meta.toBytes(); 50 | 51 | BlockMeta meta2 = BlockMeta.parseFrom(buffer, 0); 52 | 53 | Assert.assertEquals(lastKV, meta2.getLastKV()); 54 | Assert.assertEquals(offset, meta2.getBlockOffset()); 55 | Assert.assertEquals(size, meta2.getBlockSize()); 56 | Assert.assertArrayEquals(bloomFilter, meta2.getBloomFilter()); 57 | } 58 | 59 | private byte[] generateRandomBytes() { 60 | int len = (RANDOM.nextInt() % 1024 + 1024) % 1024; 61 | byte[] buffer = new byte[len]; 62 | for (int i = 0; i < buffer.length; i++) { 63 | buffer[i] = (byte) (RANDOM.nextInt() & 0xFF); 64 | } 65 | return buffer; 66 | } 67 | 68 | @Test 69 | public void testDiskFile() throws IOException { 70 | String dbFile = "testDiskFileWriter.db"; 71 | try { 72 | try (DiskFileWriter diskWriter = new DiskFileWriter(dbFile)) { 73 | for (int i = 0; i < 1000; i++) { 74 | diskWriter.append(KeyValue.createPut(generateRandomBytes(), generateRandomBytes(), 1L)); 75 | } 76 | diskWriter.appendIndex(); 77 | diskWriter.appendTrailer(); 78 | } 79 | try (DiskFile df = new DiskFile()) { 80 | df.open(dbFile); 81 | } 82 | } finally { 83 | // Remove the dbFile. 84 | File f = new File(dbFile); 85 | if (f.exists()) { 86 | f.delete(); 87 | } 88 | } 89 | } 90 | 91 | @Test 92 | public void testDiskFileIO() throws IOException { 93 | String dbFile = "testDiskFileIO.db"; 94 | int rowsCount = 1000; 95 | 96 | try { 97 | DiskFileWriter diskWriter = new DiskFileWriter(dbFile); 98 | 99 | for (int i = 0; i < rowsCount; i++) { 100 | diskWriter.append(KeyValue.createPut(Bytes.toBytes(i), Bytes.toBytes(i), 1L)); 101 | } 102 | 103 | diskWriter.appendIndex(); 104 | diskWriter.appendTrailer(); 105 | diskWriter.close(); 106 | 107 | try (DiskFile df = new DiskFile()) { 108 | df.open(dbFile); 109 | Iter it = df.iterator(); 110 | int index = 0; 111 | while (it.hasNext()) { 112 | KeyValue kv = it.next(); 113 | Assert.assertEquals(KeyValue.createPut(Bytes.toBytes(index), Bytes.toBytes(index), 1L), 114 | kv); 115 | index++; 116 | } 117 | Assert.assertEquals(index, rowsCount); 118 | } 119 | } finally { 120 | // Remove the dbFile. 121 | File f = new File(dbFile); 122 | if (f.exists()) { 123 | f.delete(); 124 | } 125 | } 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /src/test/java/org/apache/minibase/TestKeyValue.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | public class TestKeyValue { 7 | 8 | @Test 9 | public void testCompare() { 10 | KeyValue kv = KeyValue.createPut(Bytes.toBytes(100), Bytes.toBytes(1000), 0L); 11 | Assert.assertFalse(kv.equals(null)); 12 | Assert.assertFalse(kv.equals(new Object())); 13 | Assert.assertTrue(kv.equals(KeyValue.createPut(Bytes.toBytes(100), Bytes.toBytes(1000), 0L 14 | ))); 15 | Assert.assertFalse(kv.equals(KeyValue.createPut(Bytes.toBytes(100L), Bytes.toBytes(1000), 0L))); 16 | Assert.assertTrue(kv.equals(KeyValue.createPut(Bytes.toBytes(100), Bytes.toBytes(1000L), 0L))); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/test/java/org/apache/minibase/TestMemstoreFlush.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import static org.junit.Assert.assertEquals; 4 | import static org.junit.Assert.assertTrue; 5 | import static org.junit.Assert.fail; 6 | 7 | import java.io.IOException; 8 | import java.util.concurrent.ExecutorService; 9 | import java.util.concurrent.Executors; 10 | 11 | import org.apache.minibase.MiniBase.Flusher; 12 | import org.apache.minibase.MiniBase.Iter; 13 | import org.junit.Assert; 14 | import org.junit.Test; 15 | 16 | public class TestMemstoreFlush { 17 | 18 | private static class SleepAndFlusher implements Flusher { 19 | 20 | private volatile boolean sleepNow = true; 21 | 22 | @Override 23 | public void flush(Iter it) throws IOException { 24 | while (sleepNow) { 25 | try { 26 | Thread.sleep(100L); 27 | } catch (InterruptedException e) { 28 | e.printStackTrace(); 29 | } 30 | } 31 | } 32 | 33 | public void stopSleepNow() { 34 | sleepNow = false; 35 | } 36 | } 37 | 38 | @Test 39 | public void testBlockingPut() throws Exception { 40 | ExecutorService pool = Executors.newFixedThreadPool(1); 41 | try { 42 | Config conf = new Config().setMaxMemstoreSize(1); 43 | 44 | SleepAndFlusher flusher = new SleepAndFlusher(); 45 | MemStore memstore = new MemStore(conf, flusher, pool); 46 | memstore.add(KeyValue.createPut(Bytes.toBytes(1), Bytes.toBytes(1), 1L)); 47 | assertEquals(memstore.getDataSize(), 25); 48 | 49 | // Wait 5ms for the memstore snapshot. 50 | Thread.sleep(5L); 51 | memstore.add(KeyValue.createPut(Bytes.toBytes(2), Bytes.toBytes(2), 1L)); 52 | 53 | // Stuck in memstore flushing, will throw blocking exception. 54 | // because both of the memstore and snapshot are full now. 55 | try { 56 | memstore.add(KeyValue.createPut(Bytes.toBytes(3), Bytes.toBytes(3), 1L)); 57 | fail("Should throw IOException here, because our memstore is full now"); 58 | } catch (IOException e) { 59 | assertTrue(e.getMessage().contains("Memstore is full")); 60 | } 61 | assertEquals(memstore.isFlushing(), true); 62 | 63 | flusher.stopSleepNow(); 64 | Thread.sleep(200L); 65 | assertEquals(memstore.isFlushing(), false); 66 | assertEquals(memstore.getDataSize(), 25); 67 | 68 | memstore.add(KeyValue.createPut(Bytes.toBytes(4), Bytes.toBytes(4), 1L)); 69 | Thread.sleep(5L); 70 | assertEquals(memstore.getDataSize(), 0); 71 | } finally { 72 | pool.shutdownNow(); 73 | } 74 | } 75 | 76 | @Test 77 | public void testAddPutAndDelete() throws Exception { 78 | ExecutorService pool = Executors.newFixedThreadPool(1); 79 | try { 80 | Config conf = new Config().setMaxMemstoreSize(2 * 1024 * 1024); 81 | MemStore store = new MemStore(conf, new SleepAndFlusher(), pool); 82 | for (int i = 99; i >= 0; i--) { 83 | KeyValue kv; 84 | byte[] bytes = Bytes.toBytes(i); 85 | if ((i & 1) != 0) { 86 | kv = KeyValue.createPut(bytes, bytes, i); 87 | } else { 88 | kv = KeyValue.createDelete(bytes, i); 89 | } 90 | store.add(kv); 91 | } 92 | Iter it = store.createIterator(); 93 | int index = 0; 94 | while (it.hasNext()) { 95 | KeyValue kv = it.next(); 96 | byte[] bs = Bytes.toBytes(index); 97 | if ((index & 1) != 0) { 98 | Assert.assertEquals(kv, KeyValue.createPut(bs, bs, index)); 99 | } else { 100 | Assert.assertEquals(kv, KeyValue.createDelete(bs, index)); 101 | } 102 | index += 1; 103 | } 104 | } finally { 105 | pool.shutdownNow(); 106 | } 107 | } 108 | 109 | @Test 110 | public void testSeqIdAndOpOrder() throws Exception { 111 | ExecutorService pool = Executors.newFixedThreadPool(1); 112 | try { 113 | Config conf = new Config().setMaxMemstoreSize(2 * 1024 * 1024); 114 | MemStore store = new MemStore(conf, new SleepAndFlusher(), pool); 115 | byte[] bs = Bytes.toBytes(1); 116 | KeyValue kv1 = KeyValue.createPut(bs, bs, 1); 117 | KeyValue kv2 = KeyValue.createPut(bs, bs, 2); 118 | KeyValue kv3 = KeyValue.createDelete(bs, 2); 119 | 120 | store.add(kv1); 121 | store.add(kv2); 122 | store.add(kv3); 123 | 124 | Iter it = store.createIterator(); 125 | Assert.assertTrue(it.hasNext()); 126 | KeyValue kv = it.next(); 127 | Assert.assertEquals(kv, kv3); 128 | 129 | Assert.assertTrue(it.hasNext()); 130 | kv = it.next(); 131 | Assert.assertEquals(kv, kv2); 132 | 133 | Assert.assertTrue(it.hasNext()); 134 | kv = it.next(); 135 | Assert.assertEquals(kv, kv1); 136 | 137 | Assert.assertFalse(it.hasNext()); 138 | } finally { 139 | pool.shutdownNow(); 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/test/java/org/apache/minibase/TestMiniBase.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import org.apache.minibase.KeyValue.Op; 4 | import org.apache.minibase.MStore.ScanIter; 5 | import org.apache.minibase.MStore.SeekIter; 6 | import org.apache.minibase.MiniBase.Iter; 7 | import org.junit.After; 8 | import org.junit.Assert; 9 | import org.junit.Before; 10 | import org.junit.Test; 11 | 12 | import java.io.File; 13 | import java.io.IOException; 14 | import java.util.ArrayList; 15 | import java.util.List; 16 | 17 | public class TestMiniBase { 18 | 19 | private String dataDir; 20 | 21 | @Before 22 | public void setUp() { 23 | dataDir = "target/minihbase-" + System.currentTimeMillis(); 24 | File f = new File(dataDir); 25 | Assert.assertTrue(f.mkdirs()); 26 | } 27 | 28 | @After 29 | public void tearDown() { 30 | } 31 | 32 | private static class WriterThread extends Thread { 33 | 34 | private long start, end; 35 | private MiniBase db; 36 | 37 | public WriterThread(MiniBase db, long start, long end) { 38 | this.db = db; 39 | this.start = start; 40 | this.end = end; 41 | } 42 | 43 | public void run() { 44 | for (long i = start; i < end; i++) { 45 | int retries = 0; 46 | while (retries < 50) { 47 | try { 48 | db.put(Bytes.toBytes(i), Bytes.toBytes(i)); 49 | break; 50 | } catch (IOException e) { 51 | // Memstore maybe full, so let's retry. 52 | retries++; 53 | try { 54 | Thread.sleep(100 * retries); 55 | } catch (InterruptedException e1) { 56 | } 57 | } 58 | } 59 | } 60 | } 61 | } 62 | 63 | @Test 64 | public void testPut() throws IOException, InterruptedException { 65 | // Set maxMemstoreSize to 64B, which make the memstore flush frequently. 66 | Config conf = new Config().setDataDir(dataDir).setMaxMemstoreSize(1).setFlushMaxRetries(1) 67 | .setMaxDiskFiles(10); 68 | final MiniBase db = MStore.create(conf).open(); 69 | 70 | final long totalKVSize = 100L; 71 | final int threadSize = 5; 72 | 73 | WriterThread[] writers = new WriterThread[threadSize]; 74 | for (int i = 0; i < threadSize; i++) { 75 | long kvPerThread = totalKVSize / threadSize; 76 | writers[i] = new WriterThread(db, i * kvPerThread, (i + 1) * kvPerThread); 77 | writers[i].start(); 78 | } 79 | 80 | for (int i = 0; i < threadSize; i++) { 81 | writers[i].join(); 82 | } 83 | 84 | Iter kv = db.scan(); 85 | long current = 0; 86 | while (kv.hasNext()) { 87 | KeyValue expected = kv.next(); 88 | KeyValue currentKV = KeyValue.createPut(Bytes.toBytes(current), Bytes.toBytes(current), 0L); 89 | Assert.assertArrayEquals(expected.getKey(), currentKV.getKey()); 90 | Assert.assertArrayEquals(expected.getValue(), currentKV.getValue()); 91 | Assert.assertEquals(expected.getOp(), Op.Put); 92 | 93 | long sequenceId = expected.getSequenceId(); 94 | Assert.assertTrue("SequenceId: " + sequenceId, sequenceId > 0); 95 | current++; 96 | } 97 | Assert.assertEquals(current, totalKVSize); 98 | db.close(); 99 | } 100 | 101 | @Test 102 | public void testMixedOp() throws Exception { 103 | Config conf = new Config().setDataDir(dataDir).setMaxMemstoreSize(2 * 1024 * 1024); 104 | MiniBase db = MStore.create(conf).open(); 105 | 106 | byte[] A = Bytes.toBytes("A"); 107 | byte[] B = Bytes.toBytes("B"); 108 | byte[] C = Bytes.toBytes("C"); 109 | 110 | db.put(A, A); 111 | Assert.assertArrayEquals(db.get(A).getValue(), A); 112 | 113 | db.delete(A); 114 | Assert.assertNull(db.get(A)); 115 | 116 | db.put(A, B); 117 | Assert.assertArrayEquals(db.get(A).getValue(), B); 118 | 119 | db.put(B, A); 120 | Assert.assertArrayEquals(db.get(B).getValue(), A); 121 | 122 | db.put(B, B); 123 | Assert.assertArrayEquals(db.get(B).getValue(), B); 124 | 125 | db.put(C, C); 126 | Assert.assertArrayEquals(db.get(C).getValue(), C); 127 | 128 | db.delete(B); 129 | Assert.assertNull(db.get(B)); 130 | } 131 | 132 | static class MockSeekIter implements SeekIter { 133 | 134 | private int curIdx = 0; 135 | private List list; 136 | 137 | public MockSeekIter(List list) { 138 | this.list = list; 139 | } 140 | 141 | @Override 142 | public void seekTo(KeyValue kv) throws IOException { 143 | throw new IOException("Not implemented"); 144 | } 145 | 146 | @Override 147 | public boolean hasNext() throws IOException { 148 | return curIdx < list.size(); 149 | } 150 | 151 | @Override 152 | public KeyValue next() throws IOException { 153 | return list.get(curIdx++); 154 | } 155 | } 156 | 157 | 158 | @Test 159 | public void testScanIter() throws Exception { 160 | List list = new ArrayList<>(); 161 | byte[] A = Bytes.toBytes("A"); 162 | byte[] B = Bytes.toBytes("B"); 163 | byte[] C = Bytes.toBytes("C"); 164 | list.add(KeyValue.createDelete(A, 100)); 165 | list.add(KeyValue.createDelete(A, 100)); 166 | list.add(KeyValue.createPut(A, A, 100)); 167 | list.add(KeyValue.createDelete(A, 99)); 168 | list.add(KeyValue.createDelete(A, 99)); 169 | list.add(KeyValue.createPut(A, A, 99)); 170 | list.add(KeyValue.createPut(A, A, 99)); 171 | 172 | list.add(KeyValue.createPut(B, B, 100)); 173 | list.add(KeyValue.createPut(B, B, 99)); 174 | list.add(KeyValue.createPut(B, B, 99)); 175 | 176 | list.add(KeyValue.createPut(C, C, 80)); 177 | list.add(KeyValue.createDelete(C, 1)); 178 | 179 | ScanIter scan = new ScanIter(null, new MockSeekIter(list)); 180 | Assert.assertTrue(scan.hasNext()); 181 | Assert.assertEquals(scan.next(), KeyValue.createPut(B, B, 100)); 182 | Assert.assertTrue(scan.hasNext()); 183 | Assert.assertEquals(scan.next(), KeyValue.createPut(C, C, 80)); 184 | Assert.assertFalse(scan.hasNext()); 185 | 186 | scan = new ScanIter(KeyValue.createPut(B, B, 100), new MockSeekIter(list)); 187 | Assert.assertFalse(scan.hasNext()); 188 | 189 | scan = new ScanIter(KeyValue.createPut(C, C, 100), new MockSeekIter(list)); 190 | Assert.assertTrue(scan.hasNext()); 191 | Assert.assertEquals(scan.next(), KeyValue.createPut(B, B, 100)); 192 | Assert.assertFalse(scan.hasNext()); 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/test/java/org/apache/minibase/TestMultiIter.java: -------------------------------------------------------------------------------- 1 | package org.apache.minibase; 2 | 3 | import org.apache.minibase.DiskFile.DiskFileWriter; 4 | import org.apache.minibase.DiskStore.MultiIter; 5 | import org.apache.minibase.MStore.SeekIter; 6 | import org.apache.minibase.MiniBase.Iter; 7 | import org.junit.Assert; 8 | import org.junit.Test; 9 | 10 | import java.io.File; 11 | import java.io.IOException; 12 | import java.util.ArrayList; 13 | import java.util.Arrays; 14 | import java.util.List; 15 | 16 | public class TestMultiIter { 17 | 18 | public static class MockIter implements SeekIter { 19 | 20 | private int cur; 21 | private KeyValue[] kvs; 22 | 23 | public MockIter(int[] array) throws IOException { 24 | assert array != null; 25 | kvs = new KeyValue[array.length]; 26 | for (int i = 0; i < array.length; i++) { 27 | String s = String.format("%05d", array[i]); 28 | kvs[i] = KeyValue.createPut(Bytes.toBytes(s), Bytes.toBytes(s), 1L); 29 | } 30 | cur = 0; 31 | } 32 | 33 | @Override 34 | public boolean hasNext() throws IOException { 35 | return cur < kvs.length; 36 | } 37 | 38 | @Override 39 | public KeyValue next() throws IOException { 40 | return kvs[cur++]; 41 | } 42 | 43 | @Override 44 | public void seekTo(KeyValue kv) throws IOException { 45 | for (cur = 0; cur < kvs.length; cur++) { 46 | if (kvs[cur].compareTo(kv) >= 0) { 47 | break; 48 | } 49 | } 50 | } 51 | } 52 | 53 | @Test 54 | public void testMergeSort() throws IOException { 55 | int[] a = new int[] { 2, 5, 8, 10, 20 }; 56 | int[] b = new int[] { 11, 12, 12 }; 57 | MockIter iter1 = new MockIter(a); 58 | MockIter iter2 = new MockIter(b); 59 | SeekIter[] iters = new SeekIter[] { iter1, iter2 }; 60 | MultiIter multiIter = new MultiIter(iters); 61 | 62 | String[] results = 63 | new String[] { "00002", "00005", "00008", "00010", "00011", "00012", "00012", "00020" }; 64 | int index = 0; 65 | 66 | while (multiIter.hasNext()) { 67 | KeyValue kv = multiIter.next(); 68 | Assert.assertTrue(index < results.length); 69 | Assert.assertArrayEquals(kv.getKey(), Bytes.toBytes(results[index])); 70 | Assert.assertArrayEquals(kv.getValue(), Bytes.toBytes(results[index])); 71 | index++; 72 | } 73 | 74 | Assert.assertEquals(index, results.length); 75 | } 76 | 77 | @Test 78 | public void testMergeSort2() throws IOException { 79 | int[] a = new int[] {}; 80 | int[] b = new int[] {}; 81 | MockIter iter1 = new MockIter(a); 82 | MockIter iter2 = new MockIter(b); 83 | SeekIter[] iters = new SeekIter[] { iter1, iter2 }; 84 | MultiIter multiIter = new MultiIter(iters); 85 | 86 | Assert.assertFalse(multiIter.hasNext()); 87 | } 88 | 89 | @Test 90 | public void testMergeSort3() throws IOException { 91 | int[] a = new int[]{}; 92 | int[] b = new int[]{1}; 93 | MockIter iter1 = new MockIter(a); 94 | MockIter iter2 = new MockIter(b); 95 | SeekIter[] iters = new SeekIter[]{iter1, iter2}; 96 | MultiIter multiIter = new MultiIter(iters); 97 | 98 | Assert.assertTrue(multiIter.hasNext()); 99 | Assert.assertEquals(multiIter.next(), 100 | KeyValue.createPut(Bytes.toBytes("00001"), Bytes.toBytes("00001"), 1L)); 101 | Assert.assertFalse(multiIter.hasNext()); 102 | } 103 | 104 | @Test 105 | public void testMergeSort4() throws IOException { 106 | int[] a = new int[] {}; 107 | int[] b = new int[] { 1, 1 }; 108 | int[] c = new int[] { 1, 1 }; 109 | MockIter iter1 = new MockIter(a); 110 | MockIter iter2 = new MockIter(b); 111 | MockIter iter3 = new MockIter(c); 112 | SeekIter[] iters = new SeekIter[] { iter1, iter2, iter3 }; 113 | MultiIter multiIter = new MultiIter(iters); 114 | 115 | int count = 0; 116 | while (multiIter.hasNext()) { 117 | Assert.assertEquals(multiIter.next(), 118 | KeyValue.createPut(Bytes.toBytes("00001"), Bytes.toBytes("00001"), 1L)); 119 | count++; 120 | } 121 | Assert.assertEquals(count, 4); 122 | } 123 | 124 | private void testDiskFileMergeSort(String[] inputs, String output, int rowCount) 125 | throws IOException { 126 | try { 127 | DiskFileWriter[] writers = new DiskFileWriter[inputs.length]; 128 | DiskFile[] readers = new DiskFile[inputs.length]; 129 | SeekIter iterArray[] = new SeekIter[inputs.length]; 130 | 131 | for (int i = 0; i < inputs.length; i++) { 132 | writers[i] = new DiskFileWriter(inputs[i]); 133 | } 134 | for (int i = 0; i < rowCount; i++) { 135 | int k = i % inputs.length; 136 | writers[k].append(KeyValue.createPut(Bytes.toBytes(i), Bytes.toBytes(i), 1L)); 137 | } 138 | for (int i = 0; i < inputs.length; i++) { 139 | writers[i].appendIndex(); 140 | writers[i].appendTrailer(); 141 | writers[i].close(); 142 | 143 | // open the file 144 | readers[i] = new DiskFile(); 145 | readers[i].open(inputs[i]); 146 | iterArray[i] = readers[i].iterator(); 147 | } 148 | 149 | DiskFileWriter writer = new DiskFileWriter(output); 150 | MultiIter iter = new MultiIter(iterArray); 151 | while (iter.hasNext()) { 152 | writer.append(iter.next()); 153 | } 154 | 155 | writer.appendIndex(); 156 | writer.appendTrailer(); 157 | writer.close(); 158 | 159 | // close the readers 160 | for (int i = 0; i < readers.length; i++) { 161 | readers[i].close(); 162 | } 163 | 164 | DiskFile reader = new DiskFile(); 165 | reader.open(output); 166 | Iter resultIter = reader.iterator(); 167 | int count = 0; 168 | while (resultIter.hasNext()) { 169 | Assert.assertEquals(resultIter.next(), 170 | KeyValue.createPut(Bytes.toBytes(count), Bytes.toBytes(count), 1L)); 171 | count++; 172 | } 173 | Assert.assertEquals(count, rowCount); 174 | reader.close(); 175 | } finally { 176 | // Remove the dbFile. 177 | List deleteFiles = new ArrayList<>(Arrays.asList(inputs)); 178 | deleteFiles.add(output); 179 | for (String fileName : deleteFiles) { 180 | File f = new File(fileName); 181 | if (f.exists()) { 182 | f.delete(); 183 | } 184 | } 185 | } 186 | } 187 | 188 | @Test 189 | public void testDiskFileMergeSort() throws IOException { 190 | testDiskFileMergeSort(new String[] { "a.db", "b.db" }, "c.db", 10); 191 | testDiskFileMergeSort(new String[] { "a.db" }, "b.db", 1); 192 | testDiskFileMergeSort(new String[] { "a.db", "b.db", "c.db" }, "d.db", 1000); 193 | testDiskFileMergeSort(new String[] { "a.db", "b.db", "c.db" }, "d.db", 100); 194 | } 195 | } 196 | --------------------------------------------------------------------------------