├── .travis.yml ├── .gitignore ├── src ├── main │ └── java │ │ └── fi │ │ └── iki │ │ └── yak │ │ └── ts │ │ └── compression │ │ └── gorilla │ │ ├── predictors │ │ ├── LastValuePredictor.java │ │ └── DifferentialFCM.java │ │ ├── Pair.java │ │ ├── BitOutput.java │ │ ├── BitInput.java │ │ ├── Predictor.java │ │ ├── ValueDecompressor.java │ │ ├── LongArrayInput.java │ │ ├── ByteBufferBitInput.java │ │ ├── ByteBufferBitOutput.java │ │ ├── ValueCompressor.java │ │ ├── Decompressor.java │ │ ├── GorillaDecompressor.java │ │ ├── LongArrayOutput.java │ │ ├── GorillaCompressor.java │ │ ├── benchmark │ │ └── EncodingBenchmark.java │ │ └── Compressor.java └── test │ └── java │ └── fi │ └── iki │ └── yak │ └── ts │ └── compression │ └── gorilla │ ├── EncodeTest.java │ └── EncodeGorillaTest.java ├── README.adoc ├── pom.xml └── LICENSE /.travis.yml: -------------------------------------------------------------------------------- 1 | # Enable container-based infrastructure 2 | sudo: false 3 | language: java 4 | install: mvn install -DskipTests -Dgpg.skip 5 | jdk: 6 | - oraclejdk8 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Mobile Tools for Java (J2ME) 4 | .mtj.tmp/ 5 | 6 | # Package Files # 7 | *.jar 8 | *.war 9 | *.ear 10 | 11 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 12 | hs_err_pid* 13 | 14 | .idea/ 15 | *.iml 16 | target/ 17 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/predictors/LastValuePredictor.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla.predictors; 2 | 3 | import fi.iki.yak.ts.compression.gorilla.Predictor; 4 | 5 | /** 6 | * Last-Value predictor, a computational predictor using previous value as a prediction for the next one 7 | * 8 | * @author Michael Burman 9 | */ 10 | public class LastValuePredictor implements Predictor { 11 | private long storedVal = 0; 12 | 13 | public LastValuePredictor() {} 14 | 15 | public void update(long value) { 16 | this.storedVal = value; 17 | } 18 | 19 | public long predict() { 20 | return storedVal; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/Pair.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | /** 4 | * Pair is an extracted timestamp,value pair from the stream 5 | * 6 | * @author Michael Burman 7 | */ 8 | public class Pair { 9 | private long timestamp; 10 | private long value; 11 | 12 | public Pair(long timestamp, long value) { 13 | this.timestamp = timestamp; 14 | this.value = value; 15 | } 16 | 17 | public long getTimestamp() { 18 | return timestamp; 19 | } 20 | 21 | public double getDoubleValue() { 22 | return Double.longBitsToDouble(value); 23 | } 24 | 25 | public long getLongValue() { 26 | return value; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/BitOutput.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | /** 4 | * This interface is used to write a compressed timeseries. 5 | * 6 | * @author Michael Burman 7 | */ 8 | public interface BitOutput { 9 | 10 | /** 11 | * Stores a single bit and increases the bitcount by 1 12 | */ 13 | void writeBit(); 14 | 15 | /** 16 | * Stores a 0 and increases the bitcount by 1 17 | */ 18 | void skipBit(); 19 | 20 | /** 21 | * Write the given long value using the defined amount of least significant bits. 22 | * 23 | * @param value The long value to be written 24 | * @param bits How many bits are stored to the stream 25 | */ 26 | void writeBits(long value, int bits); 27 | 28 | /** 29 | * Flushes the current byte to the underlying stream 30 | */ 31 | void flush(); 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/BitInput.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | /** 4 | * This interface is used for reading a compressed time series. 5 | * 6 | * @author Michael Burman 7 | */ 8 | public interface BitInput { 9 | 10 | /** 11 | * Reads the next bit and returns true if bit is set and false if not. 12 | * 13 | * @return true == 1, false == 0 14 | */ 15 | boolean readBit(); 16 | 17 | /** 18 | * Returns a long that was stored in the next X bits in the stream. 19 | * 20 | * @param bits Amount of least significant bits to read from the stream. 21 | * @return reads the next long in the series using bits meaningful bits 22 | */ 23 | long getLong(int bits); 24 | 25 | /** 26 | * Read until next unset bit is found, or until maxBits has been reached. 27 | * 28 | * @param maxBits How many bits at maximum until returning 29 | * @return Integer value of the read bits 30 | */ 31 | int nextClearBit(int maxBits); 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/Predictor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017 Red Hat, Inc. and/or its affiliates 3 | * and other contributors as indicated by the @author tags. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package fi.iki.yak.ts.compression.gorilla; 18 | 19 | /** 20 | * @author miburman 21 | */ 22 | public interface Predictor { 23 | 24 | /** 25 | * Give the real value 26 | * 27 | * @param value Long / bits of Double 28 | */ 29 | void update(long value); 30 | 31 | /** 32 | * Predicts the next value 33 | * 34 | * @return Predicted value 35 | */ 36 | long predict(); 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/predictors/DifferentialFCM.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla.predictors; 2 | 3 | import fi.iki.yak.ts.compression.gorilla.Predictor; 4 | 5 | /** 6 | * Differential Finite Context Method (DFCM) is a context based predictor. 7 | * 8 | * @author Michael Burman 9 | */ 10 | public class DifferentialFCM implements Predictor { 11 | 12 | private long lastValue = 0L; 13 | private final long[] table; 14 | private int lastHash = 0; 15 | 16 | private final int mask; 17 | 18 | /** 19 | * Create a new DFCM predictor 20 | * 21 | * @param size Prediction table size, will be rounded to the next power of two and must be larger than 0 22 | */ 23 | public DifferentialFCM(int size) { 24 | if(size > 0) { 25 | size--; 26 | int leadingZeros = Long.numberOfLeadingZeros(size); 27 | int newSize = 1 << (Long.SIZE - leadingZeros); 28 | 29 | this.table = new long[newSize]; 30 | this.mask = newSize - 1; 31 | } else { 32 | throw new IllegalArgumentException("Size must be positive"); 33 | } 34 | } 35 | 36 | @Override 37 | public void update(long value) { 38 | table[lastHash] = value - lastValue; 39 | lastHash = (int) (((lastHash << 5) ^ ((value - lastValue) >> 50)) & this.mask); 40 | lastValue = value; 41 | } 42 | 43 | @Override 44 | public long predict() { 45 | return table[lastHash] + lastValue; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/ValueDecompressor.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | import fi.iki.yak.ts.compression.gorilla.predictors.LastValuePredictor; 4 | 5 | /** 6 | * Value decompressor for Gorilla encoded values 7 | * 8 | * @author Michael Burman 9 | */ 10 | public class ValueDecompressor { 11 | private final BitInput in; 12 | private final Predictor predictor; 13 | 14 | private int storedLeadingZeros = Integer.MAX_VALUE; 15 | private int storedTrailingZeros = 0; 16 | 17 | public ValueDecompressor(BitInput input) { 18 | this(input, new LastValuePredictor()); 19 | } 20 | 21 | public ValueDecompressor(BitInput input, Predictor predictor) { 22 | this.in = input; 23 | this.predictor = predictor; 24 | } 25 | 26 | public long readFirst() { 27 | long value = in.getLong(Long.SIZE); 28 | predictor.update(value); 29 | return value; 30 | } 31 | 32 | public long nextValue() { 33 | int val = in.nextClearBit(2); 34 | 35 | switch(val) { 36 | case 3: 37 | // New leading and trailing zeros 38 | storedLeadingZeros = (int) in.getLong(6); 39 | 40 | byte significantBits = (byte) in.getLong(6); 41 | significantBits++; 42 | 43 | storedTrailingZeros = Long.SIZE - significantBits - storedLeadingZeros; 44 | // missing break is intentional, we want to overflow to next one 45 | case 2: 46 | long value = in.getLong(Long.SIZE - storedLeadingZeros - storedTrailingZeros); 47 | value <<= storedTrailingZeros; 48 | 49 | value = predictor.predict() ^ value; 50 | predictor.update(value); 51 | return value; 52 | } 53 | return predictor.predict(); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/LongArrayInput.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016 Red Hat, Inc. and/or its affiliates 3 | * and other contributors as indicated by the @author tags. 4 | * 5 | * Licensed under the Apache License, Version 2.0 (the "License"); 6 | * you may not use this file except in compliance with the License. 7 | * You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package fi.iki.yak.ts.compression.gorilla; 18 | 19 | /** 20 | * Implements on-heap long array input stream 21 | * 22 | * @author Michael Burman 23 | */ 24 | public class LongArrayInput implements BitInput { 25 | private final long[] longArray; // TODO Investigate also the ByteBuffer performance here.. or Unsafe 26 | private long lB; 27 | private int position = 0; 28 | private int bitsLeft = 0; 29 | 30 | public LongArrayInput(long[] array) { 31 | this.longArray = array; 32 | flipByte(); 33 | } 34 | 35 | @Override 36 | public boolean readBit() { 37 | boolean bit = (lB & LongArrayOutput.BIT_SET_MASK[bitsLeft - 1]) != 0; 38 | bitsLeft--; 39 | checkAndFlipByte(); 40 | return bit; 41 | } 42 | 43 | private void flipByte() { 44 | lB = longArray[position++]; 45 | bitsLeft = Long.SIZE; 46 | } 47 | 48 | private void checkAndFlipByte() { 49 | if(bitsLeft == 0) { 50 | flipByte(); 51 | } 52 | } 53 | 54 | @Override 55 | public long getLong(int bits) { 56 | long value; 57 | if(bits <= bitsLeft) { 58 | // We can read from this word only 59 | // Shift to correct position and take only n least significant bits 60 | value = (lB >>> (bitsLeft - bits)) & LongArrayOutput.MASK_ARRAY[bits - 1]; 61 | bitsLeft -= bits; // We ate n bits from it 62 | checkAndFlipByte(); 63 | } else { 64 | // This word and next one, no more (max bits is 64) 65 | value = lB & LongArrayOutput.MASK_ARRAY[bitsLeft - 1]; // Read what's left first 66 | bits -= bitsLeft; 67 | flipByte(); // We need the next one 68 | value <<= bits; // Give n bits of space to value 69 | value |= (lB >>> (bitsLeft - bits)); 70 | bitsLeft -= bits; 71 | } 72 | return value; 73 | } 74 | 75 | @Override 76 | public int nextClearBit(int maxBits) { 77 | int val = 0x00; 78 | 79 | for(int i = 0; i < maxBits; i++) { 80 | val <<= 1; 81 | // TODO This loop has too many branches and unnecessary boolean casts 82 | boolean bit = readBit(); 83 | 84 | if(bit) { 85 | val |= 0x01; 86 | } else { 87 | break; 88 | } 89 | } 90 | return val; 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/ByteBufferBitInput.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | import java.nio.ByteBuffer; 4 | 5 | /** 6 | * An implementation of BitInput that parses the data from byte array or existing ByteBuffer. 7 | * 8 | * @author Michael Burman 9 | */ 10 | public class ByteBufferBitInput implements BitInput { 11 | private ByteBuffer bb; 12 | private byte b; 13 | private int bitsLeft = 0; 14 | 15 | /** 16 | * Uses an existing ByteBuffer to read the stream. Starts at the ByteBuffer's current position. 17 | * 18 | * @param buf Use existing ByteBuffer 19 | */ 20 | public ByteBufferBitInput(ByteBuffer buf) { 21 | bb = buf; 22 | flipByte(); 23 | } 24 | 25 | public ByteBufferBitInput(byte[] input) { 26 | this(ByteBuffer.wrap(input)); 27 | } 28 | 29 | /** 30 | * Reads the next bit and returns a boolean representing it. 31 | * 32 | * @return true if the next bit is 1, otherwise 0. 33 | */ 34 | public boolean readBit() { 35 | boolean bit = ((b >> (bitsLeft - 1)) & 1) == 1; 36 | bitsLeft--; 37 | flipByte(); 38 | return bit; 39 | } 40 | 41 | /** 42 | * Reads a long from the next X bits that represent the least significant bits in the long value. 43 | * 44 | * @param bits How many next bits are read from the stream 45 | * @return long value that was read from the stream 46 | */ 47 | public long getLong(int bits) { 48 | long value = 0; 49 | while(bits > 0) { 50 | if(bits > bitsLeft || bits == Byte.SIZE) { 51 | // Take only the bitsLeft "least significant" bits 52 | byte d = (byte) (b & ((1<>> (bitsLeft - bits)) & ((1< 0) { 74 | int shift = bits - bitsLeft; 75 | if(shift >= 0) { 76 | b |= (byte) ((value >> shift) & ((1 << bitsLeft) - 1)); 77 | bits -= bitsLeft; 78 | bitsLeft = 0; 79 | } else { 80 | shift = bitsLeft - bits; 81 | b |= (byte) (value << shift); 82 | bitsLeft -= bits; 83 | bits = 0; 84 | } 85 | flipByte(); 86 | } 87 | } 88 | 89 | /** 90 | * Causes the currently handled byte to be written to the stream 91 | */ 92 | @Override 93 | public void flush() { 94 | bitsLeft = 0; 95 | flipByte(); // Causes write to the ByteBuffer 96 | } 97 | 98 | /** 99 | * Returns the underlying DirectByteBuffer 100 | * 101 | * @return ByteBuffer of type DirectByteBuffer 102 | */ 103 | public ByteBuffer getByteBuffer() { 104 | return this.bb; 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/ValueCompressor.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | import fi.iki.yak.ts.compression.gorilla.predictors.LastValuePredictor; 4 | 5 | /** 6 | * ValueCompressor for the Gorilla encoding format. Supply with long presentation of the value, 7 | * in case of doubles use Double.doubleToRawLongBits(value) 8 | * 9 | * @author Michael Burman 10 | */ 11 | public class ValueCompressor { 12 | private int storedLeadingZeros = Integer.MAX_VALUE; 13 | private int storedTrailingZeros = 0; 14 | 15 | private Predictor predictor; 16 | private BitOutput out; 17 | 18 | public ValueCompressor(BitOutput out) { 19 | this(out, new LastValuePredictor()); 20 | } 21 | 22 | public ValueCompressor(BitOutput out, Predictor predictor) { 23 | this.out = out; 24 | this.predictor = predictor; 25 | } 26 | 27 | void writeFirst(long value) { 28 | predictor.update(value); 29 | out.writeBits(value, 64); 30 | } 31 | 32 | protected void compressValue(long value) { 33 | // In original Gorilla, Last-Value predictor is used 34 | long diff = predictor.predict() ^ value; 35 | predictor.update(value); 36 | 37 | if(diff == 0) { 38 | // Write 0 39 | out.skipBit(); 40 | } else { 41 | int leadingZeros = Long.numberOfLeadingZeros(diff); 42 | int trailingZeros = Long.numberOfTrailingZeros(diff); 43 | 44 | out.writeBit(); // Optimize to writeNewLeading / writeExistingLeading? 45 | 46 | if(leadingZeros >= storedLeadingZeros && trailingZeros >= storedTrailingZeros) { 47 | writeExistingLeading(diff); 48 | } else { 49 | writeNewLeading(diff, leadingZeros, trailingZeros); 50 | } 51 | } 52 | } 53 | 54 | /** 55 | * If there at least as many leading zeros and as many trailing zeros as previous value, control bit = 0 (type a) 56 | * store the meaningful XORed value 57 | * 58 | * @param xor XOR between previous value and current 59 | */ 60 | private void writeExistingLeading(long xor) { 61 | out.skipBit(); 62 | 63 | int significantBits = 64 - storedLeadingZeros - storedTrailingZeros; 64 | xor >>>= storedTrailingZeros; 65 | out.writeBits(xor, significantBits); 66 | } 67 | 68 | /** 69 | * store the length of the number of leading zeros in the next 5 bits 70 | * store length of the meaningful XORed value in the next 6 bits, 71 | * store the meaningful bits of the XORed value 72 | * (type b) 73 | * 74 | * @param xor XOR between previous value and current 75 | * @param leadingZeros New leading zeros 76 | * @param trailingZeros New trailing zeros 77 | */ 78 | private void writeNewLeading(long xor, int leadingZeros, int trailingZeros) { 79 | out.writeBit(); 80 | 81 | // Different from version 1.x, use (significantBits - 1) in storage - avoids a branch 82 | int significantBits = 64 - leadingZeros - trailingZeros; 83 | 84 | // Different from original, bits 5 -> 6, avoids a branch, allows storing small longs 85 | out.writeBits(leadingZeros, 6); // Number of leading zeros in the next 6 bits 86 | out.writeBits(significantBits - 1, 6); // Length of meaningful bits in the next 6 bits 87 | out.writeBits(xor >>> trailingZeros, significantBits); // Store the meaningful bits of XOR 88 | 89 | storedLeadingZeros = leadingZeros; 90 | storedTrailingZeros = trailingZeros; 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/Decompressor.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | /** 4 | * Decompresses a compressed stream created by the Compressor. Returns pairs of timestamp and floating point value. 5 | * 6 | * @author Michael Burman 7 | */ 8 | public class Decompressor { 9 | 10 | private int storedLeadingZeros = Integer.MAX_VALUE; 11 | private int storedTrailingZeros = 0; 12 | private long storedVal = 0; 13 | private long storedTimestamp = 0; 14 | private long storedDelta = 0; 15 | 16 | private long blockTimestamp = 0; 17 | 18 | private boolean endOfStream = false; 19 | 20 | private BitInput in; 21 | 22 | public Decompressor(BitInput input) { 23 | in = input; 24 | readHeader(); 25 | } 26 | 27 | private void readHeader() { 28 | blockTimestamp = in.getLong(64); 29 | } 30 | 31 | /** 32 | * Returns the next pair in the time series, if available. 33 | * 34 | * @return Pair if there's next value, null if series is done. 35 | */ 36 | public Pair readPair() { 37 | next(); 38 | if(endOfStream) { 39 | return null; 40 | } 41 | return new Pair(storedTimestamp, storedVal); 42 | } 43 | 44 | private void next() { 45 | if (storedTimestamp == 0) { 46 | // First item to read 47 | storedDelta = in.getLong(Compressor.FIRST_DELTA_BITS); 48 | if(storedDelta == (1<<27) - 1) { 49 | endOfStream = true; 50 | return; 51 | } 52 | storedVal = in.getLong(64); 53 | storedTimestamp = blockTimestamp + storedDelta; 54 | } else { 55 | nextTimestamp(); 56 | } 57 | } 58 | 59 | private int bitsToRead() { 60 | int val = in.nextClearBit(4); 61 | int toRead = 0; 62 | 63 | switch(val) { 64 | case 0x00: 65 | break; 66 | case 0x02: 67 | toRead = 7; // '10' 68 | break; 69 | case 0x06: 70 | toRead = 9; // '110' 71 | break; 72 | case 0x0e: 73 | toRead = 12; 74 | break; 75 | case 0x0F: 76 | toRead = 32; 77 | break; 78 | } 79 | 80 | return toRead; 81 | } 82 | 83 | private void nextTimestamp() { 84 | // Next, read timestamp 85 | long deltaDelta = 0; 86 | int toRead = bitsToRead(); 87 | if (toRead > 0) { 88 | deltaDelta = in.getLong(toRead); 89 | 90 | if(toRead == 32) { 91 | if ((int) deltaDelta == 0xFFFFFFFF) { 92 | // End of stream 93 | endOfStream = true; 94 | return; 95 | } 96 | } else { 97 | // Turn "unsigned" long value back to signed one 98 | if(deltaDelta > (1 << (toRead - 1))) { 99 | deltaDelta -= (1 << toRead); 100 | } 101 | } 102 | 103 | deltaDelta = (int) deltaDelta; 104 | } 105 | 106 | storedDelta = storedDelta + deltaDelta; 107 | storedTimestamp = storedDelta + storedTimestamp; 108 | nextValue(); 109 | } 110 | 111 | private void nextValue() { 112 | // Read value 113 | if (in.readBit()) { 114 | // else -> same value as before 115 | if (in.readBit()) { 116 | // New leading and trailing zeros 117 | storedLeadingZeros = (int) in.getLong(5); 118 | 119 | byte significantBits = (byte) in.getLong(6); 120 | if(significantBits == 0) { 121 | significantBits = 64; 122 | } 123 | storedTrailingZeros = 64 - significantBits - storedLeadingZeros; 124 | } 125 | long value = in.getLong(64 - storedLeadingZeros - storedTrailingZeros); 126 | value <<= storedTrailingZeros; 127 | value = storedVal ^ value; 128 | storedVal = value; 129 | } 130 | } 131 | 132 | } -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/GorillaDecompressor.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | import java.util.stream.Stream; 4 | 5 | import fi.iki.yak.ts.compression.gorilla.predictors.LastValuePredictor; 6 | 7 | /** 8 | * Decompresses a compressed stream created by the GorillaCompressor. 9 | * 10 | * @author Michael Burman 11 | */ 12 | public class GorillaDecompressor { 13 | private long storedTimestamp = 0; 14 | private long storedDelta = 0; 15 | 16 | private long blockTimestamp = 0; 17 | private long storedVal = 0; 18 | private boolean endOfStream = false; 19 | 20 | private final BitInput in; 21 | private final ValueDecompressor decompressor; 22 | 23 | public GorillaDecompressor(BitInput input) { 24 | this(input, new LastValuePredictor()); 25 | } 26 | 27 | public GorillaDecompressor(BitInput input, Predictor predictor) { 28 | in = input; 29 | readHeader(); 30 | this.decompressor = new ValueDecompressor(input, predictor); 31 | } 32 | 33 | private void readHeader() { 34 | blockTimestamp = in.getLong(64); 35 | } 36 | 37 | /** 38 | * Returns the next pair in the time series, if available. 39 | * 40 | * @return Pair if there's next value, null if series is done. 41 | */ 42 | public Pair readPair() { 43 | next(); 44 | if(endOfStream) { 45 | return null; 46 | } 47 | Pair pair = new Pair(storedTimestamp, storedVal); 48 | return pair; 49 | } 50 | 51 | private void next() { 52 | // TODO I could implement a non-streaming solution also.. is there ever a need for streaming solution? 53 | 54 | if(storedTimestamp == 0) { 55 | first(); 56 | return; 57 | } 58 | 59 | nextTimestamp(); 60 | } 61 | 62 | private void first() { 63 | // First item to read 64 | storedDelta = in.getLong(Compressor.FIRST_DELTA_BITS); 65 | if(storedDelta == (1<<27) - 1) { 66 | endOfStream = true; 67 | return; 68 | } 69 | storedVal = decompressor.readFirst(); 70 | // storedVal = in.getLong(64); 71 | storedTimestamp = blockTimestamp + storedDelta; 72 | } 73 | 74 | private void nextTimestamp() { 75 | // Next, read timestamp 76 | int readInstruction = in.nextClearBit(4); 77 | long deltaDelta; 78 | 79 | switch(readInstruction) { 80 | case 0x00: 81 | storedTimestamp = storedDelta + storedTimestamp; 82 | storedVal = decompressor.nextValue(); 83 | return; 84 | case 0x02: 85 | deltaDelta = in.getLong(7); 86 | break; 87 | case 0x06: 88 | deltaDelta = in.getLong(9); 89 | break; 90 | case 0x0e: 91 | deltaDelta = in.getLong(12); 92 | break; 93 | case 0x0F: 94 | deltaDelta = in.getLong(32); 95 | // For storage save.. if this is the last available word, check if remaining bits are all 1 96 | if ((int) deltaDelta == 0xFFFFFFFF) { 97 | // End of stream 98 | endOfStream = true; 99 | return; 100 | } 101 | break; 102 | default: 103 | return; 104 | } 105 | 106 | deltaDelta++; 107 | deltaDelta = decodeZigZag32((int) deltaDelta); 108 | storedDelta = storedDelta + deltaDelta; 109 | 110 | storedTimestamp = storedDelta + storedTimestamp; 111 | storedVal = decompressor.nextValue(); 112 | } 113 | 114 | // START: From protobuf 115 | 116 | /** 117 | * Decode a ZigZag-encoded 32-bit value. ZigZag encodes signed integers into values that can be 118 | * efficiently encoded with varint. (Otherwise, negative values must be sign-extended to 64 bits 119 | * to be varint encoded, thus always taking 10 bytes on the wire.) 120 | * 121 | * @param n An unsigned 32-bit integer, stored in a signed int because Java has no explicit 122 | * unsigned support. 123 | * @return A signed 32-bit integer. 124 | */ 125 | public static int decodeZigZag32(final int n) { 126 | return (n >>> 1) ^ -(n & 1); 127 | } 128 | 129 | // END: From protobuf 130 | } -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/LongArrayOutput.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | import java.util.Arrays; 4 | 5 | /** 6 | * An implementation of BitOutput interface that uses on-heap long array. 7 | * 8 | * @author Michael Burman 9 | */ 10 | public class LongArrayOutput implements BitOutput { 11 | public static final int DEFAULT_ALLOCATION = 256; 12 | 13 | private long[] longArray; 14 | private int position = 0; 15 | 16 | protected long lB; 17 | protected int bitsLeft = Long.SIZE; 18 | 19 | public final static long[] MASK_ARRAY; 20 | public final static long[] BIT_SET_MASK; 21 | 22 | // Java does not allow creating 64 bit masks with (1L << 64) - 1; (end result is 0) 23 | static { 24 | MASK_ARRAY = new long[64]; 25 | long mask = 1; 26 | long value = 0; 27 | for (int i = 0; i < MASK_ARRAY.length; i++) { 28 | value = value | mask; 29 | mask = mask << 1; 30 | 31 | MASK_ARRAY[i] = value; 32 | } 33 | 34 | BIT_SET_MASK = new long[64]; 35 | for(int i = 0; i < BIT_SET_MASK.length; i++) { 36 | BIT_SET_MASK[i] = (1L << i); 37 | } 38 | } 39 | 40 | 41 | /** 42 | * Creates a new ByteBufferBitOutput with a default allocated size of 4096 bytes. 43 | */ 44 | public LongArrayOutput() { 45 | this(DEFAULT_ALLOCATION); 46 | } 47 | 48 | /** 49 | * Give an initialSize different than DEFAULT_ALLOCATIONS. Recommended to use values which are dividable by 4096. 50 | * 51 | * @param initialSize New initialsize to use 52 | */ 53 | public LongArrayOutput(int initialSize) { 54 | longArray = new long[initialSize]; 55 | lB = longArray[position]; 56 | } 57 | 58 | protected void expandAllocation() { 59 | long[] largerArray = new long[longArray.length*2]; 60 | System.arraycopy(longArray, 0, largerArray, 0, longArray.length); 61 | longArray = largerArray; 62 | } 63 | 64 | private void checkAndFlipByte() { 65 | // Wish I could avoid this check in most cases... 66 | if(bitsLeft == 0) { 67 | flipWord(); 68 | } 69 | } 70 | 71 | protected int capacityLeft() { 72 | return longArray.length - position; 73 | } 74 | 75 | protected void flipWord() { 76 | if(capacityLeft() <= 2) { // We want to have always at least 2 longs available 77 | expandAllocation(); 78 | } 79 | flipWordWithoutExpandCheck(); 80 | } 81 | 82 | protected void flipWordWithoutExpandCheck() { 83 | longArray[position] = lB; 84 | ++position; 85 | resetInternalWord(); 86 | } 87 | 88 | private void resetInternalWord() { 89 | lB = 0; 90 | bitsLeft = Long.SIZE; 91 | } 92 | 93 | /** 94 | * Sets the next bit (or not) and moves the bit pointer. 95 | */ 96 | public void writeBit() { 97 | lB |= BIT_SET_MASK[bitsLeft - 1]; 98 | bitsLeft--; 99 | checkAndFlipByte(); 100 | } 101 | 102 | public void skipBit() { 103 | bitsLeft--; 104 | checkAndFlipByte(); 105 | } 106 | 107 | /** 108 | * Writes the given long to the stream using bits amount of meaningful bits. This command does not 109 | * check input values, so if they're larger than what can fit the bits (you should check this before writing), 110 | * expect some weird results. 111 | * 112 | * @param value Value to be written to the stream 113 | * @param bits How many bits are stored to the stream 114 | */ 115 | public void writeBits(long value, int bits) { 116 | if(bits <= bitsLeft) { 117 | int lastBitPosition = bitsLeft - bits; 118 | lB |= (value << lastBitPosition) & MASK_ARRAY[bitsLeft - 1]; 119 | bitsLeft -= bits; 120 | checkAndFlipByte(); // We could be at 0 bits left because of the <= condition .. would it be faster with 121 | // the other one? 122 | } else { 123 | value &= MASK_ARRAY[bits - 1]; 124 | int firstBitPosition = bits - bitsLeft; 125 | lB |= value >>> firstBitPosition; 126 | bits -= bitsLeft; 127 | flipWord(); 128 | lB |= value << (64 - bits); 129 | bitsLeft -= bits; 130 | } 131 | } 132 | 133 | /** 134 | * Causes the currently handled word to be written to the stream 135 | */ 136 | @Override 137 | public void flush() { 138 | flipWord(); 139 | } 140 | 141 | public long[] getLongArray() { 142 | long[] copy = Arrays.copyOf(longArray, position + 1); 143 | copy[copy.length - 1] = lB; 144 | return copy; 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/GorillaCompressor.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | import fi.iki.yak.ts.compression.gorilla.predictors.LastValuePredictor; 4 | 5 | /** 6 | * Implements a slightly modified version of the time series compression as described in the Facebook's Gorilla 7 | * Paper. 8 | * 9 | * @author Michael Burman 10 | */ 11 | public class GorillaCompressor { 12 | 13 | private long storedTimestamp = 0; 14 | private int storedDelta = 0; 15 | 16 | private long blockTimestamp = 0; 17 | 18 | public final static int FIRST_DELTA_BITS = 27; 19 | 20 | private static int DELTAD_7_MASK = 0x02 << 7; 21 | private static int DELTAD_9_MASK = 0x06 << 9; 22 | private static int DELTAD_12_MASK = 0x0E << 12; 23 | 24 | private BitOutput out; 25 | 26 | private ValueCompressor valueCompressor; 27 | 28 | public GorillaCompressor(long timestamp, BitOutput output) { 29 | this(timestamp, output, new LastValuePredictor()); 30 | } 31 | 32 | public GorillaCompressor(long timestamp, BitOutput output, Predictor predictor) { 33 | blockTimestamp = timestamp; 34 | out = output; 35 | addHeader(timestamp); 36 | this.valueCompressor = new ValueCompressor(output, predictor); 37 | } 38 | 39 | private void addHeader(long timestamp) { 40 | out.writeBits(timestamp, 64); 41 | } 42 | 43 | /** 44 | * Adds a new long value to the series. Note, values must be inserted in order. 45 | * 46 | * @param timestamp Timestamp which is inside the allowed time block (default 24 hours with millisecond precision) 47 | * @param value next floating point value in the series 48 | */ 49 | public void addValue(long timestamp, long value) { 50 | if(storedTimestamp == 0) { 51 | writeFirst(timestamp, value); 52 | } else { 53 | compressTimestamp(timestamp); 54 | valueCompressor.compressValue(value); 55 | } 56 | } 57 | 58 | /** 59 | * Adds a new double value to the series. Note, values must be inserted in order. 60 | * 61 | * @param timestamp Timestamp which is inside the allowed time block (default 24 hours with millisecond precision) 62 | * @param value next floating point value in the series 63 | */ 64 | public void addValue(long timestamp, double value) { 65 | if(storedTimestamp == 0) { 66 | writeFirst(timestamp, Double.doubleToRawLongBits(value)); 67 | return; 68 | } 69 | compressTimestamp(timestamp); 70 | valueCompressor.compressValue(Double.doubleToRawLongBits(value)); 71 | } 72 | 73 | private void writeFirst(long timestamp, long value) { 74 | storedDelta = (int) (timestamp - blockTimestamp); 75 | storedTimestamp = timestamp; 76 | 77 | out.writeBits(storedDelta, FIRST_DELTA_BITS); 78 | valueCompressor.writeFirst(value); 79 | } 80 | 81 | /** 82 | * Closes the block and writes the remaining stuff to the BitOutput. 83 | */ 84 | public void close() { 85 | out.writeBits(0x0F, 4); 86 | out.writeBits(0xFFFFFFFF, 32); 87 | out.skipBit(); 88 | out.flush(); 89 | } 90 | 91 | /** 92 | * Difference to the original Facebook paper, we store the first delta as 27 bits to allow 93 | * millisecond accuracy for a one day block. 94 | * 95 | * Also, the timestamp delta-delta is not good for millisecond compressions.. 96 | * 97 | * @param timestamp epoch 98 | */ 99 | private void compressTimestamp(long timestamp) { 100 | 101 | // a) Calculate the delta of delta 102 | int newDelta = (int) (timestamp - storedTimestamp); 103 | int deltaD = newDelta - storedDelta; 104 | 105 | if(deltaD == 0) { 106 | out.skipBit(); 107 | } else { 108 | deltaD = encodeZigZag32(deltaD); 109 | deltaD--; // Increase by one in the decompressing phase as we have one free bit 110 | int bitsRequired = 32 - Integer.numberOfLeadingZeros(deltaD); // Faster than highestSetBit 111 | 112 | // Turns to inlineable tableswitch 113 | switch(bitsRequired) { 114 | case 1: 115 | case 2: 116 | case 3: 117 | case 4: 118 | case 5: 119 | case 6: 120 | case 7: 121 | deltaD |= DELTAD_7_MASK; 122 | out.writeBits(deltaD, 9); 123 | break; 124 | case 8: 125 | case 9: 126 | deltaD |= DELTAD_9_MASK; 127 | out.writeBits(deltaD, 12); 128 | break; 129 | case 10: 130 | case 11: 131 | case 12: 132 | out.writeBits(deltaD | DELTAD_12_MASK, 16); 133 | break; 134 | default: 135 | out.writeBits(0x0F, 4); // Store '1111' 136 | out.writeBits(deltaD, 32); // Store delta using 32 bits 137 | break; 138 | } 139 | storedDelta = newDelta; 140 | } 141 | 142 | storedTimestamp = timestamp; 143 | } 144 | 145 | // START: From protobuf 146 | 147 | /** 148 | * Encode a ZigZag-encoded 32-bit value. ZigZag encodes signed integers 149 | * into values that can be efficiently encoded with varint. (Otherwise, 150 | * negative values must be sign-extended to 64 bits to be varint encoded, 151 | * thus always taking 10 bytes on the wire.) 152 | * 153 | * @param n A signed 32-bit integer. 154 | * @return An unsigned 32-bit integer, stored in a signed int because 155 | * Java has no explicit unsigned support. 156 | */ 157 | public static int encodeZigZag32(final int n) { 158 | // Note: the right-shift must be arithmetic 159 | return (n << 1) ^ (n >> 31); 160 | } 161 | 162 | // END: From protobuf 163 | } 164 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/benchmark/EncodingBenchmark.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla.benchmark; 2 | 3 | import fi.iki.yak.ts.compression.gorilla.*; 4 | import org.openjdk.jmh.annotations.*; 5 | import org.openjdk.jmh.infra.Blackhole; 6 | 7 | import java.nio.ByteBuffer; 8 | import java.time.LocalDateTime; 9 | import java.time.ZoneOffset; 10 | import java.time.temporal.ChronoUnit; 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | import java.util.stream.Stream; 14 | 15 | /** 16 | * @author Michael Burman 17 | */ 18 | @BenchmarkMode(Mode.Throughput) 19 | @State(Scope.Benchmark) 20 | @Fork(1) 21 | @Warmup(iterations = 5) 22 | @Measurement(iterations = 10) // Reduce the amount of iterations if you start to see GC interference 23 | public class EncodingBenchmark { 24 | 25 | @State(Scope.Benchmark) 26 | public static class DataGenerator { 27 | public List insertList; 28 | 29 | @Param({"100000"}) 30 | public int amountOfPoints; 31 | 32 | public long blockStart; 33 | 34 | public long[] uncompressedTimestamps; 35 | public long[] uncompressedValues; 36 | public double[] uncompressedDoubles; 37 | public long[] compressedArray; 38 | 39 | public ByteBuffer uncompressedBuffer; 40 | public ByteBuffer compressedBuffer; 41 | 42 | public List pairs; 43 | 44 | @Setup(Level.Trial) 45 | public void setup() { 46 | blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) 47 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 48 | 49 | long now = blockStart + 60; 50 | uncompressedTimestamps = new long[amountOfPoints]; 51 | uncompressedDoubles = new double[amountOfPoints]; 52 | uncompressedValues = new long[amountOfPoints]; 53 | 54 | insertList = new ArrayList<>(amountOfPoints); 55 | 56 | ByteBuffer bb = ByteBuffer.allocate(amountOfPoints * 2*Long.BYTES); 57 | 58 | pairs = new ArrayList<>(amountOfPoints); 59 | 60 | for(int i = 0; i < amountOfPoints; i++) { 61 | now += 60; 62 | bb.putLong(now); 63 | bb.putDouble(i); 64 | uncompressedTimestamps[i] = now; 65 | uncompressedDoubles[i] = i; 66 | uncompressedValues[i] = i; 67 | pairs.add(new Pair(now, i)); 68 | // bb.putLong(i); 69 | } 70 | 71 | if (bb.hasArray()) { 72 | uncompressedBuffer = bb.duplicate(); 73 | uncompressedBuffer.flip(); 74 | } 75 | ByteBufferBitOutput output = new ByteBufferBitOutput(); 76 | LongArrayOutput arrayOutput = new LongArrayOutput(amountOfPoints); 77 | 78 | Compressor c = new Compressor(blockStart, output); 79 | GorillaCompressor gc = new GorillaCompressor(blockStart, arrayOutput); 80 | 81 | bb.flip(); 82 | 83 | for(int j = 0; j < amountOfPoints; j++) { 84 | // c.addValue(bb.getLong(), bb.getLong()); 85 | c.addValue(bb.getLong(), bb.getDouble()); 86 | gc.addValue(uncompressedTimestamps[j], uncompressedDoubles[j]); 87 | } 88 | 89 | gc.close(); 90 | c.close(); 91 | 92 | ByteBuffer byteBuffer = output.getByteBuffer(); 93 | byteBuffer.flip(); 94 | compressedBuffer = byteBuffer; 95 | 96 | compressedArray = arrayOutput.getLongArray(); 97 | } 98 | } 99 | 100 | // @Benchmark 101 | @OperationsPerInvocation(100000) 102 | public void encodingBenchmark(DataGenerator dg) { 103 | ByteBufferBitOutput output = new ByteBufferBitOutput(); 104 | Compressor c = new Compressor(dg.blockStart, output); 105 | 106 | for(int j = 0; j < dg.amountOfPoints; j++) { 107 | c.addValue(dg.uncompressedBuffer.getLong(), dg.uncompressedBuffer.getDouble()); 108 | } 109 | c.close(); 110 | dg.uncompressedBuffer.rewind(); 111 | } 112 | 113 | @Benchmark 114 | @OperationsPerInvocation(100000) 115 | public void decodingBenchmark(DataGenerator dg, Blackhole bh) throws Exception { 116 | ByteBuffer duplicate = dg.compressedBuffer.duplicate(); 117 | ByteBufferBitInput input = new ByteBufferBitInput(duplicate); 118 | Decompressor d = new Decompressor(input); 119 | Pair pair; 120 | while((pair = d.readPair()) != null) { 121 | bh.consume(pair); 122 | } 123 | } 124 | 125 | @Benchmark 126 | @OperationsPerInvocation(100000) 127 | public void encodingGorillaBenchmark(DataGenerator dg) { 128 | LongArrayOutput output = new LongArrayOutput(); 129 | GorillaCompressor c = new GorillaCompressor(dg.blockStart, output); 130 | 131 | for(int j = 0; j < dg.amountOfPoints; j++) { 132 | c.addValue(dg.uncompressedTimestamps[j], dg.uncompressedDoubles[j]); 133 | } 134 | c.close(); 135 | } 136 | 137 | @Benchmark 138 | @OperationsPerInvocation(100000) 139 | public void encodingGorillaBenchmarkLong(DataGenerator dg) { 140 | LongArrayOutput output = new LongArrayOutput(); 141 | GorillaCompressor c = new GorillaCompressor(dg.blockStart, output); 142 | 143 | for(int j = 0; j < dg.amountOfPoints; j++) { 144 | c.addValue(dg.uncompressedTimestamps[j], dg.uncompressedValues[j]); 145 | } 146 | c.close(); 147 | } 148 | 149 | // @Benchmark 150 | // @OperationsPerInvocation(100000) 151 | // public void encodingGorillaStreamBenchmark(DataGenerator dg) { 152 | // LongArrayOutput output = new LongArrayOutput(); 153 | // GorillaCompressor c = new GorillaCompressor(dg.blockStart, output); 154 | // 155 | // c.compressLongStream(dg.pairs.stream()); 156 | // c.close(); 157 | // } 158 | 159 | @Benchmark 160 | @OperationsPerInvocation(100000) 161 | public void decodingGorillaBenchmark(DataGenerator dg, Blackhole bh) throws Exception { 162 | LongArrayInput input = new LongArrayInput(dg.compressedArray); 163 | GorillaDecompressor d = new GorillaDecompressor(input); 164 | Pair pair; 165 | while((pair = d.readPair()) != null) { 166 | bh.consume(pair); 167 | } 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/main/java/fi/iki/yak/ts/compression/gorilla/Compressor.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | /** 4 | * Implements the time series compression as described in the Facebook's Gorilla Paper. Value compression 5 | * is for floating points only. 6 | * 7 | * @author Michael Burman 8 | */ 9 | public class Compressor { 10 | 11 | private int storedLeadingZeros = Integer.MAX_VALUE; 12 | private int storedTrailingZeros = 0; 13 | private long storedVal = 0; 14 | private long storedTimestamp = 0; 15 | private long storedDelta = 0; 16 | 17 | private long blockTimestamp = 0; 18 | 19 | public final static short FIRST_DELTA_BITS = 27; 20 | 21 | private BitOutput out; 22 | 23 | // We should have access to the series? 24 | public Compressor(long timestamp, BitOutput output) { 25 | blockTimestamp = timestamp; 26 | out = output; 27 | addHeader(timestamp); 28 | } 29 | 30 | private void addHeader(long timestamp) { 31 | // One byte: length of the first delta 32 | // One byte: precision of timestamps 33 | out.writeBits(timestamp, 64); 34 | } 35 | 36 | /** 37 | * Adds a new long value to the series. Note, values must be inserted in order. 38 | * 39 | * @param timestamp Timestamp which is inside the allowed time block (default 24 hours with millisecond precision) 40 | * @param value next floating point value in the series 41 | */ 42 | public void addValue(long timestamp, long value) { 43 | if(storedTimestamp == 0) { 44 | writeFirst(timestamp, value); 45 | } else { 46 | compressTimestamp(timestamp); 47 | compressValue(value); 48 | } 49 | } 50 | 51 | /** 52 | * Adds a new double value to the series. Note, values must be inserted in order. 53 | * 54 | * @param timestamp Timestamp which is inside the allowed time block (default 24 hours with millisecond precision) 55 | * @param value next floating point value in the series 56 | */ 57 | public void addValue(long timestamp, double value) { 58 | if(storedTimestamp == 0) { 59 | writeFirst(timestamp, Double.doubleToRawLongBits(value)); 60 | } else { 61 | compressTimestamp(timestamp); 62 | compressValue(Double.doubleToRawLongBits(value)); 63 | } 64 | } 65 | 66 | private void writeFirst(long timestamp, long value) { 67 | storedDelta = timestamp - blockTimestamp; 68 | storedTimestamp = timestamp; 69 | storedVal = value; 70 | 71 | out.writeBits(storedDelta, FIRST_DELTA_BITS); 72 | out.writeBits(storedVal, 64); 73 | } 74 | 75 | /** 76 | * Closes the block and writes the remaining stuff to the BitOutput. 77 | */ 78 | public void close() { 79 | // These are selected to test interoperability and correctness of the solution, this can be read with go-tsz 80 | out.writeBits(0x0F, 4); 81 | out.writeBits(0xFFFFFFFF, 32); 82 | out.skipBit(); 83 | out.flush(); 84 | } 85 | 86 | /** 87 | * Difference to the original Facebook paper, we store the first delta as 27 bits to allow 88 | * millisecond accuracy for a one day block. 89 | * 90 | * Also, the timestamp delta-delta is not good for millisecond compressions.. 91 | * 92 | * @param timestamp epoch 93 | */ 94 | private void compressTimestamp(long timestamp) { 95 | // a) Calculate the delta of delta 96 | long newDelta = (timestamp - storedTimestamp); 97 | long deltaD = newDelta - storedDelta; 98 | 99 | // If delta is zero, write single 0 bit 100 | if(deltaD == 0) { 101 | out.skipBit(); 102 | } else if(deltaD >= -63 && deltaD <= 64) { 103 | out.writeBits(0x02, 2); // store '10' 104 | out.writeBits(deltaD, 7); // Using 7 bits, store the value.. 105 | } else if(deltaD >= -255 && deltaD <= 256) { 106 | out.writeBits(0x06, 3); // store '110' 107 | out.writeBits(deltaD, 9); // Use 9 bits 108 | } else if(deltaD >= -2047 && deltaD <= 2048) { 109 | out.writeBits(0x0E, 4); // store '1110' 110 | out.writeBits(deltaD, 12); // Use 12 bits 111 | } else { 112 | out.writeBits(0x0F, 4); // Store '1111' 113 | out.writeBits(deltaD, 32); // Store delta using 32 bits 114 | } 115 | 116 | storedDelta = newDelta; 117 | storedTimestamp = timestamp; 118 | } 119 | 120 | private void compressValue(long value) { 121 | // TODO Fix already compiled into a big method 122 | long xor = storedVal ^ value; 123 | 124 | if(xor == 0) { 125 | // Write 0 126 | out.skipBit(); 127 | } else { 128 | int leadingZeros = Long.numberOfLeadingZeros(xor); 129 | int trailingZeros = Long.numberOfTrailingZeros(xor); 130 | 131 | // Check overflow of leading? Can't be 32! 132 | if(leadingZeros >= 32) { 133 | leadingZeros = 31; 134 | } 135 | 136 | // Store bit '1' 137 | out.writeBit(); 138 | 139 | if(leadingZeros >= storedLeadingZeros && trailingZeros >= storedTrailingZeros) { 140 | writeExistingLeading(xor); 141 | } else { 142 | writeNewLeading(xor, leadingZeros, trailingZeros); 143 | } 144 | } 145 | 146 | storedVal = value; 147 | } 148 | 149 | /** 150 | * If there at least as many leading zeros and as many trailing zeros as previous value, control bit = 0 (type a) 151 | * store the meaningful XORed value 152 | * 153 | * @param xor XOR between previous value and current 154 | */ 155 | private void writeExistingLeading(long xor) { 156 | out.skipBit(); 157 | int significantBits = 64 - storedLeadingZeros - storedTrailingZeros; 158 | out.writeBits(xor >>> storedTrailingZeros, significantBits); 159 | } 160 | 161 | /** 162 | * store the length of the number of leading zeros in the next 5 bits 163 | * store length of the meaningful XORed value in the next 6 bits, 164 | * store the meaningful bits of the XORed value 165 | * (type b) 166 | * 167 | * @param xor XOR between previous value and current 168 | * @param leadingZeros New leading zeros 169 | * @param trailingZeros New trailing zeros 170 | */ 171 | private void writeNewLeading(long xor, int leadingZeros, int trailingZeros) { 172 | out.writeBit(); 173 | out.writeBits(leadingZeros, 5); // Number of leading zeros in the next 5 bits 174 | 175 | int significantBits = 64 - leadingZeros - trailingZeros; 176 | out.writeBits(significantBits, 6); // Length of meaningful bits in the next 6 bits 177 | out.writeBits(xor >>> trailingZeros, significantBits); // Store the meaningful bits of XOR 178 | 179 | storedLeadingZeros = leadingZeros; 180 | storedTrailingZeros = trailingZeros; 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /README.adoc: -------------------------------------------------------------------------------- 1 | = Time series compression library, based on the Facebook's Gorilla paper 2 | :source-language: java 3 | 4 | ifdef::env-github[] 5 | [link=https://travis-ci.org/burmanm/gorilla-tsc] 6 | image::https://travis-ci.org/burmanm/gorilla-tsc.svg?branch=master[Build Status,70,18] 7 | [link=https://maven-badges.herokuapp.com/maven-central/fi.iki.yak/compression-gorilla] 8 | image::https://img.shields.io/maven-central/v/fi.iki.yak/compression-gorilla.svg[Maven central] 9 | endif::[] 10 | 11 | == Introduction 12 | 13 | This is Java based implementation of the compression methods described in the paper link:http://www.vldb.org/pvldb/vol8/p1816-teller.pdf["Gorilla: A Fast, Scalable, In-Memory Time Series Database"]. For explanation on how the compression methods work, read the excellent paper. 14 | 15 | In comparison to the original paper, this implementation allows using both integer values (`long`) as well as 16 | floating point values (`double`), both 64 bit in length. 17 | 18 | Versions 1.x and 2.x are not compatible with each other due to small differences to the stored array. Versions 2.x 19 | will support reading and storing older format also, see usage for more details. 20 | 21 | == Usage 22 | 23 | The included tests are a good source for examples. 24 | 25 | === Maven 26 | 27 | [source, xml] 28 | ---- 29 | 30 | fi.iki.yak 31 | compression-gorilla 32 | 33 | ---- 34 | 35 | You can find latest version from the maven logo link above. 36 | 37 | === Compressing 38 | 39 | To compress in the older 1.x format, use class ``Compressor``. For 2.x, use ``GorillaCompressor`` (recommended). 40 | ``LongArrayOutput`` is also recommended compared to ``ByteBufferBitOutput`` because of performance. One can supply 41 | alternative predictor to the ``GorillaCompressor`` if required. One such implementation is included, 42 | ``DifferentialFCM`` that provides better compression ratio for some data patterns. 43 | 44 | [source, java] 45 | ---- 46 | long now = LocalDateTime.now(ZoneOffset.UTC).truncatedTo(ChronoUnit.HOURS) 47 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 48 | 49 | LongArrayOutput output = new LongArrayOutput(); 50 | GorillaCompressor c = new GorillaCompressor(now, output); 51 | ---- 52 | 53 | Compression class requires a block timestamp and an implementation of `BitOutput` interface. 54 | 55 | [source, java] 56 | ---- 57 | c.addValue(long, double); 58 | ---- 59 | 60 | Adds a new floating-point value to the time series. If you wish to store only long values, use `c.addValue(long, 61 | long)`, however do `not` mix these in the same series. 62 | 63 | After the block is ready, remember to call: 64 | 65 | [source, java] 66 | ---- 67 | c.close(); 68 | ---- 69 | 70 | which flushes the remaining data to the stream and writes closing information. 71 | 72 | === Decompressing 73 | 74 | To decompress from the older 1.x format, use class ``Decompressor``. For 2.x, use ``GorillaDecompressor`` (recommended). 75 | ``LongArrayInput`` is also recommended compared to ``ByteBufferBitInput`` because of performance if the 2.x 76 | format was used to compress the time series. If the original compressor used different predictor than 77 | ``LastValuePredictor`` it must be defined in the constructor. 78 | 79 | [source, java] 80 | ---- 81 | LongArrayInput input = new LongArrayInput(byteBuffer); 82 | GorillaDecompressor d = new GorillaDecompressor(input); 83 | ---- 84 | 85 | To decompress a stream of bytes, supply `GorillaDecompressor` with a suitable implementation of `BitInput` interface. 86 | The LongArrayInput allows to decompress a long array or existing `ByteBuffer` presentation with 8 byte word 87 | length. 88 | 89 | [source, java] 90 | ---- 91 | Pair pair = d.readPair(); 92 | ---- 93 | 94 | Requesting next pair with `readPair()` returns the following series value or a `null` once the series is completely 95 | read. The pair is a simple placeholder object with `getTimestamp()` and `getDoubleValue()` or `getLongValue()`. 96 | 97 | == Performance 98 | 99 | The following performance in reached in a Linux VM running on VMware Player in Windows 8.1 host. i7 2600K at 4GHz. 100 | The benchmark used is the ``EncodingBenchmark``. These results should not be directly compared to other 101 | implementations unless similar dataset is used. 102 | 103 | Results are in millions of datapoints (timestamp + value) pairs per second. The values in this benchmark are 104 | in doubles (performance with longs is slightly higher, around ~2-3M/s). 105 | 106 | .Compression 107 | |=== 108 | |GorillaCompressor (2.0.0) |Compressor (1.1.0) 109 | 110 | |83.5M/s (~1.34GB/s) 111 | |31.2M/s (~499MB/s) 112 | |=== 113 | 114 | 115 | .Decompression 116 | |=== 117 | |GorillaDecompressor (2.0.0) |Decompressor (1.1.0) 118 | 119 | |77,9M/s (~1.25GB/s) 120 | |51.4M/s (~822MB/s) 121 | |=== 122 | 123 | Most of the differences in decompression / compression speed between versions come from implementation changes and 124 | not from the small changes to the output format. 125 | 126 | == Roadmap 127 | 128 | There were few things I wanted to get to 2.0.0, but had to decide against due to lack of time. I will implement these 129 | later with potentially some breaking API changes: 130 | 131 | * Support timestamp only compressions (2.2.x) 132 | * Include ByteBufferLongOutput/ByteBufferLongInput in the package (2.2.x) 133 | * Move bit operations to inside the GorillaCompressor/GorillaDecompressor to allow easier usage with 134 | other allocators (2.2.x) 135 | 136 | == Internals 137 | 138 | === Differences to the original paper 139 | 140 | * Maximum number of leadingZeros is stored with 6 bits to allow up to 63 leading zeros, which are necessary when 141 | storing long values. (>= 2.0.0) 142 | * Timestamp delta-of-delta are stored by first turning them with ZigZag encoding to positive integers and then 143 | reduced by one to fit in the necessary bits. In the decoding phase all the values are incremented by one to fetch the 144 | original value. (>= 2.0.0) 145 | * The compressed blocks are created with a 27 bit delta header (unlike in the original paper, which uses a 14 bit delta 146 | header). This allows to use up to one day block size using millisecond precision. (>= 1.0.0) 147 | 148 | === Data structure 149 | 150 | Values must be inserted in the increasing time order, out-of-order insertions are not supported. 151 | 152 | The included ByteBufferBitInput and ByteBufferBitOutput classes use a big endian order for the data. 153 | 154 | == Contributing 155 | 156 | File an issue and/or send a pull request. 157 | 158 | === License 159 | 160 | .... 161 | Copyright 2016-2018 Michael Burman and/or other contributors. 162 | 163 | Licensed under the Apache License, Version 2.0 (the "License"); 164 | you may not use this file except in compliance with the License. 165 | You may obtain a copy of the License at 166 | 167 | http://www.apache.org/licenses/LICENSE-2.0 168 | 169 | Unless required by applicable law or agreed to in writing, software 170 | distributed under the License is distributed on an "AS IS" BASIS, 171 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 172 | See the License for the specific language governing permissions and 173 | limitations under the License. 174 | .... 175 | 176 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4.0.0 4 | 5 | fi.iki.yak 6 | compression-gorilla 7 | 2.1.2-SNAPSHOT 8 | Gorilla time series compression in Java 9 | Implements the time series compression methods as described in the Facebook's Gorilla 10 | paper 11 | https://github.com/burmanm/gorilla-tsc 12 | 13 | 14 | Apache License, Version 2.0 15 | http://www.apache.org/licenses/LICENSE-2.0.txt 16 | repo 17 | 18 | 19 | 20 | 21 | 1.8 22 | 5.0.0-M4 23 | 1.0.0-M4 24 | 25 | 1.18 26 | benchmark 27 | 2.5.3 28 | 29 | 30 | 31 | https://github.com/burmanm/gorilla-tsc 32 | scm:git:git://github.com/burmam/gorilla-tsc.git 33 | scm:git:git@github.com:burmanm/gorilla-tsc.git 34 | HEAD 35 | 36 | 37 | 38 | 39 | yak@iki.fi 40 | Michael Burman 41 | https://github.com/burmanm 42 | burmanm 43 | 44 | 45 | 46 | 47 | 48 | ossrh 49 | https://oss.sonatype.org/service/local/staging/deploy/maven2/ 50 | 51 | 52 | 53 | 54 | 55 | org.junit.jupiter 56 | junit-jupiter-engine 57 | ${junit.jupiter.version} 58 | test 59 | 60 | 61 | org.openjdk.jmh 62 | jmh-core 63 | ${jmh.version} 64 | 65 | 66 | org.openjdk.jmh 67 | jmh-generator-annprocess 68 | ${jmh.version} 69 | provided 70 | 71 | 72 | 73 | 74 | 75 | 76 | maven-compiler-plugin 77 | 3.1 78 | 79 | ${java.version} 80 | ${java.version} 81 | 82 | 83 | 84 | maven-surefire-plugin 85 | 2.19 86 | 87 | 88 | org.junit.platform 89 | junit-platform-surefire-provider 90 | ${junit.platform.version} 91 | 92 | 93 | 94 | 95 | org.apache.maven.plugins 96 | maven-shade-plugin 97 | 2.4.3 98 | 99 | 100 | package 101 | 102 | shade 103 | 104 | 105 | ${jar.name} 106 | 107 | 108 | org.openjdk.jmh.Main 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | org.apache.maven.plugins 117 | maven-release-plugin 118 | ${maven.release.plugin.version} 119 | 120 | true 121 | false 122 | release 123 | deploy 124 | 125 | 126 | 127 | org.sonatype.plugins 128 | nexus-staging-maven-plugin 129 | 1.6.7 130 | true 131 | 132 | ossrh 133 | https://oss.sonatype.org/ 134 | false 135 | 136 | 137 | 138 | org.apache.maven.plugins 139 | maven-javadoc-plugin 140 | 141 | 142 | 143 | jar 144 | 145 | 146 | 147 | 148 | 149 | org.apache.maven.plugins 150 | maven-source-plugin 151 | 152 | 153 | 154 | jar 155 | 156 | 157 | 158 | 159 | 160 | org.apache.maven.plugins 161 | maven-gpg-plugin 162 | 1.5 163 | 164 | 165 | sign-artifacts 166 | verify 167 | 168 | sign 169 | 170 | 171 | 172 | 173 | 174 | 175 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/test/java/fi/iki/yak/ts/compression/gorilla/EncodeTest.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertNull; 5 | 6 | import java.nio.ByteBuffer; 7 | import java.time.LocalDateTime; 8 | import java.time.Month; 9 | import java.time.ZoneOffset; 10 | import java.time.temporal.ChronoUnit; 11 | import java.util.Arrays; 12 | import java.util.concurrent.ThreadLocalRandom; 13 | 14 | import org.junit.jupiter.api.Test; 15 | 16 | /** 17 | * These are generic tests to test that input matches the output after compression + decompression cycle, using 18 | * both the timestamp and value compression. 19 | * 20 | * @author Michael Burman 21 | */ 22 | public class EncodeTest { 23 | 24 | private void comparePairsToCompression(long blockTimestamp, Pair[] pairs) { 25 | ByteBufferBitOutput output = new ByteBufferBitOutput(); 26 | Compressor c = new Compressor(blockTimestamp, output); 27 | Arrays.stream(pairs).forEach(p -> c.addValue(p.getTimestamp(), p.getDoubleValue())); 28 | c.close(); 29 | 30 | ByteBuffer byteBuffer = output.getByteBuffer(); 31 | byteBuffer.flip(); 32 | 33 | ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer); 34 | Decompressor d = new Decompressor(input); 35 | 36 | // Replace with stream once decompressor supports it 37 | for(int i = 0; i < pairs.length; i++) { 38 | Pair pair = d.readPair(); 39 | assertEquals(pairs[i].getTimestamp(), pair.getTimestamp(), "Timestamp did not match"); 40 | assertEquals(pairs[i].getDoubleValue(), pair.getDoubleValue(), "Value did not match"); 41 | } 42 | 43 | assertNull(d.readPair()); 44 | } 45 | 46 | @Test 47 | void simpleEncodeAndDecodeTest() throws Exception { 48 | long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) 49 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 50 | 51 | Pair[] pairs = { 52 | new Pair(now + 10, Double.doubleToRawLongBits(1.0)), 53 | new Pair(now + 20, Double.doubleToRawLongBits(-2.0)), 54 | new Pair(now + 28, Double.doubleToRawLongBits(-2.5)), 55 | new Pair(now + 84, Double.doubleToRawLongBits(65537)), 56 | new Pair(now + 400, Double.doubleToRawLongBits(2147483650.0)), 57 | new Pair(now + 2300, Double.doubleToRawLongBits(-16384)), 58 | new Pair(now + 16384, Double.doubleToRawLongBits(2.8)), 59 | new Pair(now + 16500, Double.doubleToRawLongBits(-38.0)) 60 | }; 61 | 62 | comparePairsToCompression(now, pairs); 63 | } 64 | 65 | @Test 66 | public void willItBlend() throws Exception { 67 | long blockTimestamp = 1500400800000L; 68 | 69 | Pair[] pairs = { 70 | new Pair(1500405481623L, 69087), 71 | new Pair(1500405488693L, 65640), 72 | new Pair(1500405495993L, 58155), 73 | new Pair(1500405503743L, 61025), 74 | new Pair(1500405511623L, 91156), 75 | new Pair(1500405519803L, 37516), 76 | new Pair(1500405528313L, 93515), 77 | new Pair(1500405537233L, 96226), 78 | new Pair(1500405546453L, 23833), 79 | new Pair(1500405556103L, 73186), 80 | new Pair(1500405566143L, 96947), 81 | new Pair(1500405576163L, 46927), 82 | new Pair(1500405586173L, 77954), 83 | new Pair(1500405596183L, 29302), 84 | new Pair(1500405606213L, 6700), 85 | new Pair(1500405616163L, 71971), 86 | new Pair(1500405625813L, 8528), 87 | new Pair(1500405635763L, 85321), 88 | new Pair(1500405645634L, 83229), 89 | new Pair(1500405655633L, 78298), 90 | new Pair(1500405665623L, 87122), 91 | new Pair(1500405675623L, 82055), 92 | new Pair(1500405685723L, 75067), 93 | new Pair(1500405695663L, 33680), 94 | new Pair(1500405705743L, 17576), 95 | new Pair(1500405715813L, 89701), 96 | new Pair(1500405725773L, 21427), 97 | new Pair(1500405735883L, 58255), 98 | new Pair(1500405745903L, 3768), 99 | new Pair(1500405755863L, 62086), 100 | new Pair(1500405765843L, 66965), 101 | new Pair(1500405775773L, 35801), 102 | new Pair(1500405785883L, 72169), 103 | new Pair(1500405795843L, 43089), 104 | new Pair(1500405805733L, 31418), 105 | new Pair(1500405815853L, 84781), 106 | new Pair(1500405825963L, 36103), 107 | new Pair(1500405836004L, 87431), 108 | new Pair(1500405845953L, 7379), 109 | new Pair(1500405855913L, 66919), 110 | new Pair(1500405865963L, 30906), 111 | new Pair(1500405875953L, 88630), 112 | new Pair(1500405885943L, 27546), 113 | new Pair(1500405896033L, 43813), 114 | new Pair(1500405906094L, 2124), 115 | new Pair(1500405916063L, 49399), 116 | new Pair(1500405926143L, 94577), 117 | new Pair(1500405936123L, 98459), 118 | new Pair(1500405946033L, 49457), 119 | new Pair(1500405956023L, 92838), 120 | new Pair(1500405966023L, 15628), 121 | new Pair(1500405976043L, 53916), 122 | new Pair(1500405986063L, 90387), 123 | new Pair(1500405996123L, 43176), 124 | new Pair(1500406006123L, 18838), 125 | new Pair(1500406016174L, 78847), 126 | new Pair(1500406026173L, 39591), 127 | new Pair(1500406036004L, 77070), 128 | new Pair(1500406045964L, 56788), 129 | new Pair(1500406056043L, 96706), 130 | new Pair(1500406066123L, 20756), 131 | new Pair(1500406076113L, 64433), 132 | new Pair(1500406086133L, 45791), 133 | new Pair(1500406096123L, 75028), 134 | new Pair(1500406106193L, 55403), 135 | new Pair(1500406116213L, 36991), 136 | new Pair(1500406126073L, 92929), 137 | new Pair(1500406136103L, 60416), 138 | new Pair(1500406146183L, 55485), 139 | new Pair(1500406156383L, 53525), 140 | new Pair(1500406166313L, 96021), 141 | new Pair(1500406176414L, 22705), 142 | new Pair(1500406186613L, 89801), 143 | new Pair(1500406196543L, 51975), 144 | new Pair(1500406206483L, 86741), 145 | new Pair(1500406216483L, 22440), 146 | new Pair(1500406226433L, 51818), 147 | new Pair(1500406236403L, 61965), 148 | new Pair(1500406246413L, 19074), 149 | new Pair(1500406256494L, 54521), 150 | new Pair(1500406266413L, 59315), 151 | new Pair(1500406276303L, 19171), 152 | new Pair(1500406286213L, 98800), 153 | new Pair(1500406296183L, 7086), 154 | new Pair(1500406306103L, 60578), 155 | new Pair(1500406316073L, 96828), 156 | new Pair(1500406326143L, 83746), 157 | new Pair(1500406336153L, 85481), 158 | new Pair(1500406346113L, 22346), 159 | new Pair(1500406356133L, 80976), 160 | new Pair(1500406366065L, 43586), 161 | new Pair(1500406376074L, 82500), 162 | new Pair(1500406386184L, 13576), 163 | new Pair(1500406396113L, 77871), 164 | new Pair(1500406406094L, 60978), 165 | new Pair(1500406416203L, 35264), 166 | new Pair(1500406426323L, 79733), 167 | new Pair(1500406436343L, 29140), 168 | new Pair(1500406446323L, 7237), 169 | new Pair(1500406456344L, 52866), 170 | new Pair(1500406466393L, 88456), 171 | new Pair(1500406476493L, 33533), 172 | new Pair(1500406486524L, 96961), 173 | new Pair(1500406496453L, 16389), 174 | new Pair(1500406506453L, 31181), 175 | new Pair(1500406516433L, 63282), 176 | new Pair(1500406526433L, 92857), 177 | new Pair(1500406536413L, 4582), 178 | new Pair(1500406546383L, 46832), 179 | new Pair(1500406556473L, 6335), 180 | new Pair(1500406566413L, 44367), 181 | new Pair(1500406576513L, 84640), 182 | new Pair(1500406586523L, 36174), 183 | new Pair(1500406596553L, 40075), 184 | new Pair(1500406606603L, 80886), 185 | new Pair(1500406616623L, 43784), 186 | new Pair(1500406626623L, 25077), 187 | new Pair(1500406636723L, 18617), 188 | new Pair(1500406646723L, 72681), 189 | new Pair(1500406656723L, 84811), 190 | new Pair(1500406666783L, 90053), 191 | new Pair(1500406676685L, 25708), 192 | new Pair(1500406686713L, 57134), 193 | new Pair(1500406696673L, 87193), 194 | new Pair(1500406706743L, 66057), 195 | new Pair(1500406716724L, 51404), 196 | new Pair(1500406726753L, 90141), 197 | new Pair(1500406736813L, 10434), 198 | new Pair(1500406746803L, 29056), 199 | new Pair(1500406756833L, 48160), 200 | new Pair(1500406766924L, 96652), 201 | new Pair(1500406777113L, 64141), 202 | new Pair(1500406787113L, 22143), 203 | new Pair(1500406797093L, 20561), 204 | new Pair(1500406807113L, 66401), 205 | new Pair(1500406817283L, 76802), 206 | new Pair(1500406827284L, 37555), 207 | new Pair(1500406837323L, 63169), 208 | new Pair(1500406847463L, 45712), 209 | new Pair(1500406857513L, 44751), 210 | new Pair(1500406867523L, 98891), 211 | new Pair(1500406877523L, 38122), 212 | new Pair(1500406887623L, 46202), 213 | new Pair(1500406897703L, 5875), 214 | new Pair(1500406907663L, 17397), 215 | new Pair(1500406917603L, 39994), 216 | new Pair(1500406927633L, 82385), 217 | new Pair(1500406937623L, 15598), 218 | new Pair(1500406947693L, 36235), 219 | new Pair(1500406957703L, 97536), 220 | new Pair(1500406967673L, 28557), 221 | new Pair(1500406977723L, 13985), 222 | new Pair(1500406987663L, 64304), 223 | new Pair(1500406997573L, 83693), 224 | new Pair(1500407007494L, 6574), 225 | new Pair(1500407017493L, 25134), 226 | new Pair(1500407027503L, 50383), 227 | new Pair(1500407037523L, 55922), 228 | new Pair(1500407047603L, 73436), 229 | new Pair(1500407057473L, 68235), 230 | new Pair(1500407067553L, 1469), 231 | new Pair(1500407077463L, 44315), 232 | new Pair(1500407087463L, 95064), 233 | new Pair(1500407097443L, 1997), 234 | new Pair(1500407107473L, 17247), 235 | new Pair(1500407117453L, 42454), 236 | new Pair(1500407127413L, 73631), 237 | new Pair(1500407137363L, 96890), 238 | new Pair(1500407147343L, 43450), 239 | new Pair(1500407157363L, 42042), 240 | new Pair(1500407167403L, 83014), 241 | new Pair(1500407177473L, 32051), 242 | new Pair(1500407187523L, 69280), 243 | new Pair(1500407197495L, 21425), 244 | new Pair(1500407207453L, 93748), 245 | new Pair(1500407217413L, 64151), 246 | new Pair(1500407227443L, 38791), 247 | new Pair(1500407237463L, 5248), 248 | new Pair(1500407247523L, 92935), 249 | new Pair(1500407257513L, 18516), 250 | new Pair(1500407267584L, 98870), 251 | new Pair(1500407277573L, 82244), 252 | new Pair(1500407287723L, 65464), 253 | new Pair(1500407297723L, 33801), 254 | new Pair(1500407307673L, 18331), 255 | new Pair(1500407317613L, 89744), 256 | new Pair(1500407327553L, 98460), 257 | new Pair(1500407337503L, 24709), 258 | new Pair(1500407347423L, 8407), 259 | new Pair(1500407357383L, 69451), 260 | new Pair(1500407367333L, 51100), 261 | new Pair(1500407377373L, 25309), 262 | new Pair(1500407387443L, 16148), 263 | new Pair(1500407397453L, 98974), 264 | new Pair(1500407407543L, 80284), 265 | new Pair(1500407417583L, 170), 266 | new Pair(1500407427453L, 34706), 267 | new Pair(1500407437433L, 39681), 268 | new Pair(1500407447603L, 6140), 269 | new Pair(1500407457513L, 64595), 270 | new Pair(1500407467564L, 59862), 271 | new Pair(1500407477563L, 53795), 272 | new Pair(1500407487593L, 83493), 273 | new Pair(1500407497584L, 90639), 274 | new Pair(1500407507623L, 16777), 275 | new Pair(1500407517613L, 11096), 276 | new Pair(1500407527673L, 38512), 277 | new Pair(1500407537963L, 52759), 278 | new Pair(1500407548023L, 79567), 279 | new Pair(1500407558033L, 48664), 280 | new Pair(1500407568113L, 10710), 281 | new Pair(1500407578164L, 25635), 282 | new Pair(1500407588213L, 40985), 283 | new Pair(1500407598163L, 94089), 284 | new Pair(1500407608163L, 50056), 285 | new Pair(1500407618223L, 15550), 286 | new Pair(1500407628143L, 78823), 287 | new Pair(1500407638223L, 9044), 288 | new Pair(1500407648173L, 20782), 289 | new Pair(1500407658023L, 86390), 290 | new Pair(1500407667903L, 79444), 291 | new Pair(1500407677903L, 84051), 292 | new Pair(1500407687923L, 91554), 293 | new Pair(1500407697913L, 58777), 294 | new Pair(1500407708003L, 89474), 295 | new Pair(1500407718083L, 94026), 296 | new Pair(1500407728034L, 41613), 297 | new Pair(1500407738083L, 64667), 298 | new Pair(1500407748034L, 5160), 299 | new Pair(1500407758003L, 45140), 300 | new Pair(1500407768033L, 53704), 301 | new Pair(1500407778083L, 68097), 302 | new Pair(1500407788043L, 81137), 303 | new Pair(1500407798023L, 59657), 304 | new Pair(1500407808033L, 56572), 305 | new Pair(1500407817983L, 1993), 306 | new Pair(1500407828063L, 62608), 307 | new Pair(1500407838213L, 76489), 308 | new Pair(1500407848203L, 22147), 309 | new Pair(1500407858253L, 92829), 310 | new Pair(1500407868073L, 48499), 311 | new Pair(1500407878053L, 89152), 312 | new Pair(1500407888073L, 9191), 313 | new Pair(1500407898033L, 49881), 314 | new Pair(1500407908113L, 96020), 315 | new Pair(1500407918213L, 90203), 316 | new Pair(1500407928234L, 32217), 317 | new Pair(1500407938253L, 94302), 318 | new Pair(1500407948293L, 83111), 319 | new Pair(1500407958234L, 75576), 320 | new Pair(1500407968073L, 5973), 321 | new Pair(1500407978023L, 5175), 322 | new Pair(1500407987923L, 63350), 323 | new Pair(1500407997833L, 44081) 324 | }; 325 | 326 | comparePairsToCompression(blockTimestamp, pairs); 327 | } 328 | 329 | /** 330 | * Tests encoding of similar floats, see https://github.com/dgryski/go-tsz/issues/4 for more information. 331 | */ 332 | @Test 333 | void testEncodeSimilarFloats() throws Exception { 334 | long now = LocalDateTime.of(2015, Month.MARCH, 02, 00, 00).toInstant(ZoneOffset.UTC).toEpochMilli(); 335 | 336 | ByteBufferBitOutput output = new ByteBufferBitOutput(); 337 | Compressor c = new Compressor(now, output); 338 | 339 | ByteBuffer bb = ByteBuffer.allocate(5 * 2*Long.BYTES); 340 | 341 | bb.putLong(now + 1); 342 | bb.putDouble(6.00065e+06); 343 | bb.putLong(now + 2); 344 | bb.putDouble(6.000656e+06); 345 | bb.putLong(now + 3); 346 | bb.putDouble(6.000657e+06); 347 | bb.putLong(now + 4); 348 | bb.putDouble(6.000659e+06); 349 | bb.putLong(now + 5); 350 | bb.putDouble(6.000661e+06); 351 | 352 | bb.flip(); 353 | 354 | for(int j = 0; j < 5; j++) { 355 | c.addValue(bb.getLong(), bb.getDouble()); 356 | } 357 | 358 | c.close(); 359 | 360 | bb.flip(); 361 | 362 | ByteBuffer byteBuffer = output.getByteBuffer(); 363 | byteBuffer.flip(); 364 | 365 | ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer); 366 | Decompressor d = new Decompressor(input); 367 | 368 | // Replace with stream once decompressor supports it 369 | for(int i = 0; i < 5; i++) { 370 | Pair pair = d.readPair(); 371 | assertEquals(bb.getLong(), pair.getTimestamp(), "Timestamp did not match"); 372 | assertEquals(bb.getDouble(), pair.getDoubleValue(), "Value did not match"); 373 | } 374 | assertNull(d.readPair()); 375 | } 376 | 377 | /** 378 | * Tests writing enough large amount of datapoints that causes the included ByteBufferBitOutput to do 379 | * internal byte array expansion. 380 | */ 381 | @Test 382 | void testEncodeLargeAmountOfData() throws Exception { 383 | // This test should trigger ByteBuffer reallocation 384 | int amountOfPoints = 100000; 385 | long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) 386 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 387 | ByteBufferBitOutput output = new ByteBufferBitOutput(); 388 | 389 | long now = blockStart + 60; 390 | ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES); 391 | 392 | for(int i = 0; i < amountOfPoints; i++) { 393 | bb.putLong(now + i*60); 394 | bb.putDouble(i * Math.random()); 395 | } 396 | 397 | Compressor c = new Compressor(blockStart, output); 398 | 399 | bb.flip(); 400 | 401 | for(int j = 0; j < amountOfPoints; j++) { 402 | c.addValue(bb.getLong(), bb.getDouble()); 403 | } 404 | 405 | c.close(); 406 | 407 | bb.flip(); 408 | 409 | ByteBuffer byteBuffer = output.getByteBuffer(); 410 | byteBuffer.flip(); 411 | 412 | ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer); 413 | Decompressor d = new Decompressor(input); 414 | 415 | for(int i = 0; i < amountOfPoints; i++) { 416 | long tStamp = bb.getLong(); 417 | double val = bb.getDouble(); 418 | Pair pair = d.readPair(); 419 | assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i); 420 | assertEquals(val, pair.getDoubleValue()); 421 | } 422 | assertNull(d.readPair()); 423 | } 424 | 425 | /** 426 | * Although not intended usage, an empty block should not cause errors 427 | */ 428 | @Test 429 | void testEmptyBlock() throws Exception { 430 | long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) 431 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 432 | 433 | ByteBufferBitOutput output = new ByteBufferBitOutput(); 434 | 435 | Compressor c = new Compressor(now, output); 436 | c.close(); 437 | 438 | ByteBuffer byteBuffer = output.getByteBuffer(); 439 | byteBuffer.flip(); 440 | 441 | ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer); 442 | Decompressor d = new Decompressor(input); 443 | 444 | assertNull(d.readPair()); 445 | } 446 | 447 | @Test 448 | void testLongEncoding() throws Exception { 449 | // This test should trigger ByteBuffer reallocation 450 | int amountOfPoints = 10000; 451 | long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) 452 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 453 | ByteBufferBitOutput output = new ByteBufferBitOutput(); 454 | 455 | long now = blockStart + 60; 456 | ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES); 457 | 458 | for(int i = 0; i < amountOfPoints; i++) { 459 | bb.putLong(now + i*60); 460 | bb.putLong(ThreadLocalRandom.current().nextLong(Integer.MAX_VALUE)); 461 | } 462 | 463 | Compressor c = new Compressor(blockStart, output); 464 | 465 | bb.flip(); 466 | 467 | for(int j = 0; j < amountOfPoints; j++) { 468 | c.addValue(bb.getLong(), bb.getLong()); 469 | } 470 | 471 | c.close(); 472 | 473 | bb.flip(); 474 | 475 | ByteBuffer byteBuffer = output.getByteBuffer(); 476 | byteBuffer.flip(); 477 | 478 | ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer); 479 | Decompressor d = new Decompressor(input); 480 | 481 | for(int i = 0; i < amountOfPoints; i++) { 482 | long tStamp = bb.getLong(); 483 | long val = bb.getLong(); 484 | Pair pair = d.readPair(); 485 | assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i); 486 | assertEquals(val, pair.getLongValue()); 487 | } 488 | assertNull(d.readPair()); 489 | } 490 | } 491 | -------------------------------------------------------------------------------- /src/test/java/fi/iki/yak/ts/compression/gorilla/EncodeGorillaTest.java: -------------------------------------------------------------------------------- 1 | package fi.iki.yak.ts.compression.gorilla; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertNull; 5 | 6 | import java.nio.ByteBuffer; 7 | import java.time.LocalDateTime; 8 | import java.time.Month; 9 | import java.time.ZoneOffset; 10 | import java.time.temporal.ChronoUnit; 11 | import java.util.Arrays; 12 | import java.util.concurrent.ThreadLocalRandom; 13 | 14 | import org.junit.jupiter.api.Test; 15 | 16 | import fi.iki.yak.ts.compression.gorilla.predictors.DifferentialFCM; 17 | 18 | /** 19 | * These are generic tests to test that input matches the output after compression + decompression cycle, using 20 | * both the timestamp and value compression. 21 | * 22 | * @author Michael Burman 23 | */ 24 | public class EncodeGorillaTest { 25 | 26 | private void comparePairsToCompression(long blockTimestamp, Pair[] pairs) { 27 | LongArrayOutput output = new LongArrayOutput(); 28 | 29 | GorillaCompressor c = new GorillaCompressor(blockTimestamp, output); 30 | 31 | Arrays.stream(pairs).forEach(p -> c.addValue(p.getTimestamp(), p.getDoubleValue())); 32 | c.close(); 33 | 34 | LongArrayInput input = new LongArrayInput(output.getLongArray()); 35 | GorillaDecompressor d = new GorillaDecompressor(input); 36 | 37 | // Replace with stream once GorillaDecompressor supports it 38 | for(int i = 0; i < pairs.length; i++) { 39 | Pair pair = d.readPair(); 40 | assertEquals(pairs[i].getTimestamp(), pair.getTimestamp(), "Timestamp did not match"); 41 | assertEquals(pairs[i].getDoubleValue(), pair.getDoubleValue(), "Value did not match"); 42 | } 43 | 44 | assertNull(d.readPair()); 45 | } 46 | 47 | @Test 48 | void simpleEncodeAndDecodeTest() throws Exception { 49 | long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) 50 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 51 | 52 | Pair[] pairs = { 53 | new Pair(now + 10, Double.doubleToRawLongBits(1.0)), 54 | new Pair(now + 20, Double.doubleToRawLongBits(-2.0)), 55 | new Pair(now + 28, Double.doubleToRawLongBits(-2.5)), 56 | new Pair(now + 84, Double.doubleToRawLongBits(65537)), 57 | new Pair(now + 400, Double.doubleToRawLongBits(2147483650.0)), 58 | new Pair(now + 2300, Double.doubleToRawLongBits(-16384)), 59 | new Pair(now + 16384, Double.doubleToRawLongBits(2.8)), 60 | new Pair(now + 16500, Double.doubleToRawLongBits(-38.0)) 61 | }; 62 | 63 | comparePairsToCompression(now, pairs); 64 | } 65 | 66 | @Test 67 | public void willItBlend() throws Exception { 68 | long blockTimestamp = 1500400800000L; 69 | 70 | Pair[] pairs = { 71 | new Pair(1500405481623L, 69087), 72 | new Pair(1500405488693L, 65640), 73 | new Pair(1500405495993L, 58155), 74 | new Pair(1500405503743L, 61025), 75 | new Pair(1500405511623L, 91156), 76 | new Pair(1500405519803L, 37516), 77 | new Pair(1500405528313L, 93515), 78 | new Pair(1500405537233L, 96226), 79 | new Pair(1500405546453L, 23833), 80 | new Pair(1500405556103L, 73186), 81 | new Pair(1500405566143L, 96947), 82 | new Pair(1500405576163L, 46927), 83 | new Pair(1500405586173L, 77954), 84 | new Pair(1500405596183L, 29302), 85 | new Pair(1500405606213L, 6700), 86 | new Pair(1500405616163L, 71971), 87 | new Pair(1500405625813L, 8528), 88 | new Pair(1500405635763L, 85321), 89 | new Pair(1500405645634L, 83229), 90 | new Pair(1500405655633L, 78298), 91 | new Pair(1500405665623L, 87122), 92 | new Pair(1500405675623L, 82055), 93 | new Pair(1500405685723L, 75067), 94 | new Pair(1500405695663L, 33680), 95 | new Pair(1500405705743L, 17576), 96 | new Pair(1500405715813L, 89701), 97 | new Pair(1500405725773L, 21427), 98 | new Pair(1500405735883L, 58255), 99 | new Pair(1500405745903L, 3768), 100 | new Pair(1500405755863L, 62086), 101 | new Pair(1500405765843L, 66965), 102 | new Pair(1500405775773L, 35801), 103 | new Pair(1500405785883L, 72169), 104 | new Pair(1500405795843L, 43089), 105 | new Pair(1500405805733L, 31418), 106 | new Pair(1500405815853L, 84781), 107 | new Pair(1500405825963L, 36103), 108 | new Pair(1500405836004L, 87431), 109 | new Pair(1500405845953L, 7379), 110 | new Pair(1500405855913L, 66919), 111 | new Pair(1500405865963L, 30906), 112 | new Pair(1500405875953L, 88630), 113 | new Pair(1500405885943L, 27546), 114 | new Pair(1500405896033L, 43813), 115 | new Pair(1500405906094L, 2124), 116 | new Pair(1500405916063L, 49399), 117 | new Pair(1500405926143L, 94577), 118 | new Pair(1500405936123L, 98459), 119 | new Pair(1500405946033L, 49457), 120 | new Pair(1500405956023L, 92838), 121 | new Pair(1500405966023L, 15628), 122 | new Pair(1500405976043L, 53916), 123 | new Pair(1500405986063L, 90387), 124 | new Pair(1500405996123L, 43176), 125 | new Pair(1500406006123L, 18838), 126 | new Pair(1500406016174L, 78847), 127 | new Pair(1500406026173L, 39591), 128 | new Pair(1500406036004L, 77070), 129 | new Pair(1500406045964L, 56788), 130 | new Pair(1500406056043L, 96706), 131 | new Pair(1500406066123L, 20756), 132 | new Pair(1500406076113L, 64433), 133 | new Pair(1500406086133L, 45791), 134 | new Pair(1500406096123L, 75028), 135 | new Pair(1500406106193L, 55403), 136 | new Pair(1500406116213L, 36991), 137 | new Pair(1500406126073L, 92929), 138 | new Pair(1500406136103L, 60416), 139 | new Pair(1500406146183L, 55485), 140 | new Pair(1500406156383L, 53525), 141 | new Pair(1500406166313L, 96021), 142 | new Pair(1500406176414L, 22705), 143 | new Pair(1500406186613L, 89801), 144 | new Pair(1500406196543L, 51975), 145 | new Pair(1500406206483L, 86741), 146 | new Pair(1500406216483L, 22440), 147 | new Pair(1500406226433L, 51818), 148 | new Pair(1500406236403L, 61965), 149 | new Pair(1500406246413L, 19074), 150 | new Pair(1500406256494L, 54521), 151 | new Pair(1500406266413L, 59315), 152 | new Pair(1500406276303L, 19171), 153 | new Pair(1500406286213L, 98800), 154 | new Pair(1500406296183L, 7086), 155 | new Pair(1500406306103L, 60578), 156 | new Pair(1500406316073L, 96828), 157 | new Pair(1500406326143L, 83746), 158 | new Pair(1500406336153L, 85481), 159 | new Pair(1500406346113L, 22346), 160 | new Pair(1500406356133L, 80976), 161 | new Pair(1500406366065L, 43586), 162 | new Pair(1500406376074L, 82500), 163 | new Pair(1500406386184L, 13576), 164 | new Pair(1500406396113L, 77871), 165 | new Pair(1500406406094L, 60978), 166 | new Pair(1500406416203L, 35264), 167 | new Pair(1500406426323L, 79733), 168 | new Pair(1500406436343L, 29140), 169 | new Pair(1500406446323L, 7237), 170 | new Pair(1500406456344L, 52866), 171 | new Pair(1500406466393L, 88456), 172 | new Pair(1500406476493L, 33533), 173 | new Pair(1500406486524L, 96961), 174 | new Pair(1500406496453L, 16389), 175 | new Pair(1500406506453L, 31181), 176 | new Pair(1500406516433L, 63282), 177 | new Pair(1500406526433L, 92857), 178 | new Pair(1500406536413L, 4582), 179 | new Pair(1500406546383L, 46832), 180 | new Pair(1500406556473L, 6335), 181 | new Pair(1500406566413L, 44367), 182 | new Pair(1500406576513L, 84640), 183 | new Pair(1500406586523L, 36174), 184 | new Pair(1500406596553L, 40075), 185 | new Pair(1500406606603L, 80886), 186 | new Pair(1500406616623L, 43784), 187 | new Pair(1500406626623L, 25077), 188 | new Pair(1500406636723L, 18617), 189 | new Pair(1500406646723L, 72681), 190 | new Pair(1500406656723L, 84811), 191 | new Pair(1500406666783L, 90053), 192 | new Pair(1500406676685L, 25708), 193 | new Pair(1500406686713L, 57134), 194 | new Pair(1500406696673L, 87193), 195 | new Pair(1500406706743L, 66057), 196 | new Pair(1500406716724L, 51404), 197 | new Pair(1500406726753L, 90141), 198 | new Pair(1500406736813L, 10434), 199 | new Pair(1500406746803L, 29056), 200 | new Pair(1500406756833L, 48160), 201 | new Pair(1500406766924L, 96652), 202 | new Pair(1500406777113L, 64141), 203 | new Pair(1500406787113L, 22143), 204 | new Pair(1500406797093L, 20561), 205 | new Pair(1500406807113L, 66401), 206 | new Pair(1500406817283L, 76802), 207 | new Pair(1500406827284L, 37555), 208 | new Pair(1500406837323L, 63169), 209 | new Pair(1500406847463L, 45712), 210 | new Pair(1500406857513L, 44751), 211 | new Pair(1500406867523L, 98891), 212 | new Pair(1500406877523L, 38122), 213 | new Pair(1500406887623L, 46202), 214 | new Pair(1500406897703L, 5875), 215 | new Pair(1500406907663L, 17397), 216 | new Pair(1500406917603L, 39994), 217 | new Pair(1500406927633L, 82385), 218 | new Pair(1500406937623L, 15598), 219 | new Pair(1500406947693L, 36235), 220 | new Pair(1500406957703L, 97536), 221 | new Pair(1500406967673L, 28557), 222 | new Pair(1500406977723L, 13985), 223 | new Pair(1500406987663L, 64304), 224 | new Pair(1500406997573L, 83693), 225 | new Pair(1500407007494L, 6574), 226 | new Pair(1500407017493L, 25134), 227 | new Pair(1500407027503L, 50383), 228 | new Pair(1500407037523L, 55922), 229 | new Pair(1500407047603L, 73436), 230 | new Pair(1500407057473L, 68235), 231 | new Pair(1500407067553L, 1469), 232 | new Pair(1500407077463L, 44315), 233 | new Pair(1500407087463L, 95064), 234 | new Pair(1500407097443L, 1997), 235 | new Pair(1500407107473L, 17247), 236 | new Pair(1500407117453L, 42454), 237 | new Pair(1500407127413L, 73631), 238 | new Pair(1500407137363L, 96890), 239 | new Pair(1500407147343L, 43450), 240 | new Pair(1500407157363L, 42042), 241 | new Pair(1500407167403L, 83014), 242 | new Pair(1500407177473L, 32051), 243 | new Pair(1500407187523L, 69280), 244 | new Pair(1500407197495L, 21425), 245 | new Pair(1500407207453L, 93748), 246 | new Pair(1500407217413L, 64151), 247 | new Pair(1500407227443L, 38791), 248 | new Pair(1500407237463L, 5248), 249 | new Pair(1500407247523L, 92935), 250 | new Pair(1500407257513L, 18516), 251 | new Pair(1500407267584L, 98870), 252 | new Pair(1500407277573L, 82244), 253 | new Pair(1500407287723L, 65464), 254 | new Pair(1500407297723L, 33801), 255 | new Pair(1500407307673L, 18331), 256 | new Pair(1500407317613L, 89744), 257 | new Pair(1500407327553L, 98460), 258 | new Pair(1500407337503L, 24709), 259 | new Pair(1500407347423L, 8407), 260 | new Pair(1500407357383L, 69451), 261 | new Pair(1500407367333L, 51100), 262 | new Pair(1500407377373L, 25309), 263 | new Pair(1500407387443L, 16148), 264 | new Pair(1500407397453L, 98974), 265 | new Pair(1500407407543L, 80284), 266 | new Pair(1500407417583L, 170), 267 | new Pair(1500407427453L, 34706), 268 | new Pair(1500407437433L, 39681), 269 | new Pair(1500407447603L, 6140), 270 | new Pair(1500407457513L, 64595), 271 | new Pair(1500407467564L, 59862), 272 | new Pair(1500407477563L, 53795), 273 | new Pair(1500407487593L, 83493), 274 | new Pair(1500407497584L, 90639), 275 | new Pair(1500407507623L, 16777), 276 | new Pair(1500407517613L, 11096), 277 | new Pair(1500407527673L, 38512), 278 | new Pair(1500407537963L, 52759), 279 | new Pair(1500407548023L, 79567), 280 | new Pair(1500407558033L, 48664), 281 | new Pair(1500407568113L, 10710), 282 | new Pair(1500407578164L, 25635), 283 | new Pair(1500407588213L, 40985), 284 | new Pair(1500407598163L, 94089), 285 | new Pair(1500407608163L, 50056), 286 | new Pair(1500407618223L, 15550), 287 | new Pair(1500407628143L, 78823), 288 | new Pair(1500407638223L, 9044), 289 | new Pair(1500407648173L, 20782), 290 | new Pair(1500407658023L, 86390), 291 | new Pair(1500407667903L, 79444), 292 | new Pair(1500407677903L, 84051), 293 | new Pair(1500407687923L, 91554), 294 | new Pair(1500407697913L, 58777), 295 | new Pair(1500407708003L, 89474), 296 | new Pair(1500407718083L, 94026), 297 | new Pair(1500407728034L, 41613), 298 | new Pair(1500407738083L, 64667), 299 | new Pair(1500407748034L, 5160), 300 | new Pair(1500407758003L, 45140), 301 | new Pair(1500407768033L, 53704), 302 | new Pair(1500407778083L, 68097), 303 | new Pair(1500407788043L, 81137), 304 | new Pair(1500407798023L, 59657), 305 | new Pair(1500407808033L, 56572), 306 | new Pair(1500407817983L, 1993), 307 | new Pair(1500407828063L, 62608), 308 | new Pair(1500407838213L, 76489), 309 | new Pair(1500407848203L, 22147), 310 | new Pair(1500407858253L, 92829), 311 | new Pair(1500407868073L, 48499), 312 | new Pair(1500407878053L, 89152), 313 | new Pair(1500407888073L, 9191), 314 | new Pair(1500407898033L, 49881), 315 | new Pair(1500407908113L, 96020), 316 | new Pair(1500407918213L, 90203), 317 | new Pair(1500407928234L, 32217), 318 | new Pair(1500407938253L, 94302), 319 | new Pair(1500407948293L, 83111), 320 | new Pair(1500407958234L, 75576), 321 | new Pair(1500407968073L, 5973), 322 | new Pair(1500407978023L, 5175), 323 | new Pair(1500407987923L, 63350), 324 | new Pair(1500407997833L, 44081) 325 | }; 326 | 327 | comparePairsToCompression(blockTimestamp, pairs); 328 | } 329 | 330 | /** 331 | * Tests encoding of similar floats, see https://github.com/dgryski/go-tsz/issues/4 for more information. 332 | */ 333 | @Test 334 | void testEncodeSimilarFloats() throws Exception { 335 | long now = LocalDateTime.of(2015, Month.MARCH, 02, 00, 00).toInstant(ZoneOffset.UTC).toEpochMilli(); 336 | 337 | LongArrayOutput output = new LongArrayOutput(); 338 | GorillaCompressor c = new GorillaCompressor(now, output); 339 | 340 | ByteBuffer bb = ByteBuffer.allocate(5 * 2*Long.BYTES); 341 | 342 | bb.putLong(now + 1); 343 | bb.putDouble(6.00065e+06); 344 | bb.putLong(now + 2); 345 | bb.putDouble(6.000656e+06); 346 | bb.putLong(now + 3); 347 | bb.putDouble(6.000657e+06); 348 | bb.putLong(now + 4); 349 | bb.putDouble(6.000659e+06); 350 | bb.putLong(now + 5); 351 | bb.putDouble(6.000661e+06); 352 | 353 | bb.flip(); 354 | 355 | for(int j = 0; j < 5; j++) { 356 | c.addValue(bb.getLong(), bb.getDouble()); 357 | } 358 | 359 | c.close(); 360 | 361 | bb.flip(); 362 | 363 | LongArrayInput input = new LongArrayInput(output.getLongArray()); 364 | GorillaDecompressor d = new GorillaDecompressor(input); 365 | 366 | // Replace with stream once GorillaDecompressor supports it 367 | for(int i = 0; i < 5; i++) { 368 | Pair pair = d.readPair(); 369 | assertEquals(bb.getLong(), pair.getTimestamp(), "Timestamp did not match"); 370 | assertEquals(bb.getDouble(), pair.getDoubleValue(), "Value did not match"); 371 | } 372 | assertNull(d.readPair()); 373 | } 374 | 375 | /** 376 | * Tests writing enough large amount of datapoints that causes the included LongArrayOutput to do 377 | * internal byte array expansion. 378 | */ 379 | @Test 380 | void testEncodeLargeAmountOfData() throws Exception { 381 | // This test should trigger ByteBuffer reallocation 382 | int amountOfPoints = 100000; 383 | long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) 384 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 385 | LongArrayOutput output = new LongArrayOutput(); 386 | 387 | long now = blockStart + 60; 388 | ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES); 389 | 390 | for(int i = 0; i < amountOfPoints; i++) { 391 | bb.putLong(now + i*60); 392 | bb.putDouble(i * Math.random()); 393 | } 394 | 395 | GorillaCompressor c = new GorillaCompressor(blockStart, output); 396 | 397 | bb.flip(); 398 | 399 | for(int j = 0; j < amountOfPoints; j++) { 400 | c.addValue(bb.getLong(), bb.getDouble()); 401 | } 402 | 403 | c.close(); 404 | 405 | bb.flip(); 406 | 407 | LongArrayInput input = new LongArrayInput(output.getLongArray()); 408 | GorillaDecompressor d = new GorillaDecompressor(input); 409 | 410 | for(int i = 0; i < amountOfPoints; i++) { 411 | long tStamp = bb.getLong(); 412 | double val = bb.getDouble(); 413 | Pair pair = d.readPair(); 414 | assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i); 415 | assertEquals(val, pair.getDoubleValue()); 416 | } 417 | assertNull(d.readPair()); 418 | } 419 | 420 | @Test 421 | void testEncodeLargeAmountOfDataOldBuffer() throws Exception { 422 | // This test should trigger ByteBuffer reallocation 423 | int amountOfPoints = 100000; 424 | long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) 425 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 426 | ByteBufferBitOutput output = new ByteBufferBitOutput(); 427 | 428 | long now = blockStart + 60; 429 | ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES); 430 | 431 | for(int i = 0; i < amountOfPoints; i++) { 432 | bb.putLong(now + i*60); 433 | bb.putDouble(i * Math.random()); 434 | } 435 | 436 | GorillaCompressor c = new GorillaCompressor(blockStart, output); 437 | 438 | bb.flip(); 439 | 440 | for(int j = 0; j < amountOfPoints; j++) { 441 | c.addValue(bb.getLong(), bb.getDouble()); 442 | } 443 | 444 | c.close(); 445 | 446 | bb.flip(); 447 | 448 | ByteBuffer byteBuffer = output.getByteBuffer(); 449 | byteBuffer.flip(); 450 | 451 | ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer); 452 | GorillaDecompressor d = new GorillaDecompressor(input); 453 | 454 | for(int i = 0; i < amountOfPoints; i++) { 455 | long tStamp = bb.getLong(); 456 | double val = bb.getDouble(); 457 | Pair pair = d.readPair(); 458 | assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i); 459 | assertEquals(val, pair.getDoubleValue()); 460 | } 461 | assertNull(d.readPair()); 462 | } 463 | 464 | /** 465 | * Although not intended usage, an empty block should not cause errors 466 | */ 467 | @Test 468 | void testEmptyBlock() throws Exception { 469 | long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) 470 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 471 | 472 | LongArrayOutput output = new LongArrayOutput(); 473 | 474 | GorillaCompressor c = new GorillaCompressor(now, output); 475 | c.close(); 476 | 477 | LongArrayInput input = new LongArrayInput(output.getLongArray()); 478 | GorillaDecompressor d = new GorillaDecompressor(input); 479 | 480 | assertNull(d.readPair()); 481 | } 482 | 483 | @Test 484 | void testCopyFlush() { 485 | long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) 486 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 487 | 488 | LongArrayOutput output = new LongArrayOutput(); 489 | 490 | GorillaCompressor c = new GorillaCompressor(now, output); 491 | 492 | c.addValue(now + 1, 1.0); 493 | c.addValue(now + 2, 1.0); 494 | 495 | LongArrayInput input = new LongArrayInput(output.getLongArray()); 496 | GorillaDecompressor d = new GorillaDecompressor(input); 497 | 498 | assertEquals(now + 1, d.readPair().getTimestamp()); 499 | assertEquals(now + 2, d.readPair().getTimestamp()); 500 | } 501 | 502 | /** 503 | * Long values should be compressable and decompressable in the stream 504 | */ 505 | @Test 506 | void testLongEncoding() throws Exception { 507 | // This test should trigger ByteBuffer reallocation 508 | int amountOfPoints = 10000; 509 | long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) 510 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 511 | LongArrayOutput output = new LongArrayOutput(); 512 | 513 | long now = blockStart + 60; 514 | ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES); 515 | 516 | for(int i = 0; i < amountOfPoints; i++) { 517 | bb.putLong(now + i*60); 518 | bb.putLong(ThreadLocalRandom.current().nextLong(Integer.MAX_VALUE)); 519 | } 520 | 521 | GorillaCompressor c = new GorillaCompressor(blockStart, output); 522 | 523 | bb.flip(); 524 | 525 | for(int j = 0; j < amountOfPoints; j++) { 526 | c.addValue(bb.getLong(), bb.getLong()); 527 | } 528 | 529 | c.close(); 530 | 531 | bb.flip(); 532 | 533 | LongArrayInput input = new LongArrayInput(output.getLongArray()); 534 | GorillaDecompressor d = new GorillaDecompressor(input); 535 | 536 | for(int i = 0; i < amountOfPoints; i++) { 537 | long tStamp = bb.getLong(); 538 | long val = bb.getLong(); 539 | Pair pair = d.readPair(); 540 | assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i); 541 | assertEquals(val, pair.getLongValue()); 542 | } 543 | assertNull(d.readPair()); 544 | } 545 | 546 | /** 547 | * Tests writing enough large amount of datapoints that causes the included LongArrayOutput to do 548 | * internal byte array expansion. 549 | */ 550 | @Test 551 | void testDifferentialFCM() throws Exception { 552 | // This test should trigger ByteBuffer reallocation 553 | int amountOfPoints = 100000; 554 | long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) 555 | .toInstant(ZoneOffset.UTC).toEpochMilli(); 556 | LongArrayOutput output = new LongArrayOutput(); 557 | 558 | long now = blockStart + 60; 559 | ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES); 560 | 561 | for(int i = 0; i < amountOfPoints; i++) { 562 | bb.putLong(now + i*60); 563 | bb.putDouble(i * Math.random()); 564 | } 565 | 566 | GorillaCompressor c = new GorillaCompressor(blockStart, output, new DifferentialFCM(1024)); 567 | 568 | bb.flip(); 569 | 570 | for(int j = 0; j < amountOfPoints; j++) { 571 | c.addValue(bb.getLong(), bb.getDouble()); 572 | } 573 | 574 | c.close(); 575 | 576 | bb.flip(); 577 | 578 | LongArrayInput input = new LongArrayInput(output.getLongArray()); 579 | GorillaDecompressor d = new GorillaDecompressor(input, new DifferentialFCM(1024)); 580 | 581 | for(int i = 0; i < amountOfPoints; i++) { 582 | long tStamp = bb.getLong(); 583 | double val = bb.getDouble(); 584 | Pair pair = d.readPair(); 585 | assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i); 586 | assertEquals(val, pair.getDoubleValue()); 587 | } 588 | assertNull(d.readPair()); 589 | } 590 | 591 | } 592 | --------------------------------------------------------------------------------