IntegerCODEC ic = 15 | * new Composition(new BinaryPacking(), new VariableByte()).16 | * 17 | * Note that this does not use differential coding: if you are working on sorted 18 | * lists, use IntegratedBinaryPacking instead. 19 | * 20 | *
21 | * For details, please see 22 | *
23 | *24 | * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second 25 | * through vectorization Software: Practice & Experience 26 | * http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract 27 | * http://arxiv.org/abs/1209.2137 28 | *
29 | *30 | * Daniel Lemire, Leonid Boytsov, Nathan Kurz, 31 | * SIMD Compression and the Intersection of Sorted Integers 32 | * http://arxiv.org/abs/1401.6399 33 | *
34 | * 35 | * @author Daniel Lemire 36 | */ 37 | 38 | namespace Genbox.CSharpFastPFOR 39 | { 40 | public class BinaryPacking : IntegerCODEC, SkippableIntegerCODEC 41 | { 42 | private const int BLOCK_SIZE = 32; 43 | 44 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 45 | { 46 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 47 | if (inlength == 0) 48 | return; 49 | @out[outpos.get()] = inlength; 50 | outpos.increment(); 51 | headlessCompress(@in, inpos, inlength, @out, outpos); 52 | } 53 | 54 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 55 | { 56 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 57 | int tmpoutpos = outpos.get(); 58 | int s = inpos.get(); 59 | for (; s + BLOCK_SIZE * 4 - 1 < inpos.get() + inlength; s += BLOCK_SIZE * 4) 60 | { 61 | 62 | int mbits1 = Util.maxbits(@in, s, BLOCK_SIZE); 63 | 64 | int mbits2 = Util.maxbits(@in, s + BLOCK_SIZE, BLOCK_SIZE); 65 | 66 | int mbits3 = Util.maxbits(@in, s + 2 * BLOCK_SIZE, BLOCK_SIZE); 67 | 68 | int mbits4 = Util.maxbits(@in, s + 3 * BLOCK_SIZE, BLOCK_SIZE); 69 | @out[tmpoutpos++] = (mbits1 << 24) | (mbits2 << 16) 70 | | (mbits3 << 8) | (mbits4); 71 | BitPacking.fastpackwithoutmask(@in, s, @out, tmpoutpos, 72 | mbits1); 73 | tmpoutpos += mbits1; 74 | BitPacking.fastpackwithoutmask(@in, s + BLOCK_SIZE, @out, 75 | tmpoutpos, mbits2); 76 | tmpoutpos += mbits2; 77 | BitPacking.fastpackwithoutmask(@in, s + 2 * BLOCK_SIZE, @out, 78 | tmpoutpos, mbits3); 79 | tmpoutpos += mbits3; 80 | BitPacking.fastpackwithoutmask(@in, s + 3 * BLOCK_SIZE, @out, 81 | tmpoutpos, mbits4); 82 | tmpoutpos += mbits4; 83 | } 84 | for (; s < inpos.get() + inlength; s += BLOCK_SIZE) 85 | { 86 | 87 | int mbits = Util.maxbits(@in, s, BLOCK_SIZE); 88 | @out[tmpoutpos++] = mbits; 89 | BitPacking.fastpackwithoutmask(@in, s, @out, tmpoutpos, 90 | mbits); 91 | tmpoutpos += mbits; 92 | 93 | } 94 | inpos.add(inlength); 95 | outpos.set(tmpoutpos); 96 | } 97 | 98 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 99 | { 100 | if (inlength == 0) 101 | return; 102 | 103 | int outlength = @in[inpos.get()]; 104 | inpos.increment(); 105 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 106 | } 107 | 108 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num) 109 | { 110 | 111 | int outlength = Util.greatestMultiple(num, BLOCK_SIZE); 112 | int tmpinpos = inpos.get(); 113 | int s = outpos.get(); 114 | for (; s + BLOCK_SIZE * 4 - 1 < outpos.get() + outlength; s += BLOCK_SIZE * 4) 115 | { 116 | 117 | int mbits1 = (int)((uint)@in[tmpinpos] >> 24); 118 | 119 | int mbits2 = (int)((uint)@in[tmpinpos] >> 16) & 0xFF; 120 | 121 | int mbits3 = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 122 | 123 | int mbits4 = (int)((uint)@in[tmpinpos]) & 0xFF; 124 | ++tmpinpos; 125 | BitPacking.fastunpack(@in, tmpinpos, @out, s, mbits1); 126 | tmpinpos += mbits1; 127 | BitPacking 128 | .fastunpack(@in, tmpinpos, @out, s + BLOCK_SIZE, mbits2); 129 | tmpinpos += mbits2; 130 | BitPacking.fastunpack(@in, tmpinpos, @out, s + 2 * BLOCK_SIZE, 131 | mbits3); 132 | tmpinpos += mbits3; 133 | BitPacking.fastunpack(@in, tmpinpos, @out, s + 3 * BLOCK_SIZE, 134 | mbits4); 135 | tmpinpos += mbits4; 136 | } 137 | for (; s < outpos.get() + outlength; s += BLOCK_SIZE) 138 | { 139 | 140 | int mbits = @in[tmpinpos]; 141 | ++tmpinpos; 142 | BitPacking.fastunpack(@in, tmpinpos, @out, s, mbits); 143 | tmpinpos += mbits; 144 | } 145 | outpos.add(outlength); 146 | inpos.set(tmpinpos); 147 | } 148 | 149 | public override string ToString() 150 | { 151 | return nameof(BinaryPacking); 152 | } 153 | } 154 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/ByteIntegerCODEC.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Interface describing a CODEC to compress integers to bytes. 10 | * 11 | * @author Daniel Lemire 12 | * 13 | */ 14 | namespace Genbox.CSharpFastPFOR 15 | { 16 | public interface ByteIntegerCODEC 17 | { 18 | /** 19 | * Compress data from an array to another array. 20 | * 21 | * Both inpos and outpos are modified to represent how much data was 22 | * read and written to if 12 ints (inlength = 12) are compressed to 3 23 | * bytes, then inpos will be incremented by 12 while outpos will be 24 | * incremented by 3 we use IntWrapper to pass the values by reference. 25 | * 26 | * @param in 27 | * input array 28 | * @param inpos 29 | * location in the input array 30 | * @param inlength 31 | * how many integers to compress 32 | * @param out 33 | * output array 34 | * @param outpos 35 | * where to write in the output array 36 | */ 37 | void compress(int[] @in, IntWrapper inpos, int inlength, sbyte[] @out, IntWrapper outpos); 38 | 39 | /** 40 | * Uncompress data from an array to another array. 41 | * 42 | * Both inpos and outpos parameters are modified to indicate new 43 | * positions after read/write. 44 | * 45 | * @param in 46 | * array containing data in compressed form 47 | * @param inpos 48 | * where to start reading in the array 49 | * @param inlength 50 | * length of the compressed data (ignored by some 51 | * schemes) 52 | * @param out 53 | * array where to write the compressed output 54 | * @param outpos 55 | * where to write the compressed output in out 56 | */ 57 | void uncompress(sbyte[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos); 58 | } 59 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/CSharpFastPFOR.csproj: -------------------------------------------------------------------------------- 1 | IntegerCODEC ic = new Composition(new DeltaZigzagBinaryPacking(), 14 | * new DeltaZigzagVariableByte()).15 | * 16 | * @author MURAOKA Taro http://github.com/koron 17 | */ 18 | 19 | namespace Genbox.CSharpFastPFOR 20 | { 21 | public class DeltaZigzagBinaryPacking : IntegerCODEC 22 | { 23 | private const int BLOCK_LENGTH = 128; 24 | 25 | public void compress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) 26 | { 27 | inLen = inLen - inLen % BLOCK_LENGTH; 28 | if (inLen == 0) 29 | { 30 | return; 31 | } 32 | 33 | outBuf[outPos.get()] = inLen; 34 | outPos.increment(); 35 | 36 | DeltaZigzagEncoding.Encoder ctx = new DeltaZigzagEncoding.Encoder(0); 37 | int[] work = new int[BLOCK_LENGTH]; 38 | 39 | int op = outPos.get(); 40 | int ip = inPos.get(); 41 | int inPosLast = ip + inLen; 42 | for (; ip < inPosLast; ip += BLOCK_LENGTH) 43 | { 44 | ctx.encodeArray(inBuf, ip, BLOCK_LENGTH, work); 45 | int bits1 = Util.maxbits32(work, 0); 46 | int bits2 = Util.maxbits32(work, 32); 47 | int bits3 = Util.maxbits32(work, 64); 48 | int bits4 = Util.maxbits32(work, 96); 49 | outBuf[op++] = (bits1 << 24) | (bits2 << 16) 50 | | (bits3 << 8) | (bits4 << 0); 51 | op += pack(work, 0, outBuf, op, bits1); 52 | op += pack(work, 32, outBuf, op, bits2); 53 | op += pack(work, 64, outBuf, op, bits3); 54 | op += pack(work, 96, outBuf, op, bits4); 55 | } 56 | 57 | inPos.add(inLen); 58 | outPos.set(op); 59 | } 60 | 61 | public void uncompress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) 62 | { 63 | if (inLen == 0) 64 | { 65 | return; 66 | } 67 | 68 | int outLen = inBuf[inPos.get()]; 69 | inPos.increment(); 70 | 71 | DeltaZigzagEncoding.Decoder ctx = new DeltaZigzagEncoding.Decoder(0); 72 | int[] work = new int[BLOCK_LENGTH]; 73 | 74 | int ip = inPos.get(); 75 | int op = outPos.get(); 76 | int outPosLast = op + outLen; 77 | for (; op < outPosLast; op += BLOCK_LENGTH) 78 | { 79 | int n = inBuf[ip++]; 80 | ip += unpack(inBuf, ip, work, 0, (n >> 24) & 0x3F); 81 | ip += unpack(inBuf, ip, work, 32, (n >> 16) & 0x3F); 82 | ip += unpack(inBuf, ip, work, 64, (n >> 8) & 0x3F); 83 | ip += unpack(inBuf, ip, work, 96, (n >> 0) & 0x3F); 84 | ctx.decodeArray(work, 0, BLOCK_LENGTH, outBuf, op); 85 | } 86 | 87 | outPos.add(outLen); 88 | inPos.set(ip); 89 | } 90 | 91 | private static int pack(int[] inBuf, int inOff, int[] outBuf, int outOff, int validBits) 92 | { 93 | BitPacking.fastpackwithoutmask(inBuf, inOff, outBuf, outOff, validBits); 94 | return validBits; 95 | } 96 | 97 | private static int unpack(int[] inBuf, int inOff, int[] outBuf, int outOff, int validBits) 98 | { 99 | BitPacking.fastunpack(inBuf, inOff, outBuf, outOff, validBits); 100 | return validBits; 101 | } 102 | 103 | public override string ToString() 104 | { 105 | return nameof(DeltaZigzagBinaryPacking); 106 | } 107 | } 108 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/DeltaZigzagEncoding.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | */ 5 | 6 | /** 7 | * Delta+Zigzag Encoding. 8 | * 9 | * @author MURAOKA Taro http://github.com/koron 10 | */ 11 | namespace Genbox.CSharpFastPFOR 12 | { 13 | public class DeltaZigzagEncoding 14 | { 15 | public class Context 16 | { 17 | protected int ContextValue; 18 | 19 | protected Context(int contextValue) 20 | { 21 | ContextValue = contextValue; 22 | } 23 | 24 | public void setContextValue(int contextValue) 25 | { 26 | ContextValue = contextValue; 27 | } 28 | 29 | public int getContextValue() 30 | { 31 | return ContextValue; 32 | } 33 | } 34 | 35 | public class Encoder : Context 36 | { 37 | public Encoder(int contextValue) : base(contextValue) 38 | { 39 | } 40 | 41 | public int encodeInt(int value) 42 | { 43 | int n = value - ContextValue; 44 | ContextValue = value; 45 | return (n << 1) ^ (n >> 31); 46 | } 47 | 48 | public int[] encodeArray(int[] src, int srcoff, int length, int[] dst, int dstoff) 49 | { 50 | for (int i = 0; i < length; ++i) 51 | { 52 | dst[dstoff + i] = encodeInt(src[srcoff + i]); 53 | } 54 | return dst; 55 | } 56 | 57 | public int[] encodeArray(int[] src, int srcoff, int length, int[] dst) 58 | { 59 | return encodeArray(src, srcoff, length, dst, 0); 60 | } 61 | 62 | public int[] encodeArray(int[] src, int offset, int length) 63 | { 64 | return encodeArray(src, offset, length, new int[length], 0); 65 | } 66 | 67 | public int[] encodeArray(int[] src) 68 | { 69 | return encodeArray(src, 0, src.Length, new int[src.Length], 0); 70 | } 71 | } 72 | 73 | public class Decoder : Context 74 | { 75 | public Decoder(int contextValue) : base(contextValue) 76 | { 77 | } 78 | 79 | public int decodeInt(int value) 80 | { 81 | int n = (int)((uint)value >> 1) ^ ((value << 31) >> 31); 82 | n += ContextValue; 83 | ContextValue = n; 84 | return n; 85 | } 86 | 87 | public int[] decodeArray(int[] src, int srcoff, int length, 88 | int[] dst, int dstoff) 89 | { 90 | for (int i = 0; i < length; ++i) 91 | { 92 | dst[dstoff + i] = decodeInt(src[srcoff + i]); 93 | } 94 | return dst; 95 | } 96 | 97 | public int[] decodeArray(int[] src, int offset, int length) 98 | { 99 | return decodeArray(src, offset, length, new int[length], 0); 100 | } 101 | 102 | public int[] decodeArray(int[] src) 103 | { 104 | return decodeArray(src, 0, src.Length); 105 | } 106 | } 107 | } 108 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/DeltaZigzagVariableByte.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | */ 5 | 6 | /** 7 | * VariableByte with Delta+Zigzag Encoding. 8 | * 9 | * @author MURAOKA Taro http://github.com/koron 10 | */ 11 | 12 | using Genbox.CSharpFastPFOR.Port; 13 | 14 | namespace Genbox.CSharpFastPFOR 15 | { 16 | public class DeltaZigzagVariableByte : IntegerCODEC 17 | { 18 | public void compress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) 19 | { 20 | if (inLen == 0) 21 | { 22 | return; 23 | } 24 | 25 | ByteBuffer byteBuf = ByteBuffer.allocateDirect(inLen * 5 + 3); 26 | DeltaZigzagEncoding.Encoder ctx = new DeltaZigzagEncoding.Encoder(0); 27 | 28 | // Delta+Zigzag+VariableByte encoding. 29 | int ip = inPos.get(); 30 | 31 | int inPosLast = ip + inLen; 32 | for (; ip < inPosLast; ++ip) 33 | { 34 | // Filter with delta+zigzag encoding. 35 | int n = ctx.encodeInt(inBuf[ip]); 36 | // Variable byte encoding. 37 | 38 | //PORT NOTE: The following IF statements are ported from a switch. Fall through switches are not allowed in C# 39 | int zeros = Integer.numberOfLeadingZeros(n); 40 | 41 | if (zeros < 4) 42 | { 43 | byteBuf.put((sbyte)(((int)((uint)n >> 28) & 0x7F) | 0x80)); 44 | } 45 | 46 | if (zeros < 11) 47 | { 48 | byteBuf.put((sbyte)(((int)((uint)n >> 21) & 0x7F) | 0x80)); 49 | } 50 | 51 | if (zeros < 18) 52 | { 53 | byteBuf.put((sbyte)(((int)((uint)n >> 14) & 0x7F) | 0x80)); 54 | } 55 | 56 | if (zeros < 25) 57 | { 58 | byteBuf.put((sbyte)(((int)((uint)n >> 7) & 0x7F) | 0x80)); 59 | } 60 | 61 | byteBuf.put((sbyte)((uint)n & 0x7F)); 62 | } 63 | 64 | // Padding buffer to considerable as IntBuffer. 65 | for (int i = (4 - (byteBuf.position() % 4)) % 4; i > 0; --i) 66 | { 67 | unchecked 68 | { 69 | byteBuf.put((sbyte)(0x80)); 70 | } 71 | } 72 | 73 | int outLen = byteBuf.position() / 4; 74 | byteBuf.flip(); 75 | IntBuffer intBuf = byteBuf.asIntBuffer(); 76 | /* 77 | * Console.WriteLine(String.format( 78 | * "inLen=%d pos=%d limit=%d outLen=%d outBuf.len=%d", inLen, 79 | * intBuf.position(), intBuf.limit(), outLen, outBuf.Length)); 80 | */ 81 | intBuf.get(outBuf, outPos.get(), outLen); 82 | inPos.add(inLen); 83 | outPos.add(outLen); 84 | } 85 | 86 | public void uncompress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) 87 | { 88 | DeltaZigzagEncoding.Decoder ctx = new DeltaZigzagEncoding.Decoder(0); 89 | 90 | int ip = inPos.get(); 91 | int op = outPos.get(); 92 | int vbcNum = 0, vbcShift = 24; // Varialbe Byte Context. 93 | 94 | int inPosLast = ip + inLen; 95 | while (ip < inPosLast) 96 | { 97 | // Fetch a byte value. 98 | int n = (int)((uint)inBuf[ip] >> vbcShift) & 0xFF; 99 | if (vbcShift > 0) 100 | { 101 | vbcShift -= 8; 102 | } 103 | else 104 | { 105 | vbcShift = 24; 106 | ip++; 107 | } 108 | // Decode variable byte and delta+zigzag. 109 | vbcNum = (vbcNum << 7) + (n & 0x7F); 110 | if ((n & 0x80) == 0) 111 | { 112 | outBuf[op++] = ctx.decodeInt(vbcNum); 113 | vbcNum = 0; 114 | } 115 | } 116 | 117 | outPos.set(op); 118 | inPos.set(inPosLast); 119 | } 120 | 121 | public override string ToString() 122 | { 123 | return nameof(DeltaZigzagVariableByte); 124 | } 125 | } 126 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/Delta.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Generic class to compute differential coding. 10 | * 11 | * @author Daniel Lemire 12 | * 13 | */ 14 | namespace Genbox.CSharpFastPFOR.Differential 15 | { 16 | public class Delta 17 | { 18 | /** 19 | * Apply differential coding (in-place). 20 | * 21 | * @param data 22 | * data to be modified 23 | */ 24 | public static void delta(int[] data) 25 | { 26 | for (int i = data.Length - 1; i > 0; --i) 27 | { 28 | data[i] -= data[i - 1]; 29 | } 30 | } 31 | 32 | /** 33 | * Apply differential coding (in-place) given an initial value. 34 | * 35 | * @param data 36 | * data to be modified 37 | * @param start 38 | * starting index 39 | * @param length 40 | * number of integers to process 41 | * @param init 42 | * initial value 43 | * @return next initial vale 44 | */ 45 | public static int delta(int[] data, int start, int length, int init) 46 | { 47 | int nextinit = data[start + length - 1]; 48 | for (int i = length - 1; i > 0; --i) 49 | { 50 | data[start + i] -= data[start + i - 1]; 51 | } 52 | data[start] -= init; 53 | return nextinit; 54 | } 55 | 56 | /** 57 | * Compute differential coding given an initial value. Output is written 58 | * to a provided array: must have length "length" or better. 59 | * 60 | * @param data 61 | * data to be modified 62 | * @param start 63 | * starting index 64 | * @param length 65 | * number of integers to process 66 | * @param init 67 | * initial value 68 | * @param out 69 | * output array 70 | * @return next initial vale 71 | */ 72 | public static int delta(int[] data, int start, int length, int init, int[] @out) 73 | { 74 | for (int i = length - 1; i > 0; --i) 75 | { 76 | @out[i] = data[start + i] - data[start + i - 1]; 77 | } 78 | @out[0] = data[start] - init; 79 | return data[start + length - 1]; 80 | } 81 | 82 | /** 83 | * Undo differential coding (in-place). Effectively computes a prefix 84 | * sum. 85 | * 86 | * @param data 87 | * to be modified. 88 | */ 89 | public static void inverseDelta(int[] data) 90 | { 91 | for (int i = 1; i < data.Length; ++i) 92 | { 93 | data[i] += data[i - 1]; 94 | } 95 | } 96 | 97 | /** 98 | * Undo differential coding (in-place). Effectively computes a prefix 99 | * sum. Like inverseDelta, only faster. 100 | * 101 | * @param data 102 | * to be modified 103 | */ 104 | public static void fastinverseDelta(int[] data) 105 | { 106 | int sz0 = data.Length / 4 * 4; 107 | int i = 1; 108 | if (sz0 >= 4) 109 | { 110 | int a = data[0]; 111 | for (; i < sz0 - 4; i += 4) 112 | { 113 | a = data[i] += a; 114 | a = data[i + 1] += a; 115 | a = data[i + 2] += a; 116 | a = data[i + 3] += a; 117 | } 118 | } 119 | 120 | for (; i != data.Length; ++i) 121 | { 122 | data[i] += data[i - 1]; 123 | } 124 | } 125 | 126 | /** 127 | * Undo differential coding (in-place). Effectively computes a prefix 128 | * sum. Like inverseDelta, only faster. Uses an initial value. 129 | * 130 | * @param data 131 | * to be modified 132 | * @param start 133 | * starting index 134 | * @param length 135 | * number of integers to process 136 | * @param init 137 | * initial value 138 | * @return next initial value 139 | */ 140 | public static int fastinverseDelta(int[] data, int start, int length, int init) 141 | { 142 | data[start] += init; 143 | int sz0 = length / 4 * 4; 144 | int i = 1; 145 | if (sz0 >= 4) 146 | { 147 | int a = data[start]; 148 | for (; i < sz0 - 4; i += 4) 149 | { 150 | a = data[start + i] += a; 151 | a = data[start + i + 1] += a; 152 | a = data[start + i + 2] += a; 153 | a = data[start + i + 3] += a; 154 | } 155 | } 156 | 157 | for (; i != length; ++i) 158 | { 159 | data[start + i] += data[start + i - 1]; 160 | } 161 | return data[start + length - 1]; 162 | } 163 | } 164 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/IntegratedBinaryPacking.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Scheme based on a commonly used idea: can be extremely fast. 10 | * 11 | * You should only use this scheme on sorted arrays. Use BinaryPacking if you 12 | * have unsorted arrays. 13 | * 14 | * It encodes integers in blocks of 32 integers. For arrays containing an 15 | * arbitrary number of integers, you should use it in conjunction with another 16 | * CODEC: 17 | * 18 | *
19 | * IntegratedIntegerCODEC is = 20 | * new IntegratedComposition(new IntegratedBinaryPacking(), 21 | * new IntegratedVariableByte()) 22 | *23 | * 24 | *
25 | * For details, please see 26 | *
27 | *28 | * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second 29 | * through vectorization Software: Practice & Experience http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract http://arxiv.org/abs/1209.2137 33 | *
34 | *35 | * Daniel Lemire, Leonid Boytsov, Nathan Kurz, SIMD Compression and the 36 | * Intersection of Sorted Integers http://arxiv.org/abs/1401.6399 38 | *
39 | * 40 | * @author Daniel Lemire 41 | * 42 | */ 43 | 44 | namespace Genbox.CSharpFastPFOR.Differential 45 | { 46 | public class IntegratedBinaryPacking : IntegratedIntegerCODEC, SkippableIntegratedIntegerCODEC 47 | { 48 | private const int BLOCK_SIZE = 32; 49 | 50 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 51 | { 52 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 53 | if (inlength == 0) 54 | return; 55 | @out[outpos.get()] = inlength; 56 | outpos.increment(); 57 | headlessCompress(@in, inpos, inlength, @out, outpos, new IntWrapper(0)); 58 | } 59 | 60 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 61 | { 62 | if (inlength == 0) 63 | return; 64 | int outlength = @in[inpos.get()]; 65 | inpos.increment(); 66 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength, new IntWrapper(0)); 67 | } 68 | 69 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, IntWrapper initvalue) 70 | { 71 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 72 | if (inlength == 0) 73 | return; 74 | int tmpoutpos = outpos.get(); 75 | int initoffset = initvalue.get(); 76 | initvalue.set(@in[inpos.get() + inlength - 1]); 77 | int s = inpos.get(); 78 | for (; s + BLOCK_SIZE * 4 - 1 < inpos.get() + inlength; s += BLOCK_SIZE * 4) 79 | { 80 | int mbits1 = Util.maxdiffbits(initoffset, @in, s, BLOCK_SIZE); 81 | int initoffset2 = @in[s + 31]; 82 | int mbits2 = Util.maxdiffbits(initoffset2, @in, s + BLOCK_SIZE, BLOCK_SIZE); 83 | int initoffset3 = @in[s + BLOCK_SIZE + 31]; 84 | int mbits3 = Util 85 | .maxdiffbits(initoffset3, @in, s + 2 * BLOCK_SIZE, BLOCK_SIZE); 86 | int initoffset4 = @in[s + 2 * BLOCK_SIZE + 31]; 87 | int mbits4 = Util 88 | .maxdiffbits(initoffset4, @in, s + 3 * BLOCK_SIZE, BLOCK_SIZE); 89 | @out[tmpoutpos++] = (mbits1 << 24) | (mbits2 << 16) | (mbits3 << 8) 90 | | (mbits4); 91 | IntegratedBitPacking.integratedpack(initoffset, @in, s, @out, 92 | tmpoutpos, mbits1); 93 | tmpoutpos += mbits1; 94 | IntegratedBitPacking.integratedpack(initoffset2, @in, s + BLOCK_SIZE, @out, 95 | tmpoutpos, mbits2); 96 | tmpoutpos += mbits2; 97 | IntegratedBitPacking.integratedpack(initoffset3, @in, s + 2 * BLOCK_SIZE, 98 | @out, tmpoutpos, mbits3); 99 | tmpoutpos += mbits3; 100 | IntegratedBitPacking.integratedpack(initoffset4, @in, s + 3 * BLOCK_SIZE, 101 | @out, tmpoutpos, mbits4); 102 | tmpoutpos += mbits4; 103 | initoffset = @in[s + 3 * BLOCK_SIZE + 31]; 104 | } 105 | for (; s < inpos.get() + inlength; s += BLOCK_SIZE) 106 | { 107 | int mbits = Util.maxdiffbits(initoffset, @in, s, BLOCK_SIZE); 108 | @out[tmpoutpos++] = mbits; 109 | IntegratedBitPacking.integratedpack(initoffset, @in, s, @out, 110 | tmpoutpos, mbits); 111 | tmpoutpos += mbits; 112 | initoffset = @in[s + 31]; 113 | } 114 | inpos.add(inlength); 115 | outpos.set(tmpoutpos); 116 | } 117 | 118 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num, IntWrapper initvalue) 119 | { 120 | int outlength = Util.greatestMultiple(num, BLOCK_SIZE); 121 | int tmpinpos = inpos.get(); 122 | int initoffset = initvalue.get(); 123 | int s = outpos.get(); 124 | for (; s + BLOCK_SIZE * 4 - 1 < outpos.get() + outlength; s += BLOCK_SIZE * 4) 125 | { 126 | int mbits1 = (int)((uint)@in[tmpinpos] >> 24); 127 | int mbits2 = (int)((uint)@in[tmpinpos] >> 16) & 0xFF; 128 | int mbits3 = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 129 | int mbits4 = (@in[tmpinpos]) & 0xFF; 130 | 131 | ++tmpinpos; 132 | IntegratedBitPacking.integratedunpack(initoffset, @in, tmpinpos, 133 | @out, s, mbits1); 134 | tmpinpos += mbits1; 135 | initoffset = @out[s + 31]; 136 | IntegratedBitPacking.integratedunpack(initoffset, @in, tmpinpos, 137 | @out, s + BLOCK_SIZE, mbits2); 138 | tmpinpos += mbits2; 139 | initoffset = @out[s + BLOCK_SIZE + 31]; 140 | IntegratedBitPacking.integratedunpack(initoffset, @in, tmpinpos, 141 | @out, s + 2 * BLOCK_SIZE, mbits3); 142 | tmpinpos += mbits3; 143 | initoffset = @out[s + 2 * BLOCK_SIZE + 31]; 144 | IntegratedBitPacking.integratedunpack(initoffset, @in, tmpinpos, 145 | @out, s + 3 * BLOCK_SIZE, mbits4); 146 | tmpinpos += mbits4; 147 | initoffset = @out[s + 3 * BLOCK_SIZE + 31]; 148 | } 149 | for (; s < outpos.get() + outlength; s += BLOCK_SIZE) 150 | { 151 | int mbits = @in[tmpinpos]; 152 | ++tmpinpos; 153 | IntegratedBitPacking.integratedunpack(initoffset, @in, tmpinpos, 154 | @out, s, mbits); 155 | initoffset = @out[s + 31]; 156 | 157 | tmpinpos += mbits; 158 | } 159 | outpos.add(outlength); 160 | initvalue.set(initoffset); 161 | inpos.set(tmpinpos); 162 | } 163 | 164 | public override string ToString() 165 | { 166 | return nameof(IntegratedBinaryPacking); 167 | } 168 | } 169 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/IntegratedByteIntegerCODEC.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Interface describing a CODEC to compress integers to bytes. 10 | * 11 | * "Integrated" means that it uses differential coding. 12 | * 13 | * @author Daniel Lemire 14 | * 15 | */ 16 | namespace Genbox.CSharpFastPFOR.Differential 17 | { 18 | public interface IntegratedByteIntegerCODEC : ByteIntegerCODEC 19 | { 20 | } 21 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/IntegratedComposition.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Helper class to compose schemes. 10 | * 11 | * @author Daniel Lemire 12 | */ 13 | 14 | namespace Genbox.CSharpFastPFOR.Differential 15 | { 16 | public class IntegratedComposition : IntegratedIntegerCODEC 17 | { 18 | private IntegratedIntegerCODEC F1; 19 | private IntegratedIntegerCODEC F2; 20 | 21 | /** 22 | * Compose a scheme from a first one (f1) and a second one (f2). The 23 | * first one is called first and then the second one tries to compress 24 | * whatever remains from the first run. 25 | * 26 | * By convention, the first scheme should be such that if, during 27 | * decoding, a 32-bit zero is first encountered, then there is no 28 | * output. 29 | * 30 | * @param f1 31 | * first codec 32 | * @param f2 33 | * second codec 34 | */ 35 | public IntegratedComposition(IntegratedIntegerCODEC f1, IntegratedIntegerCODEC f2) 36 | { 37 | F1 = f1; 38 | F2 = f2; 39 | } 40 | 41 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 42 | { 43 | if (inlength == 0) 44 | { 45 | return; 46 | } 47 | int inposInit = inpos.get(); 48 | int outposInit = outpos.get(); 49 | F1.compress(@in, inpos, inlength, @out, outpos); 50 | if (outpos.get() == outposInit) 51 | { 52 | @out[outposInit] = 0; 53 | outpos.increment(); 54 | } 55 | inlength -= inpos.get() - inposInit; 56 | F2.compress(@in, inpos, inlength, @out, outpos); 57 | } 58 | 59 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 60 | { 61 | if (inlength == 0) 62 | return; 63 | int init = inpos.get(); 64 | F1.uncompress(@in, inpos, inlength, @out, outpos); 65 | inlength -= inpos.get() - init; 66 | F2.uncompress(@in, inpos, inlength, @out, outpos); 67 | } 68 | 69 | public override string ToString() 70 | { 71 | return F1 + " + " + F2; 72 | } 73 | } 74 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/IntegratedIntCompressor.cs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | /** 8 | * This is a convenience class that wraps a codec to provide 9 | * a "friendly" API. 10 | * 11 | */ 12 | 13 | using Genbox.CSharpFastPFOR.Port; 14 | 15 | namespace Genbox.CSharpFastPFOR.Differential 16 | { 17 | public class IntegratedIntCompressor 18 | { 19 | private SkippableIntegratedIntegerCODEC codec; 20 | 21 | /** 22 | * Constructor wrapping a codec. 23 | * 24 | * @param c the underlying codec 25 | */ 26 | public IntegratedIntCompressor(SkippableIntegratedIntegerCODEC c) 27 | { 28 | codec = c; 29 | } 30 | 31 | /** 32 | * Constructor with default codec. 33 | */ 34 | public IntegratedIntCompressor() 35 | { 36 | codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()); 37 | } 38 | 39 | /** 40 | * Compress an array and returns the compressed result as a new array. 41 | * 42 | * @param input array to be compressed 43 | * @return compressed array 44 | */ 45 | public int[] compress(int[] input) 46 | { 47 | int[] compressed = new int[input.Length + 1024]; 48 | compressed[0] = input.Length; 49 | IntWrapper outpos = new IntWrapper(1); 50 | IntWrapper initvalue = new IntWrapper(0); 51 | codec.headlessCompress(input, new IntWrapper(0), 52 | input.Length, compressed, outpos, initvalue); 53 | compressed = Arrays.copyOf(compressed, outpos.intValue()); 54 | return compressed; 55 | } 56 | 57 | /** 58 | * Uncompress an array and returns the uncompressed result as a new array. 59 | * 60 | * @param compressed compressed array 61 | * @return uncompressed array 62 | */ 63 | public int[] uncompress(int[] compressed) 64 | { 65 | int[] decompressed = new int[compressed[0]]; 66 | IntWrapper inpos = new IntWrapper(1); 67 | codec.headlessUncompress(compressed, inpos, 68 | compressed.Length - inpos.intValue(), 69 | decompressed, new IntWrapper(0), 70 | decompressed.Length, new IntWrapper(0)); 71 | return decompressed; 72 | } 73 | } 74 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/IntegratedIntegerCODEC.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * This is just like IntegerCODEC, except that it indicates that delta coding is 10 | * "integrated", so that you don't need a separate step for delta coding. 11 | * 12 | * @author Daniel Lemire 13 | */ 14 | namespace Genbox.CSharpFastPFOR.Differential 15 | { 16 | public interface IntegratedIntegerCODEC : IntegerCODEC 17 | { 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/SkippableIntegratedComposition.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Helper class to compose schemes. 10 | * 11 | * @author Daniel Lemire 12 | */ 13 | namespace Genbox.CSharpFastPFOR.Differential 14 | { 15 | public class SkippableIntegratedComposition : SkippableIntegratedIntegerCODEC 16 | { 17 | private SkippableIntegratedIntegerCODEC F1; 18 | private SkippableIntegratedIntegerCODEC F2; 19 | 20 | /** 21 | * Compose a scheme from a first one (f1) and a second one (f2). The first 22 | * one is called first and then the second one tries to compress whatever 23 | * remains from the first run. 24 | * 25 | * By convention, the first scheme should be such that if, during decoding, 26 | * a 32-bit zero is first encountered, then there is no output. 27 | * 28 | * @param f1 29 | * first codec 30 | * @param f2 31 | * second codec 32 | */ 33 | public SkippableIntegratedComposition(SkippableIntegratedIntegerCODEC f1, SkippableIntegratedIntegerCODEC f2) 34 | { 35 | F1 = f1; 36 | F2 = f2; 37 | } 38 | 39 | public override string ToString() 40 | { 41 | return F1 + " + " + F2; 42 | } 43 | 44 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, IntWrapper initvalue) 45 | { 46 | if (inlength == 0) 47 | return; 48 | int init = inpos.get(); 49 | F1.headlessCompress(@in, inpos, inlength, @out, outpos, initvalue); 50 | if (outpos.get() == 0) 51 | { 52 | @out[0] = 0; 53 | outpos.increment(); 54 | } 55 | inlength -= inpos.get() - init; 56 | F2.headlessCompress(@in, inpos, inlength, @out, outpos, initvalue); 57 | } 58 | 59 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num, IntWrapper initvalue) 60 | { 61 | if (inlength == 0) 62 | return; 63 | int init = inpos.get(); 64 | F1.headlessUncompress(@in, inpos, inlength, @out, outpos, num, initvalue); 65 | inlength -= inpos.get() - init; 66 | num -= outpos.get(); 67 | F2.headlessUncompress(@in, inpos, inlength, @out, outpos, num, initvalue); 68 | } 69 | } 70 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/SkippableIntegratedIntegerCODEC.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This is code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Interface describing a standard CODEC to compress integers. This is a 10 | * variation on the IntegerCODEC interface meant to be used for random access 11 | * and with integrated differential coding 12 | * (i.e., given a large array, you can segment it and decode just the subarray you need). 13 | * 14 | * The main differences are that we must specify the number of integers we wish to 15 | * decode as well as the initial value (for differential coding). This information 16 | * might be stored elsewhere. 17 | * 18 | * 19 | * @author Daniel Lemire 20 | * 21 | */ 22 | namespace Genbox.CSharpFastPFOR.Differential 23 | { 24 | public interface SkippableIntegratedIntegerCODEC 25 | { 26 | /** 27 | * Compress data from an array to another array. 28 | * 29 | * Both inpos and outpos are modified to represent how much data was read 30 | * and written to if 12 ints (inlength = 12) are compressed to 3 ints, then 31 | * inpos will be incremented by 12 while outpos will be incremented by 3 we 32 | * use IntWrapper to pass the values by reference. 33 | * 34 | * @param in 35 | * input array 36 | * @param inpos 37 | * location in the input array 38 | * @param inlength 39 | * how many integers to compress 40 | * @param out 41 | * output array 42 | * @param outpos 43 | * where to write in the output array 44 | * @param initvalue initial value for the purpose of differential coding, the value is automatically updated 45 | */ 46 | void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, IntWrapper initvalue); 47 | 48 | /** 49 | * Uncompress data from an array to another array. 50 | * 51 | * Both inpos and outpos parameters are modified to indicate new positions 52 | * after read/write. 53 | * 54 | * @param in 55 | * array containing data in compressed form 56 | * @param inpos 57 | * where to start reading in the array 58 | * @param inlength 59 | * length of the compressed data (ignored by some schemes) 60 | * @param out 61 | * array where to write the compressed output 62 | * @param outpos 63 | * where to write the compressed output in out 64 | * @param num 65 | * number of integers we want to decode, the actual number of integers decoded can be less 66 | * @param initvalue initial value for the purpose of differential coding, the value is automatically updated 67 | */ 68 | void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num, IntWrapper initvalue); 69 | } 70 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/XorBinaryPacking.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | */ 5 | 6 | /** 7 | * BinaryPacking over XOR differential. 8 | * 9 | *IntegratedIntegerCODEC is = 10 | * new Composition(new XorBinaryPacking(), new VariableByte())11 | * 12 | * @author MURAOKA Taro http://github.com/koron 13 | */ 14 | 15 | using Genbox.CSharpFastPFOR.Port; 16 | 17 | namespace Genbox.CSharpFastPFOR.Differential 18 | { 19 | public class XorBinaryPacking : IntegratedIntegerCODEC 20 | { 21 | private const int BLOCK_LENGTH = 128; 22 | 23 | public void compress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) 24 | { 25 | inLen = inLen - inLen % BLOCK_LENGTH; 26 | if (inLen == 0) 27 | return; 28 | 29 | outBuf[outPos.get()] = inLen; 30 | outPos.increment(); 31 | 32 | int context = 0; 33 | int[] work = new int[32]; 34 | 35 | int op = outPos.get(); 36 | int ip = inPos.get(); 37 | int inPosLast = ip + inLen; 38 | for (; ip < inPosLast; ip += BLOCK_LENGTH) 39 | { 40 | int bits1 = xorMaxBits(inBuf, ip + 0, 32, context); 41 | int bits2 = xorMaxBits(inBuf, ip + 32, 32, 42 | inBuf[ip + 31]); 43 | int bits3 = xorMaxBits(inBuf, ip + 64, 32, 44 | inBuf[ip + 63]); 45 | int bits4 = xorMaxBits(inBuf, ip + 96, 32, 46 | inBuf[ip + 95]); 47 | outBuf[op++] = (bits1 << 24) | (bits2 << 16) 48 | | (bits3 << 8) | (bits4 << 0); 49 | op += xorPack(inBuf, ip + 0, outBuf, op, bits1, 50 | context, work); 51 | op += xorPack(inBuf, ip + 32, outBuf, op, bits2, 52 | inBuf[ip + 31], work); 53 | op += xorPack(inBuf, ip + 64, outBuf, op, bits3, 54 | inBuf[ip + 63], work); 55 | op += xorPack(inBuf, ip + 96, outBuf, op, bits4, 56 | inBuf[ip + 95], work); 57 | context = inBuf[ip + 127]; 58 | } 59 | 60 | inPos.add(inLen); 61 | outPos.set(op); 62 | } 63 | 64 | public void uncompress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) 65 | { 66 | if (inLen == 0) 67 | return; 68 | 69 | int outLen = inBuf[inPos.get()]; 70 | inPos.increment(); 71 | 72 | int context = 0; 73 | int[] work = new int[32]; 74 | 75 | int ip = inPos.get(); 76 | int op = outPos.get(); 77 | int outPosLast = op + outLen; 78 | for (; op < outPosLast; op += BLOCK_LENGTH) 79 | { 80 | int bits1 = (int)((uint)inBuf[ip] >> 24); 81 | int bits2 = (int)((uint)inBuf[ip] >> 16) & 0xFF; 82 | int bits3 = (int)((uint)inBuf[ip] >> 8) & 0xFF; 83 | int bits4 = (int)((uint)inBuf[ip] >> 0) & 0xFF; 84 | ++ip; 85 | ip += xorUnpack(inBuf, ip, outBuf, op + 0, bits1, 86 | context, work); 87 | ip += xorUnpack(inBuf, ip, outBuf, op + 32, bits2, 88 | outBuf[op + 31], work); 89 | ip += xorUnpack(inBuf, ip, outBuf, op + 64, bits3, 90 | outBuf[op + 63], work); 91 | ip += xorUnpack(inBuf, ip, outBuf, op + 96, bits4, 92 | outBuf[op + 95], work); 93 | context = outBuf[op + 127]; 94 | } 95 | 96 | outPos.add(outLen); 97 | inPos.set(ip); 98 | } 99 | 100 | private static int xorMaxBits(int[] buf, int offset, int length, int context) 101 | { 102 | int mask = buf[offset] ^ context; 103 | int M = offset + length; 104 | for (int i = offset + 1, prev = offset; i < M; ++i, ++prev) 105 | { 106 | mask |= buf[i] ^ buf[prev]; 107 | } 108 | 109 | return 32 - Integer.numberOfLeadingZeros(mask); 110 | } 111 | 112 | private static int xorPack(int[] inBuf, int inOff, int[] outBuf, int outOff, int validBits, int context, int[] work) 113 | { 114 | work[0] = inBuf[inOff] ^ context; 115 | for (int i = 1, p = inOff + 1; i < 32; ++i, ++p) 116 | { 117 | work[i] = inBuf[p] ^ inBuf[p - 1]; 118 | } 119 | BitPacking.fastpackwithoutmask(work, 0, outBuf, outOff, 120 | validBits); 121 | 122 | return validBits; 123 | } 124 | 125 | private static int xorUnpack(int[] inBuf, int inOff, int[] outBuf, int outOff, int validBits, int context, int[] work) 126 | { 127 | BitPacking.fastunpack(inBuf, inOff, work, 0, validBits); 128 | outBuf[outOff] = context = work[0] ^ context; 129 | for (int i = 1, p = outOff + 1; i < 32; ++i, ++p) 130 | { 131 | outBuf[p] = context = work[i] ^ context; 132 | } 133 | return validBits; 134 | } 135 | 136 | public override string ToString() 137 | { 138 | return nameof(XorBinaryPacking); 139 | } 140 | } 141 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/IntCompressor.cs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | /** 6 | * This is a convenience class that wraps a codec to provide 7 | * a "friendly" API. 8 | * 9 | */ 10 | 11 | using Genbox.CSharpFastPFOR.Port; 12 | 13 | namespace Genbox.CSharpFastPFOR 14 | { 15 | public class IntCompressor 16 | { 17 | private SkippableIntegerCODEC codec; 18 | 19 | /** 20 | * Constructor wrapping a codec. 21 | * 22 | * @param c the underlying codec 23 | */ 24 | public IntCompressor(SkippableIntegerCODEC c) 25 | { 26 | codec = c; 27 | } 28 | 29 | /** 30 | * Constructor with default codec. 31 | */ 32 | public IntCompressor() 33 | { 34 | codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); 35 | } 36 | 37 | /** 38 | * Compress an array and returns the compressed result as a new array. 39 | * 40 | * @param input array to be compressed 41 | * @return compressed array 42 | */ 43 | public int[] compress(int[] input) 44 | { 45 | int[] compressed = new int[input.Length + 1024]; 46 | compressed[0] = input.Length; 47 | IntWrapper outpos = new IntWrapper(1); 48 | codec.headlessCompress(input, new IntWrapper(0), input.Length, compressed, outpos); 49 | compressed = Arrays.copyOf(compressed, outpos.intValue()); 50 | return compressed; 51 | } 52 | 53 | /** 54 | * Uncompress an array and returns the uncompressed result as a new array. 55 | * 56 | * @param compressed compressed array 57 | * @return uncompressed array 58 | */ 59 | public int[] uncompress(int[] compressed) 60 | { 61 | int[] decompressed = new int[compressed[0]]; 62 | IntWrapper inpos = new IntWrapper(1); 63 | codec.headlessUncompress(compressed, inpos, compressed.Length - inpos.intValue(), decompressed, new IntWrapper(0), decompressed.Length); 64 | return decompressed; 65 | } 66 | } 67 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/IntWrapper.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | */ 6 | 7 | /** 8 | * Essentially a mutable wrapper around an integer. 9 | * 10 | * @author dwu 11 | */ 12 | namespace Genbox.CSharpFastPFOR 13 | { 14 | public class IntWrapper 15 | { 16 | private int value; 17 | 18 | /** 19 | * Constructor: value set to 0. 20 | */ 21 | public IntWrapper() : this(0) 22 | { 23 | } 24 | 25 | /** 26 | * Construction: value set to provided argument. 27 | * 28 | * @param v 29 | * value to wrap 30 | */ 31 | public IntWrapper(int v) 32 | { 33 | this.value = v; 34 | } 35 | 36 | /** 37 | * add the provided value to the integer 38 | * @param v value to add 39 | */ 40 | public void add(int v) 41 | { 42 | this.value += v; 43 | } 44 | 45 | 46 | public double doubleValue() 47 | { 48 | return this.value; 49 | } 50 | 51 | public float floatValue() 52 | { 53 | return this.value; 54 | } 55 | 56 | /** 57 | * @return the integer value 58 | */ 59 | public int get() 60 | { 61 | return this.value; 62 | } 63 | 64 | /** 65 | * add 1 to the integer value 66 | */ 67 | public void increment() 68 | { 69 | this.value++; 70 | } 71 | 72 | public int intValue() 73 | { 74 | return this.value; 75 | } 76 | 77 | public long longValue() 78 | { 79 | return this.value; 80 | } 81 | 82 | /** 83 | * Set the value to that of the specified integer. 84 | * 85 | * @param value 86 | * specified integer value 87 | */ 88 | public void set(int value) 89 | { 90 | this.value = value; 91 | } 92 | 93 | public override string ToString() 94 | { 95 | return value.ToString(); 96 | } 97 | } 98 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/IntegerCODEC.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Interface describing a standard CODEC to compress integers. 10 | * 11 | * @author Daniel Lemire 12 | * 13 | */ 14 | namespace Genbox.CSharpFastPFOR 15 | { 16 | public interface IntegerCODEC 17 | { 18 | /** 19 | * Compress data from an array to another array. 20 | * 21 | * Both inpos and outpos are modified to represent how much data was 22 | * read and written to if 12 ints (inlength = 12) are compressed to 3 23 | * ints, then inpos will be incremented by 12 while outpos will be 24 | * incremented by 3 we use IntWrapper to pass the values by reference. 25 | * 26 | * @param in 27 | * input array 28 | * @param inpos 29 | * location in the input array 30 | * @param inlength 31 | * how many integers to compress 32 | * @param out 33 | * output array 34 | * @param outpos 35 | * where to write in the output array 36 | */ 37 | void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos); 38 | 39 | /** 40 | * Uncompress data from an array to another array. 41 | * 42 | * Both inpos and outpos parameters are modified to indicate new 43 | * positions after read/write. 44 | * 45 | * @param in 46 | * array containing data in compressed form 47 | * @param inpos 48 | * where to start reading in the array 49 | * @param inlength 50 | * length of the compressed data (ignored by some 51 | * schemes) 52 | * @param out 53 | * array where to write the compressed output 54 | * @param outpos 55 | * where to write the compressed output in out 56 | */ 57 | void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos); 58 | } 59 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/JustCopy.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * @author Daniel Lemire 10 | * 11 | */ 12 | 13 | using System; 14 | 15 | namespace Genbox.CSharpFastPFOR 16 | { 17 | public class JustCopy : IntegerCODEC, SkippableIntegerCODEC 18 | { 19 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 20 | { 21 | Array.Copy(@in, inpos.get(), @out, outpos.get(), inlength); 22 | inpos.add(inlength); 23 | outpos.add(inlength); 24 | } 25 | 26 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 27 | { 28 | headlessUncompress(@in, inpos, inlength, @out, outpos, inlength); 29 | } 30 | 31 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num) 32 | { 33 | Array.Copy(@in, inpos.get(), @out, outpos.get(), num); 34 | inpos.add(num); 35 | outpos.add(num); 36 | } 37 | 38 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 39 | { 40 | headlessCompress(@in, inpos, inlength, @out, outpos); 41 | } 42 | 43 | public override string ToString() 44 | { 45 | return nameof(JustCopy); 46 | } 47 | } 48 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/NewPFD.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * NewPFD/NewPFOR: fast patching scheme by Yan et al. 10 | *
11 | * Follows: 12 | *
13 | * H. Yan, S. Ding, T. Suel, Inverted index compression and query processing 14 | * with optimized document ordering, in: WWW 09, 2009, pp. 401-410. 15 | *
16 | * using Simple16 as the secondary coder. 17 | * 18 | * It encodes integers in blocks of 128 integers. For arrays containing 19 | * an arbitrary number of integers, you should use it in conjunction 20 | * with another CODEC: 21 | * 22 | *IntegerCODEC ic = 23 | * new Composition(new NewPDF(), new VariableByte()).24 | * 25 | * Note that this does not use differential coding: if you are working on sorted 26 | * lists, you must compute the deltas separately. (Yes, this is true even though 27 | * the "D" at the end of the name probably stands for delta.) 28 | * 29 | * For multi-threaded applications, each thread should use its own NewPFD 30 | * object. 31 | * 32 | * @author Daniel Lemire 33 | */ 34 | namespace Genbox.CSharpFastPFOR 35 | { 36 | public class NewPFD : IntegerCODEC, SkippableIntegerCODEC 37 | { 38 | private const int BLOCK_SIZE = 128; 39 | 40 | private int[] exceptbuffer = new int[2 * BLOCK_SIZE]; 41 | 42 | /** 43 | * Constructor for the NewPFD CODEC. 44 | */ 45 | public NewPFD() 46 | { 47 | } 48 | 49 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 50 | { 51 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 52 | if (inlength == 0) 53 | return; 54 | encodePage(@in, inpos, inlength, @out, outpos); 55 | } 56 | 57 | protected static int[] bits = { 0, 1, 2, 3, 4, 5, 58 | 6, 7, 8, 9, 10, 11, 59 | 12, 13, 16, 20, 32 }; 60 | 61 | protected static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 62 | 10, 11, 12, 13, 14, 14, 14, 15, 63 | 15, 15, 15, 16, 16, 16, 16, 16, 64 | 16, 16, 16, 16, 16, 16, 16 }; 65 | 66 | protected static void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept) 67 | { 68 | int mb = Util.maxbits(@in, pos, BLOCK_SIZE); 69 | int mini = 0; 70 | if (mini + 28 < bits[invbits[mb]]) 71 | mini = bits[invbits[mb]] - 28; // 28 is the max for 72 | // exceptions 73 | int besti = bits.Length - 1; 74 | int exceptcounter = 0; 75 | for (int i = mini; i < bits.Length - 1; ++i) 76 | { 77 | int tmpcounter = 0; 78 | for (int k = pos; k < BLOCK_SIZE + pos; ++k) 79 | if ((int)((uint)@in[k] >> bits[i]) != 0) 80 | ++tmpcounter; 81 | if (tmpcounter * 10 <= BLOCK_SIZE) 82 | { 83 | besti = i; 84 | exceptcounter = tmpcounter; 85 | break; 86 | } 87 | } 88 | bestb.set(besti); 89 | bestexcept.set(exceptcounter); 90 | } 91 | 92 | private void encodePage(int[] @in, IntWrapper inpos, int thissize, int[] @out, IntWrapper outpos) 93 | { 94 | int tmpoutpos = outpos.get(); 95 | int tmpinpos = inpos.get(); 96 | IntWrapper bestb = new IntWrapper(); 97 | IntWrapper bestexcept = new IntWrapper(); 98 | for (int finalinpos = tmpinpos + thissize; tmpinpos 99 | + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) 100 | { 101 | getBestBFromData(@in, tmpinpos, bestb, bestexcept); 102 | int tmpbestb = bestb.get(); 103 | int nbrexcept = bestexcept.get(); 104 | int exceptsize = 0; 105 | int remember = tmpoutpos; 106 | tmpoutpos++; 107 | if (nbrexcept > 0) 108 | { 109 | for (int i = 0, c = 0; i < BLOCK_SIZE; ++i) 110 | { 111 | if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) 112 | { 113 | exceptbuffer[c + nbrexcept] = i; 114 | exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); 115 | ++c; 116 | } 117 | } 118 | exceptsize = S16.compress(exceptbuffer, 0, 119 | 2 * nbrexcept, @out, tmpoutpos); 120 | tmpoutpos += exceptsize; 121 | } 122 | @out[remember] = tmpbestb | (nbrexcept << 8) 123 | | (exceptsize << 16); 124 | for (int k = 0; k < BLOCK_SIZE; k += 32) 125 | { 126 | BitPacking.fastpack(@in, tmpinpos + k, @out, 127 | tmpoutpos, bits[tmpbestb]); 128 | tmpoutpos += bits[tmpbestb]; 129 | } 130 | } 131 | inpos.set(tmpinpos); 132 | outpos.set(tmpoutpos); 133 | } 134 | 135 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue) 136 | { 137 | if (inlength == 0) 138 | return; 139 | mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE); 140 | decodePage(@in, inpos, @out, outpos, mynvalue); 141 | } 142 | 143 | private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize) 144 | { 145 | int tmpoutpos = outpos.get(); 146 | int tmpinpos = inpos.get(); 147 | 148 | for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE) 149 | { 150 | int b = @in[tmpinpos] & 0xFF; 151 | int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 152 | int exceptsize = (int)((uint)@in[tmpinpos] >> 16); 153 | ++tmpinpos; 154 | S16.uncompress(@in, tmpinpos, exceptsize, exceptbuffer, 155 | 0, 2 * cexcept); 156 | tmpinpos += exceptsize; 157 | for (int k = 0; k < BLOCK_SIZE; k += 32) 158 | { 159 | BitPacking.fastunpack(@in, tmpinpos, @out, 160 | tmpoutpos + k, bits[b]); 161 | tmpinpos += bits[b]; 162 | } 163 | for (int k = 0; k < cexcept; ++k) 164 | { 165 | @out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]); 166 | } 167 | } 168 | outpos.set(tmpoutpos); 169 | inpos.set(tmpinpos); 170 | } 171 | 172 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 173 | { 174 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 175 | if (inlength == 0) 176 | return; 177 | @out[outpos.get()] = inlength; 178 | outpos.increment(); 179 | headlessCompress(@in, inpos, inlength, @out, outpos); 180 | } 181 | 182 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 183 | { 184 | if (inlength == 0) 185 | return; 186 | int outlength = @in[inpos.get()]; 187 | inpos.increment(); 188 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 189 | } 190 | 191 | public override string ToString() 192 | { 193 | return nameof(NewPFD); 194 | } 195 | } 196 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/NewPFDS16.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * NewPFD/NewPFOR based on Simple16 by Yan et al. 10 | *
11 | * Follows: 12 | *
13 | * H. Yan, S. Ding, T. Suel, Inverted index compression and query processing 14 | * with optimized document ordering, in: WWW 09, 2009, pp. 401-410. 15 | *
16 | * using Simple16 as the secondary coder. 17 | * 18 | * It encodes integers in blocks of 128 integers. For arrays containing 19 | * an arbitrary number of integers, you should use it in conjunction 20 | * with another CODEC: 21 | * 22 | *
IntegerCODEC ic = 23 | * new Composition(new PDFS16(), new VariableByte()).24 | * 25 | * Note that this does not use differential coding: if you are working on sorted 26 | * lists, you must compute the deltas separately. 27 | * 28 | * For multi-threaded applications, each thread should use its own NewPFDS16 29 | * object. 30 | * 31 | * @author Daniel Lemire 32 | */ 33 | namespace Genbox.CSharpFastPFOR 34 | { 35 | public class NewPFDS16 : IntegerCODEC, SkippableIntegerCODEC 36 | { 37 | private const int BLOCK_SIZE = 128; 38 | 39 | private int[] exceptbuffer = new int[2 * BLOCK_SIZE]; 40 | 41 | /** 42 | * Constructor for the NewPFDS16 CODEC. 43 | */ 44 | public NewPFDS16() 45 | { 46 | } 47 | 48 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 49 | { 50 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 51 | if (inlength == 0) 52 | return; 53 | encodePage(@in, inpos, inlength, @out, outpos); 54 | } 55 | 56 | private static int[] bits = { 0, 1, 2, 3, 4, 5, 57 | 6, 7, 8, 9, 10, 11, 58 | 12, 13, 16, 20, 32 }; 59 | 60 | private static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 61 | 10, 11, 12, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 62 | 16, 16, 16, 16, 16, 16, 16 }; 63 | 64 | private static void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept) 65 | { 66 | int mb = Util.maxbits(@in, pos, BLOCK_SIZE); 67 | int mini = 0; 68 | if (mini + 28 < bits[invbits[mb]]) 69 | mini = bits[invbits[mb]] - 28; // 28 is the max for 70 | // exceptions 71 | int besti = bits.Length - 1; 72 | int exceptcounter = 0; 73 | for (int i = mini; i < bits.Length - 1; ++i) 74 | { 75 | int tmpcounter = 0; 76 | for (int k = pos; k < BLOCK_SIZE + pos; ++k) 77 | if ((int)((uint)@in[k] >> bits[i]) != 0) 78 | ++tmpcounter; 79 | if (tmpcounter * 10 <= BLOCK_SIZE) 80 | { 81 | besti = i; 82 | exceptcounter = tmpcounter; 83 | break; 84 | } 85 | } 86 | bestb.set(besti); 87 | bestexcept.set(exceptcounter); 88 | } 89 | 90 | private void encodePage(int[] @in, IntWrapper inpos, int thissize, int[] @out, IntWrapper outpos) 91 | { 92 | int tmpoutpos = outpos.get(); 93 | int tmpinpos = inpos.get(); 94 | IntWrapper bestb = new IntWrapper(); 95 | IntWrapper bestexcept = new IntWrapper(); 96 | for (int finalinpos = tmpinpos + thissize; tmpinpos 97 | + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) 98 | { 99 | getBestBFromData(@in, tmpinpos, bestb, bestexcept); 100 | int tmpbestb = bestb.get(); 101 | int nbrexcept = bestexcept.get(); 102 | int exceptsize = 0; 103 | int remember = tmpoutpos; 104 | tmpoutpos++; 105 | if (nbrexcept > 0) 106 | { 107 | for (int i = 0, c = 0; i < BLOCK_SIZE; ++i) 108 | { 109 | if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) 110 | { 111 | exceptbuffer[c + nbrexcept] = i; 112 | exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); 113 | ++c; 114 | } 115 | } 116 | exceptsize = S16.compress(exceptbuffer, 0, 117 | 2 * nbrexcept, @out, tmpoutpos); 118 | tmpoutpos += exceptsize; 119 | } 120 | @out[remember] = tmpbestb | (nbrexcept << 8) 121 | | (exceptsize << 16); 122 | for (int k = 0; k < BLOCK_SIZE; k += 32) 123 | { 124 | BitPacking.fastpack(@in, tmpinpos + k, @out, 125 | tmpoutpos, bits[tmpbestb]); 126 | tmpoutpos += bits[tmpbestb]; 127 | } 128 | } 129 | inpos.set(tmpinpos); 130 | outpos.set(tmpoutpos); 131 | } 132 | 133 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue) 134 | { 135 | if (inlength == 0) 136 | return; 137 | mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE); 138 | decodePage(@in, inpos, @out, outpos, mynvalue); 139 | } 140 | 141 | private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize) 142 | { 143 | int tmpoutpos = outpos.get(); 144 | int tmpinpos = inpos.get(); 145 | 146 | for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE) 147 | { 148 | int b = @in[tmpinpos] & 0xFF; 149 | int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 150 | int exceptsize = (int)((uint)@in[tmpinpos] >> 16); 151 | ++tmpinpos; 152 | S16.uncompress(@in, tmpinpos, exceptsize, exceptbuffer, 153 | 0, 2 * cexcept); 154 | tmpinpos += exceptsize; 155 | for (int k = 0; k < BLOCK_SIZE; k += 32) 156 | { 157 | BitPacking.fastunpack(@in, tmpinpos, @out, 158 | tmpoutpos + k, bits[b]); 159 | tmpinpos += bits[b]; 160 | } 161 | for (int k = 0; k < cexcept; ++k) 162 | { 163 | @out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]); 164 | } 165 | } 166 | outpos.set(tmpoutpos); 167 | inpos.set(tmpinpos); 168 | } 169 | 170 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 171 | { 172 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 173 | if (inlength == 0) 174 | return; 175 | @out[outpos.get()] = inlength; 176 | outpos.increment(); 177 | headlessCompress(@in, inpos, inlength, @out, outpos); 178 | } 179 | 180 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 181 | { 182 | if (inlength == 0) 183 | return; 184 | int outlength = @in[inpos.get()]; 185 | inpos.increment(); 186 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 187 | } 188 | 189 | public override string ToString() 190 | { 191 | return nameof(NewPFDS16); 192 | } 193 | } 194 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/NewPFDS9.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * NewPFD/NewPFOR based on Simple9 by Yan et al. 10 | *
11 | * Follows: 12 | *
13 | * H. Yan, S. Ding, T. Suel, Inverted index compression and query processing 14 | * with optimized document ordering, in: WWW 09, 2009, pp. 401-410. 15 | *
16 | * using Simple9 as the secondary coder. 17 | * 18 | * It encodes integers in blocks of 128 integers. For arrays containing 19 | * an arbitrary number of integers, you should use it in conjunction 20 | * with another CODEC: 21 | * 22 | *IntegerCODEC ic = new Composition(new PDFS9(), new VariableByte()).23 | * 24 | * Note that this does not use differential coding: if you are working on sorted 25 | * lists, you must compute the deltas separately. 26 | * 27 | * For multi-threaded applications, each thread should use its own NewPFDS9 28 | * object. 29 | * 30 | * @author Daniel Lemire 31 | */ 32 | namespace Genbox.CSharpFastPFOR 33 | { 34 | public class NewPFDS9 : IntegerCODEC, SkippableIntegerCODEC 35 | { 36 | private const int BLOCK_SIZE = 128; 37 | 38 | private int[] exceptbuffer = new int[2 * BLOCK_SIZE]; 39 | 40 | /** 41 | * Constructor for the NewPFDS9 CODEC. 42 | */ 43 | public NewPFDS9() 44 | { 45 | } 46 | 47 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 48 | { 49 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 50 | if (inlength == 0) 51 | return; 52 | encodePage(@in, inpos, inlength, @out, outpos); 53 | } 54 | 55 | private static int[] bits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 56 | 11, 12, 13, 16, 20, 32 }; 57 | private static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 58 | 10, 11, 12, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 59 | 16, 16, 16, 16, 16, 16, 16 }; 60 | 61 | private static void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept) 62 | { 63 | int mb = Util.maxbits(@in, pos, BLOCK_SIZE); 64 | int mini = 0; 65 | if (mini + 28 < bits[invbits[mb]]) 66 | mini = bits[invbits[mb]] - 28; // 28 is the max for 67 | // exceptions 68 | int besti = bits.Length - 1; 69 | int exceptcounter = 0; 70 | for (int i = mini; i < bits.Length - 1; ++i) 71 | { 72 | int tmpcounter = 0; 73 | for (int k = pos; k < BLOCK_SIZE + pos; ++k) 74 | if ((int)((uint)@in[k] >> bits[i]) != 0) 75 | ++tmpcounter; 76 | if (tmpcounter * 10 <= BLOCK_SIZE) 77 | { 78 | besti = i; 79 | exceptcounter = tmpcounter; 80 | break; 81 | } 82 | } 83 | bestb.set(besti); 84 | bestexcept.set(exceptcounter); 85 | } 86 | 87 | private void encodePage(int[] @in, IntWrapper inpos, int thissize, int[] @out, IntWrapper outpos) 88 | { 89 | int tmpoutpos = outpos.get(); 90 | int tmpinpos = inpos.get(); 91 | IntWrapper bestb = new IntWrapper(); 92 | IntWrapper bestexcept = new IntWrapper(); 93 | for (int finalinpos = tmpinpos + thissize; tmpinpos 94 | + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) 95 | { 96 | getBestBFromData(@in, tmpinpos, bestb, bestexcept); 97 | int tmpbestb = bestb.get(); 98 | int nbrexcept = bestexcept.get(); 99 | int exceptsize = 0; 100 | int remember = tmpoutpos; 101 | tmpoutpos++; 102 | if (nbrexcept > 0) 103 | { 104 | for (int i = 0, c = 0; i < BLOCK_SIZE; ++i) 105 | { 106 | if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) 107 | { 108 | exceptbuffer[c + nbrexcept] = i; 109 | exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); 110 | ++c; 111 | } 112 | } 113 | exceptsize = S9.compress(exceptbuffer, 0, 114 | 2 * nbrexcept, @out, tmpoutpos); 115 | tmpoutpos += exceptsize; 116 | } 117 | @out[remember] = tmpbestb | (nbrexcept << 8) 118 | | (exceptsize << 16); 119 | for (int k = 0; k < BLOCK_SIZE; k += 32) 120 | { 121 | BitPacking.fastpack(@in, tmpinpos + k, @out, 122 | tmpoutpos, bits[tmpbestb]); 123 | tmpoutpos += bits[tmpbestb]; 124 | } 125 | } 126 | inpos.set(tmpinpos); 127 | outpos.set(tmpoutpos); 128 | } 129 | 130 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue) 131 | { 132 | if (inlength == 0) 133 | return; 134 | mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE); 135 | decodePage(@in, inpos, @out, outpos, mynvalue); 136 | } 137 | 138 | private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize) 139 | { 140 | int tmpoutpos = outpos.get(); 141 | int tmpinpos = inpos.get(); 142 | 143 | for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE) 144 | { 145 | int b = @in[tmpinpos] & 0xFF; 146 | int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 147 | int exceptsize = (int)((uint)@in[tmpinpos] >> 16); 148 | ++tmpinpos; 149 | S9.uncompress(@in, tmpinpos, exceptsize, exceptbuffer, 150 | 0, 2 * cexcept); 151 | tmpinpos += exceptsize; 152 | for (int k = 0; k < BLOCK_SIZE; k += 32) 153 | { 154 | BitPacking.fastunpack(@in, tmpinpos, @out, 155 | tmpoutpos + k, bits[b]); 156 | tmpinpos += bits[b]; 157 | } 158 | for (int k = 0; k < cexcept; ++k) 159 | { 160 | @out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]); 161 | } 162 | } 163 | outpos.set(tmpoutpos); 164 | inpos.set(tmpinpos); 165 | } 166 | 167 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 168 | { 169 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 170 | if (inlength == 0) 171 | return; 172 | @out[outpos.get()] = inlength; 173 | outpos.increment(); 174 | headlessCompress(@in, inpos, inlength, @out, outpos); 175 | } 176 | 177 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 178 | { 179 | if (inlength == 0) 180 | return; 181 | int outlength = @in[inpos.get()]; 182 | inpos.increment(); 183 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 184 | } 185 | 186 | public override string ToString() 187 | { 188 | return nameof(NewPFDS9); 189 | } 190 | } 191 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/OptPFD.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * OptPFD: fast patching scheme by Yan et al. 10 | *
11 | * Follows: 12 | *
13 | * H. Yan, S. Ding, T. Suel, Inverted index compression and query processing 14 | * with optimized document ordering, in: WWW 09, 2009, pp. 401-410. 15 | *
16 | * using Simple16 as the secondary coder. 17 | * 18 | * It encodes integers in blocks of 128 integers. For arrays containing 19 | * an arbitrary number of integers, you should use it in conjunction 20 | * with another CODEC: 21 | * 22 | *IntegerCODEC ic = new Composition(new OptPFD(), new VariableByte()).23 | * 24 | * Note that this does not use differential coding: if you are working on sorted 25 | * lists, you must compute the deltas separately. (Yes, this is true even though 26 | * the "D" at the end of the name probably stands for delta.) 27 | * 28 | * For multi-threaded applications, each thread should use its own OptPFD 29 | * object. 30 | * 31 | * @author Daniel Lemire 32 | */ 33 | namespace Genbox.CSharpFastPFOR 34 | { 35 | public class OptPFD : IntegerCODEC, SkippableIntegerCODEC 36 | { 37 | private const int BLOCK_SIZE = 128; 38 | 39 | private int[] exceptbuffer = new int[2 * BLOCK_SIZE]; 40 | 41 | /** 42 | * Constructor for the OptPFD CODEC. 43 | */ 44 | public OptPFD() 45 | { 46 | } 47 | 48 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 49 | { 50 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 51 | if (inlength == 0) 52 | return; 53 | encodePage(@in, inpos, inlength, @out, outpos); 54 | } 55 | 56 | private static int[] bits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 57 | 11, 12, 13, 16, 20, 32 }; 58 | private static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 59 | 10, 11, 12, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 60 | 16, 16, 16, 16, 16, 16, 16 }; 61 | 62 | private void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept) 63 | { 64 | int mb = Util.maxbits(@in, pos, BLOCK_SIZE); 65 | int mini = 0; 66 | if (mini + 28 < bits[invbits[mb]]) 67 | mini = bits[invbits[mb]] - 28; // 28 is the max for 68 | // exceptions 69 | int besti = bits.Length - 1; 70 | int bestcost = bits[besti] * 4; 71 | int exceptcounter = 0; 72 | for (int i = mini; i < bits.Length - 1; ++i) 73 | { 74 | int tmpcounter = 0; 75 | for (int k = pos; k < BLOCK_SIZE + pos; ++k) 76 | if ((int)((uint)@in[k] >> bits[i]) != 0) 77 | { 78 | ++tmpcounter; 79 | } 80 | if (tmpcounter == BLOCK_SIZE) 81 | continue; // no need 82 | for (int k = pos, c = 0; k < pos + BLOCK_SIZE; ++k) 83 | if ((int)((uint)@in[k] >> bits[i]) != 0) 84 | { 85 | exceptbuffer[tmpcounter + c] = k - pos; 86 | exceptbuffer[c] = (int)((uint)@in[k] >> bits[i]); 87 | ++c; 88 | } 89 | 90 | int thiscost = bits[i] * 4 + S16.estimatecompress(exceptbuffer, 0, 2 * tmpcounter); 91 | if (thiscost <= bestcost) 92 | { 93 | bestcost = thiscost; 94 | besti = i; 95 | exceptcounter = tmpcounter; 96 | } 97 | } 98 | 99 | bestb.set(besti); 100 | bestexcept.set(exceptcounter); 101 | } 102 | 103 | private void encodePage(int[] @in, IntWrapper inpos, int thissize, int[] @out, IntWrapper outpos) 104 | { 105 | int tmpoutpos = outpos.get(); 106 | int tmpinpos = inpos.get(); 107 | IntWrapper bestb = new IntWrapper(); 108 | IntWrapper bestexcept = new IntWrapper(); 109 | for (int finalinpos = tmpinpos + thissize; tmpinpos + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) 110 | { 111 | getBestBFromData(@in, tmpinpos, bestb, bestexcept); 112 | int tmpbestb = bestb.get(); 113 | int nbrexcept = bestexcept.get(); 114 | int exceptsize = 0; 115 | int remember = tmpoutpos; 116 | tmpoutpos++; 117 | if (nbrexcept > 0) 118 | { 119 | int c = 0; 120 | for (int i = 0; i < BLOCK_SIZE; ++i) 121 | { 122 | if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) 123 | { 124 | exceptbuffer[c + nbrexcept] = i; 125 | exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); 126 | ++c; 127 | } 128 | } 129 | exceptsize = S16.compress(exceptbuffer, 0, 130 | 2 * nbrexcept, @out, tmpoutpos); 131 | tmpoutpos += exceptsize; 132 | } 133 | @out[remember] = tmpbestb | (nbrexcept << 8) 134 | | (exceptsize << 16); 135 | for (int k = 0; k < BLOCK_SIZE; k += 32) 136 | { 137 | BitPacking.fastpack(@in, tmpinpos + k, @out, 138 | tmpoutpos, bits[tmpbestb]); 139 | tmpoutpos += bits[tmpbestb]; 140 | } 141 | } 142 | inpos.set(tmpinpos); 143 | outpos.set(tmpoutpos); 144 | } 145 | 146 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue) 147 | { 148 | if (inlength == 0) 149 | return; 150 | mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE); 151 | decodePage(@in, inpos, @out, outpos, mynvalue); 152 | } 153 | 154 | private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize) 155 | { 156 | int tmpoutpos = outpos.get(); 157 | int tmpinpos = inpos.get(); 158 | 159 | for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE) 160 | { 161 | int b = @in[tmpinpos] & 0xFF; 162 | int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 163 | int exceptsize = (int)((uint)@in[tmpinpos] >> 16); 164 | ++tmpinpos; 165 | S16.uncompress(@in, tmpinpos, exceptsize, exceptbuffer, 166 | 0, 2 * cexcept); 167 | tmpinpos += exceptsize; 168 | for (int k = 0; k < BLOCK_SIZE; k += 32) 169 | { 170 | BitPacking.fastunpack(@in, tmpinpos, @out, 171 | tmpoutpos + k, bits[b]); 172 | tmpinpos += bits[b]; 173 | } 174 | for (int k = 0; k < cexcept; ++k) 175 | { 176 | @out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]); 177 | } 178 | } 179 | outpos.set(tmpoutpos); 180 | inpos.set(tmpinpos); 181 | } 182 | 183 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 184 | { 185 | inlength = inlength / BLOCK_SIZE * BLOCK_SIZE; 186 | if (inlength == 0) 187 | return; 188 | @out[outpos.get()] = inlength; 189 | outpos.increment(); 190 | headlessCompress(@in, inpos, inlength, @out, outpos); 191 | } 192 | 193 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 194 | { 195 | if (inlength == 0) 196 | return; 197 | int outlength = @in[inpos.get()]; 198 | inpos.increment(); 199 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 200 | } 201 | 202 | public override string ToString() 203 | { 204 | return nameof(OptPFD); 205 | } 206 | } 207 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/OptPFDS16.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * OptPFD based on Simple16 by Yan et al. 10 | *
11 | * Follows: 12 | *
13 | * H. Yan, S. Ding, T. Suel, Inverted index compression and query processing 14 | * with optimized document ordering, in: WWW 09, 2009, pp. 401-410. 15 | *
16 | * using Simple16 as the secondary coder. 17 | * 18 | * It encodes integers in blocks of 128 integers. For arrays containing 19 | * an arbitrary number of integers, you should use it in conjunction 20 | * with another CODEC: 21 | * 22 | *IntegerCODEC ic = new Composition(new OptPFDS16(), new VariableByte()).23 | * 24 | * Note that this does not use differential coding: if you are working on sorted 25 | * lists, you must compute the deltas separately. 26 | * 27 | * For multi-threaded applications, each thread should use its own OptPFDS16 28 | * object. 29 | * 30 | * @author Daniel Lemire 31 | */ 32 | namespace Genbox.CSharpFastPFOR 33 | { 34 | public class OptPFDS16 : IntegerCODEC, SkippableIntegerCODEC 35 | { 36 | private const int BLOCK_SIZE = 128; 37 | private int[] exceptbuffer = new int[2 * BLOCK_SIZE]; 38 | 39 | /** 40 | * Constructor for the OptPFDS16 CODEC. 41 | */ 42 | public OptPFDS16() 43 | { 44 | } 45 | 46 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 47 | { 48 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 49 | if (inlength == 0) 50 | return; 51 | 52 | encodePage(@in, inpos, inlength, @out, outpos); 53 | } 54 | 55 | private static int[] bits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 56 | 11, 12, 13, 16, 20, 32 }; 57 | private static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 58 | 10, 11, 12, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 59 | 16, 16, 16, 16, 16, 16, 16 }; 60 | 61 | private void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept) 62 | { 63 | int mb = Util.maxbits(@in, pos, BLOCK_SIZE); 64 | int mini = 0; 65 | if (mini + 28 < bits[invbits[mb]]) 66 | mini = bits[invbits[mb]] - 28; // 28 is the max for 67 | // exceptions 68 | int besti = bits.Length - 1; 69 | int bestcost = bits[besti] * 4; 70 | int exceptcounter = 0; 71 | for (int i = mini; i < bits.Length - 1; ++i) 72 | { 73 | int tmpcounter = 0; 74 | for (int k = pos; k < BLOCK_SIZE + pos; ++k) 75 | if ((int)((uint)@in[k] >> bits[i]) != 0) 76 | { 77 | ++tmpcounter; 78 | } 79 | if (tmpcounter == BLOCK_SIZE) 80 | continue; // no need 81 | for (int k = pos, c = 0; k < pos + BLOCK_SIZE; ++k) 82 | if ((int)((uint)@in[k] >> bits[i]) != 0) 83 | { 84 | exceptbuffer[tmpcounter + c] = k - pos; 85 | exceptbuffer[c] = (int)((uint)@in[k] >> bits[i]); 86 | ++c; 87 | } 88 | 89 | int thiscost = bits[i] * 4 + S16.estimatecompress(exceptbuffer, 0, 2 * tmpcounter); 90 | if (thiscost <= bestcost) 91 | { 92 | bestcost = thiscost; 93 | besti = i; 94 | exceptcounter = tmpcounter; 95 | } 96 | } 97 | bestb.set(besti); 98 | bestexcept.set(exceptcounter); 99 | } 100 | 101 | private void encodePage(int[] @in, IntWrapper inpos, int thissize, int[] @out, IntWrapper outpos) 102 | { 103 | int tmpoutpos = outpos.get(); 104 | int tmpinpos = inpos.get(); 105 | IntWrapper bestb = new IntWrapper(); 106 | IntWrapper bestexcept = new IntWrapper(); 107 | for (int finalinpos = tmpinpos + thissize; tmpinpos + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) 108 | { 109 | getBestBFromData(@in, tmpinpos, bestb, bestexcept); 110 | int tmpbestb = bestb.get(); 111 | int nbrexcept = bestexcept.get(); 112 | int exceptsize = 0; 113 | int remember = tmpoutpos; 114 | tmpoutpos++; 115 | if (nbrexcept > 0) 116 | { 117 | int c = 0; 118 | for (int i = 0; i < BLOCK_SIZE; ++i) 119 | { 120 | if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) 121 | { 122 | exceptbuffer[c + nbrexcept] = i; 123 | exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); 124 | ++c; 125 | } 126 | } 127 | exceptsize = S16.compress(exceptbuffer, 0, 128 | 2 * nbrexcept, @out, tmpoutpos); 129 | tmpoutpos += exceptsize; 130 | } 131 | @out[remember] = tmpbestb | (nbrexcept << 8) 132 | | (exceptsize << 16); 133 | for (int k = 0; k < BLOCK_SIZE; k += 32) 134 | { 135 | BitPacking.fastpack(@in, tmpinpos + k, @out, 136 | tmpoutpos, bits[tmpbestb]); 137 | tmpoutpos += bits[tmpbestb]; 138 | } 139 | } 140 | inpos.set(tmpinpos); 141 | outpos.set(tmpoutpos); 142 | } 143 | 144 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue) 145 | { 146 | if (inlength == 0) 147 | return; 148 | mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE); 149 | decodePage(@in, inpos, @out, outpos, mynvalue); 150 | } 151 | 152 | private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize) 153 | { 154 | int tmpoutpos = outpos.get(); 155 | int tmpinpos = inpos.get(); 156 | 157 | for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE) 158 | { 159 | int b = @in[tmpinpos] & 0xFF; 160 | int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 161 | int exceptsize = (int)((uint)@in[tmpinpos] >> 16); 162 | ++tmpinpos; 163 | S16.uncompress(@in, tmpinpos, exceptsize, exceptbuffer, 164 | 0, 2 * cexcept); 165 | tmpinpos += exceptsize; 166 | for (int k = 0; k < BLOCK_SIZE; k += 32) 167 | { 168 | BitPacking.fastunpack(@in, tmpinpos, @out, 169 | tmpoutpos + k, bits[b]); 170 | tmpinpos += bits[b]; 171 | } 172 | for (int k = 0; k < cexcept; ++k) 173 | { 174 | @out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]); 175 | } 176 | } 177 | outpos.set(tmpoutpos); 178 | inpos.set(tmpinpos); 179 | } 180 | 181 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 182 | { 183 | inlength = inlength / BLOCK_SIZE * BLOCK_SIZE; 184 | if (inlength == 0) 185 | return; 186 | @out[outpos.get()] = inlength; 187 | outpos.increment(); 188 | headlessCompress(@in, inpos, inlength, @out, outpos); 189 | } 190 | 191 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 192 | { 193 | if (inlength == 0) 194 | return; 195 | int outlength = @in[inpos.get()]; 196 | inpos.increment(); 197 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 198 | } 199 | 200 | public override string ToString() 201 | { 202 | return nameof(OptPFDS16); 203 | } 204 | } 205 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/OptPFDS9.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * OptPFD based on Simple9 by Yan et al. 10 | *
11 | * Follows: 12 | *
13 | * H. Yan, S. Ding, T. Suel, Inverted index compression and query processing 14 | * with optimized document ordering, in: WWW 09, 2009, pp. 401-410. 15 | *
16 | * using Simple9 as the secondary coder. 17 | * 18 | * It encodes integers in blocks of 128 integers. For arrays containing 19 | * an arbitrary number of integers, you should use it in conjunction 20 | * with another CODEC: 21 | * 22 | *IntegerCODEC ic = new Composition(new OptPFDS9(), new VariableByte()).23 | * 24 | * Note that this does not use differential coding: if you are working on sorted 25 | * lists, you must compute the deltas separately. 26 | * 27 | * For multi-threaded applications, each thread should use its own OptPFDS9 28 | * object. 29 | * 30 | * @author Daniel Lemire 31 | */ 32 | namespace Genbox.CSharpFastPFOR 33 | { 34 | public class OptPFDS9 : IntegerCODEC, SkippableIntegerCODEC 35 | { 36 | private const int BLOCK_SIZE = 128; 37 | private int[] exceptbuffer = new int[2 * BLOCK_SIZE]; 38 | 39 | /** 40 | * Constructor for the OptPFDS9 CODEC. 41 | */ 42 | public OptPFDS9() 43 | { 44 | } 45 | 46 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 47 | { 48 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 49 | if (inlength == 0) 50 | return; 51 | encodePage(@in, inpos, inlength, @out, outpos); 52 | } 53 | 54 | private static int[] bits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 55 | 11, 12, 13, 16, 20, 32 }; 56 | private static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 57 | 10, 11, 12, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 58 | 16, 16, 16, 16, 16, 16, 16 }; 59 | 60 | private void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept) 61 | { 62 | int mb = Util.maxbits(@in, pos, BLOCK_SIZE); 63 | int mini = 0; 64 | if (mini + 28 < bits[invbits[mb]]) 65 | mini = bits[invbits[mb]] - 28; // 28 is the max for 66 | // exceptions 67 | int besti = bits.Length - 1; 68 | int bestcost = bits[besti] * 4; 69 | int exceptcounter = 0; 70 | for (int i = mini; i < bits.Length - 1; ++i) 71 | { 72 | int tmpcounter = 0; 73 | for (int k = pos; k < BLOCK_SIZE + pos; ++k) 74 | if ((int)((uint)@in[k] >> bits[i]) != 0) 75 | { 76 | ++tmpcounter; 77 | } 78 | if (tmpcounter == BLOCK_SIZE) 79 | continue; // no need 80 | for (int k = pos, c = 0; k < pos + BLOCK_SIZE; ++k) 81 | if ((int)((uint)@in[k] >> bits[i]) != 0) 82 | { 83 | exceptbuffer[tmpcounter + c] = k - pos; 84 | exceptbuffer[c] = (int)((uint)@in[k] >> bits[i]); 85 | ++c; 86 | } 87 | 88 | int thiscost = bits[i] * 4 + S9.estimatecompress(exceptbuffer, 0, 2 * tmpcounter); 89 | if (thiscost <= bestcost) 90 | { 91 | bestcost = thiscost; 92 | besti = i; 93 | exceptcounter = tmpcounter; 94 | } 95 | } 96 | bestb.set(besti); 97 | bestexcept.set(exceptcounter); 98 | } 99 | 100 | private void encodePage(int[] @in, IntWrapper inpos, int thissize, 101 | int[] @out, IntWrapper outpos) 102 | { 103 | int tmpoutpos = outpos.get(); 104 | int tmpinpos = inpos.get(); 105 | IntWrapper bestb = new IntWrapper(); 106 | IntWrapper bestexcept = new IntWrapper(); 107 | for (int finalinpos = tmpinpos + thissize; tmpinpos + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) 108 | { 109 | getBestBFromData(@in, tmpinpos, bestb, bestexcept); 110 | int tmpbestb = bestb.get(); 111 | int nbrexcept = bestexcept.get(); 112 | int exceptsize = 0; 113 | int remember = tmpoutpos; 114 | tmpoutpos++; 115 | if (nbrexcept > 0) 116 | { 117 | int c = 0; 118 | for (int i = 0; i < BLOCK_SIZE; ++i) 119 | { 120 | if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) 121 | { 122 | exceptbuffer[c + nbrexcept] = i; 123 | exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); 124 | ++c; 125 | } 126 | } 127 | exceptsize = S9.compress(exceptbuffer, 0, 128 | 2 * nbrexcept, @out, tmpoutpos); 129 | tmpoutpos += exceptsize; 130 | } 131 | @out[remember] = tmpbestb | (nbrexcept << 8) 132 | | (exceptsize << 16); 133 | for (int k = 0; k < BLOCK_SIZE; k += 32) 134 | { 135 | BitPacking.fastpack(@in, tmpinpos + k, @out, 136 | tmpoutpos, bits[tmpbestb]); 137 | tmpoutpos += bits[tmpbestb]; 138 | } 139 | } 140 | inpos.set(tmpinpos); 141 | outpos.set(tmpoutpos); 142 | } 143 | 144 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue) 145 | { 146 | if (inlength == 0) 147 | return; 148 | mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE); 149 | decodePage(@in, inpos, @out, outpos, mynvalue); 150 | } 151 | 152 | private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize) 153 | { 154 | int tmpoutpos = outpos.get(); 155 | int tmpinpos = inpos.get(); 156 | 157 | for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE) 158 | { 159 | int b = @in[tmpinpos] & 0xFF; 160 | int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 161 | int exceptsize = (int)((uint)@in[tmpinpos] >> 16); 162 | ++tmpinpos; 163 | S9.uncompress(@in, tmpinpos, exceptsize, exceptbuffer, 164 | 0, 2 * cexcept); 165 | tmpinpos += exceptsize; 166 | for (int k = 0; k < BLOCK_SIZE; k += 32) 167 | { 168 | BitPacking.fastunpack(@in, tmpinpos, @out, 169 | tmpoutpos + k, bits[b]); 170 | tmpinpos += bits[b]; 171 | } 172 | for (int k = 0; k < cexcept; ++k) 173 | { 174 | @out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]); 175 | } 176 | } 177 | outpos.set(tmpoutpos); 178 | inpos.set(tmpinpos); 179 | } 180 | 181 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 182 | { 183 | inlength = inlength / BLOCK_SIZE * BLOCK_SIZE; 184 | if (inlength == 0) 185 | return; 186 | @out[outpos.get()] = inlength; 187 | outpos.increment(); 188 | headlessCompress(@in, inpos, inlength, @out, outpos); 189 | } 190 | 191 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 192 | { 193 | if (inlength == 0) 194 | return; 195 | int outlength = @in[inpos.get()]; 196 | inpos.increment(); 197 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 198 | } 199 | 200 | public override string ToString() 201 | { 202 | return nameof(OptPFDS9); 203 | } 204 | } 205 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Port/Arrays.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Linq; 3 | 4 | namespace Genbox.CSharpFastPFOR.Port 5 | { 6 | public class Arrays 7 | { 8 | private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) 9 | { 10 | if (fromIndex > toIndex) 11 | throw new ArgumentOutOfRangeException("fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")"); 12 | 13 | if (fromIndex < 0) 14 | throw new ArgumentOutOfRangeException(nameof(fromIndex)); 15 | 16 | if (toIndex > arrayLen) 17 | throw new ArgumentOutOfRangeException(nameof(toIndex)); 18 | } 19 | 20 | public static void fill
11 | * Adapted by D. Lemire from the Apache Lucene project. 12 | *
13 | */ 14 | 15 | using System; 16 | 17 | namespace Genbox.CSharpFastPFOR 18 | { 19 | public class S16 20 | { 21 | /** 22 | * Compress an integer array using Simple16 23 | * 24 | * 25 | * @param in 26 | * array to compress 27 | * @param currentPos 28 | * where to start reading 29 | * @param inlength 30 | * how many integers to read 31 | * @param out output array 32 | * @param tmpoutpos location in the output array 33 | * @return the number of 32-bit words written (in compressed form) 34 | */ 35 | public static int compress(int[] @in, int currentPos, int inlength, int[] @out, int tmpoutpos) 36 | { 37 | int outpos = tmpoutpos; 38 | int finalin = currentPos + inlength; 39 | while (currentPos < finalin) 40 | { 41 | int inoffset = compressblock(@out, outpos++, @in, 42 | currentPos, inlength); 43 | if (inoffset == -1) 44 | throw new Exception("Too big a number"); 45 | currentPos += inoffset; 46 | inlength -= inoffset; 47 | } 48 | return outpos - tmpoutpos; 49 | } 50 | 51 | /** 52 | * Estimate size of the compressed output. 53 | * 54 | * @param in 55 | * array to compress 56 | * @param currentPos 57 | * where to start reading 58 | * @param inlength 59 | * how many integers to read 60 | * @return estimated size of the output (in 32-bit integers) 61 | */ 62 | public static int estimatecompress(int[] @in, int currentPos, int inlength) 63 | { 64 | int finalin = currentPos + inlength; 65 | int counter = 0; 66 | while (currentPos < finalin) 67 | { 68 | int inoffset = fakecompressblock(@in, currentPos, 69 | inlength); 70 | if (inoffset == -1) 71 | throw new Exception("Too big a number"); 72 | currentPos += inoffset; 73 | inlength -= inoffset; 74 | ++counter; 75 | } 76 | return counter; 77 | } 78 | 79 | /** 80 | * Compress an integer array using Simple16 81 | * 82 | * @param out 83 | * the compressed output 84 | * @param outOffset 85 | * the offset of the output in the number of integers 86 | * @param in 87 | * the integer input array 88 | * @param inOffset 89 | * the offset of the input in the number of integers 90 | * @param n 91 | * the number of elements to be compressed 92 | * @return the size of the outputs in 32-bit integers 93 | * 94 | */ 95 | public static int compressblock(int[] @out, int outOffset, int[] @in, int inOffset, int n) 96 | { 97 | int numIdx, j, num, bits; 98 | for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) 99 | { 100 | @out[outOffset] = numIdx << S16_BITSSIZE; 101 | num = (S16_NUM[numIdx] < n) ? S16_NUM[numIdx] : n; 102 | 103 | for (j = 0, bits = 0; (j < num) 104 | && (@in[inOffset + j] < SHIFTED_S16_BITS[numIdx][j]);) 105 | { 106 | @out[outOffset] |= (@in[inOffset + j] << bits); 107 | bits += S16_BITS[numIdx][j]; 108 | j++; 109 | } 110 | 111 | if (j == num) 112 | { 113 | return num; 114 | } 115 | } 116 | 117 | return -1; 118 | } 119 | 120 | private static int fakecompressblock(int[] @in, int inOffset, int n) 121 | { 122 | int numIdx, j, num; 123 | for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) 124 | { 125 | num = (S16_NUM[numIdx] < n) ? S16_NUM[numIdx] : n; 126 | 127 | for (j = 0; (j < num) 128 | && (@in[inOffset + j] < SHIFTED_S16_BITS[numIdx][j]);) 129 | { 130 | j++; 131 | } 132 | 133 | if (j == num) 134 | { 135 | return num; 136 | } 137 | } 138 | 139 | return -1; 140 | } 141 | 142 | /** 143 | * Decompress an integer array using Simple16 144 | * 145 | * @param out 146 | * the decompressed output 147 | * @param outOffset 148 | * the offset of the output in the number of integers 149 | * @param in 150 | * the compressed input array 151 | * @param inOffset 152 | * the offset of the input in the number of integers 153 | * @param n 154 | * the number of elements to be compressed 155 | * @return the number of processed integers 156 | */ 157 | public static int decompressblock(int[] @out, int outOffset, int[] @in, int inOffset, int n) 158 | { 159 | int numIdx, j = 0, bits = 0; 160 | numIdx = (int)((uint)@in[inOffset] >> S16_BITSSIZE); 161 | int num = S16_NUM[numIdx] < n ? S16_NUM[numIdx] : n; 162 | for (j = 0, bits = 0; j < num; j++) 163 | { 164 | @out[outOffset + j] = (int)((uint)@in[inOffset] >> bits) & (int)((uint)0xffffffff >> (32 - S16_BITS[numIdx][j])); 165 | bits += S16_BITS[numIdx][j]; 166 | } 167 | return num; 168 | } 169 | 170 | /** 171 | * Uncompressed data from an input array into an output array 172 | * 173 | * @param in input array (in compressed form) 174 | * @param tmpinpos starting location in the compressed input array 175 | * @param inlength how much data we wish the read (in 32-bit words) 176 | * @param out output array (in decompressed form) 177 | * @param currentPos current position in the output array 178 | * @param outlength available data in the output array 179 | */ 180 | public static void uncompress(int[] @in, int tmpinpos, int inlength, int[] @out, int currentPos, int outlength) 181 | { 182 | int pos = tmpinpos + inlength; 183 | while (tmpinpos < pos) 184 | { 185 | int howmany = decompressblock(@out, currentPos, 186 | @in, tmpinpos, outlength); 187 | outlength -= howmany; 188 | currentPos += howmany; 189 | tmpinpos += 1; 190 | } 191 | } 192 | 193 | private static int[][] shiftme(int[][] x) 194 | { 195 | int[][] answer = new int[x.Length][]; 196 | for (int k = 0; k < x.Length; ++k) 197 | { 198 | answer[k] = new int[x[k].Length]; 199 | for (int z = 0; z < answer[k].Length; ++z) 200 | answer[k][z] = 1 << x[k][z]; 201 | } 202 | return answer; 203 | } 204 | 205 | private const int S16_NUMSIZE = 16; 206 | private const int S16_BITSSIZE = 28; 207 | // the possible number of bits used to represent one integer 208 | private static int[] S16_NUM = { 28, 21, 21, 21, 14, 9, 8, 7, 6, 6, 5, 5, 4, 3, 2, 1 }; 209 | // the corresponding number of elements for each value of the number of 210 | // bits 211 | private static int[][] S16_BITS = { 212 | new[]{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 213 | new[]{ 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 214 | new[]{ 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1 }, 215 | new[]{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2 }, 216 | new[]{ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, 217 | new[]{ 4, 3, 3, 3, 3, 3, 3, 3, 3 }, new[] { 3, 4, 4, 4, 4, 3, 3, 3 }, 218 | new[]{ 4, 4, 4, 4, 4, 4, 4 }, new[]{ 5, 5, 5, 5, 4, 4 }, 219 | new[]{ 4, 4, 5, 5, 5, 5 }, new[]{ 6, 6, 6, 5, 5 }, new[]{ 5, 5, 6, 6, 6 }, 220 | new[]{ 7, 7, 7, 7 }, new[]{ 10, 9, 9, }, new[]{ 14, 14 }, new[]{ 28 } }; 221 | private static int[][] SHIFTED_S16_BITS = shiftme(S16_BITS); 222 | 223 | public override string ToString() 224 | { 225 | return nameof(S16); 226 | } 227 | } 228 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Simple16.cs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | /** 7 | * This is an implementation of the popular Simple16 scheme. It is limited to 8 | * 28-bit integers (between 0 and 2^28-1). 9 | * 10 | * Note that this does not use differential coding: if you are working on sorted 11 | * lists, you must compute the deltas separately. 12 | * 13 | *14 | * Adapted by D. Lemire from the Apache Lucene project. 15 | *
16 | */ 17 | 18 | using System; 19 | 20 | namespace Genbox.CSharpFastPFOR 21 | { 22 | public class Simple16 : IntegerCODEC, SkippableIntegerCODEC 23 | { 24 | 25 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 26 | { 27 | int i_inpos = inpos.get(); 28 | int i_outpos = outpos.get(); 29 | int finalin = i_inpos + inlength; 30 | while (i_inpos < finalin) 31 | { 32 | int inoffset = compressblock(@out, i_outpos++, @in, i_inpos, inlength); 33 | if (inoffset == -1) 34 | throw new Exception("Too big a number"); 35 | i_inpos += inoffset; 36 | inlength -= inoffset; 37 | } 38 | inpos.set(i_inpos); 39 | outpos.set(i_outpos); 40 | } 41 | 42 | /** 43 | * Compress an integer array using Simple16 44 | * 45 | * @param out 46 | * the compressed output 47 | * @param outOffset 48 | * the offset of the output in the number of integers 49 | * @param in 50 | * the integer input array 51 | * @param inOffset 52 | * the offset of the input in the number of integers 53 | * @param n 54 | * the number of elements to be compressed 55 | * @return the number of compressed integers 56 | */ 57 | public static int compressblock(int[] @out, int outOffset, int[] @in, int inOffset, int n) 58 | { 59 | int numIdx, j, num, bits; 60 | for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) 61 | { 62 | @out[outOffset] = numIdx << S16_BITSSIZE; 63 | num = (S16_NUM[numIdx] < n) ? S16_NUM[numIdx] : n; 64 | 65 | for (j = 0, bits = 0; (j < num) 66 | && (@in[inOffset + j] < SHIFTED_S16_BITS[numIdx][j]);) 67 | { 68 | @out[outOffset] |= (@in[inOffset + j] << bits); 69 | bits += S16_BITS[numIdx][j]; 70 | j++; 71 | } 72 | 73 | if (j == num) 74 | { 75 | return num; 76 | } 77 | } 78 | 79 | return -1; 80 | } 81 | 82 | /** 83 | * Decompress an integer array using Simple16 84 | * 85 | * @param out 86 | * the decompressed output 87 | * @param outOffset 88 | * the offset of the output in the number of integers 89 | * @param in 90 | * the compressed input array 91 | * @param inOffset 92 | * the offset of the input in the number of integers 93 | * @param n 94 | * the number of elements to be compressed 95 | * @return the number of processed integers 96 | */ 97 | public static int decompressblock(int[] @out, int outOffset, int[] @in, int inOffset, int n) 98 | { 99 | int numIdx, j = 0, bits = 0; 100 | numIdx = (int)((uint)@in[inOffset] >> S16_BITSSIZE); 101 | int num = S16_NUM[numIdx] < n ? S16_NUM[numIdx] : n; 102 | for (j = 0, bits = 0; j < num; j++) 103 | { 104 | @out[outOffset + j] = (int)((uint)@in[inOffset] >> bits) & (int)((uint)0xffffffff >> (32 - S16_BITS[numIdx][j])); 105 | bits += S16_BITS[numIdx][j]; 106 | } 107 | return num; 108 | } 109 | 110 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num) 111 | { 112 | int i_inpos = inpos.get(); 113 | int i_outpos = outpos.get(); 114 | while (num > 0) 115 | { 116 | int howmany = decompressblock(@out, i_outpos, @in, i_inpos, num); 117 | num -= howmany; 118 | i_outpos += howmany; 119 | i_inpos++; 120 | } 121 | inpos.set(i_inpos); 122 | outpos.set(i_outpos); 123 | } 124 | 125 | /** 126 | * Uncompress data from an array to another array. 127 | * 128 | * Both inpos and outpos parameters are modified to indicate new positions 129 | * after read/write. 130 | * 131 | * @param in 132 | * array containing data in compressed form 133 | * @param tmpinpos 134 | * where to start reading in the array 135 | * @param inlength 136 | * length of the compressed data (ignored by some schemes) 137 | * @param out 138 | * array where to write the compressed output 139 | * @param currentPos 140 | * where to write the compressed output in out 141 | * @param outlength 142 | * number of integers we want to decode 143 | */ 144 | public static void uncompress(int[] @in, int tmpinpos, int inlength, int[] @out, int currentPos, int outlength) 145 | { 146 | int pos = tmpinpos + inlength; 147 | while (tmpinpos < pos) 148 | { 149 | int howmany = decompressblock(@out, currentPos, @in, tmpinpos, 150 | outlength); 151 | outlength -= howmany; 152 | currentPos += howmany; 153 | tmpinpos += 1; 154 | } 155 | } 156 | 157 | private static int[][] shiftme(int[][] x) 158 | { 159 | int[][] answer = new int[x.Length][]; 160 | for (int k = 0; k < x.Length; ++k) 161 | { 162 | answer[k] = new int[x[k].Length]; 163 | for (int z = 0; z < answer[k].Length; ++z) 164 | answer[k][z] = 1 << x[k][z]; 165 | } 166 | return answer; 167 | } 168 | 169 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 170 | { 171 | if (inlength == 0) 172 | return; 173 | @out[outpos.get()] = inlength; 174 | outpos.increment(); 175 | headlessCompress(@in, inpos, inlength, @out, outpos); 176 | } 177 | 178 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 179 | { 180 | if (inlength == 0) 181 | return; 182 | int outlength = @in[inpos.get()]; 183 | inpos.increment(); 184 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 185 | 186 | } 187 | 188 | private const int S16_NUMSIZE = 16; 189 | private const int S16_BITSSIZE = 28; 190 | // the possible number of bits used to represent one integer 191 | private static int[] S16_NUM = { 28, 21, 21, 21, 14, 9, 8, 7, 6, 6, 5, 5, 4, 3, 2, 1 }; 192 | // the corresponding number of elements for each value of the number of bits 193 | private static int[][] S16_BITS = { 194 | new[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1 }, 195 | new[]{ 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 196 | new[]{ 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1 }, 197 | new[]{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2 }, 198 | new[]{ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, 199 | new[]{ 4, 3, 3, 3, 3, 3, 3, 3, 3 }, new[] { 3, 4, 4, 4, 4, 3, 3, 3 }, 200 | new[]{ 4, 4, 4, 4, 4, 4, 4 }, new[]{ 5, 5, 5, 5, 4, 4 }, 201 | new[]{ 4, 4, 5, 5, 5, 5 }, new[] { 6, 6, 6, 5, 5 }, new[] { 5, 5, 6, 6, 6 }, 202 | new[]{ 7, 7, 7, 7 }, new[] { 10, 9, 9, }, new[]{ 14, 14 }, new[]{ 28 } }; 203 | private static int[][] SHIFTED_S16_BITS = shiftme(S16_BITS); 204 | 205 | public override string ToString() 206 | { 207 | return nameof(Simple16); 208 | } 209 | } 210 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/SkippableComposition.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This is code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Helper class to compose schemes. 10 | * 11 | * @author Daniel Lemire 12 | */ 13 | 14 | namespace Genbox.CSharpFastPFOR 15 | { 16 | public class SkippableComposition : SkippableIntegerCODEC 17 | { 18 | private SkippableIntegerCODEC F1; 19 | private SkippableIntegerCODEC F2; 20 | 21 | /** 22 | * Compose a scheme from a first one (f1) and a second one (f2). The first 23 | * one is called first and then the second one tries to compress whatever 24 | * remains from the first run. 25 | * 26 | * By convention, the first scheme should be such that if, during decoding, 27 | * a 32-bit zero is first encountered, then there is no output. 28 | * 29 | * @param f1 30 | * first codec 31 | * @param f2 32 | * second codec 33 | */ 34 | public SkippableComposition(SkippableIntegerCODEC f1, SkippableIntegerCODEC f2) 35 | { 36 | F1 = f1; 37 | F2 = f2; 38 | } 39 | 40 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 41 | { 42 | int init = inpos.get(); 43 | F1.headlessCompress(@in, inpos, inlength, @out, outpos); 44 | inlength -= inpos.get() - init; 45 | F2.headlessCompress(@in, inpos, inlength, @out, outpos); 46 | } 47 | 48 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num) 49 | { 50 | int init = inpos.get(); 51 | F1.headlessUncompress(@in, inpos, inlength, @out, outpos, num); 52 | inlength -= inpos.get() - init; 53 | num -= outpos.get(); 54 | F2.headlessUncompress(@in, inpos, inlength, @out, outpos, num); 55 | } 56 | 57 | public override string ToString() 58 | { 59 | return F1 + "+" + F2; 60 | } 61 | } 62 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/SkippableIntegerCODEC.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This is code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Interface describing a standard CODEC to compress integers. This is a 10 | * variation on the IntegerCODEC interface meant to be used for random access. 11 | * 12 | * The main difference is that we must specify the number of integers we wish to 13 | * decode. This information should be stored elsewhere. 14 | * 15 | * This interface was designed by the Terrier team for their search engine. 16 | * 17 | * @author Daniel Lemire 18 | * 19 | */ 20 | namespace Genbox.CSharpFastPFOR 21 | { 22 | public interface SkippableIntegerCODEC 23 | { 24 | /** 25 | * Compress data from an array to another array. 26 | * 27 | * Both inpos and outpos are modified to represent how much data was read 28 | * and written to if 12 ints (inlength = 12) are compressed to 3 ints, then 29 | * inpos will be incremented by 12 while outpos will be incremented by 3 we 30 | * use IntWrapper to pass the values by reference. 31 | * 32 | * @param in 33 | * input array 34 | * @param inpos 35 | * location in the input array 36 | * @param inlength 37 | * how many integers to compress 38 | * @param out 39 | * output array 40 | * @param outpos 41 | * where to write in the output array 42 | */ 43 | void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos); 44 | 45 | /** 46 | * Uncompress data from an array to another array. 47 | * 48 | * Both inpos and outpos parameters are modified to indicate new positions 49 | * after read/write. 50 | * 51 | * @param in 52 | * array containing data in compressed form 53 | * @param inpos 54 | * where to start reading in the array 55 | * @param inlength 56 | * length of the compressed data (ignored by some schemes) 57 | * @param out 58 | * array where to write the compressed output 59 | * @param outpos 60 | * where to write the compressed output in out 61 | * @param num 62 | * number of integers we want to decode, the actual number of integers decoded can be less 63 | */ 64 | void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num); 65 | } 66 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Synth/ClusteredDataGenerator.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * This class will generate lists of random integers based on the clustered 10 | * model: 11 | * 12 | * Reference: Vo Ngoc Anh and Alistair Moffat. 2010. Index compression using 13 | * 64-bit words. Softw. Pract. Exper.40, 2 (February 2010), 131-147. 14 | * 15 | * @author Daniel Lemire 16 | */ 17 | 18 | namespace Genbox.CSharpFastPFOR.Synth 19 | { 20 | public class ClusteredDataGenerator 21 | { 22 | private readonly UniformDataGenerator unidg = new UniformDataGenerator(); 23 | 24 | /** 25 | * Creating random array generator. 26 | */ 27 | public ClusteredDataGenerator() 28 | { 29 | } 30 | 31 | private void fillUniform(int[] array, int offset, int length, int Min, int Max) 32 | { 33 | int[] v = this.unidg.generateUniform(length, Max - Min); 34 | for (int k = 0; k < v.Length; ++k) 35 | array[k + offset] = Min + v[k]; 36 | } 37 | 38 | private void fillClustered(int[] array, int offset, int length, int Min, int Max) 39 | { 40 | int range = Max - Min; 41 | if ((range == length) || (length <= 10)) 42 | { 43 | fillUniform(array, offset, length, Min, Max); 44 | return; 45 | } 46 | int cut = length / 2 + ((range - length - 1 > 0) ? this.unidg.rand.Next(range - length - 1) : 0); 47 | double p = this.unidg.rand.NextDouble(); 48 | if (p < 0.25) 49 | { 50 | fillUniform(array, offset, length / 2, Min, Min + cut); 51 | fillClustered(array, offset + length / 2, length - length / 2, Min + cut, Max); 52 | } 53 | else if (p < 0.5) 54 | { 55 | fillClustered(array, offset, length / 2, Min, Min + cut); 56 | fillUniform(array, offset + length / 2, length - length / 2, Min + cut, Max); 57 | } 58 | else 59 | { 60 | fillClustered(array, offset, length / 2, Min, Min + cut); 61 | fillClustered(array, offset + length / 2, length - length / 2, Min + cut, Max); 62 | } 63 | } 64 | 65 | /** 66 | * generates randomly N distinct integers from 0 to Max. 67 | * 68 | * @param N 69 | * number of integers to generate 70 | * @param Max 71 | * maximal value of the integers 72 | * @return array containing the integers 73 | */ 74 | public int[] generateClustered(int N, int Max) 75 | { 76 | int[] array = new int[N]; 77 | fillClustered(array, 0, N, 0, Max); 78 | return array; 79 | } 80 | } 81 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Synth/UniformDataGenerator.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * This class will generate "uniform" lists of random integers. 10 | * 11 | * @author Daniel Lemire 12 | */ 13 | 14 | using System; 15 | using System.Collections.Generic; 16 | using Genbox.CSharpFastPFOR.Port; 17 | 18 | namespace Genbox.CSharpFastPFOR.Synth 19 | { 20 | public class UniformDataGenerator 21 | { 22 | /** 23 | * construct generator of random arrays. 24 | */ 25 | public UniformDataGenerator() 26 | { 27 | this.rand = new Random(); 28 | } 29 | 30 | /** 31 | * @param seed 32 | * random seed 33 | */ 34 | public UniformDataGenerator(int seed) 35 | { 36 | this.rand = new Random(seed); 37 | } 38 | 39 | /** 40 | * generates randomly N distinct integers from 0 to Max. 41 | */ 42 | 43 | private int[] generateUniformHash(int N, int Max) 44 | { 45 | if (N > Max) 46 | throw new Exception("not possible"); 47 | 48 | int[] ans = new int[N]; 49 | HashSet