├── Library ├── TODO.md ├── Terminology.txt ├── Codecs │ └── Integers │ │ ├── ThompsonAlphaInteger.cs │ │ ├── RawInteger.cs │ │ ├── EliasDeltaInteger.cs │ │ ├── EliasGammaInteger.cs │ │ ├── EliasOmegaInteger.cs │ │ ├── RawIntegerDecoder.cs │ │ ├── IntUtil.cs │ │ ├── VlqInteger.cs │ │ ├── RawIntegerEncoder.cs │ │ ├── IntegerDecoderBase.cs │ │ ├── EliasGammaIntegerDecoder.cs │ │ ├── EliasGammaIntegerEncoder.cs │ │ ├── IntegerEncoderBase.cs │ │ ├── ZigZag.cs │ │ ├── EliasOmegaIntegerDecoder.cs │ │ ├── InvertedVlqInteger.cs │ │ ├── ThompsonAlphaIntegerDecoder.cs │ │ ├── EliasDeltaIntegerDecoder.cs │ │ ├── VlqIntegerDecoder.cs │ │ ├── VlqIntegerEncoder.cs │ │ ├── InvertedVlqIntegerDecoder.cs │ │ ├── FibonacciInteger.cs │ │ ├── EliasDeltaIntegerEncoder.cs │ │ ├── InvertedVlqIntegerEncoder.cs │ │ ├── FibonacciIntegerDecoder.cs │ │ ├── ThompsonAlphaIntegerEncoder.cs │ │ ├── EliasOmegaIntegerEncoder.cs │ │ └── FibonacciIntegerEncoder.cs ├── IBitWriter.cs ├── IBitReader.cs ├── GlobalUsings.cs ├── Extensions │ ├── ArrayExtensions.cs │ ├── StreamExtensions.cs │ └── NumericStringExtensions.cs ├── Bits.cs ├── Library.csproj ├── StreamBitReader.cs └── StreamBitWriter.cs ├── .idea ├── .idea.InvertedTomato.IntegerCompression │ └── .idea │ │ ├── .name │ │ ├── encodings.xml │ │ ├── vcs.xml │ │ ├── indexLayout.xml │ │ └── .gitignore ├── .idea.packing │ └── .idea │ │ ├── encodings.xml │ │ ├── vcs.xml │ │ ├── indexLayout.xml │ │ └── .gitignore ├── .idea.integer-compression │ └── .idea │ │ ├── encodings.xml │ │ ├── vcs.xml │ │ ├── indexLayout.xml │ │ └── .gitignore └── .idea.binary │ └── .idea │ └── workspace.xml ├── nuget-pack.cmd ├── images └── comparison-1.png ├── Test ├── GlobalUsings.cs ├── ZigZagTests.cs ├── IntUtilTests.cs ├── EliasDeltaTests.cs ├── EliasGammaTests.cs ├── EliasOmegaTests.cs ├── Test.csproj ├── RawCodecTests.cs ├── ThompsonAlphaTests.cs ├── InvertedVlqCodecTests.cs ├── VlqCodecTests.cs ├── StreamBitReaderTests.cs ├── StreamBitWriterTests.cs └── FibonaciCodecTests.cs ├── packing.sln.DotSettings ├── LoadTest ├── LoadTest.csproj └── Program.cs ├── Sample ├── Sample.csproj └── Program.cs ├── LICENSE ├── packing.sln ├── .gitignore └── README.md /Library/TODO.md: -------------------------------------------------------------------------------- 1 | Separate interface 2 | String encoding 3 | -------------------------------------------------------------------------------- /.idea/.idea.InvertedTomato.IntegerCompression/.idea/.name: -------------------------------------------------------------------------------- 1 | InvertedTomato.IntegerCompression -------------------------------------------------------------------------------- /nuget-pack.cmd: -------------------------------------------------------------------------------- 1 | @echo off 2 | dotnet pack Library/Library.csproj --include-symbols 3 | pause -------------------------------------------------------------------------------- /Library/Terminology.txt: -------------------------------------------------------------------------------- 1 | Value = input 2 | Symbol = compressed value 3 | Set = array of 0 or more symbols -------------------------------------------------------------------------------- /images/comparison-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/invertedtomato/packing/HEAD/images/comparison-1.png -------------------------------------------------------------------------------- /Test/GlobalUsings.cs: -------------------------------------------------------------------------------- 1 | global using System; 2 | global using System.IO; 3 | global using Xunit; 4 | global using InvertedTomato.Packing.Codecs.Integers; 5 | global using InvertedTomato.Packing.Extensions; -------------------------------------------------------------------------------- /Library/Codecs/Integers/ThompsonAlphaInteger.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public static class ThompsonAlphaInteger 6 | { 7 | } -------------------------------------------------------------------------------- /Library/IBitWriter.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public interface IBitWriter 4 | { 5 | void WriteBit(Boolean value); 6 | void WriteBits(UInt64 bits, Int32 count); 7 | void Align(); 8 | } -------------------------------------------------------------------------------- /.idea/.idea.packing/.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/.idea.packing/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/.idea.integer-compression/.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /Library/IBitReader.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public interface IBitReader 4 | { 5 | Boolean PeakBit(); 6 | Boolean ReadBit(); 7 | UInt64 ReadBits(Int32 count); 8 | void Align(); 9 | } 10 | -------------------------------------------------------------------------------- /.idea/.idea.InvertedTomato.IntegerCompression/.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/.idea.integer-compression/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Library/GlobalUsings.cs: -------------------------------------------------------------------------------- 1 | global using System; 2 | global using System.IO; 3 | global using System.Linq; 4 | global using InvertedTomato.Packing; 5 | global using InvertedTomato.Packing.Extensions; 6 | global using System.Runtime.CompilerServices; -------------------------------------------------------------------------------- /.idea/.idea.InvertedTomato.IntegerCompression/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/.idea.packing/.idea/indexLayout.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/.idea.integer-compression/.idea/indexLayout.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/.idea.InvertedTomato.IntegerCompression/.idea/indexLayout.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Library/Extensions/ArrayExtensions.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing.Extensions; 2 | 3 | public static class ArrayExtensions 4 | { 5 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 6 | public static void Clear(this T[] target) => Array.Clear(target, 0, target.Length); 7 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/RawInteger.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | // ReSharper disable UnusedMember.Global 3 | 4 | namespace InvertedTomato.Packing.Codecs.Integers; 5 | 6 | public static class RawInteger 7 | { 8 | public const UInt64 MinValue = UInt64.MinValue; 9 | public const UInt64 MaxValue = UInt64.MaxValue; 10 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/EliasDeltaInteger.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | // ReSharper disable UnusedMember.Global 3 | 4 | namespace InvertedTomato.Packing.Codecs.Integers; 5 | 6 | public static class EliasDeltaInteger 7 | { 8 | public const UInt64 MinValue = UInt64.MinValue; 9 | public const UInt64 MaxValue = UInt64.MaxValue - 1; 10 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/EliasGammaInteger.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | // ReSharper disable UnusedMember.Global 3 | 4 | namespace InvertedTomato.Packing.Codecs.Integers; 5 | 6 | public static class EliasGammaInteger 7 | { 8 | public const UInt64 MinValue = UInt64.MinValue; 9 | public const UInt64 MaxValue = UInt64.MaxValue - 1; 10 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/EliasOmegaInteger.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | // ReSharper disable UnusedMember.Global 3 | 4 | namespace InvertedTomato.Packing.Codecs.Integers; 5 | 6 | public static class EliasOmegaInteger 7 | { 8 | public const UInt64 MinValue = UInt64.MinValue; 9 | public const UInt64 MaxValue = UInt64.MaxValue - 1; 10 | } -------------------------------------------------------------------------------- /.idea/.idea.packing/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Rider ignored files 5 | /.idea.binary.iml 6 | /projectSettingsUpdater.xml 7 | /modules.xml 8 | /contentModel.xml 9 | # Editor-based HTTP Client requests 10 | /httpRequests/ 11 | # Datasource local storage ignored files 12 | /dataSources/ 13 | /dataSources.local.xml 14 | -------------------------------------------------------------------------------- /.idea/.idea.integer-compression/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Rider ignored files 5 | /contentModel.xml 6 | /.idea.integer-compression.iml 7 | /projectSettingsUpdater.xml 8 | /modules.xml 9 | # Editor-based HTTP Client requests 10 | /httpRequests/ 11 | # Datasource local storage ignored files 12 | /dataSources/ 13 | /dataSources.local.xml 14 | -------------------------------------------------------------------------------- /Library/Codecs/Integers/RawIntegerDecoder.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing.Codecs.Integers; 2 | 3 | public class RawIntegerDecoder : IntegerDecoderBase 4 | { 5 | private readonly IBitReader _reader; 6 | 7 | public RawIntegerDecoder(IBitReader reader) 8 | { 9 | _reader = reader; 10 | } 11 | 12 | protected override UInt64 Decode() => _reader.ReadBits(Bits.LongBits); 13 | } -------------------------------------------------------------------------------- /packing.sln.DotSettings: -------------------------------------------------------------------------------- 1 | 2 | True -------------------------------------------------------------------------------- /.idea/.idea.InvertedTomato.IntegerCompression/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Rider ignored files 5 | /modules.xml 6 | /projectSettingsUpdater.xml 7 | /.idea.InvertedTomato.IntegerCompression.iml 8 | /contentModel.xml 9 | # Editor-based HTTP Client requests 10 | /httpRequests/ 11 | # Datasource local storage ignored files 12 | /dataSources/ 13 | /dataSources.local.xml 14 | -------------------------------------------------------------------------------- /Library/Codecs/Integers/IntUtil.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing.Codecs.Integers; 2 | 3 | public static class IntegerUtil 4 | { 5 | public static UInt64 Pow(UInt64 x, UInt64 pow) // Math.Pow only supports doubles 6 | { 7 | UInt64 ret = 1; 8 | while (pow != 0) 9 | { 10 | if ((pow & 1) == 1) ret *= x; 11 | x *= x; 12 | pow >>= 1; 13 | } 14 | 15 | return ret; 16 | } 17 | } -------------------------------------------------------------------------------- /LoadTest/LoadTest.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net7.0 6 | enable 7 | enable 8 | 11 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /Library/Codecs/Integers/VlqInteger.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public static class VlqInteger 6 | { 7 | public const UInt64 MinValue = 0; 8 | public const UInt64 MaxValue = UInt64.MaxValue - 1; 9 | 10 | internal const Byte More = 0b10000000; 11 | internal const Byte Mask = 0b01111111; 12 | internal const Int32 PacketSize = 7; 13 | internal const UInt64 MinPacketValue = UInt64.MaxValue >> (64 - PacketSize); 14 | } -------------------------------------------------------------------------------- /Test/ZigZagTests.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public class ZigZagTests 4 | { 5 | [Fact] 6 | public void CanEncodeDecodeMax() 7 | { 8 | var encoded = ZigZagUtility.Encode(Int64.MaxValue); 9 | Assert.Equal(Int64.MaxValue, ZigZagUtility.Decode(encoded)); 10 | } 11 | 12 | [Fact] 13 | public void CanEncodeDecodeMin() 14 | { 15 | var encoded = ZigZagUtility.Encode(Int64.MinValue + 1); 16 | Assert.Equal(Int64.MinValue + 1, ZigZagUtility.Decode(encoded)); 17 | } 18 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/RawIntegerEncoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public class RawIntegerEncoder : IntegerEncoderBase 6 | { 7 | private readonly IBitWriter _writer; 8 | 9 | public RawIntegerEncoder(IBitWriter writer) 10 | { 11 | _writer = writer; 12 | } 13 | 14 | protected override void Encode(UInt64 value) => _writer.WriteBits(value, Bits.LongBits); 15 | 16 | public override Int32? PredictEncodedBits(UInt64 value) => Bits.LongBits; 17 | } -------------------------------------------------------------------------------- /Test/IntUtilTests.cs: -------------------------------------------------------------------------------- 1 | using FluentAssertions; 2 | 3 | namespace InvertedTomato.Packing; 4 | 5 | public class IntUtilTests 6 | { 7 | [Fact] 8 | public void CanPower0() => IntegerUtil.Pow(0, 0).Should().Be(1); // See https://en.wikipedia.org/wiki/Zero_to_the_power_of_zero 9 | 10 | [Fact] 11 | public void CanPower1() => IntegerUtil.Pow(1, 1).Should().Be(1); 12 | 13 | [Fact] 14 | public void CanPower2() => IntegerUtil.Pow(2, 2).Should().Be(4); 15 | 16 | [Fact] 17 | public void CanPower10() => IntegerUtil.Pow(10, 10).Should().Be(10000000000); 18 | } -------------------------------------------------------------------------------- /Sample/Sample.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net7.0 6 | enable 7 | enable 8 | 11 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /Library/Codecs/Integers/IntegerDecoderBase.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedMember.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public abstract class IntegerDecoderBase 6 | { 7 | protected abstract UInt64 Decode(); 8 | 9 | public Boolean DecodeBit() => Decode() > 0; 10 | public Byte DecodeUInt8() => (Byte)Decode(); 11 | public UInt16 DecodeUInt16() => (UInt16)Decode(); 12 | public UInt32 DecodeUInt32() => (UInt32)Decode(); 13 | public UInt64 DecodeUInt64() => Decode(); 14 | public SByte DecodeInt8() => (SByte)ZigZagUtility.Decode(Decode()); 15 | public Int16 DecodeInt16() => (Int16)ZigZagUtility.Decode(Decode()); 16 | public Int32 DecodeInt32() => (Int32)ZigZagUtility.Decode(Decode()); 17 | public Int64 DecodeInt64() => ZigZagUtility.Decode(Decode()); 18 | } -------------------------------------------------------------------------------- /Library/Extensions/StreamExtensions.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing.Extensions; 2 | 3 | public static class StreamExtensions 4 | { 5 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 6 | public static void Write(this Stream target, Byte[] buffer) => target.Write(buffer, 0, buffer.Length); 7 | 8 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 9 | public static void Write(this Stream target, Byte[] buffer, Int32 count) => target.Write(buffer, 0, count); 10 | 11 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 12 | public static Int32 Read(this Stream target, Byte[] buffer) => target.Read(buffer, 0, buffer.Length); 13 | 14 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 15 | public static Int32 Read(this Stream target, Byte[] buffer, Int32 count) => target.Read(buffer, 0, count); 16 | } -------------------------------------------------------------------------------- /Library/Extensions/NumericStringExtensions.cs: -------------------------------------------------------------------------------- 1 | using System.Text.RegularExpressions; 2 | 3 | // ReSharper disable MemberCanBePrivate.Global 4 | 5 | namespace InvertedTomato.Packing.Extensions; 6 | 7 | public static class NumericStringExtensions 8 | { 9 | public static String ToBinaryString(this Byte[] target) => 10 | String.Join(" ", target.Select(b => Convert.ToString(b, 2).PadLeft(Bits.ByteBits, '0'))); 11 | 12 | public static String ToBinaryString(this Byte[] target, Int32 offset, Int32 count) => 13 | target.ToBinaryString().Substring(offset, count); 14 | 15 | public static String ToBinaryString(this UInt64 target) => 16 | Regex.Replace(Convert.ToString((Int64)target, 2).PadLeft(Bits.LongBits, '0'), ".{8}", "$0 "); 17 | 18 | public static String ToHexString(this Byte[] target) => 19 | BitConverter.ToString(target).Replace(" ", ""); 20 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/EliasGammaIntegerDecoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public class EliasGammaIntegerDecoder : IntegerDecoderBase 6 | { 7 | private readonly IBitReader _reader; 8 | 9 | public EliasGammaIntegerDecoder(IBitReader reader) 10 | { 11 | _reader = reader; 12 | } 13 | 14 | protected override UInt64 Decode() 15 | { 16 | // Read length 17 | var length = 1; 18 | while (!_reader.PeakBit()) 19 | { 20 | // Note that length is one bit longer 21 | length++; 22 | 23 | // Remove 0 from input 24 | _reader.ReadBit(); 25 | } 26 | 27 | // Read value 28 | var value = _reader.ReadBits(length); 29 | 30 | // Remove offset from value 31 | value--; 32 | 33 | return value; 34 | } 35 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/EliasGammaIntegerEncoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public class EliasGammaIntegerEncoder : IntegerEncoderBase 6 | { 7 | private readonly IBitWriter _writer; 8 | 9 | public EliasGammaIntegerEncoder(IBitWriter writer) 10 | { 11 | _writer = writer; 12 | } 13 | 14 | protected override void Encode(UInt64 value) 15 | { 16 | // Offset value to allow zeros 17 | value++; 18 | 19 | // Calculate length 20 | var length = Bits.CountUsed(value); 21 | 22 | // Write unary zeros 23 | _writer.WriteBits(0, length - 1); 24 | 25 | // Write value 26 | _writer.WriteBits(value, length); 27 | } 28 | 29 | public override Int32? PredictEncodedBits(UInt64 value) 30 | { 31 | // Offset for zero 32 | value++; 33 | 34 | return Bits.CountUsed(value) * 2 - 1; 35 | } 36 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/IntegerEncoderBase.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedMember.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public abstract class IntegerEncoderBase 6 | { 7 | protected abstract void Encode(UInt64 value); 8 | 9 | public void EncodeBit(Boolean value) => Encode(value ? 1UL : 0UL); 10 | public void EncodeUInt8(Byte value) => Encode(value); 11 | public void EncodeUInt16(UInt16 value) => Encode(value); 12 | public void EncodeUInt32(UInt32 value) => Encode(value); 13 | public void EncodeUInt64(UInt64 value) => Encode(value); 14 | public void EncodeInt8(SByte value) => Encode(ZigZagUtility.Encode(value)); 15 | public void EncodeInt16(Int16 value) => Encode(ZigZagUtility.Encode(value)); 16 | public void EncodeInt32(Int32 value) => Encode(ZigZagUtility.Encode(value)); 17 | public void EncodeInt64(Int64 value) => Encode(ZigZagUtility.Encode(value)); 18 | 19 | public abstract Int32? PredictEncodedBits(UInt64 value); 20 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/ZigZag.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing.Codecs.Integers; 2 | 3 | /// 4 | /// Encode signed values as unsigned using ProtoBuffer ZigZag bijection encoding algorithm. 5 | /// https://developers.google.com/protocol-buffers/docs/encoding 6 | /// 7 | public static class ZigZagUtility 8 | { 9 | /// 10 | /// Encode a signed long into an ZigZag unsigned long 11 | /// 12 | /// 13 | /// 14 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 15 | public static UInt64 Encode(Int64 value) => (UInt64)((value << 1) ^ (value >> 63)); 16 | 17 | /// 18 | /// Decode a ZigZag unsigned long back into a signed long 19 | /// 20 | /// 21 | /// 22 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 23 | public static Int64 Decode(UInt64 value) => (Int64)((value >> 1) ^ (~(value & 1) + 1)); 24 | } -------------------------------------------------------------------------------- /Test/EliasDeltaTests.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public class EliasDeltaTests 4 | { 5 | // TODO: A full set of tests are required! I haven't bothered yet as I haven't found any use for this codec beyond academic interest 6 | 7 | [Fact] 8 | public void CanEncodeDecodeFirst1000() 9 | { 10 | using var stream = new MemoryStream(); 11 | 12 | using (var writer = new StreamBitWriter(stream)) 13 | { 14 | var encoder = new EliasDeltaIntegerEncoder(writer); 15 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol); 16 | } 17 | 18 | stream.Seek(0, SeekOrigin.Begin); 19 | 20 | using (var reader = new StreamBitReader(stream)) 21 | { 22 | var decoder = new EliasDeltaIntegerDecoder(reader); 23 | for (UInt64 symbol = 0; symbol < 1000; symbol++) 24 | { 25 | Assert.Equal(symbol, decoder.DecodeUInt64()); 26 | } 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /Test/EliasGammaTests.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public class EliasGammaTests 4 | { 5 | // TODO: A full set of tests are required! I haven't bothered yet as I haven't found any use for this codec beyond academic interest 6 | 7 | [Fact] 8 | public void CanEncodeDecodeFirst1000() 9 | { 10 | using var stream = new MemoryStream(); 11 | 12 | using (var writer = new StreamBitWriter(stream)) 13 | { 14 | var encoder = new EliasGammaIntegerEncoder(writer); 15 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol); 16 | } 17 | 18 | stream.Seek(0, SeekOrigin.Begin); 19 | 20 | using (var reader = new StreamBitReader(stream)) 21 | { 22 | var decoder = new EliasGammaIntegerDecoder(reader); 23 | for (UInt64 symbol = 0; symbol < 1000; symbol++) 24 | { 25 | Assert.Equal(symbol, decoder.DecodeUInt64()); 26 | } 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /Library/Bits.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public static class Bits 4 | { 5 | public const Int32 ByteBits = 1 * 8; 6 | public const Int32 LongBits = 8 * 8; 7 | 8 | /// 9 | /// Count the number of bits used to express number 10 | /// 11 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 12 | public static Byte CountUsed(UInt64 value) 13 | { 14 | Byte bits = 0; 15 | 16 | do 17 | { 18 | bits++; 19 | value >>= 1; 20 | } while (value > 0); 21 | 22 | return bits; 23 | } 24 | 25 | /// 26 | /// Count the number of bits used to express number 27 | /// 28 | [MethodImpl(MethodImplOptions.AggressiveInlining)] 29 | public static Byte CountUsed(Byte value) 30 | { 31 | Byte bits = 0; 32 | 33 | do 34 | { 35 | bits++; 36 | value >>= 1; 37 | } while (value > 0); 38 | 39 | return bits; 40 | } 41 | } -------------------------------------------------------------------------------- /Test/EliasOmegaTests.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public class EliasOmegaTests 4 | { 5 | // TODO: A full set of tests are required! I haven't bothered yet as I haven't found any use for this codec beyond academic interest 6 | 7 | [Fact] 8 | public void CanEncodeDecodeFirst1000() 9 | { 10 | using var stream = new MemoryStream(); 11 | 12 | using (var writer = new StreamBitWriter(stream)) 13 | { 14 | var encoder = new EliasOmegaIntegerEncoder(writer); 15 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol); 16 | } 17 | 18 | stream.Seek(0, SeekOrigin.Begin); 19 | 20 | using (var reader = new StreamBitReader(stream)) 21 | { 22 | var decoder = new EliasOmegaIntegerDecoder(reader); 23 | for (UInt64 symbol = 0; symbol < 1000; symbol++) 24 | { 25 | Assert.Equal(symbol, decoder.DecodeUInt64()); 26 | } 27 | } 28 | } 29 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/EliasOmegaIntegerDecoder.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | // ReSharper disable UnusedType.Global 4 | 5 | namespace InvertedTomato.Packing.Codecs.Integers; 6 | 7 | public class EliasOmegaIntegerDecoder : IntegerDecoderBase 8 | { 9 | private readonly IBitReader _reader; 10 | 11 | public EliasOmegaIntegerDecoder(IBitReader reader) 12 | { 13 | _reader = reader; 14 | } 15 | 16 | protected override UInt64 Decode() 17 | { 18 | // #1 Start with a variable N, set to a value of 1. 19 | UInt64 value = 1; 20 | 21 | // #2 If the next bit is a "0", stop. The decoded number is N. 22 | while (_reader.PeakBit()) 23 | { 24 | // #3 If the next bit is a "1", then read it plus N more bits, and use that binary number as the new value of N. 25 | value = _reader.ReadBits((Int32)value + 1); 26 | } 27 | 28 | // Burn last bit from input 29 | _reader.ReadBit(); 30 | 31 | // Offset for min value 32 | return value - 1; 33 | } 34 | } -------------------------------------------------------------------------------- /Sample/Program.cs: -------------------------------------------------------------------------------- 1 | using InvertedTomato.Packing; 2 | using InvertedTomato.Packing.Codecs.Integers; 3 | 4 | // Encode some values... 5 | using var stream = new MemoryStream(); // Could be a FileStream or a NetworkStream 6 | using (var writer = new StreamBitWriter(stream)) 7 | { 8 | var fib = new FibonacciIntegerEncoder(writer); // Pick a codec - you can use one or many 9 | 10 | // Encode some values using the Fibonacci codec 11 | fib.EncodeUInt64(1); 12 | fib.EncodeUInt64(2); 13 | fib.EncodeUInt64(3); 14 | } 15 | 16 | Console.WriteLine("Compressed data is " + stream.Length + " bytes"); // Output: Now data is 2 bytes 17 | 18 | // Decode the values... 19 | stream.Position = 0; 20 | using (var reader = new StreamBitReader(stream)) 21 | { 22 | var fib = new FibonacciIntegerDecoder(reader); 23 | 24 | // Decode the Fibonacci values 25 | Console.WriteLine(fib.DecodeUInt64()); // Output: 1 26 | Console.WriteLine(fib.DecodeUInt64()); // Output: 2 27 | Console.WriteLine(fib.DecodeUInt64()); // Output: 3 28 | } 29 | 30 | 31 | Console.WriteLine("Done."); 32 | Console.ReadKey(true); -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Ben Thompson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Library/Codecs/Integers/InvertedVlqInteger.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | /// 6 | /// VLQ similar to https://en.wikipedia.org/wiki/Variable-length_quantity with "Removing Redundancy", but the 7 | /// continuation bit flag is reversed. This might be more performant for datasets with consistently large values. 8 | /// 9 | public static class InvertedVlqInteger 10 | { 11 | public const UInt64 MinValue = UInt64.MinValue; 12 | public const UInt64 MaxValue = UInt64.MaxValue - 1; 13 | 14 | public static readonly Byte[] Zero = { 0x80 }; // 10000000 15 | public static readonly Byte[] One = { 0x81 }; // 10000001 16 | public static readonly Byte[] Two = { 0x82 }; // 10000010 17 | public static readonly Byte[] Four = { 0x84 }; // 10000100 18 | public static readonly Byte[] Eight = { 0x88 }; 19 | 20 | internal const Byte Nil = 0x80; // 10000000 21 | internal const Byte Mask = 0x7f; // 01111111 22 | internal const Int32 PacketSize = 7; 23 | internal const UInt64 MinPacketValue = UInt64.MaxValue >> (64 - PacketSize); 24 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/ThompsonAlphaIntegerDecoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public class ThompsonAlphaIntegerDecoder : IntegerDecoderBase 6 | { 7 | private readonly IBitReader _reader; 8 | private readonly Int32 _lengthBits; 9 | 10 | public ThompsonAlphaIntegerDecoder(IBitReader reader, Int32 lengthBits) 11 | { 12 | if (lengthBits is < 1 or > 6) throw new ArgumentOutOfRangeException($"Must be between 1 and 6, not {lengthBits}.", nameof(lengthBits)); 13 | 14 | _reader = reader; 15 | _lengthBits = lengthBits; 16 | } 17 | 18 | protected override UInt64 Decode() 19 | { 20 | // Read length 21 | var length = (Int32)_reader.ReadBits(_lengthBits); 22 | 23 | // Read number (max 32 bits can be written in one operation, so split it over two) 24 | var value = _reader.ReadBits(length); 25 | 26 | // Recover implied MSB 27 | value |= (UInt64)1 << length; 28 | 29 | // Remove offset to allow zeros 30 | value--; 31 | 32 | return value; 33 | } 34 | } -------------------------------------------------------------------------------- /Test/Test.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net7.0 5 | 6 | false 7 | 8 | InvertedTomato.Packing.Tests 9 | 10 | InvertedTomato.Packing 11 | 12 | 11 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | all 21 | runtime; build; native; contentfiles; analyzers; buildtransitive 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /Library/Codecs/Integers/EliasDeltaIntegerDecoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public class EliasDeltaIntegerDecoder : IntegerDecoderBase 6 | { 7 | private readonly IBitReader _reader; 8 | 9 | public EliasDeltaIntegerDecoder(IBitReader reader) 10 | { 11 | _reader = reader; 12 | } 13 | 14 | protected override UInt64 Decode() 15 | { 16 | // #1 Read and count zeros from the stream until you reach the first one. Call this count of zeros L 17 | var l = 1; 18 | while (!_reader.PeakBit()) 19 | { 20 | // Note that length is one bit longer 21 | l++; 22 | 23 | // Remove 0 from input 24 | _reader.ReadBit(); 25 | } 26 | 27 | // #2 Considering the one that was reached to be the first digit of an integer, with a value of 2L, read the remaining L digits of the integer. Call this integer N+1, and subtract one to get N. 28 | var n = (Int32)_reader.ReadBits(l) - 1; 29 | 30 | // #3 Put a one in the first place of our final output, representing the value 2N. 31 | // #4 Read and append the following N digits. 32 | var value = _reader.ReadBits(n) + ((UInt64)1 << n); 33 | 34 | // Remove zero offset 35 | value--; 36 | 37 | return value; 38 | } 39 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/VlqIntegerDecoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public class VlqIntegerDecoder : IntegerDecoderBase 6 | { 7 | private readonly IBitReader _reader; 8 | 9 | public VlqIntegerDecoder(IBitReader reader) 10 | { 11 | _reader = reader; 12 | } 13 | 14 | protected override UInt64 Decode() 15 | { 16 | // Setup symbol 17 | UInt64 symbol = 0; 18 | var bit = 0; 19 | Byte b; 20 | do 21 | { 22 | // Read byte 23 | b = (Byte)_reader.ReadBits(Bits.ByteBits); 24 | 25 | // Add input bits to output 26 | var chunk = (UInt64)(b & VlqInteger.Mask); 27 | var pre = symbol; 28 | symbol += (chunk + 1) << bit; 29 | 30 | #if DEBUG 31 | // Check for overflow 32 | if (symbol < pre) throw new OverflowException($"Symbol is larger than maximum supported value or is corrupt. See {nameof(VlqInteger)}.{nameof(VlqInteger.MaxValue)}."); 33 | #endif 34 | 35 | // Increment bit offset 36 | bit += VlqInteger.PacketSize; 37 | } while ((b & VlqInteger.More) > 0); // If not final byte 38 | 39 | // Remove zero offset 40 | symbol--; 41 | 42 | // Add to output 43 | return symbol; 44 | } 45 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/VlqIntegerEncoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public class VlqIntegerEncoder : IntegerEncoderBase 6 | { 7 | private readonly IBitWriter _writer; 8 | 9 | public VlqIntegerEncoder(IBitWriter writer) 10 | { 11 | _writer = writer; 12 | } 13 | 14 | protected override void Encode(UInt64 value) 15 | { 16 | #if DEBUG 17 | if (value > VlqInteger.MaxValue) throw new OverflowException($"Symbol is larger than maximum supported value. Must be less than or equal to {nameof(VlqInteger.MaxValue)}"); 18 | #endif 19 | 20 | // Iterate through input, taking X bits of data each time, aborting when less than X bits left 21 | while (value > VlqInteger.MinPacketValue) 22 | { 23 | // Write payload, skipping MSB bit 24 | _writer.WriteBits((value & VlqInteger.Mask) | VlqInteger.More, 8); 25 | 26 | // Offset value for next cycle 27 | value >>= VlqInteger.PacketSize; 28 | value--; 29 | } 30 | 31 | // Write remaining - marking it as the final byte for symbol 32 | _writer.WriteBits(value & VlqInteger.Mask, 8); 33 | } 34 | 35 | public override Int32? PredictEncodedBits(UInt64 value) 36 | { 37 | var packets = (Int32)Math.Ceiling(Bits.CountUsed(value) / (Single)VlqInteger.PacketSize); 38 | return packets * (VlqInteger.PacketSize + 1); 39 | } 40 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/InvertedVlqIntegerDecoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | /// 6 | /// VLQ similar to https://en.wikipedia.org/wiki/Variable-length_quantity with "Removing Redundancy", but the 7 | /// continuation bit flag is reversed. This might be more performant for datasets with consistently large values. 8 | /// 9 | public class InvertedVlqIntegerDecoder : IntegerDecoderBase 10 | { 11 | private readonly IBitReader _reader; 12 | 13 | public InvertedVlqIntegerDecoder(IBitReader reader) 14 | { 15 | _reader = reader; 16 | } 17 | 18 | protected override UInt64 Decode() 19 | { 20 | // Setup symbol 21 | UInt64 symbol = 0; 22 | var bit = 0; 23 | 24 | UInt64 b; 25 | do 26 | { 27 | // Read byte 28 | b = _reader.ReadBits(Bits.ByteBits); 29 | 30 | // Add input bits to output 31 | var chunk = b & InvertedVlqInteger.Mask; 32 | var pre = symbol; 33 | symbol += (chunk + 1) << bit; 34 | 35 | #if DEBUG 36 | // Check for overflow 37 | if (symbol < pre) throw new OverflowException("Input symbol larger than the supported limit of 64 bits. Probable corrupt input."); 38 | #endif 39 | 40 | // Increment bit offset 41 | bit += InvertedVlqInteger.PacketSize; 42 | } while ((b & InvertedVlqInteger.Nil) == 0); // If not final bit 43 | 44 | // Remove zero offset 45 | symbol--; 46 | 47 | // Add to output 48 | return symbol; 49 | } 50 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/FibonacciInteger.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | // ReSharper disable UnusedMember.Global 3 | 4 | namespace InvertedTomato.Packing.Codecs.Integers; 5 | 6 | public static class FibonacciInteger 7 | { 8 | public const UInt64 MinValue = UInt64.MinValue; 9 | public const UInt64 MaxValue = UInt64.MaxValue - 1; 10 | 11 | /// 12 | /// Lookup table of Fibonacci numbers that can fit in a UInt64 13 | /// 14 | internal static readonly UInt64[] Table = 15 | { 16 | 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711, 28657, 17 | 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578, 5702887, 9227465, 14930352, 18 | 24157817, 39088169, 63245986, 102334155, 165580141, 267914296, 433494437, 701408733, 1134903170, 1836311903, 19 | 2971215073, 4807526976, 7778742049, 12586269025, 20365011074, 32951280099, 53316291173, 86267571272, 20 | 139583862445, 225851433717, 365435296162, 591286729879, 956722026041, 1548008755920, 2504730781961, 21 | 4052739537881, 6557470319842, 10610209857723, 17167680177565, 27777890035288, 44945570212853, 22 | 72723460248141, 117669030460994, 190392490709135, 308061521170129, 498454011879264, 806515533049393, 23 | 1304969544928657, 2111485077978050, 3416454622906707, 5527939700884757, 8944394323791464, 14472334024676221, 24 | 23416728348467685, 37889062373143906, 61305790721611591, 99194853094755497, 160500643816367088, 25 | 259695496911122585, 420196140727489673, 679891637638612258, 1100087778366101931, 1779979416004714189, 26 | 2880067194370816120, 4660046610375530309, 7540113804746346429, 12200160415121876738, 27 | }; 28 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/EliasDeltaIntegerEncoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public class EliasDeltaIntegerEncoder : IntegerEncoderBase 6 | { 7 | private readonly IBitWriter _writer; 8 | 9 | public EliasDeltaIntegerEncoder(IBitWriter writer) 10 | { 11 | _writer = writer; 12 | } 13 | 14 | protected override void Encode(UInt64 value) 15 | { 16 | // Offset value to allow zeros 17 | value++; 18 | 19 | // #1 Separate X into the highest power of 2 it contains (2N) and the remaining N binary digits. 20 | var n = 0; 21 | while (Math.Pow(2, n + 1) <= value) n++; 22 | var r = value - (UInt64)Math.Pow(2, n); 23 | 24 | // #2 Encode N+1 with Elias gamma coding. 25 | var np = (Byte)(n + 1); 26 | var len = Bits.CountUsed(np); 27 | _writer.WriteBits(0, len - 1); 28 | _writer.WriteBits(np, len); 29 | 30 | // #3 Append the remaining N binary digits to this representation of N+1. 31 | _writer.WriteBits(r, n); 32 | } 33 | 34 | public override Int32? PredictEncodedBits(UInt64 value) 35 | { 36 | var result = 0; 37 | 38 | // Offset for zero 39 | value++; 40 | 41 | // #1 Separate X into the highest power of 2 it contains (2N) and the remaining N binary digits. 42 | Byte n = 0; 43 | while (Math.Pow(2, n + 1) <= value) n++; 44 | 45 | // #2 Encode N+1 with Elias gamma coding. 46 | var np = (Byte)(n + 1); 47 | var len = Bits.CountUsed(np); 48 | result += len - 1; 49 | result += len; 50 | 51 | // #3 Append the remaining N binary digits to this representation of N+1. 52 | result += n; 53 | 54 | return result; 55 | } 56 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/InvertedVlqIntegerEncoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | /// 6 | /// VLQ similar to https://en.wikipedia.org/wiki/Variable-length_quantity with "Removing Redundancy", but the 7 | /// continuation bit flag is reversed. This might be more performant for datasets with consistently large values. 8 | /// 9 | public class InvertedVlqIntegerEncoder : IntegerEncoderBase 10 | { 11 | private readonly IBitWriter _writer; 12 | 13 | public InvertedVlqIntegerEncoder(IBitWriter writer) 14 | { 15 | _writer = writer; 16 | } 17 | 18 | protected override void Encode(UInt64 value) 19 | { 20 | #if DEBUG 21 | if (value > InvertedVlqInteger.MaxValue) throw new OverflowException($"Symbol is larger than maximum supported value. Must be less than or equal to {InvertedVlqInteger.MaxValue}"); 22 | #endif 23 | 24 | // Iterate through input, taking X bits of data each time, aborting when less than X bits left 25 | while (value > InvertedVlqInteger.MinPacketValue) 26 | { 27 | // Write payload, skipping MSB bit 28 | _writer.WriteBits((Byte)(value & InvertedVlqInteger.Mask), Bits.ByteBits); 29 | 30 | // Offset value for next cycle 31 | value >>= InvertedVlqInteger.PacketSize; 32 | value--; 33 | } 34 | 35 | // Write remaining - marking it as the final byte for symbol 36 | _writer.WriteBits((Byte)(value | InvertedVlqInteger.Nil), Bits.ByteBits); 37 | } 38 | 39 | public override Int32? PredictEncodedBits(UInt64 value) 40 | { 41 | var packets = (Int32)Math.Ceiling(Bits.CountUsed(value) / (Single)InvertedVlqInteger.PacketSize); 42 | return packets * (InvertedVlqInteger.PacketSize + 1); 43 | } 44 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/FibonacciIntegerDecoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public class FibonacciIntegerDecoder : IntegerDecoderBase 6 | { 7 | private readonly IBitReader _reader; 8 | 9 | public FibonacciIntegerDecoder(IBitReader reader) 10 | { 11 | _reader = reader; 12 | } 13 | 14 | protected override UInt64 Decode() 15 | { 16 | // Current symbol being decoded 17 | UInt64 symbol = 0; 18 | 19 | // State of the last bit while decoding 20 | var lastBit = false; 21 | 22 | // Loop through each possible fib 23 | foreach (var fib in FibonacciInteger.Table) 24 | { 25 | // Read bit of input 26 | var bit = _reader.ReadBit(); 27 | if (bit) 28 | { 29 | // If double 1 bits - all done! Return symbol less zero offset 30 | if (lastBit) return symbol - 1; 31 | 32 | // Add value to current symbol 33 | var pre = symbol; 34 | symbol += fib; 35 | #if DEBUG 36 | if (symbol < pre) 37 | { 38 | // Input is larger than expected 39 | throw new OverflowException($"Symbol is larger than the max value of {FibonacciInteger.MaxValue}. Data is probably corrupt"); 40 | } 41 | #endif 42 | } 43 | 44 | // Note bit for next cycle 45 | lastBit = bit; 46 | } 47 | 48 | // If double 1 bits - all done! Return symbol less zero offset (this occurs only when decoding MaxValue) 49 | if (lastBit && _reader.ReadBit()) return symbol - 1; 50 | 51 | // Input longer than supported 52 | throw new OverflowException($"Termination not found within supported {FibonacciInteger.Table.Length} bit range. Data is probably corrupt."); 53 | } 54 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/ThompsonAlphaIntegerEncoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | // ReSharper disable MemberCanBePrivate.Global 3 | // ReSharper disable UnusedMember.Global 4 | 5 | namespace InvertedTomato.Packing.Codecs.Integers; 6 | 7 | public class ThompsonAlphaIntegerEncoder : IntegerEncoderBase 8 | { 9 | public UInt64 MinValue => UInt64.MinValue; 10 | 11 | public UInt64 MaxValue => IntegerUtil.Pow(2, IntegerUtil.Pow(2, (UInt64)_lengthBits + 1)) - 1; // (2^(2^(bits+1)))-1 12 | 13 | private readonly IBitWriter _writer; 14 | private readonly Int32 _lengthBits; 15 | 16 | public ThompsonAlphaIntegerEncoder(IBitWriter writer, Int32 lengthBits) 17 | { 18 | if (lengthBits is < 1 or > 6) throw new ArgumentOutOfRangeException($"Must be between 1 and 6, not {lengthBits}.", nameof(lengthBits)); 19 | 20 | _writer = writer; 21 | _lengthBits = lengthBits; 22 | } 23 | 24 | protected override void Encode(UInt64 value) 25 | { 26 | if (value > MaxValue) throw new ArgumentOutOfRangeException($"Value is greater than maximum of {MaxValue}. Consider increasing length bits to support larger numbers."); 27 | 28 | // Offset value to allow zeros 29 | value++; 30 | 31 | // Count length 32 | var length = Bits.CountUsed(value); 33 | 34 | // Clip MSB, it's redundant 35 | length--; 36 | value = length == 0 ? 0 : value << (Bits.LongBits - length) >> (Bits.LongBits - length); 37 | 38 | // Write length 39 | _writer.WriteBits(length, _lengthBits); 40 | 41 | // Write number 42 | _writer.WriteBits(value, length); 43 | } 44 | 45 | public override Int32? PredictEncodedBits(UInt64 value) 46 | { 47 | // Offset value to allow zeros 48 | value++; 49 | 50 | // Count length 51 | var length = Bits.CountUsed(value); 52 | 53 | // Check not too large 54 | if (length > (_lengthBits + 2) * 8) return null; 55 | 56 | // Clip MSB, it's redundant 57 | length--; 58 | 59 | return _lengthBits + length; 60 | } 61 | } -------------------------------------------------------------------------------- /Library/Codecs/Integers/EliasOmegaIntegerEncoder.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | // ReSharper disable UnusedType.Global 4 | 5 | namespace InvertedTomato.Packing.Codecs.Integers; 6 | 7 | public class EliasOmegaIntegerEncoder : IntegerEncoderBase 8 | { 9 | private readonly IBitWriter _writer; 10 | 11 | public EliasOmegaIntegerEncoder(IBitWriter writer) 12 | { 13 | _writer = writer; 14 | } 15 | 16 | protected override void Encode(UInt64 value) 17 | { 18 | // Offset min value 19 | value++; 20 | 21 | // Prepare buffer 22 | var groups = new Stack>(); 23 | 24 | // #1 Place a "0" at the end of the code. 25 | groups.Push(new(0, 1)); 26 | 27 | // #2 If N=1, stop; encoding is complete. 28 | while (value > 1) 29 | { 30 | // Calculate the length of value 31 | var length = Bits.CountUsed(value); 32 | 33 | // #3 Prepend the binary representation of N to the beginning of the code (this will be at least two bits, the first bit of which is a 1) 34 | groups.Push(new(value, length)); 35 | 36 | // #4 Let N equal the number of bits just prepended, minus one. 37 | value = (UInt64)length - 1; 38 | } 39 | 40 | // Write buffer 41 | foreach (var item in groups) 42 | { 43 | var bits = item.Value; 44 | var group = item.Key; 45 | 46 | _writer.WriteBits(group, bits); 47 | } 48 | } 49 | 50 | public override Int32? PredictEncodedBits(UInt64 value) 51 | { 52 | var result = 1; // Termination bit 53 | 54 | // Offset value to allow for 0s 55 | value++; 56 | 57 | // #2 If N=1, stop; encoding is complete. 58 | while (value > 1) 59 | { 60 | // Calculate the length of value 61 | var length = Bits.CountUsed(value); 62 | 63 | // #3 Prepend the binary representation of N to the beginning of the code (this will be at least two bits, the first bit of which is a 1) 64 | result += length; 65 | 66 | // #4 Let N equal the number of bits just prepended, minus one. 67 | value = (UInt64)length - 1; 68 | } 69 | 70 | return result; 71 | } 72 | } -------------------------------------------------------------------------------- /Library/Library.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | enable 5 | enable 6 | InvertedTomato.Packing 7 | InvertedTomato.Packing 8 | net7.0;netstandard1.0;netstandard2.0 9 | 11 10 | 5.0.2 11 | InvertedTomato.Packing 12 | Inverted Tomato 13 | Library for expressing data in the least possible space without using compression. Handy storage and transmission of data when both speed and size are critical. Includes VLQ, Elias Omega, Elias Gamma, Elias Delta, Fibonacci and others. 14 | Inverted Tomato 15 | https://github.com/invertedtomato/packing 16 | https://raw.githubusercontent.com/invertedtomato/packing/master/LICENSE 17 | https://github.com/invertedtomato/packing 18 | git 19 | Compression VLQ Elias Omega Gamma Delta Fibonacci Integer Numbers 20 | disable 21 | true 22 | InvertedTomato.Packing 23 | InvertedTomato.Packing 24 | 5.0.2 25 | 5.0.2 26 | Updated readme 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | README.md 36 | 37 | 38 | 39 | InvertedTomato.Packing.xml 40 | 41 | 42 | 43 | InvertedTomato.Packing.xml 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /packing.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.30114.105 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Sample", "Sample\Sample.csproj", "{66CD159E-51BA-4021-AE05-1540CBB53137}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Test", "Test\Test.csproj", "{40D316FC-DF50-4620-905E-88F8959FAE32}" 9 | EndProject 10 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LoadTest", "LoadTest\LoadTest.csproj", "{F63FB851-FCC5-4E9D-8DD7-6D5D156FA962}" 11 | EndProject 12 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Library", "Library\Library.csproj", "{F84E594A-76F6-409A-8210-3C332C2CA785}" 13 | EndProject 14 | Global 15 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 16 | Debug|Any CPU = Debug|Any CPU 17 | Release|Any CPU = Release|Any CPU 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 23 | {66CD159E-51BA-4021-AE05-1540CBB53137}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 24 | {66CD159E-51BA-4021-AE05-1540CBB53137}.Debug|Any CPU.Build.0 = Debug|Any CPU 25 | {66CD159E-51BA-4021-AE05-1540CBB53137}.Release|Any CPU.ActiveCfg = Release|Any CPU 26 | {66CD159E-51BA-4021-AE05-1540CBB53137}.Release|Any CPU.Build.0 = Release|Any CPU 27 | {40D316FC-DF50-4620-905E-88F8959FAE32}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 28 | {40D316FC-DF50-4620-905E-88F8959FAE32}.Debug|Any CPU.Build.0 = Debug|Any CPU 29 | {40D316FC-DF50-4620-905E-88F8959FAE32}.Release|Any CPU.ActiveCfg = Release|Any CPU 30 | {40D316FC-DF50-4620-905E-88F8959FAE32}.Release|Any CPU.Build.0 = Release|Any CPU 31 | {F63FB851-FCC5-4E9D-8DD7-6D5D156FA962}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 32 | {F63FB851-FCC5-4E9D-8DD7-6D5D156FA962}.Debug|Any CPU.Build.0 = Debug|Any CPU 33 | {F63FB851-FCC5-4E9D-8DD7-6D5D156FA962}.Release|Any CPU.ActiveCfg = Release|Any CPU 34 | {F63FB851-FCC5-4E9D-8DD7-6D5D156FA962}.Release|Any CPU.Build.0 = Release|Any CPU 35 | {F84E594A-76F6-409A-8210-3C332C2CA785}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 36 | {F84E594A-76F6-409A-8210-3C332C2CA785}.Debug|Any CPU.Build.0 = Debug|Any CPU 37 | {F84E594A-76F6-409A-8210-3C332C2CA785}.Release|Any CPU.ActiveCfg = Release|Any CPU 38 | {F84E594A-76F6-409A-8210-3C332C2CA785}.Release|Any CPU.Build.0 = Release|Any CPU 39 | EndGlobalSection 40 | EndGlobal 41 | -------------------------------------------------------------------------------- /Library/Codecs/Integers/FibonacciIntegerEncoder.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | 3 | namespace InvertedTomato.Packing.Codecs.Integers; 4 | 5 | public class FibonacciIntegerEncoder : IntegerEncoderBase 6 | { 7 | private const UInt64 One = 1; 8 | 9 | private readonly IBitWriter _writer; 10 | 11 | public FibonacciIntegerEncoder(IBitWriter writer) 12 | { 13 | _writer = writer; 14 | } 15 | 16 | protected override void Encode(UInt64 value) 17 | { 18 | #if DEBUG 19 | // Check for overflow 20 | if (value > FibonacciInteger.MaxValue) throw new OverflowException($"Exceeded FibonacciCodec maximum supported symbol value of {FibonacciInteger.MaxValue}."); 21 | #endif 22 | 23 | // Fibonacci doesn't support 0s, so offset by 1 to allow for them 24 | value++; 25 | 26 | // #1 Find the largest Fibonacci number equal to or less than N; subtract this number from N, keeping track of the remainder. 27 | // #3 Repeat the previous steps, substituting the remainder for N, until a remainder of 0 is reached. 28 | UInt64[]? buffers = null; 29 | Int32[]? counts = null; 30 | Int32 a; 31 | // ReSharper disable once TooWideLocalVariableScope 32 | Int32 b; 33 | for (var i = FibonacciInteger.Table.Length - 1; i >= 0; i--) 34 | { 35 | // Do nothing if not a fib match 36 | if (value < FibonacciInteger.Table[i]) continue; 37 | 38 | // If this is the first fib match... 39 | if (buffers == null) 40 | { 41 | // Calculate the total bit count 42 | var totalCount = i + 2; // The current index, add one to make it a count, and add another one for the termination bit 43 | 44 | // Allocate buffers 45 | buffers = new UInt64[totalCount / Bits.LongBits + 1]; 46 | counts = new Int32[totalCount / Bits.LongBits + 1]; 47 | 48 | // Calculate the count of bits for each buffer 49 | for (var j = 0; j < counts.Length; j++) 50 | { 51 | counts[j] = Math.Min(totalCount, Bits.LongBits); 52 | totalCount -= counts[j]; 53 | } 54 | 55 | // Calculate address for termination bit 56 | a = (i + 1) / Bits.LongBits; 57 | 58 | // Set termination bit 59 | buffers[a] |= One; 60 | } 61 | 62 | // Calculate address 63 | a = i / Bits.LongBits; 64 | b = counts![a] - i - 1; 65 | 66 | // Write to buffer 67 | buffers[a] |= One << b; 68 | 69 | // Deduct Fibonacci number from value 70 | value -= FibonacciInteger.Table[i]; 71 | } 72 | 73 | // Write out buffers 74 | for (a = 0; a < buffers!.Length; a++) _writer.WriteBits(buffers[a], counts![a]); 75 | } 76 | 77 | public override Int32? PredictEncodedBits(UInt64 value) 78 | { 79 | // Check for overflow 80 | if (value > FibonacciInteger.MaxValue) 81 | { 82 | return null; 83 | } 84 | 85 | // Offset for zero 86 | value++; 87 | 88 | for (var i = FibonacciInteger.Table.Length - 1; i >= 0; i--) 89 | { 90 | if (value >= FibonacciInteger.Table[i]) 91 | { 92 | return i + 1; 93 | } 94 | } 95 | 96 | return 0; 97 | } 98 | } -------------------------------------------------------------------------------- /Library/StreamBitReader.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | // ReSharper disable MemberCanBePrivate.Global 3 | 4 | namespace InvertedTomato.Packing; 5 | 6 | public class StreamBitReader : IBitReader, IDisposable 7 | { 8 | private readonly Stream _underlying; 9 | private readonly Boolean _ownUnderlying; 10 | private readonly Byte[] _buffer; 11 | private Int32 _offset; 12 | private Int32 _count; 13 | 14 | public Boolean IsDisposed { get; private set; } 15 | 16 | public StreamBitReader(Stream underlying, Boolean ownUnderlying = false, Int32 bufferSize = 1024) 17 | { 18 | _underlying = underlying; 19 | _ownUnderlying = ownUnderlying; 20 | _buffer = new Byte[bufferSize]; 21 | } 22 | 23 | public UInt64 ReadBits(int count) 24 | { 25 | #if DEBUG 26 | if (count is < 0 or > Bits.LongBits) throw new ArgumentOutOfRangeException(nameof(count), $"Must be between 0 and {Bits.LongBits}"); 27 | #endif 28 | 29 | // If nothing to do, do nothing - we don't want UnderlyingRead trying to read bits when we don't need any 30 | if (count == 0) return 0; 31 | 32 | UInt64 value = 0; 33 | do 34 | { 35 | // Load more bits if needed 36 | UnderlyingRead(); 37 | 38 | // Calculate bit address 39 | var a = _offset / Bits.ByteBits; 40 | var b = _offset % Bits.ByteBits; 41 | 42 | // Calculate number of bits available in this byte 43 | var load = Math.Min(Bits.ByteBits - b, count); 44 | 45 | // Extract bits 46 | var chunk = (Byte)(_buffer[a] << b) >> Bits.ByteBits - load; // This is a little complex, as it must mask out any previous bits in this byte at the same time 47 | 48 | // Load the bits 49 | value |= (UInt64)chunk << count - load; 50 | _offset += load; 51 | _count -= load; 52 | 53 | // Decrement input 54 | count -= load; 55 | 56 | // If all bits have been written, end here 57 | } while (count > 0); 58 | 59 | return value; 60 | } 61 | 62 | public Boolean ReadBit() => ReadBits(1) > 0; 63 | 64 | public void Align() => ReadBits(_count % Bits.ByteBits); 65 | 66 | public bool PeakBit() 67 | { 68 | // Load more bits if needed 69 | UnderlyingRead(); 70 | 71 | // Calculate bit address 72 | var a = _offset / Bits.ByteBits; 73 | var b = _offset % Bits.ByteBits; 74 | 75 | // Get bit at that address 76 | var bit = _buffer[a] & (Byte)(1 << Bits.ByteBits - b - 1); 77 | 78 | // Test if non-zero 79 | return bit > 0; 80 | } 81 | 82 | private void UnderlyingRead() 83 | { 84 | // If there's more bits in the buffer, do nothing 85 | if (_count > 0) return; 86 | 87 | // Otherwise load more bits 88 | _offset = 0; 89 | _count = _underlying.Read(_buffer) * Bits.ByteBits; 90 | 91 | // If nothing could be loaded, throw exception 92 | if (_count == 0) throw new EndOfStreamException(); 93 | } 94 | 95 | public void Dispose() 96 | { 97 | // Don't allow running twice 98 | if (IsDisposed) return; 99 | IsDisposed = true; 100 | 101 | // If we own the underlying, dispose it too 102 | if (_ownUnderlying) _underlying.Dispose(); 103 | } 104 | 105 | public override String ToString() => _buffer.ToBinaryString(_offset, _count); 106 | } -------------------------------------------------------------------------------- /Test/RawCodecTests.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public class RawCodecTests 4 | { 5 | private static Byte[] Encode(UInt64 value) 6 | { 7 | using var stream = new MemoryStream(); 8 | using (var writer = new StreamBitWriter(stream)) 9 | { 10 | var encoder = new RawIntegerEncoder(writer); 11 | encoder.EncodeUInt64(value); 12 | } 13 | 14 | return stream.ToArray(); 15 | } 16 | 17 | [Fact] 18 | public void CanEncode0() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000 }.ToHexString(), Encode(0).ToHexString()); 19 | 20 | [Fact] 21 | public void CanEncode1() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000001 }.ToHexString(), Encode(1).ToHexString()); 22 | 23 | [Fact] 24 | public void CanEncode2() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000010 }.ToHexString(), Encode(2).ToHexString()); 25 | 26 | [Fact] 27 | public void CanEncode3() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000011 }.ToHexString(), Encode(3).ToHexString()); 28 | 29 | [Fact] 30 | public void CanEncodeMax() => Assert.Equal(new Byte[] { 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111 }.ToHexString(), 31 | Encode(UInt64.MaxValue).ToHexString()); 32 | 33 | private static UInt64 Decode(Byte[] encoded) 34 | { 35 | using var stream = new MemoryStream(encoded); 36 | using var reader = new StreamBitReader(stream); 37 | var decoder = new RawIntegerDecoder(reader); 38 | return decoder.DecodeUInt64(); 39 | } 40 | 41 | [Fact] 42 | public void CanDecode0() => Assert.Equal((UInt64)0, Decode(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000 })); 43 | 44 | [Fact] 45 | public void CanDecode1() => Assert.Equal((UInt64)1, Decode(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000001 })); 46 | 47 | [Fact] 48 | public void CanDecode2() => Assert.Equal((UInt64)2, Decode(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000010 })); 49 | 50 | [Fact] 51 | public void CanDecode3() => Assert.Equal((UInt64)3, Decode(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000011 })); 52 | 53 | [Fact] 54 | public void CanDecodeMax() => Assert.Equal(RawInteger.MaxValue, Decode(new Byte[] { 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111 })); 55 | 56 | 57 | [Fact] 58 | public void CanEncodeDecodeFirst1000() 59 | { 60 | using var stream = new MemoryStream(); 61 | 62 | using (var writer = new StreamBitWriter(stream)) 63 | { 64 | var encoder = new RawIntegerEncoder(writer); 65 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol); 66 | } 67 | 68 | stream.Seek(0, SeekOrigin.Begin); 69 | 70 | using (var reader = new StreamBitReader(stream)) 71 | { 72 | var decoder = new RawIntegerDecoder(reader); 73 | for (UInt64 symbol = 0; symbol < 1000; symbol++) 74 | { 75 | Assert.Equal(symbol, decoder.DecodeUInt64()); 76 | } 77 | } 78 | } 79 | } -------------------------------------------------------------------------------- /Library/StreamBitWriter.cs: -------------------------------------------------------------------------------- 1 | // ReSharper disable UnusedType.Global 2 | // ReSharper disable MemberCanBePrivate.Global 3 | 4 | namespace InvertedTomato.Packing; 5 | 6 | public class StreamBitWriter : IBitWriter, IDisposable 7 | { 8 | private const UInt64 Zero = 0; 9 | private const UInt64 One = 1; 10 | private readonly Stream _underlying; 11 | private readonly Boolean _ownUnderlying; 12 | private readonly Byte[] _buffer; 13 | private Int32 _count; 14 | 15 | public Boolean IsDisposed { get; private set; } 16 | 17 | public StreamBitWriter(Stream underlying, Boolean ownUnderlying = false, Int32 bufferSize = 1024) 18 | { 19 | _underlying = underlying; 20 | _ownUnderlying = ownUnderlying; 21 | _buffer = new Byte[bufferSize]; 22 | } 23 | 24 | public void WriteBits(UInt64 bits, int count) 25 | { 26 | #if DEBUG 27 | // Count the count is sane 28 | if (count is < 0 or > Bits.LongBits) throw new ArgumentOutOfRangeException(nameof(count), $"Must be between 0 and {Bits.LongBits} but was {count}"); 29 | 30 | // Check that only bits within the count range are used (yep, we could clean this automatically, but that adds operations and slows things down, so we only check when debugging) 31 | if ((bits << Bits.LongBits - count >> Bits.LongBits - count != bits) 32 | || (count == 0 && bits > 0) // Once again, why does UInt64 >> 64 not equal 0?? Catching and handling this additional case here 33 | ) throw new ArgumentException("Bits must only have '1' bits within the 'count' range. Ie, if count=1, only the right-most bit can be used", nameof(bits)); 34 | #endif 35 | 36 | // Cycle through buffer bytes 37 | do 38 | { 39 | // Calculate bit address 40 | var a = _count / Bits.ByteBits; 41 | var b = _count % Bits.ByteBits; 42 | 43 | // Calculate number of bits to load into this byte 44 | var load = Math.Min(Bits.ByteBits - b, count); 45 | 46 | // Extract bits 47 | var chunk = (Byte)(bits >> (count - load)); 48 | 49 | // Load the bits 50 | _buffer[a] |= (Byte)(chunk << (Bits.ByteBits - load - b)); 51 | _count += load; 52 | 53 | // Decrement input 54 | count -= load; 55 | 56 | // If buffer is full.. 57 | if (_count == _buffer.Length * Bits.ByteBits) 58 | { 59 | // Flush buffer 60 | _underlying.Write(_buffer); 61 | 62 | // Clear buffer 63 | _buffer.Clear(); 64 | _count = 0; 65 | } 66 | 67 | // If all bits have been written, end here 68 | } while (count > 0); 69 | } 70 | 71 | public void WriteBit(Boolean value) => WriteBits(value ? One : Zero, 1); 72 | 73 | public void Align() 74 | { 75 | if (HasPartialByte()) WriteBits(0, Bits.ByteBits - _count % Bits.ByteBits); 76 | } 77 | 78 | public void Dispose() 79 | { 80 | // Don't allow running twice 81 | if (IsDisposed) return; 82 | IsDisposed = true; 83 | 84 | // Write out any remaining bytes 85 | var count = _count / Bits.ByteBits; 86 | if (HasPartialByte()) count++; // If there's an incomplete byte, write it anyway 87 | _underlying.Write(_buffer, count); 88 | 89 | // If we own the underlying, dispose it too 90 | if (_ownUnderlying) _underlying.Dispose(); 91 | } 92 | 93 | public override String ToString() => _buffer.ToBinaryString(0, _count); 94 | 95 | private Boolean HasPartialByte() => _count % Bits.ByteBits > 0; 96 | } -------------------------------------------------------------------------------- /Test/ThompsonAlphaTests.cs: -------------------------------------------------------------------------------- 1 | using FluentAssertions; 2 | 3 | namespace InvertedTomato.Packing; 4 | 5 | public class ThompsonAlphaTests 6 | { 7 | // Max | Bin | Value 8 | // 1 | _1 | 2 9 | // 2 | _11 | 6 10 | // 3 | _111 | 14 11 | // 4 | _1111 | 30 12 | // 5 | _11111 13 | 14 | // 2^(bits + 1) 15 | 16 | [Fact] 17 | public void CanCalculateMaxValue1() => new ThompsonAlphaIntegerEncoder(null!, 1).MaxValue.Should().Be(15); 18 | [Fact] 19 | public void CanCalculateMaxValue2() => new ThompsonAlphaIntegerEncoder(null!, 2).MaxValue.Should().Be(255); 20 | [Fact] 21 | public void CanCalculateMaxValue3() => new ThompsonAlphaIntegerEncoder(null!, 3).MaxValue.Should().Be(65535); 22 | [Fact] 23 | public void CanCalculateMaxValue4() => new ThompsonAlphaIntegerEncoder(null!, 4).MaxValue.Should().Be(4294967295); 24 | [Fact] 25 | public void CanCalculateMaxValue5() => new ThompsonAlphaIntegerEncoder(null!, 5).MaxValue.Should().Be(18446744073709551615); 26 | [Fact] 27 | public void CanCalculateMaxValue6() => new ThompsonAlphaIntegerEncoder(null!, 6).MaxValue.Should().Be(18446744073709551615); 28 | 29 | private static Byte[] Encode(UInt64 value) 30 | { 31 | using var stream = new MemoryStream(); 32 | using (var writer = new StreamBitWriter(stream)) 33 | { 34 | var encoder = new ThompsonAlphaIntegerEncoder(writer, 6); 35 | encoder.EncodeUInt64(value); 36 | } 37 | 38 | return stream.ToArray(); 39 | } 40 | 41 | [Fact] 42 | public void CanEncode0() => Assert.Equal(new Byte[] {0b000000_00}, Encode(0)); // Len=0, Val=(1) 43 | 44 | [Fact] 45 | public void CanEncode1() => Assert.Equal(new Byte[] {0b000001_0_0}, Encode(1)); // Len=1, Val=(1)1 46 | 47 | [Fact] 48 | public void CanEncode2() => Assert.Equal(new Byte[] {0b000001_1_0}, Encode(2)); // Len=10, val=(1)10 49 | 50 | [Fact] 51 | public void CanEncode3() => Assert.Equal(new Byte[] {0b000010_00}, Encode(3)); // Len=10, val=(1)11 52 | 53 | [Fact] 54 | public void CanEncode8589934590() => Assert.Equal(new Byte[] {0b100000_11, 0b11111111, 0b11111111, 0b11111111, 0b111111_00}, Encode(8589934590)); 55 | 56 | [Fact] 57 | public void CanEncode8589934591() => Assert.Equal(new Byte[] {0b100001_00, 0b00000000, 0b00000000, 0b00000000, 0b0000000_0}, Encode(8589934591)); 58 | 59 | [Fact] 60 | public void CanEncodeMax() => Assert.Equal(new Byte[] {0b111111_11, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111000}, Encode(UInt64.MaxValue - 1)); 61 | 62 | private static UInt64 Decode(Byte[] encoded) 63 | { 64 | using var stream = new MemoryStream(encoded); 65 | using var reader = new StreamBitReader(stream); 66 | var decoder = new ThompsonAlphaIntegerDecoder(reader, 6); 67 | 68 | return decoder.DecodeUInt64(); 69 | } 70 | 71 | [Fact] 72 | public void CanDecode0() => Assert.Equal((UInt64) 0, Decode(new Byte[] {0b000000_00})); 73 | 74 | [Fact] 75 | public void CanDecode1() => Assert.Equal((UInt64) 1, Decode(new Byte[] {0b000001_0_0})); // (len)_(val)_(padding) 76 | 77 | [Fact] 78 | public void CanDecode2() => Assert.Equal((UInt64) 2, Decode(new Byte[] {0b000001_1_0})); 79 | 80 | [Fact] 81 | public void CanDecode3() => Assert.Equal((UInt64) 3, Decode(new Byte[] {0b000010_00})); 82 | 83 | [Fact] 84 | public void CanDecode8589934590() => Assert.Equal((UInt64) 8589934590, Decode(new Byte[] {0b100000_11, 0b11111111, 0b11111111, 0b11111111, 0b111111_00})); 85 | 86 | [Fact] 87 | public void CanDecode8589934591() => Assert.Equal((UInt64) 8589934591, Decode(new Byte[] {0b100001_00, 0b00000000, 0b00000000, 0b00000000, 0b0000000_0})); 88 | 89 | [Fact] 90 | public void CanDecodeMax() => Assert.Equal(UInt64.MaxValue - 1, Decode(new Byte[] {0b111111_11, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111000})); 91 | 92 | [Fact] 93 | public void CanEncodeDecodeFirst1000() 94 | { 95 | using var stream = new MemoryStream(); 96 | 97 | using (var writer = new StreamBitWriter(stream)) 98 | { 99 | var encoder = new ThompsonAlphaIntegerEncoder(writer, 6); 100 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol); 101 | } 102 | 103 | stream.Seek(0, SeekOrigin.Begin); 104 | 105 | using (var reader = new StreamBitReader(stream)) 106 | { 107 | var decoder = new ThompsonAlphaIntegerDecoder(reader, 6); 108 | for (UInt64 symbol = 0; symbol < 1000; symbol++) 109 | { 110 | Assert.Equal(symbol, decoder.DecodeUInt64()); 111 | } 112 | } 113 | } 114 | } -------------------------------------------------------------------------------- /Test/InvertedVlqCodecTests.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public class InvertedVlqCodecTests 4 | { 5 | private static Byte[] Encode(UInt64 value, Int32 expectedCount) 6 | { 7 | using var stream = new MemoryStream(expectedCount); 8 | using (var writer = new StreamBitWriter(stream)) 9 | { 10 | var encoder = new InvertedVlqIntegerEncoder(writer); 11 | encoder.EncodeUInt64(value); 12 | } 13 | 14 | return stream.ToArray(); 15 | } 16 | 17 | [Fact] 18 | public void CanEncode0() => Assert.Equal(new Byte[] { 0b10000000 }, Encode(0, 1)); 19 | 20 | [Fact] 21 | public void CanEncode1() => Assert.Equal(new Byte[] { 0b10000001 }, Encode(1, 1)); 22 | 23 | [Fact] 24 | public void CanEncode2() => Assert.Equal(new Byte[] { 0b10000010 }, Encode(2, 1)); 25 | 26 | [Fact] 27 | public void CanEncode3() => Assert.Equal(new Byte[] { 0b10000011 }, Encode(3, 1)); 28 | 29 | [Fact] 30 | public void CanEncode127() => Assert.Equal(new Byte[] { 0b11111111 }, Encode(127, 1)); 31 | 32 | [Fact] 33 | public void CanEncode128() => Assert.Equal(new Byte[] { 0b00000000, 0b10000000 }, Encode(128, 2)); 34 | 35 | [Fact] 36 | public void CanEncode129() => Assert.Equal(new Byte[] { 0b00000001, 0b10000000 }, Encode(129, 2)); 37 | 38 | [Fact] 39 | public void CanEncode16511() => Assert.Equal(new Byte[] { 0b01111111, 0b11111111 }, Encode(16511, 2)); 40 | 41 | [Fact] 42 | public void CanEncode16512() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b10000000 }, Encode(16512, 3)); 43 | 44 | [Fact] 45 | public void CanEncode2113663() => Assert.Equal(new Byte[] { 0b01111111, 0b01111111, 0b11111111 }, Encode(2113663, 3)); 46 | 47 | [Fact] 48 | public void CanEncode2113664() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b10000000 }, Encode(2113664, 4)); 49 | 50 | [Fact] 51 | public void CanEncodeMax() => Assert.Equal(new Byte[] { 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b10000000 }, 52 | Encode(InvertedVlqInteger.MaxValue, 10)); 53 | 54 | [Fact] 55 | public void EncoderOverflowThrows() => Assert.Throws(() => { Encode(UInt64.MaxValue, 32); }); 56 | 57 | private static UInt64 Decode(Byte[] encoded, Int32 expectedUsed) 58 | { 59 | using var stream = new MemoryStream(encoded); 60 | using var reader = new StreamBitReader(stream); 61 | var decoder = new InvertedVlqIntegerDecoder(reader); 62 | 63 | return decoder.DecodeUInt64(); 64 | } 65 | 66 | [Fact] 67 | public void CanDecode0() => Assert.Equal((UInt64)0, Decode(new Byte[] { 0b10000000 }, 1)); 68 | 69 | [Fact] 70 | public void CanDecode1() => Assert.Equal((UInt64)1, Decode(new Byte[] { 0b10000001 }, 1)); 71 | 72 | [Fact] 73 | public void CanDecode2() => Assert.Equal((UInt64)2, Decode(new Byte[] { 0b10000010 }, 1)); 74 | 75 | [Fact] 76 | public void CanDecode3() => Assert.Equal((UInt64)3, Decode(new Byte[] { 0b10000011 }, 1)); 77 | 78 | [Fact] 79 | public void CanDecode127() => Assert.Equal((UInt64)127, Decode(new Byte[] { 0b11111111 }, 1)); 80 | 81 | [Fact] 82 | public void CanDecode128() => Assert.Equal((UInt64)128, Decode(new Byte[] { 0b00000000, 0b10000000 }, 2)); 83 | 84 | [Fact] 85 | public void CanDecode129() => Assert.Equal((UInt64)129, Decode(new Byte[] { 0b00000001, 0b10000000 }, 2)); 86 | 87 | [Fact] 88 | public void CanDecode16511() => Assert.Equal((UInt64)16511, Decode(new Byte[] { 0b01111111, 0b11111111 }, 2)); 89 | 90 | [Fact] 91 | public void CanDecode16512() => Assert.Equal((UInt64)16512, Decode(new Byte[] { 0b00000000, 0b00000000, 0b10000000 }, 3)); 92 | 93 | [Fact] 94 | public void CanDecode16513() => Assert.Equal((UInt64)16513, Decode(new Byte[] { 0b00000001, 0b00000000, 0b10000000 }, 3)); 95 | 96 | [Fact] 97 | public void CanDecode2113663() => Assert.Equal((UInt64)2113663, Decode(new Byte[] { 0b01111111, 0b01111111, 0b11111111 }, 3)); 98 | 99 | [Fact] 100 | public void CanDecode2113664() => Assert.Equal((UInt64)2113664, Decode(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b10000000 }, 4)); 101 | 102 | [Fact] 103 | public void CanDecodeMax() => Assert.Equal(InvertedVlqInteger.MaxValue, 104 | Decode(new Byte[] { 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b10000000 }, 10)); 105 | 106 | [Fact] 107 | public void DecodingOverflowThrows() => Assert.Throws(() => 108 | { 109 | Decode(new Byte[] { 0b01111111, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b10000000 }, 11); 110 | }); 111 | 112 | 113 | [Fact] 114 | public void CanEncodeDecodeFirst1000() 115 | { 116 | using var stream = new MemoryStream(); 117 | 118 | using (var writer = new StreamBitWriter(stream)) 119 | { 120 | var encoder = new InvertedVlqIntegerEncoder(writer); 121 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol); 122 | } 123 | 124 | stream.Seek(0, SeekOrigin.Begin); 125 | 126 | using (var reader = new StreamBitReader(stream)) 127 | { 128 | var decoder = new InvertedVlqIntegerDecoder(reader); 129 | for (UInt64 symbol = 0; symbol < 1000; symbol++) 130 | { 131 | Assert.Equal(symbol, decoder.DecodeUInt64()); 132 | } 133 | } 134 | } 135 | } -------------------------------------------------------------------------------- /LoadTest/Program.cs: -------------------------------------------------------------------------------- 1 | // #5 2 | // FIBONACCI (Gen2) 3 | // Compress: 5986ms 12.75MB / s Total 38MB 4 | // Decompress: 3455ms 22.08MB / s 5 | 6 | // VLQ (Gen2) 7 | // Compress: 386ms 197.65MB / s Total 36MB 8 | // Decompress: 874ms 87.29MB / s 9 | 10 | // 220707 11 | // CODEC ENCODE TIME DECODE TIME RESULT SIZE 12 | // ThompsonAlpha 13 | // InvertedTomato.Compression.Integers.Gen2.ThompsonAlphaCodec 839ms 736ms 32.00MB 14 | // InvertedTomato.Compression.Integers.ThompsonAlphaCodec 897ms 738ms 32.00MB 15 | // Fibonacci 16 | // InvertedTomato.Compression.Integers.Gen2.FibonacciCodec 2,874ms 1,442ms 38.00MB 17 | // InvertedTomato.Compression.Integers.FibonacciCodec 8,399ms 6,777ms 38.00MB 18 | // VLQ 19 | // InvertedTomato.Compression.Integers.Gen2.VlqCodec 265ms 346ms 36.00MB 20 | // InvertedTomato.Compression.Integers.VlqCodec 959ms 1,112ms 36.00MB 21 | // Raw 22 | // InvertedTomato.Compression.Integers.Gen2.RawCodec 631ms 625ms 76.00MB 23 | // InvertedTomato.Compression.Integers.RawCodec 2,000ms 2,093ms 76.00MB 24 | 25 | // 220711 Added buffer to StreamBitReader&StreamBitWriter (ie, writes byte[] rather than byte) 26 | // CODEC ENCODE TIME DECODE TIME RESULT SIZE 27 | // ThompsonAlpha 28 | // InvertedTomato.Compression.Integers.Gen2.ThompsonAlphaCodec 860ms 745ms 32.00MB 29 | // InvertedTomato.Compression.Integers.Gen3.ThompsonAlphaCodec 758ms 625ms 32.00MB 30 | // Fibonacci 31 | // InvertedTomato.Compression.Integers.Gen2.FibonacciCodec 2,891ms 1,445ms 38.00MB 32 | // InvertedTomato.Compression.Integers.Gen3.FibonacciCodec 7,972ms 6,385ms 38.00MB 33 | // VLQ 34 | // InvertedTomato.Compression.Integers.Gen2.VlqCodec 271ms 356ms 36.00MB 35 | // InvertedTomato.Compression.Integers.Gen3.VlqCodec 525ms 683ms 36.00MB 36 | // Raw 37 | // InvertedTomato.Compression.Integers.Gen2.RawCodec 647ms 639ms 76.00MB 38 | // InvertedTomato.Compression.Integers.Gen3.RawCodec 825ms 850ms 76.00MB 39 | 40 | // 220713 Added Fib write buffering rather than pushing raw bits 41 | // Fibonacci 42 | // InvertedTomato.Compression.Integers.Gen2.FibonacciCodec 2,924ms 1,484ms 38.00MB 43 | // InvertedTomato.Compression.Integers.Gen3.FibonacciCodec 3,396ms 7,443ms 38.00MB 44 | 45 | using System.Diagnostics; 46 | using InvertedTomato.Packing; 47 | using InvertedTomato.Packing.Codecs.Integers; 48 | 49 | // ReSharper disable ForeachCanBeConvertedToQueryUsingAnotherGetEnumerator 50 | 51 | var min = 100000; 52 | var count = 10000000; 53 | 54 | // Seed 55 | var input = new List(count); 56 | for (var v = min; v < min + count; v++) 57 | { 58 | input.Add((UInt64)v); 59 | } 60 | 61 | void Gen3Test(string name, Func encoderFactory, Func decoderFactory) 62 | { 63 | // Compress 64 | using var stream = new MemoryStream(count * 5); 65 | var compressStopwatch = Stopwatch.StartNew(); 66 | using (var writer = new StreamBitWriter(stream)) 67 | { 68 | var encoder = encoderFactory(writer); 69 | foreach (var item in input) 70 | { 71 | encoder.EncodeUInt64(item); 72 | } 73 | } 74 | 75 | compressStopwatch.Stop(); 76 | 77 | // Decompress 78 | stream.Position = 0; 79 | var decompressStopwatch = Stopwatch.StartNew(); 80 | using (var reader = new StreamBitReader(stream)) 81 | { 82 | var decoder = decoderFactory(reader); 83 | foreach (var item in input) 84 | { 85 | if (item != decoder.DecodeUInt64()) throw new("Incorrect result."); 86 | } 87 | } 88 | 89 | decompressStopwatch.Stop(); 90 | 91 | Console.WriteLine("{0,-75} {1,15:N0}ms {2,15:N0}ms {3,15:N}MB", name, compressStopwatch.ElapsedMilliseconds, decompressStopwatch.ElapsedMilliseconds, 92 | stream.Length / 1024 / 1024); 93 | } 94 | 95 | 96 | Console.WriteLine("CODEC ENCODE TIME DECODE TIME RESULT SIZE"); 97 | Console.WriteLine("ThompsonAlpha"); 98 | Gen3Test( 99 | "ThompsonAlpha(6)", 100 | writer => new ThompsonAlphaIntegerEncoder(writer, 6), 101 | reader => new ThompsonAlphaIntegerDecoder(reader, 6) 102 | ); 103 | 104 | Console.WriteLine("Fibonacci"); 105 | Gen3Test( 106 | "Fibbonacci", 107 | writer => new FibonacciIntegerEncoder(writer), 108 | reader => new FibonacciIntegerDecoder(reader) 109 | ); 110 | 111 | Console.WriteLine("VLQ"); 112 | Gen3Test( 113 | "VLQ", 114 | writer => new VlqIntegerEncoder(writer), 115 | reader => new VlqIntegerDecoder(reader) 116 | ); 117 | 118 | Console.WriteLine("Raw"); 119 | Gen3Test("Raw", 120 | writer => new RawIntegerEncoder(writer), 121 | reader => new RawIntegerDecoder(reader) 122 | ); 123 | 124 | Console.WriteLine("\nDone."); -------------------------------------------------------------------------------- /.idea/.idea.binary/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | LoadTest/LoadTest.csproj 5 | Sample/Sample.csproj 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 18 | 19 | 20 | 21 | 22 | 24 | 25 | 26 | 29 | 46 | 47 | 48 | 65 | 66 | 83 | 84 | 85 | 86 | 87 | 88 | 1678013536669 89 | 95 | 96 | 97 | 98 | 100 | 101 | 102 | 104 | -------------------------------------------------------------------------------- /Test/VlqCodecTests.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public class VlqCodecTests 4 | { 5 | private static Byte[] Encode(UInt64 value) 6 | { 7 | using var stream = new MemoryStream(); 8 | using (var writer = new StreamBitWriter(stream)) 9 | { 10 | var encoder = new VlqIntegerEncoder(writer); 11 | encoder.EncodeUInt64(value); 12 | } 13 | 14 | return stream.ToArray(); 15 | } 16 | 17 | [Fact] 18 | public void CanEncode0() => Assert.Equal(new Byte[] { 0b00000000 }, Encode(0)); 19 | 20 | [Fact] 21 | public void CanEncode1() => Assert.Equal(new Byte[] { 0b00000001 }, Encode(1)); 22 | 23 | [Fact] 24 | public void CanEncode2() => Assert.Equal(new Byte[] { 0b00000010 }, Encode(2)); 25 | 26 | [Fact] 27 | public void CanEncode3() => Assert.Equal(new Byte[] { 0b00000011 }, Encode(3)); 28 | 29 | [Fact] 30 | public void CanEncode127() => Assert.Equal(new Byte[] { 0b01111111 }, Encode(127)); 31 | 32 | [Fact] 33 | public void CanEncode128() => Assert.Equal(new Byte[] { 0b10000000, 0b00000000 }, Encode(128)); 34 | 35 | [Fact] 36 | public void CanEncode129() => Assert.Equal(new Byte[] { 0b10000001, 0b00000000 }, Encode(129)); 37 | 38 | [Fact] 39 | public void CanEncode16511() => Assert.Equal(new Byte[] { 0b11111111, 0b01111111 }, Encode(16511)); 40 | 41 | [Fact] 42 | public void CanEncode16512() => Assert.Equal(new Byte[] { 0b10000000, 0b10000000, 0b00000000 }, Encode(16512)); 43 | 44 | [Fact] 45 | public void CanEncode2113663() => Assert.Equal(new Byte[] { 0b11111111, 0b11111111, 0b01111111 }, Encode(2113663)); 46 | 47 | [Fact] 48 | public void CanEncode2113664() => Assert.Equal(new Byte[] { 0b10000000, 0b10000000, 0b10000000, 0b00000000 }, Encode(2113664)); 49 | 50 | [Fact] 51 | public void EncodeMax() => Assert.Equal(new Byte[] { 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b00000000 }, 52 | Encode(VlqInteger.MaxValue)); 53 | 54 | [Fact] 55 | public void EncodeOverflow() => Assert.Throws(() => { Encode(UInt64.MaxValue); }); 56 | 57 | private static UInt64 Decode(Byte[] encoded) 58 | { 59 | using var stream = new MemoryStream(encoded); 60 | using var reader = new StreamBitReader(stream); 61 | var decoder = new VlqIntegerDecoder(reader); 62 | return decoder.DecodeUInt64(); 63 | } 64 | 65 | [Fact] 66 | public void CanDecode0() => Assert.Equal((UInt64)0, Decode(new Byte[] { 0b00000000 })); 67 | 68 | [Fact] 69 | public void CanDecode1() => Assert.Equal((UInt64)1, Decode(new Byte[] { 0b00000001 })); 70 | 71 | [Fact] 72 | public void CanDecode2() => Assert.Equal((UInt64)2, Decode(new Byte[] { 0b00000010 })); 73 | 74 | [Fact] 75 | public void CanDecode3() => Assert.Equal((UInt64)3, Decode(new Byte[] { 0b00000011 })); 76 | 77 | [Fact] 78 | public void CanDecode127() => Assert.Equal((UInt64)127, Decode(new Byte[] { 0b01111111 })); 79 | 80 | [Fact] 81 | public void CanDecode128() => Assert.Equal((UInt64)128, Decode(new Byte[] { 0b10000000, 0b00000000 })); 82 | 83 | [Fact] 84 | public void CanDecode129() => Assert.Equal((UInt64)129, Decode(new Byte[] { 0b10000001, 0b00000000 })); 85 | 86 | [Fact] 87 | public void CanDecode16511() => Assert.Equal((UInt64)16511, Decode(new Byte[] { 0b11111111, 0b01111111 })); 88 | 89 | [Fact] 90 | public void CanDecode16512() => Assert.Equal((UInt64)16512, Decode(new Byte[] { 0b10000000, 0b10000000, 0b00000000 })); 91 | 92 | [Fact] 93 | public void CanDecode16513() => Assert.Equal((UInt64)16513, Decode(new Byte[] { 0b10000001, 0b10000000, 0b00000000 })); 94 | 95 | [Fact] 96 | public void CanDecode2113663() => Assert.Equal((UInt64)2113663, Decode(new Byte[] { 0b11111111, 0b11111111, 0b01111111 })); 97 | 98 | [Fact] 99 | public void CanDecode2113664() => Assert.Equal((UInt64)2113664, Decode(new Byte[] { 0b10000000, 0b10000000, 0b10000000, 0b00000000 })); 100 | 101 | [Fact] 102 | public void DecodeMax() => Assert.Equal(VlqInteger.MaxValue, 103 | Decode(new Byte[] { 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b00000000 })); 104 | 105 | [Fact] 106 | public void CanDecode1_1_1() 107 | { 108 | using var stream = new MemoryStream(new Byte[] { 0b00000001, 0b00000001, 0b00000001 }); 109 | using var reader = new StreamBitReader(stream); 110 | var decoder = new VlqIntegerDecoder(reader); 111 | 112 | Assert.Equal((UInt64)1, decoder.DecodeUInt64()); 113 | Assert.Equal((UInt64)1, decoder.DecodeUInt64()); 114 | Assert.Equal((UInt64)1, decoder.DecodeUInt64()); 115 | Assert.Throws(() => decoder.DecodeUInt64()); 116 | } 117 | 118 | [Fact] 119 | public void DecodeInputClipped() => Assert.Throws(() => { Decode(new Byte[] { 0b10000000 }); }); 120 | 121 | [Fact] 122 | public void DecodeOverflow() => Assert.Throws(() => 123 | { 124 | Decode(new Byte[] { 0b11111111, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b00000000 }); 125 | }); 126 | 127 | [Fact] 128 | public void CanDecode1_X() => Assert.Equal((UInt64)1, Decode(new Byte[] { 0b00000001, 0b10000011 })); 129 | 130 | 131 | [Fact] 132 | public void CanEncodeDecodeFirst1000() 133 | { 134 | using var stream = new MemoryStream(); 135 | 136 | using (var writer = new StreamBitWriter(stream)) 137 | { 138 | var encoder = new VlqIntegerEncoder(writer); 139 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol); 140 | } 141 | 142 | stream.Seek(0, SeekOrigin.Begin); 143 | 144 | using (var reader = new StreamBitReader(stream)) 145 | { 146 | var decoder = new VlqIntegerDecoder(reader); 147 | for (UInt64 symbol = 0; symbol < 1000; symbol++) 148 | { 149 | Assert.Equal(symbol, decoder.DecodeUInt64()); 150 | } 151 | } 152 | } 153 | } -------------------------------------------------------------------------------- /Test/StreamBitReaderTests.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public class StreamBitReaderTests 4 | { 5 | [Fact] 6 | public void CanReadBit1() 7 | { 8 | using var stream = new MemoryStream(new byte[] { 0b_10000000 }); 9 | using var reader = new StreamBitReader(stream); 10 | 11 | Assert.True(reader.ReadBit()); 12 | } 13 | 14 | [Fact] 15 | public void CanReadBit0() 16 | { 17 | using var stream = new MemoryStream(new byte[] { 0b_00000000 }); 18 | using var reader = new StreamBitReader(stream); 19 | 20 | Assert.False(reader.ReadBit()); 21 | } 22 | 23 | [Fact] 24 | public void CanReadBit0_1() 25 | { 26 | using var stream = new MemoryStream(new byte[] { 0b_01000000 }); 27 | using var reader = new StreamBitReader(stream); 28 | 29 | Assert.False(reader.ReadBit()); 30 | Assert.True(reader.ReadBit()); 31 | } 32 | 33 | [Fact] 34 | public void CanPeak_ReadBit8() 35 | { 36 | using var stream = new MemoryStream(new byte[] { 0b_11111111 }); 37 | using var reader = new StreamBitReader(stream); 38 | 39 | Assert.True(reader.PeakBit()); 40 | Assert.Equal((ulong)0b11111111, reader.ReadBits(8)); 41 | } 42 | 43 | [Fact] 44 | public void CanPeak_ReadBit8_Peak_ReadBit8_ReadBit0() 45 | { 46 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b00000000 }); 47 | using var reader = new StreamBitReader(stream); 48 | 49 | Assert.True(reader.PeakBit()); 50 | Assert.Equal((ulong)0b11111111, reader.ReadBits(8)); 51 | 52 | Assert.False(reader.PeakBit()); 53 | Assert.Equal((ulong)0b00000000, reader.ReadBits(8)); 54 | Assert.Equal((ulong)0b00000000, reader.ReadBits(0)); 55 | } 56 | 57 | [Fact] 58 | public void CanReadBit1_1_1_1_1_1_1_1_0_0_0_0_0_0_0_0() 59 | { 60 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b00000000 }); 61 | using var reader = new StreamBitReader(stream); 62 | 63 | Assert.True(reader.ReadBit()); 64 | Assert.True(reader.ReadBit()); 65 | Assert.True(reader.ReadBit()); 66 | Assert.True(reader.ReadBit()); 67 | Assert.True(reader.ReadBit()); 68 | Assert.True(reader.ReadBit()); 69 | Assert.True(reader.ReadBit()); 70 | Assert.True(reader.ReadBit()); 71 | 72 | Assert.False(reader.ReadBit()); 73 | Assert.False(reader.ReadBit()); 74 | Assert.False(reader.ReadBit()); 75 | Assert.False(reader.ReadBit()); 76 | Assert.False(reader.ReadBit()); 77 | Assert.False(reader.ReadBit()); 78 | Assert.False(reader.ReadBit()); 79 | Assert.False(reader.ReadBit()); 80 | } 81 | 82 | [Fact] 83 | public void CanReadBits4_Peak_8_Peak_4() 84 | { 85 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b00000000 }); 86 | using var reader = new StreamBitReader(stream); 87 | 88 | Assert.Equal((ulong)0b1111, reader.ReadBits(4)); 89 | 90 | Assert.True(reader.PeakBit()); 91 | Assert.Equal((ulong)0b11110000, reader.ReadBits(8)); 92 | 93 | Assert.False(reader.PeakBit()); 94 | Assert.Equal((ulong)0b0000, reader.ReadBits(4)); 95 | } 96 | 97 | [Fact] 98 | public void CanReadBits4_Peak_Align_Peak_4() 99 | { 100 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b00000000 }); 101 | using var reader = new StreamBitReader(stream); 102 | 103 | Assert.Equal((ulong)0b1111, reader.ReadBits(4)); 104 | Assert.True(reader.PeakBit()); 105 | reader.Align(); 106 | 107 | Assert.False(reader.PeakBit()); 108 | Assert.Equal((ulong)0b0000, reader.ReadBits(4)); 109 | Assert.Equal((ulong)0b0000, reader.ReadBits(4)); 110 | } 111 | 112 | [Fact] 113 | public void CanReadBits32() 114 | { 115 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111 }); 116 | using var reader = new StreamBitReader(stream); 117 | 118 | Assert.Equal(0b_11111111_11111111_11111111_11111111, reader.ReadBits(32)); 119 | } 120 | 121 | [Fact] 122 | public void CanReadBits63() 123 | { 124 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111110 }); 125 | using var reader = new StreamBitReader(stream); 126 | 127 | Assert.Equal((UInt64)0b_01111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, reader.ReadBits(63)); 128 | } 129 | 130 | [Fact] 131 | public void CanReadBits64() 132 | { 133 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111 }); 134 | using var reader = new StreamBitReader(stream); 135 | 136 | Assert.Equal(0b_11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, reader.ReadBits(64)); 137 | } 138 | 139 | [Fact] 140 | public void CanReadBits1_32() 141 | { 142 | using var stream = new MemoryStream(new byte[] { 0b_01111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b10000000, }); 143 | using var reader = new StreamBitReader(stream); 144 | 145 | Assert.False(reader.ReadBit()); 146 | Assert.Equal(0b_11111111_11111111_11111111_11111111, reader.ReadBits(32)); 147 | } 148 | 149 | 150 | [Fact] 151 | public void CanReadBitX_1() 152 | { 153 | using var stream = new MemoryStream(new byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000001, }); 154 | using var reader = new StreamBitReader(stream); 155 | 156 | Assert.Equal((UInt64)1, reader.ReadBits(64)); 157 | } 158 | 159 | [Fact] 160 | public void CanDisposeNotOwned() 161 | { 162 | using var stream = new MemoryStream(new byte[] { 0b00000000 }); 163 | using var reader = new StreamBitReader(stream); 164 | 165 | Assert.False(reader.IsDisposed); 166 | reader.Dispose(); 167 | Assert.True(reader.IsDisposed); 168 | stream.ReadByte(); 169 | } 170 | 171 | [Fact] 172 | public void CanDisposeOwned() 173 | { 174 | using var stream = new MemoryStream(new byte[] { 0b00000000 }); 175 | using var reader = new StreamBitReader(stream, true); 176 | 177 | Assert.False(reader.IsDisposed); 178 | reader.Dispose(); 179 | Assert.True(reader.IsDisposed); 180 | Assert.Throws(() => stream.ReadByte()); 181 | } 182 | 183 | 184 | [Fact] 185 | public void CanReadBlank() 186 | { 187 | using var stream = new MemoryStream(new byte[] { 0b_11111111 }); 188 | using var reader = new StreamBitReader(stream); 189 | 190 | Assert.Equal((ulong)0b00000000, reader.ReadBits(0)); 191 | } 192 | 193 | [Fact] 194 | public void ReadEndOfStreamThrows() 195 | { 196 | using var stream = new MemoryStream(new byte[] { 0b_11111111 }); 197 | using var reader = new StreamBitReader(stream); 198 | 199 | reader.ReadBits(8); 200 | Assert.Throws(() => reader.ReadBits(1)); 201 | } 202 | 203 | [Fact] 204 | public void PeakEndOfStreamThrows() 205 | { 206 | using var stream = new MemoryStream(new byte[] { 0b_11111111 }); 207 | using var reader = new StreamBitReader(stream); 208 | 209 | reader.ReadBits(8); 210 | Assert.Throws(() => reader.PeakBit()); 211 | } 212 | 213 | [Fact] 214 | public void CanReadBitsB8_8() 215 | { 216 | using var stream = new MemoryStream(new byte[] { 0b_11111111 }); 217 | using var reader = new StreamBitReader(stream, false, 1); 218 | 219 | Assert.Equal((UInt64)0b_11111111, reader.ReadBits(8)); 220 | } 221 | 222 | [Fact] 223 | public void CanReadBitsB8_9() 224 | { 225 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b10000000, }); 226 | using var reader = new StreamBitReader(stream, false, 1); 227 | 228 | Assert.Equal((UInt64)0b_00000001_11111111, reader.ReadBits(9)); 229 | } 230 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Ll]og/ 33 | [Ll]ogs/ 34 | 35 | # Visual Studio 2015/2017 cache/options directory 36 | .vs/ 37 | # Uncomment if you have tasks that create the project's static files in wwwroot 38 | #wwwroot/ 39 | 40 | # Visual Studio 2017 auto generated files 41 | Generated\ Files/ 42 | 43 | # MSTest test Results 44 | [Tt]est[Rr]esult*/ 45 | [Bb]uild[Ll]og.* 46 | 47 | # NUnit 48 | *.VisualState.xml 49 | TestResult.xml 50 | nunit-*.xml 51 | 52 | # Build Results of an ATL Project 53 | [Dd]ebugPS/ 54 | [Rr]eleasePS/ 55 | dlldata.c 56 | 57 | # Benchmark Results 58 | BenchmarkDotNet.Artifacts/ 59 | 60 | # .NET Core 61 | project.lock.json 62 | project.fragment.lock.json 63 | artifacts/ 64 | 65 | # ASP.NET Scaffolding 66 | ScaffoldingReadMe.txt 67 | 68 | # StyleCop 69 | StyleCopReport.xml 70 | 71 | # Files built by Visual Studio 72 | *_i.c 73 | *_p.c 74 | *_h.h 75 | *.ilk 76 | *.meta 77 | *.obj 78 | *.iobj 79 | *.pch 80 | *.pdb 81 | *.ipdb 82 | *.pgc 83 | *.pgd 84 | *.rsp 85 | *.sbr 86 | *.tlb 87 | *.tli 88 | *.tlh 89 | *.tmp 90 | *.tmp_proj 91 | *_wpftmp.csproj 92 | *.log 93 | *.tlog 94 | *.vspscc 95 | *.vssscc 96 | .builds 97 | *.pidb 98 | *.svclog 99 | *.scc 100 | 101 | # Chutzpah Test files 102 | _Chutzpah* 103 | 104 | # Visual C++ cache files 105 | ipch/ 106 | *.aps 107 | *.ncb 108 | *.opendb 109 | *.opensdf 110 | *.sdf 111 | *.cachefile 112 | *.VC.db 113 | *.VC.VC.opendb 114 | 115 | # Visual Studio profiler 116 | *.psess 117 | *.vsp 118 | *.vspx 119 | *.sap 120 | 121 | # Visual Studio Trace Files 122 | *.e2e 123 | 124 | # TFS 2012 Local Workspace 125 | $tf/ 126 | 127 | # Guidance Automation Toolkit 128 | *.gpState 129 | 130 | # ReSharper is a .NET coding add-in 131 | _ReSharper*/ 132 | *.[Rr]e[Ss]harper 133 | *.DotSettings.user 134 | 135 | # TeamCity is a build add-in 136 | _TeamCity* 137 | 138 | # DotCover is a Code Coverage Tool 139 | *.dotCover 140 | 141 | # AxoCover is a Code Coverage Tool 142 | .axoCover/* 143 | !.axoCover/settings.json 144 | 145 | # Coverlet is a free, cross platform Code Coverage Tool 146 | coverage*.json 147 | coverage*.xml 148 | coverage*.info 149 | 150 | # Visual Studio code coverage results 151 | *.coverage 152 | *.coveragexml 153 | 154 | # NCrunch 155 | _NCrunch_* 156 | .*crunch*.local.xml 157 | nCrunchTemp_* 158 | 159 | # MightyMoose 160 | *.mm.* 161 | AutoTest.Net/ 162 | 163 | # Web workbench (sass) 164 | .sass-cache/ 165 | 166 | # Installshield output folder 167 | [Ee]xpress/ 168 | 169 | # DocProject is a documentation generator add-in 170 | DocProject/buildhelp/ 171 | DocProject/Help/*.HxT 172 | DocProject/Help/*.HxC 173 | DocProject/Help/*.hhc 174 | DocProject/Help/*.hhk 175 | DocProject/Help/*.hhp 176 | DocProject/Help/Html2 177 | DocProject/Help/html 178 | 179 | # Click-Once directory 180 | publish/ 181 | 182 | # Publish Web Output 183 | *.[Pp]ublish.xml 184 | *.azurePubxml 185 | # Note: Comment the next line if you want to checkin your web deploy settings, 186 | # but database connection strings (with potential passwords) will be unencrypted 187 | *.pubxml 188 | *.publishproj 189 | 190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 191 | # checkin your Azure Web App publish settings, but sensitive information contained 192 | # in these scripts will be unencrypted 193 | PublishScripts/ 194 | 195 | # NuGet Packages 196 | *.nupkg 197 | # NuGet Symbol Packages 198 | *.snupkg 199 | # The packages folder can be ignored because of Package Restore 200 | **/[Pp]ackages/* 201 | # except build/, which is used as an MSBuild target. 202 | !**/[Pp]ackages/build/ 203 | # Uncomment if necessary however generally it will be regenerated when needed 204 | #!**/[Pp]ackages/repositories.config 205 | # NuGet v3's project.json files produces more ignorable files 206 | *.nuget.props 207 | *.nuget.targets 208 | 209 | # Microsoft Azure Build Output 210 | csx/ 211 | *.build.csdef 212 | 213 | # Microsoft Azure Emulator 214 | ecf/ 215 | rcf/ 216 | 217 | # Windows Store app package directories and files 218 | AppPackages/ 219 | BundleArtifacts/ 220 | Package.StoreAssociation.xml 221 | _pkginfo.txt 222 | *.appx 223 | *.appxbundle 224 | *.appxupload 225 | 226 | # Visual Studio cache files 227 | # files ending in .cache can be ignored 228 | *.[Cc]ache 229 | # but keep track of directories ending in .cache 230 | !?*.[Cc]ache/ 231 | 232 | # Others 233 | ClientBin/ 234 | ~$* 235 | *~ 236 | *.dbmdl 237 | *.dbproj.schemaview 238 | *.jfm 239 | *.pfx 240 | *.publishsettings 241 | orleans.codegen.cs 242 | 243 | # Including strong name files can present a security risk 244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 245 | #*.snk 246 | 247 | # Since there are multiple workflows, uncomment next line to ignore bower_components 248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 249 | #bower_components/ 250 | 251 | # RIA/Silverlight projects 252 | Generated_Code/ 253 | 254 | # Backup & report files from converting an old project file 255 | # to a newer Visual Studio version. Backup files are not needed, 256 | # because we have git ;-) 257 | _UpgradeReport_Files/ 258 | Backup*/ 259 | UpgradeLog*.XML 260 | UpgradeLog*.htm 261 | ServiceFabricBackup/ 262 | *.rptproj.bak 263 | 264 | # SQL Server files 265 | *.mdf 266 | *.ldf 267 | *.ndf 268 | 269 | # Business Intelligence projects 270 | *.rdl.data 271 | *.bim.layout 272 | *.bim_*.settings 273 | *.rptproj.rsuser 274 | *- [Bb]ackup.rdl 275 | *- [Bb]ackup ([0-9]).rdl 276 | *- [Bb]ackup ([0-9][0-9]).rdl 277 | 278 | # Microsoft Fakes 279 | FakesAssemblies/ 280 | 281 | # GhostDoc plugin setting file 282 | *.GhostDoc.xml 283 | 284 | # Node.js Tools for Visual Studio 285 | .ntvs_analysis.dat 286 | node_modules/ 287 | 288 | # Visual Studio 6 build log 289 | *.plg 290 | 291 | # Visual Studio 6 workspace options file 292 | *.opt 293 | 294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 295 | *.vbw 296 | 297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.) 298 | *.vbp 299 | 300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project) 301 | *.dsw 302 | *.dsp 303 | 304 | # Visual Studio 6 technical files 305 | *.ncb 306 | *.aps 307 | 308 | # Visual Studio LightSwitch build output 309 | **/*.HTMLClient/GeneratedArtifacts 310 | **/*.DesktopClient/GeneratedArtifacts 311 | **/*.DesktopClient/ModelManifest.xml 312 | **/*.Server/GeneratedArtifacts 313 | **/*.Server/ModelManifest.xml 314 | _Pvt_Extensions 315 | 316 | # Paket dependency manager 317 | .paket/paket.exe 318 | paket-files/ 319 | 320 | # FAKE - F# Make 321 | .fake/ 322 | 323 | # CodeRush personal settings 324 | .cr/personal 325 | 326 | # Python Tools for Visual Studio (PTVS) 327 | __pycache__/ 328 | *.pyc 329 | 330 | # Cake - Uncomment if you are using it 331 | # tools/** 332 | # !tools/packages.config 333 | 334 | # Tabs Studio 335 | *.tss 336 | 337 | # Telerik's JustMock configuration file 338 | *.jmconfig 339 | 340 | # BizTalk build output 341 | *.btp.cs 342 | *.btm.cs 343 | *.odx.cs 344 | *.xsd.cs 345 | 346 | # OpenCover UI analysis results 347 | OpenCover/ 348 | 349 | # Azure Stream Analytics local run output 350 | ASALocalRun/ 351 | 352 | # MSBuild Binary and Structured Log 353 | *.binlog 354 | 355 | # NVidia Nsight GPU debugger configuration file 356 | *.nvuser 357 | 358 | # MFractors (Xamarin productivity tool) working folder 359 | .mfractor/ 360 | 361 | # Local History for Visual Studio 362 | .localhistory/ 363 | 364 | # Visual Studio History (VSHistory) files 365 | .vshistory/ 366 | 367 | # BeatPulse healthcheck temp database 368 | healthchecksdb 369 | 370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 371 | MigrationBackup/ 372 | 373 | # Ionide (cross platform F# VS Code tools) working folder 374 | .ionide/ 375 | 376 | # Fody - auto-generated XML schema 377 | FodyWeavers.xsd 378 | 379 | # VS Code files for those working on multiple tools 380 | .vscode/* 381 | !.vscode/settings.json 382 | !.vscode/tasks.json 383 | !.vscode/launch.json 384 | !.vscode/extensions.json 385 | *.code-workspace 386 | 387 | # Local History for Visual Studio Code 388 | .history/ 389 | 390 | # Windows Installer files from build outputs 391 | *.cab 392 | *.msi 393 | *.msix 394 | *.msm 395 | *.msp 396 | 397 | # JetBrains Rider 398 | *.sln.iml 399 | 400 | InvertedTomato.Packing.xml -------------------------------------------------------------------------------- /Test/StreamBitWriterTests.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public class StreamBitWriterTests 4 | { 5 | [Fact] 6 | public void CanWriteBit_0() 7 | { 8 | using var stream = new MemoryStream(); 9 | using (var writer = new StreamBitWriter(stream)) 10 | { 11 | writer.WriteBit(false); 12 | } 13 | 14 | Assert.Equal(new Byte[] {0b00000000,}, stream.ToArray()); 15 | } 16 | 17 | [Fact] 18 | public void CanWriteBit_1() 19 | { 20 | using var stream = new MemoryStream(); 21 | using (var writer = new StreamBitWriter(stream)) 22 | { 23 | writer.WriteBit(true); 24 | } 25 | 26 | Assert.Equal(new Byte[] {0b10000000,}, stream.ToArray()); 27 | } 28 | 29 | [Fact] 30 | public void CanWriteBit_0_1() 31 | { 32 | using var stream = new MemoryStream(); 33 | using (var writer = new StreamBitWriter(stream)) 34 | { 35 | writer.WriteBit(false); 36 | writer.WriteBit(true); 37 | } 38 | 39 | Assert.Equal(new Byte[] {0b01000000,}, stream.ToArray()); 40 | } 41 | 42 | [Fact] 43 | public void CanWriteBit_1_1_1_1_1_1_1_1() 44 | { 45 | using var stream = new MemoryStream(); 46 | using (var writer = new StreamBitWriter(stream)) 47 | { 48 | writer.WriteBit(true); 49 | writer.WriteBit(true); 50 | writer.WriteBit(true); 51 | writer.WriteBit(true); 52 | writer.WriteBit(true); 53 | writer.WriteBit(true); 54 | writer.WriteBit(true); 55 | writer.WriteBit(true); 56 | } 57 | 58 | Assert.Equal(new Byte[] {0b11111111,}, stream.ToArray()); 59 | } 60 | 61 | [Fact] 62 | public void CanWriteBit_1_1_1_1_1_1_1_1_1() 63 | { 64 | using var stream = new MemoryStream(); 65 | using (var writer = new StreamBitWriter(stream)) 66 | { 67 | writer.WriteBit(true); 68 | writer.WriteBit(true); 69 | writer.WriteBit(true); 70 | writer.WriteBit(true); 71 | writer.WriteBit(true); 72 | writer.WriteBit(true); 73 | writer.WriteBit(true); 74 | writer.WriteBit(true); 75 | writer.WriteBit(true); 76 | } 77 | 78 | Assert.Equal(new Byte[] {0b11111111, 0b10000000}, stream.ToArray()); 79 | } 80 | 81 | [Fact] 82 | public void CanWriteBits_10_1() 83 | { 84 | using var stream = new MemoryStream(); 85 | using (var writer = new StreamBitWriter(stream)) 86 | { 87 | writer.WriteBits(0b10, 2); 88 | writer.WriteBits(0b1, 1); 89 | } 90 | 91 | Assert.Equal(new Byte[] {0b10100000,}, stream.ToArray()); 92 | } 93 | 94 | [Fact] 95 | public void CanWriteBits_1_0_1_0_1_0() 96 | { 97 | using var stream = new MemoryStream(); 98 | using (var writer = new StreamBitWriter(stream)) 99 | { 100 | writer.WriteBits(0b1, 1); 101 | writer.WriteBits(0b0, 1); 102 | writer.WriteBits(0b1, 1); 103 | writer.WriteBits(0b0, 1); 104 | writer.WriteBits(0b1, 1); 105 | writer.WriteBits(0b0, 1); 106 | writer.WriteBits(0b1, 1); 107 | writer.WriteBits(0b0, 1); 108 | } 109 | 110 | Assert.Equal(new Byte[] {0b10101010,}, stream.ToArray()); 111 | } 112 | 113 | 114 | [Fact] 115 | public void CanWriteBits_10_10_10_101() 116 | { 117 | using var stream = new MemoryStream(); 118 | using (var writer = new StreamBitWriter(stream)) 119 | { 120 | writer.WriteBits(0b10, 2); 121 | writer.WriteBits(0b10, 2); 122 | writer.WriteBits(0b10, 2); 123 | writer.WriteBits(0b101, 3); 124 | } 125 | 126 | Assert.Equal(new Byte[] {0b10101010, 0b10000000,}, stream.ToArray()); 127 | } 128 | 129 | [Fact] 130 | public void CanWriteBits_10_Align_10() 131 | { 132 | using var stream = new MemoryStream(); 133 | using (var writer = new StreamBitWriter(stream)) 134 | { 135 | writer.WriteBits(0b10, 2); 136 | writer.Align(); 137 | writer.WriteBits(0b11, 2); 138 | } 139 | 140 | Assert.Equal(new Byte[] {0b10000000, 0b11000000}, stream.ToArray()); 141 | } 142 | 143 | [Fact] 144 | public void CanWriteBits_Align() 145 | { 146 | using var stream = new MemoryStream(); 147 | using (var writer = new StreamBitWriter(stream)) 148 | { 149 | writer.Align(); 150 | } 151 | 152 | Assert.Equal(new Byte[] { }, stream.ToArray()); 153 | } 154 | 155 | [Fact] 156 | public void CanWriteBits_8_Align() 157 | { 158 | using var stream = new MemoryStream(); 159 | using (var writer = new StreamBitWriter(stream)) 160 | { 161 | writer.WriteBits(0b11111111, 8); 162 | writer.Align(); 163 | } 164 | 165 | Assert.Equal(new Byte[] {0b11111111}, stream.ToArray()); 166 | } 167 | 168 | [Fact] 169 | public void CanWriteBits_8_Align_8() 170 | { 171 | using var stream = new MemoryStream(); 172 | using (var writer = new StreamBitWriter(stream)) 173 | { 174 | writer.WriteBits(0b11111111, 8); 175 | writer.Align(); 176 | writer.WriteBits(0b11111111, 8); 177 | } 178 | 179 | Assert.Equal(new Byte[] {0b11111111, 0b11111111}, stream.ToArray()); 180 | } 181 | 182 | [Fact] 183 | public void CanWriteBits_32() 184 | { 185 | using var stream = new MemoryStream(); 186 | using (var writer = new StreamBitWriter(stream)) 187 | { 188 | writer.WriteBits(0b_11111111_11111111_11111111_11111111, 32); 189 | } 190 | 191 | Assert.Equal(new Byte[] {0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111,}, stream.ToArray()); 192 | } 193 | 194 | [Fact] 195 | public void CanWriteBits_63() 196 | { 197 | using var stream = new MemoryStream(); 198 | using (var writer = new StreamBitWriter(stream)) 199 | { 200 | writer.WriteBits(0b_01111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, 63); 201 | } 202 | 203 | Assert.Equal(new Byte[] {0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111,0b_11111111, 0b_11111111, 0b_11111111, 0b_11111110,}, stream.ToArray()); 204 | } 205 | 206 | [Fact] 207 | public void CanWriteBits_64() 208 | { 209 | using var stream = new MemoryStream(); 210 | using (var writer = new StreamBitWriter(stream)) 211 | { 212 | writer.WriteBits(0b_11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, 64); 213 | } 214 | 215 | Assert.Equal(new Byte[] {0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111,0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111,}, stream.ToArray()); 216 | } 217 | 218 | [Fact] 219 | public void CanWriteBits_1_32() 220 | { 221 | using var stream = new MemoryStream(); 222 | using (var writer = new StreamBitWriter(stream)) 223 | { 224 | writer.WriteBit(false); 225 | writer.WriteBits(0b_11111111_11111111_11111111_11111111, 32); 226 | } 227 | 228 | Assert.Equal(new Byte[] {0b_01111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b10000000,}, stream.ToArray()); 229 | } 230 | 231 | 232 | [Fact] 233 | public void CanWriteBit_x1() 234 | { 235 | using var stream = new MemoryStream(); 236 | using (var writer = new StreamBitWriter(stream)) 237 | { 238 | writer.WriteBits(1,64); 239 | } 240 | 241 | Assert.Equal(new Byte[] {0b00000000,0b00000000,0b00000000,0b00000000,0b00000000,0b00000000,0b00000000,0b00000001,}.ToHexString(), stream.ToArray().ToHexString()); 242 | } 243 | 244 | [Fact] 245 | public void CanDisposeNotOwned() 246 | { 247 | using var stream = new MemoryStream(); 248 | using var writer = new StreamBitWriter(stream); 249 | 250 | Assert.False(writer.IsDisposed); 251 | writer.Dispose(); 252 | Assert.True(writer.IsDisposed); 253 | stream.ReadByte(); 254 | } 255 | 256 | [Fact] 257 | public void CanDisposeOwned() 258 | { 259 | using var stream = new MemoryStream(); 260 | using var writer = new StreamBitWriter(stream, true); 261 | 262 | Assert.False(writer.IsDisposed); 263 | writer.Dispose(); 264 | Assert.True(writer.IsDisposed); 265 | Assert.Throws(() => stream.ReadByte()); 266 | } 267 | 268 | 269 | [Fact] 270 | public void CanWriteBit_B1_8() 271 | { 272 | using var stream = new MemoryStream(); 273 | using (var writer = new StreamBitWriter(stream,false,1)) 274 | { 275 | writer.WriteBits(0b11111111,8); 276 | } 277 | 278 | Assert.Equal(new Byte[] {0b11111111,}, stream.ToArray()); 279 | } 280 | 281 | [Fact] 282 | public void CanWriteBit_B1_9() 283 | { 284 | using var stream = new MemoryStream(); 285 | using (var writer = new StreamBitWriter(stream,false,1)) 286 | { 287 | writer.WriteBits(0b111111111,9); 288 | } 289 | 290 | Assert.Equal(new Byte[] {0b11111111,0b10000000}, stream.ToArray()); 291 | } 292 | } -------------------------------------------------------------------------------- /Test/FibonaciCodecTests.cs: -------------------------------------------------------------------------------- 1 | namespace InvertedTomato.Packing; 2 | 3 | public class FibonacciCodecTests 4 | { 5 | private Byte[] Encode(UInt64 value) 6 | { 7 | using var stream = new MemoryStream(); 8 | using (var writer = new StreamBitWriter(stream)) 9 | { 10 | var encoder = new FibonacciIntegerEncoder(writer); 11 | encoder.EncodeUInt64(value); 12 | } 13 | 14 | return stream.ToArray(); 15 | } 16 | 17 | [Fact] 18 | public void Encode_0() => Assert.Equal(new Byte[] { 0b11000000 }.ToBinaryString(), Encode(0).ToBinaryString()); 19 | 20 | [Fact] 21 | public void Encode_1() => Assert.Equal(new Byte[] { 0b01100000 }.ToBinaryString(), Encode(1).ToBinaryString()); 22 | 23 | [Fact] 24 | public void Encode_2() => Assert.Equal(new Byte[] { 0b00110000 }.ToBinaryString(), Encode(2).ToBinaryString()); 25 | 26 | [Fact] 27 | public void Encode_3() => Assert.Equal(new Byte[] { 0b10110000 }.ToBinaryString(), Encode(3).ToBinaryString()); 28 | 29 | [Fact] 30 | public void Encode_4() => Assert.Equal(new Byte[] { 0b00011000 }.ToBinaryString(), Encode(4).ToBinaryString()); 31 | 32 | [Fact] 33 | public void Encode_5() => Assert.Equal(new Byte[] { 0b10011000 }.ToBinaryString(), Encode(5).ToBinaryString()); 34 | 35 | [Fact] 36 | public void Encode_6() => Assert.Equal(new Byte[] { 0b01011000 }.ToBinaryString(), Encode(6).ToBinaryString()); 37 | 38 | [Fact] 39 | public void Encode_7() => Assert.Equal(new Byte[] { 0b00001100 }.ToBinaryString(), Encode(7).ToBinaryString()); 40 | 41 | [Fact] 42 | public void Encode_8() => Assert.Equal(new Byte[] { 0b10001100 }.ToBinaryString(), Encode(8).ToBinaryString()); 43 | 44 | [Fact] 45 | public void Encode_9() => Assert.Equal(new Byte[] { 0b01001100 }.ToBinaryString(), Encode(9).ToBinaryString()); 46 | 47 | [Fact] 48 | public void Encode_10() => Assert.Equal(new Byte[] { 0b00101100 }.ToBinaryString(), Encode(10).ToBinaryString()); 49 | 50 | [Fact] 51 | public void Encode_11() => Assert.Equal(new Byte[] { 0b10101100 }, Encode(11)); 52 | 53 | [Fact] 54 | public void Encode_12() => Assert.Equal(new Byte[] { 0b00000110 }, Encode(12)); 55 | 56 | [Fact] 57 | public void Encode_13() => Assert.Equal(new Byte[] { 0b10000110 }, Encode(13)); 58 | 59 | [Fact] 60 | public void Encode_20() => Assert.Equal(new Byte[] { 0b00000011 }, Encode(20)); // Exactly one byte 61 | 62 | [Fact] 63 | public void Encode_33() => Assert.Equal(new Byte[] { 0b00000001, 0b10000000 }, Encode(33)); // Termination bit is on next byte 64 | 65 | [Fact] 66 | public void Encode_54() => Assert.Equal(new Byte[] { 0b00000000, 0b11000000 }, Encode(54)); // Final and termination bits on next byte 67 | 68 | [Fact] 69 | public void Encode_986() => Assert.Equal(new Byte[] { 0b00000000, 0b00000011 }, Encode(986)); // Exactly one byte 70 | 71 | [Fact] 72 | public void Encode_1596() => Assert.Equal(new Byte[] { 0b00000000, 0b00000001, 0b10000000 }, Encode(1596)); // Termination bit is on next byte 73 | 74 | [Fact] 75 | public void Encode_2583() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b11000000 }, Encode(2583)); // Final and termination bits on next byte 76 | 77 | [Fact] 78 | public void Encode_6557470319841() => 79 | Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000110 }.ToBinaryString(), 80 | Encode(6557470319841).ToBinaryString()); // All bits in first buffer 81 | 82 | [Fact] 83 | public void Encode_10610209857722() => 84 | Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000011 }.ToBinaryString(), 85 | Encode(10610209857722).ToBinaryString()); // All bits in first buffer 86 | 87 | [Fact] 88 | public void Encode_17167680177564() => 89 | Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000001, 0b10000000 }.ToBinaryString(), 90 | Encode(17167680177564).ToBinaryString()); // All value bits in first buffer and termination in second buffer 91 | 92 | [Fact] 93 | public void Encode_27777890035287() => 94 | Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b11000000 }.ToBinaryString(), 95 | Encode(27777890035287).ToBinaryString()); // Value bits and termination bits in second buffer 96 | 97 | [Fact] 98 | public void Encode_Max() 99 | { 100 | var expected = new Byte[] { 0b01010000, 0b01010001, 0b01000001, 0b00010101, 0b00010010, 0b00100100, 0b00000010, 0b01000100, 0b10001000, 0b10100000, 0b10001010, 0b01011000 }.ToBinaryString(); 101 | var actual = Encode(FibonacciInteger.MaxValue).ToBinaryString(); 102 | Assert.Equal(expected, actual); // Not completely sure about this value 103 | // Actual: 10100010 01000100 10000000 01001000 10001010 00001010 00101000 00100010 10001000 10100000 10001010 01011000 104 | // Expected: 01010000 01010001 01000001 00010101 00010010 00100100 00000010 01000100 10001000 10100000 10001010 01011000 105 | } 106 | 107 | // Decode 108 | 109 | private UInt64 Decode(Byte[] encoded) 110 | { 111 | using var stream = new MemoryStream(encoded); 112 | using var reader = new StreamBitReader(stream); 113 | var decoder = new FibonacciIntegerDecoder(reader); 114 | 115 | return decoder.DecodeUInt64(); 116 | } 117 | 118 | [Fact] 119 | public void Decode_0() => Assert.Equal((UInt64)0, Decode(new Byte[] { 0b11_000000 })); 120 | 121 | [Fact] 122 | public void Decode_1() => Assert.Equal((UInt64)1, Decode(new Byte[] { 0b011_00000 })); 123 | 124 | [Fact] 125 | public void Decode_2() => Assert.Equal((UInt64)2, Decode(new Byte[] { 0b0011_0000 })); 126 | 127 | [Fact] 128 | public void Decode_3() => Assert.Equal((UInt64)3, Decode(new Byte[] { 0b1011_0000 })); 129 | 130 | [Fact] 131 | public void Decode_4() => Assert.Equal((UInt64)4, Decode(new Byte[] { 0b00011_000 })); 132 | 133 | [Fact] 134 | public void Decode_5() => Assert.Equal((UInt64)5, Decode(new Byte[] { 0b10011_000 })); 135 | 136 | [Fact] 137 | public void Decode_6() => Assert.Equal((UInt64)6, Decode(new Byte[] { 0b01011_000 })); 138 | 139 | [Fact] 140 | public void Decode_7() => Assert.Equal((UInt64)7, Decode(new Byte[] { 0b000011_00 })); 141 | 142 | [Fact] 143 | public void Decode_8() => Assert.Equal((UInt64)8, Decode(new Byte[] { 0b100011_00 })); 144 | 145 | [Fact] 146 | public void Decode_9() => Assert.Equal((UInt64)9, Decode(new Byte[] { 0b010011_00 })); 147 | 148 | [Fact] 149 | public void Decode_10() => Assert.Equal((UInt64)10, Decode(new Byte[] { 0b001011_00 })); 150 | 151 | [Fact] 152 | public void Decode_11() => Assert.Equal((UInt64)11, Decode(new Byte[] { 0b101011_00 })); 153 | 154 | [Fact] 155 | public void Decode_20() => Assert.Equal((UInt64)20, Decode(new Byte[] { 0b00000011 })); // Exactly one byte 156 | 157 | [Fact] 158 | public void Decode_33() => Assert.Equal((UInt64)33, Decode(new Byte[] { 0b00000001, 0b1_0000000 })); // Termination bit is on next byte 159 | 160 | [Fact] 161 | public void Decode_54() => Assert.Equal((UInt64)54, Decode(new Byte[] { 0b00000000, 0b11_000000 })); // Final and termination bits on next byte 162 | 163 | [Fact] 164 | public void Decode_986() => Assert.Equal((UInt64)986, Decode(new Byte[] { 0b00000000, 0b00000011 })); // Exactly two bytes 165 | 166 | [Fact] 167 | public void Decode_1596() => Assert.Equal((UInt64)1596, Decode(new Byte[] { 0b00000000, 0b00000001, 0b1_0000000 })); // Termination bit is on next byte 168 | 169 | [Fact] 170 | public void Decode_2583() => Assert.Equal((UInt64)2583, Decode(new Byte[] { 0b00000000, 0b00000000, 0b11_000000 })); // Final and termination bits on next byte 171 | 172 | [Fact] 173 | public void Decode_Max() => Assert.Equal(FibonacciInteger.MaxValue, 174 | Decode(new Byte[] { 0b01010000, 0b01010001, 0b01000001, 0b00010101, 0b00010010, 0b00100100, 0b00000010, 0b01000100, 0b10001000, 0b10100000, 0b10001010, 0b01011_000 })); 175 | 176 | [Fact] 177 | public void Decode_Overflow1() => Assert.Throws(() => 178 | { 179 | Decode(new Byte[] { 0b01010000, 0b01010001, 0b01000001, 0b00010101, 0b00010010, 0b00100100, 0b00000010, 0b01000100, 0b10001000, 0b10100000, 0b10101010, 0b01011_000 }); 180 | }); // Symbol too large 181 | 182 | [Fact] 183 | public void Decode_Overflow2() => Assert.Throws(() => 184 | { 185 | Decode(new Byte[] { 0b01010000, 0b01010001, 0b01000001, 0b00010101, 0b00010010, 0b00100100, 0b00000010, 0b01000100, 0b10001000, 0b10100000, 0b10001010, 0b010011_00 }); 186 | }); // Symbol too long 187 | 188 | [Fact] 189 | public void CanEncodeDecodeFirst1000() 190 | { 191 | using var stream = new MemoryStream(); 192 | 193 | using (var writer = new StreamBitWriter(stream)) 194 | { 195 | var encoder = new FibonacciIntegerEncoder(writer); 196 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol); 197 | } 198 | 199 | stream.Seek(0, SeekOrigin.Begin); 200 | 201 | using (var reader = new StreamBitReader(stream)) 202 | { 203 | var decoder = new FibonacciIntegerDecoder(reader); 204 | for (UInt64 symbol = 0; symbol < 1000; symbol++) 205 | { 206 | Assert.Equal(symbol, decoder.DecodeUInt64()); 207 | } 208 | } 209 | } 210 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Packing 2 | `InvertedTomato.Packing` is all about encoding data in the smallest possible way quickly. This is super useful for both storage and transmission of data when size and speed are both important. Data isn't compressed, at least not in the traditional sense, rather stored in encoded in efficently manners. 3 | 4 | ## TLDR 5 | Here's how to squash 24 bytes of data down to 2 using Fibonacci coding: 6 | ```C# 7 | using InvertedTomato.Packing; 8 | using InvertedTomato.Packing.Codecs.Integers; 9 | 10 | // Encode some values... 11 | using var stream = new MemoryStream(); // Could be a FileStream or a NetworkStream 12 | using (var writer = new StreamBitWriter(stream)) 13 | { 14 | // Pick a codec - you can use one or many - so long as you decode in the same order you encoded 15 | var fib = new FibonacciIntegerEncoder(writer); 16 | 17 | // Encode some values using the Fibonacci codec 18 | fib.EncodeUInt64(1); 19 | fib.EncodeUInt64(2); 20 | fib.EncodeUInt64(3); 21 | } 22 | 23 | Console.WriteLine("Compressed data is " + stream.Length + " bytes"); // Output: Now data is 2 bytes 24 | 25 | // Decode the values... 26 | stream.Position = 0; 27 | using (var reader = new StreamBitReader(stream)) 28 | { 29 | var fib = new FibonacciIntegerDecoder(reader); 30 | 31 | // Decode the Fibonacci values 32 | Console.WriteLine(fib.DecodeUInt64()); // Output: 1 33 | Console.WriteLine(fib.DecodeUInt64()); // Output: 2 34 | Console.WriteLine(fib.DecodeUInt64()); // Output: 3 35 | } 36 | ``` 37 | 38 | ## Introduction 39 | Modern PCs have stacks of RAM, so it's usually not a problem that integers take 4-8 bytes each 40 | to store in memory. There are times however when this is a problem. For exammple: 41 | - When you want to store a large set of numbers in memory (100 million * 8 bytes = 760MB) 42 | - When you want to store a large set of numbers on disk 43 | - When you want to transmit numbers over a network (the Internet?) quickly 44 | 45 | In almost all cases those numbers can be stored in a much lower number of bytes. Heck, its 46 | **possible to store three integers in a single byte**. 47 | 48 | ## Algorithms 49 | The example in the **TLDR** section used the Fibonacci codec. Whilst this codec is excellent for small numbers, it's not so 50 | great when numbers get larger. You really need to select a codec with your domain in mind. Following is a summary of the 51 | codecs available, their strengths and weaknesses. 52 | 53 | ### Bits required to represent each number with each codec 54 | Keep in mind that there is a physical minimum possible size for each number. That is displayed in blue. 55 | ![alt text](https://raw.githubusercontent.com/invertedtomato/integer-compression/master/images/comparison-1.png "Algorithm comparison") 56 | 57 | ### Fibonacci *(best for integers <8,000)* 58 | - **Family:** [universal code](https://en.wikipedia.org/wiki/Universal_code_(data_compression)) 59 | - **Random access:** yes *(can jump ahead)* 60 | - **Lossy:** no *(doesn't approximate)* 61 | - **Universal:** yes *(can handle any number)* 62 | - **Details:** [Wikipedia](https://en.wikipedia.org/wiki/Fibonacci_coding) 63 | - **Options:** 64 | 65 | This is a very interesting algorithm - it encodes the numbers against a Fibonacci sequence. It's the best algorithm in the pack for numbers up to 8,000, It 66 | degrades after that point - but not horrendously so. This is my personal favorite algo. 67 | 68 | ### Thompson-Alpha *(best for integers >8,000)* 69 | - **Family:** none 70 | - **Random access:** no 71 | - **Universal:** no *(can only handle a predefined range of numbers)* 72 | - **Details:** N/A 73 | - **Options:** 74 | - Length bits 75 | 76 | I couldn't find an algorithm which performed well for large integers (>8,000), so this is my own. In it's default configuration it has a flat 6-bits 77 | of overhead for each integer, no matter it's size. That makes it excellent if your numbers have a large distribution. 78 | 79 | ### Variable Length Quantities (VLQ) 80 | - **Random access:** no *(can't jump ahead)* 81 | - **Universal:** yes *(can handle any number)* 82 | - **Details:** [Wikipedia](https://en.wikipedia.org/wiki/Variable-length_quantity) 83 | - **Options:** 84 | 85 | It seems VLQ was originally invented by the designers of MIDI (you know, the old-school 86 | MP3). The algorithm is really retro, there's stacks of variations of it's spec and 87 | it smells a little musty, but it's awesome! It produces pretty good results for all numbers 88 | with a very low CPU overhead. 89 | 90 | ### Inverted Variable Length Quantities (VLQ) 91 | - **Random access:** no *(can't jump ahead)* 92 | - **Universal:** yes *(can handle any number)* 93 | - **Details:** N/A 94 | - **Options:** 95 | 96 | Similar to VLQ, Inverted-VLQ is a slight variation which uses a final-byte flag, rather than a 97 | more-bit flag. Theoretically this has slightly better CPU performance for numbers 98 | that encode to more than three bytes. 99 | 100 | ### Elias-Omega 101 | - **Family:** [universal code](https://en.wikipedia.org/wiki/Universal_code_(data_compression)) 102 | - **Random access:** no (can't jump ahead) 103 | - **Universal:** yes (can handle any number) 104 | - **Supported values:** all 105 | - **Details:** [Wikipedia](https://en.wikipedia.org/wiki/Elias_omega_coding) 106 | 107 | Elias Omega is a sexy algorithm. It's well thought out and utterly brilliant. But I 108 | wouldn't use it. It does well for tiny integers (under 8), but just doesn't cut the 109 | mustard for larger values - all other algorithms do better. Sorry Omega :-/. 110 | 111 | ### Elias-Gamma 112 | - **Family:** [universal code](https://en.wikipedia.org/wiki/Universal_code_(data_compression)) 113 | - **Random access:** no (can't jump ahead) 114 | - **Universal:** yes *(can handle any number)* 115 | - **Supported values:** all 116 | - **Details:** [Wikipedia](https://en.wikipedia.org/wiki/Elias_gamma_coding) 117 | 118 | Like Elias-Omega, this is a very interesting algorithm. However it's only really useful for small integers (less than 8). For bigger numbers 119 | it performs *terribly*. 120 | 121 | ### Elias-Delta 122 | - **Family:** [universal code](https://en.wikipedia.org/wiki/Universal_code_(data_compression)) 123 | - **Random access:** no (can't jump ahead) 124 | - **Universal:** yes *(can handle any number)* 125 | - **Supported values:** all 126 | - **Details:** [Wikipedia](https://en.wikipedia.org/wiki/Elias_delta_coding) 127 | 128 | I have a lot of respect for this algorithm. It's an all-rounder, doing well on small numbers and large alike. If you knew you 129 | were mostly going to have small numbers, but you'd have a some larger ones as well, this would be my choice if it weren't for ThompsonAlpha. The algorithm is a little complex, so you might be cautious if you have extreme CPU limitations. 130 | 131 | ## Comparing algorithms 132 | In order to make an accurate assessment of a codec for your purpose, some 133 | algorithms have a method `CalculateEncodedBits` that allows you to know 134 | how many bits a given value would consume when encoded. I recommend getting a set 135 | of your data and running it through the `CalculateEncodedBits` methods of a few 136 | algorithms to see which one is best. 137 | 138 | ## Signed and unsigned 139 | If your numbers are unsigned (eg, no negatives), be sure to use **unsigned** calls to the Codec. That 140 | way you'll get the best size reduction. Obviously fall back to **signed** if you must. Hand-waving, it'll cost you an extra bit or so for each value if you used signed. 141 | 142 | ## Even better reduction 143 | There are a few techniques you can use to further increase the reduction of your integers. 144 | Following is a summary of each 145 | 146 | ### Use deltas 147 | Smaller numbers use less space. So take a moment to consider what 148 | you can do to keep your numbers small. One common technique is to store the difference 149 | between numbers instead of the numbers themselves. Consider if you wanted to store the 150 | following sequence: 151 | - 10000 152 | - 10001 153 | - 10002 154 | - 10003 155 | - 10004 156 | 157 | If you converted them to deltas you could instead store: 158 | - 1000 159 | - 1 160 | - 2 161 | - 3 162 | - 4 163 | 164 | This sequence uses a stack less bytes! 165 | 166 | Naturally this isn't suitable for all contexts. If the receiver has the potential to 167 | loose state (eg. UDP transport) you'll have to include a recovery mechanism (eg keyframes), 168 | otherwise those deltas become meaningless. 169 | 170 | ### Make lossy 171 | Sometimes it's okay to loose data when encoding. Let's say that you're compressing a 172 | list of distances in meters, however you only really care about the distance rounded 173 | to the nearest 100 meters. You can save a heap of data by dividing your value by 174 | 100 before compressing it, and multiplying it by 100 after. 175 | 176 | ### Use a false floor 177 | Sometimes all of your values are always going to be above zero. Let's say that you're 178 | storing the number of cars going over a busy bridge each hour. If it's safe to assume 179 | there will never be 0 cars you could save some data by subtracting one from your 180 | value before encoding and adding one after decoding. 181 | 182 | This may seem like a trivial optimization, however with most algorithms it will save 183 | you one or two bits per number. If you have several million numbers that really 184 | adds up. 185 | 186 | ### Intermix codecs 187 | So Fibonacci is best for small numbers, and ThompsonAlpha is better for large values - 188 | so why not use both? So long as I read it in the same order I wrote it. If you use this 189 | cleverly you can get some real size wins. 190 | 191 | ### Compress it 192 | You thought we were compressing integers already? We'll it depends how you define your terms, but I'd say I was just encoding them more cleverly. But you can compress it as well. Check out [BrotliStream](https://docs.microsoft.com/en-us/dotnet/api/system.io.compression.brotlistream). If you wrap your stream in this you can further compress your dataset. While the above encoding stores your data in the most efficent manner, Brotli will then look for patterns in your data to exploit to make it smaller again. 193 | --------------------------------------------------------------------------------