├── Library
├── TODO.md
├── Terminology.txt
├── Codecs
│ └── Integers
│ │ ├── ThompsonAlphaInteger.cs
│ │ ├── RawInteger.cs
│ │ ├── EliasDeltaInteger.cs
│ │ ├── EliasGammaInteger.cs
│ │ ├── EliasOmegaInteger.cs
│ │ ├── RawIntegerDecoder.cs
│ │ ├── IntUtil.cs
│ │ ├── VlqInteger.cs
│ │ ├── RawIntegerEncoder.cs
│ │ ├── IntegerDecoderBase.cs
│ │ ├── EliasGammaIntegerDecoder.cs
│ │ ├── EliasGammaIntegerEncoder.cs
│ │ ├── IntegerEncoderBase.cs
│ │ ├── ZigZag.cs
│ │ ├── EliasOmegaIntegerDecoder.cs
│ │ ├── InvertedVlqInteger.cs
│ │ ├── ThompsonAlphaIntegerDecoder.cs
│ │ ├── EliasDeltaIntegerDecoder.cs
│ │ ├── VlqIntegerDecoder.cs
│ │ ├── VlqIntegerEncoder.cs
│ │ ├── InvertedVlqIntegerDecoder.cs
│ │ ├── FibonacciInteger.cs
│ │ ├── EliasDeltaIntegerEncoder.cs
│ │ ├── InvertedVlqIntegerEncoder.cs
│ │ ├── FibonacciIntegerDecoder.cs
│ │ ├── ThompsonAlphaIntegerEncoder.cs
│ │ ├── EliasOmegaIntegerEncoder.cs
│ │ └── FibonacciIntegerEncoder.cs
├── IBitWriter.cs
├── IBitReader.cs
├── GlobalUsings.cs
├── Extensions
│ ├── ArrayExtensions.cs
│ ├── StreamExtensions.cs
│ └── NumericStringExtensions.cs
├── Bits.cs
├── Library.csproj
├── StreamBitReader.cs
└── StreamBitWriter.cs
├── .idea
├── .idea.InvertedTomato.IntegerCompression
│ └── .idea
│ │ ├── .name
│ │ ├── encodings.xml
│ │ ├── vcs.xml
│ │ ├── indexLayout.xml
│ │ └── .gitignore
├── .idea.packing
│ └── .idea
│ │ ├── encodings.xml
│ │ ├── vcs.xml
│ │ ├── indexLayout.xml
│ │ └── .gitignore
├── .idea.integer-compression
│ └── .idea
│ │ ├── encodings.xml
│ │ ├── vcs.xml
│ │ ├── indexLayout.xml
│ │ └── .gitignore
└── .idea.binary
│ └── .idea
│ └── workspace.xml
├── nuget-pack.cmd
├── images
└── comparison-1.png
├── Test
├── GlobalUsings.cs
├── ZigZagTests.cs
├── IntUtilTests.cs
├── EliasDeltaTests.cs
├── EliasGammaTests.cs
├── EliasOmegaTests.cs
├── Test.csproj
├── RawCodecTests.cs
├── ThompsonAlphaTests.cs
├── InvertedVlqCodecTests.cs
├── VlqCodecTests.cs
├── StreamBitReaderTests.cs
├── StreamBitWriterTests.cs
└── FibonaciCodecTests.cs
├── packing.sln.DotSettings
├── LoadTest
├── LoadTest.csproj
└── Program.cs
├── Sample
├── Sample.csproj
└── Program.cs
├── LICENSE
├── packing.sln
├── .gitignore
└── README.md
/Library/TODO.md:
--------------------------------------------------------------------------------
1 | Separate interface
2 | String encoding
3 |
--------------------------------------------------------------------------------
/.idea/.idea.InvertedTomato.IntegerCompression/.idea/.name:
--------------------------------------------------------------------------------
1 | InvertedTomato.IntegerCompression
--------------------------------------------------------------------------------
/nuget-pack.cmd:
--------------------------------------------------------------------------------
1 | @echo off
2 | dotnet pack Library/Library.csproj --include-symbols
3 | pause
--------------------------------------------------------------------------------
/Library/Terminology.txt:
--------------------------------------------------------------------------------
1 | Value = input
2 | Symbol = compressed value
3 | Set = array of 0 or more symbols
--------------------------------------------------------------------------------
/images/comparison-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/invertedtomato/packing/HEAD/images/comparison-1.png
--------------------------------------------------------------------------------
/Test/GlobalUsings.cs:
--------------------------------------------------------------------------------
1 | global using System;
2 | global using System.IO;
3 | global using Xunit;
4 | global using InvertedTomato.Packing.Codecs.Integers;
5 | global using InvertedTomato.Packing.Extensions;
--------------------------------------------------------------------------------
/Library/Codecs/Integers/ThompsonAlphaInteger.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public static class ThompsonAlphaInteger
6 | {
7 | }
--------------------------------------------------------------------------------
/Library/IBitWriter.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public interface IBitWriter
4 | {
5 | void WriteBit(Boolean value);
6 | void WriteBits(UInt64 bits, Int32 count);
7 | void Align();
8 | }
--------------------------------------------------------------------------------
/.idea/.idea.packing/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/.idea.packing/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/.idea.integer-compression/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/Library/IBitReader.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public interface IBitReader
4 | {
5 | Boolean PeakBit();
6 | Boolean ReadBit();
7 | UInt64 ReadBits(Int32 count);
8 | void Align();
9 | }
10 |
--------------------------------------------------------------------------------
/.idea/.idea.InvertedTomato.IntegerCompression/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/.idea.integer-compression/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Library/GlobalUsings.cs:
--------------------------------------------------------------------------------
1 | global using System;
2 | global using System.IO;
3 | global using System.Linq;
4 | global using InvertedTomato.Packing;
5 | global using InvertedTomato.Packing.Extensions;
6 | global using System.Runtime.CompilerServices;
--------------------------------------------------------------------------------
/.idea/.idea.InvertedTomato.IntegerCompression/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/.idea.packing/.idea/indexLayout.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/.idea.integer-compression/.idea/indexLayout.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/.idea.InvertedTomato.IntegerCompression/.idea/indexLayout.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Library/Extensions/ArrayExtensions.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing.Extensions;
2 |
3 | public static class ArrayExtensions
4 | {
5 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
6 | public static void Clear(this T[] target) => Array.Clear(target, 0, target.Length);
7 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/RawInteger.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 | // ReSharper disable UnusedMember.Global
3 |
4 | namespace InvertedTomato.Packing.Codecs.Integers;
5 |
6 | public static class RawInteger
7 | {
8 | public const UInt64 MinValue = UInt64.MinValue;
9 | public const UInt64 MaxValue = UInt64.MaxValue;
10 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/EliasDeltaInteger.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 | // ReSharper disable UnusedMember.Global
3 |
4 | namespace InvertedTomato.Packing.Codecs.Integers;
5 |
6 | public static class EliasDeltaInteger
7 | {
8 | public const UInt64 MinValue = UInt64.MinValue;
9 | public const UInt64 MaxValue = UInt64.MaxValue - 1;
10 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/EliasGammaInteger.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 | // ReSharper disable UnusedMember.Global
3 |
4 | namespace InvertedTomato.Packing.Codecs.Integers;
5 |
6 | public static class EliasGammaInteger
7 | {
8 | public const UInt64 MinValue = UInt64.MinValue;
9 | public const UInt64 MaxValue = UInt64.MaxValue - 1;
10 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/EliasOmegaInteger.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 | // ReSharper disable UnusedMember.Global
3 |
4 | namespace InvertedTomato.Packing.Codecs.Integers;
5 |
6 | public static class EliasOmegaInteger
7 | {
8 | public const UInt64 MinValue = UInt64.MinValue;
9 | public const UInt64 MaxValue = UInt64.MaxValue - 1;
10 | }
--------------------------------------------------------------------------------
/.idea/.idea.packing/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Rider ignored files
5 | /.idea.binary.iml
6 | /projectSettingsUpdater.xml
7 | /modules.xml
8 | /contentModel.xml
9 | # Editor-based HTTP Client requests
10 | /httpRequests/
11 | # Datasource local storage ignored files
12 | /dataSources/
13 | /dataSources.local.xml
14 |
--------------------------------------------------------------------------------
/.idea/.idea.integer-compression/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Rider ignored files
5 | /contentModel.xml
6 | /.idea.integer-compression.iml
7 | /projectSettingsUpdater.xml
8 | /modules.xml
9 | # Editor-based HTTP Client requests
10 | /httpRequests/
11 | # Datasource local storage ignored files
12 | /dataSources/
13 | /dataSources.local.xml
14 |
--------------------------------------------------------------------------------
/Library/Codecs/Integers/RawIntegerDecoder.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing.Codecs.Integers;
2 |
3 | public class RawIntegerDecoder : IntegerDecoderBase
4 | {
5 | private readonly IBitReader _reader;
6 |
7 | public RawIntegerDecoder(IBitReader reader)
8 | {
9 | _reader = reader;
10 | }
11 |
12 | protected override UInt64 Decode() => _reader.ReadBits(Bits.LongBits);
13 | }
--------------------------------------------------------------------------------
/packing.sln.DotSettings:
--------------------------------------------------------------------------------
1 |
2 | True
--------------------------------------------------------------------------------
/.idea/.idea.InvertedTomato.IntegerCompression/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Rider ignored files
5 | /modules.xml
6 | /projectSettingsUpdater.xml
7 | /.idea.InvertedTomato.IntegerCompression.iml
8 | /contentModel.xml
9 | # Editor-based HTTP Client requests
10 | /httpRequests/
11 | # Datasource local storage ignored files
12 | /dataSources/
13 | /dataSources.local.xml
14 |
--------------------------------------------------------------------------------
/Library/Codecs/Integers/IntUtil.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing.Codecs.Integers;
2 |
3 | public static class IntegerUtil
4 | {
5 | public static UInt64 Pow(UInt64 x, UInt64 pow) // Math.Pow only supports doubles
6 | {
7 | UInt64 ret = 1;
8 | while (pow != 0)
9 | {
10 | if ((pow & 1) == 1) ret *= x;
11 | x *= x;
12 | pow >>= 1;
13 | }
14 |
15 | return ret;
16 | }
17 | }
--------------------------------------------------------------------------------
/LoadTest/LoadTest.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net7.0
6 | enable
7 | enable
8 | 11
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/Library/Codecs/Integers/VlqInteger.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public static class VlqInteger
6 | {
7 | public const UInt64 MinValue = 0;
8 | public const UInt64 MaxValue = UInt64.MaxValue - 1;
9 |
10 | internal const Byte More = 0b10000000;
11 | internal const Byte Mask = 0b01111111;
12 | internal const Int32 PacketSize = 7;
13 | internal const UInt64 MinPacketValue = UInt64.MaxValue >> (64 - PacketSize);
14 | }
--------------------------------------------------------------------------------
/Test/ZigZagTests.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public class ZigZagTests
4 | {
5 | [Fact]
6 | public void CanEncodeDecodeMax()
7 | {
8 | var encoded = ZigZagUtility.Encode(Int64.MaxValue);
9 | Assert.Equal(Int64.MaxValue, ZigZagUtility.Decode(encoded));
10 | }
11 |
12 | [Fact]
13 | public void CanEncodeDecodeMin()
14 | {
15 | var encoded = ZigZagUtility.Encode(Int64.MinValue + 1);
16 | Assert.Equal(Int64.MinValue + 1, ZigZagUtility.Decode(encoded));
17 | }
18 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/RawIntegerEncoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public class RawIntegerEncoder : IntegerEncoderBase
6 | {
7 | private readonly IBitWriter _writer;
8 |
9 | public RawIntegerEncoder(IBitWriter writer)
10 | {
11 | _writer = writer;
12 | }
13 |
14 | protected override void Encode(UInt64 value) => _writer.WriteBits(value, Bits.LongBits);
15 |
16 | public override Int32? PredictEncodedBits(UInt64 value) => Bits.LongBits;
17 | }
--------------------------------------------------------------------------------
/Test/IntUtilTests.cs:
--------------------------------------------------------------------------------
1 | using FluentAssertions;
2 |
3 | namespace InvertedTomato.Packing;
4 |
5 | public class IntUtilTests
6 | {
7 | [Fact]
8 | public void CanPower0() => IntegerUtil.Pow(0, 0).Should().Be(1); // See https://en.wikipedia.org/wiki/Zero_to_the_power_of_zero
9 |
10 | [Fact]
11 | public void CanPower1() => IntegerUtil.Pow(1, 1).Should().Be(1);
12 |
13 | [Fact]
14 | public void CanPower2() => IntegerUtil.Pow(2, 2).Should().Be(4);
15 |
16 | [Fact]
17 | public void CanPower10() => IntegerUtil.Pow(10, 10).Should().Be(10000000000);
18 | }
--------------------------------------------------------------------------------
/Sample/Sample.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | net7.0
6 | enable
7 | enable
8 | 11
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/Library/Codecs/Integers/IntegerDecoderBase.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedMember.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public abstract class IntegerDecoderBase
6 | {
7 | protected abstract UInt64 Decode();
8 |
9 | public Boolean DecodeBit() => Decode() > 0;
10 | public Byte DecodeUInt8() => (Byte)Decode();
11 | public UInt16 DecodeUInt16() => (UInt16)Decode();
12 | public UInt32 DecodeUInt32() => (UInt32)Decode();
13 | public UInt64 DecodeUInt64() => Decode();
14 | public SByte DecodeInt8() => (SByte)ZigZagUtility.Decode(Decode());
15 | public Int16 DecodeInt16() => (Int16)ZigZagUtility.Decode(Decode());
16 | public Int32 DecodeInt32() => (Int32)ZigZagUtility.Decode(Decode());
17 | public Int64 DecodeInt64() => ZigZagUtility.Decode(Decode());
18 | }
--------------------------------------------------------------------------------
/Library/Extensions/StreamExtensions.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing.Extensions;
2 |
3 | public static class StreamExtensions
4 | {
5 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
6 | public static void Write(this Stream target, Byte[] buffer) => target.Write(buffer, 0, buffer.Length);
7 |
8 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
9 | public static void Write(this Stream target, Byte[] buffer, Int32 count) => target.Write(buffer, 0, count);
10 |
11 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
12 | public static Int32 Read(this Stream target, Byte[] buffer) => target.Read(buffer, 0, buffer.Length);
13 |
14 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
15 | public static Int32 Read(this Stream target, Byte[] buffer, Int32 count) => target.Read(buffer, 0, count);
16 | }
--------------------------------------------------------------------------------
/Library/Extensions/NumericStringExtensions.cs:
--------------------------------------------------------------------------------
1 | using System.Text.RegularExpressions;
2 |
3 | // ReSharper disable MemberCanBePrivate.Global
4 |
5 | namespace InvertedTomato.Packing.Extensions;
6 |
7 | public static class NumericStringExtensions
8 | {
9 | public static String ToBinaryString(this Byte[] target) =>
10 | String.Join(" ", target.Select(b => Convert.ToString(b, 2).PadLeft(Bits.ByteBits, '0')));
11 |
12 | public static String ToBinaryString(this Byte[] target, Int32 offset, Int32 count) =>
13 | target.ToBinaryString().Substring(offset, count);
14 |
15 | public static String ToBinaryString(this UInt64 target) =>
16 | Regex.Replace(Convert.ToString((Int64)target, 2).PadLeft(Bits.LongBits, '0'), ".{8}", "$0 ");
17 |
18 | public static String ToHexString(this Byte[] target) =>
19 | BitConverter.ToString(target).Replace(" ", "");
20 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/EliasGammaIntegerDecoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public class EliasGammaIntegerDecoder : IntegerDecoderBase
6 | {
7 | private readonly IBitReader _reader;
8 |
9 | public EliasGammaIntegerDecoder(IBitReader reader)
10 | {
11 | _reader = reader;
12 | }
13 |
14 | protected override UInt64 Decode()
15 | {
16 | // Read length
17 | var length = 1;
18 | while (!_reader.PeakBit())
19 | {
20 | // Note that length is one bit longer
21 | length++;
22 |
23 | // Remove 0 from input
24 | _reader.ReadBit();
25 | }
26 |
27 | // Read value
28 | var value = _reader.ReadBits(length);
29 |
30 | // Remove offset from value
31 | value--;
32 |
33 | return value;
34 | }
35 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/EliasGammaIntegerEncoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public class EliasGammaIntegerEncoder : IntegerEncoderBase
6 | {
7 | private readonly IBitWriter _writer;
8 |
9 | public EliasGammaIntegerEncoder(IBitWriter writer)
10 | {
11 | _writer = writer;
12 | }
13 |
14 | protected override void Encode(UInt64 value)
15 | {
16 | // Offset value to allow zeros
17 | value++;
18 |
19 | // Calculate length
20 | var length = Bits.CountUsed(value);
21 |
22 | // Write unary zeros
23 | _writer.WriteBits(0, length - 1);
24 |
25 | // Write value
26 | _writer.WriteBits(value, length);
27 | }
28 |
29 | public override Int32? PredictEncodedBits(UInt64 value)
30 | {
31 | // Offset for zero
32 | value++;
33 |
34 | return Bits.CountUsed(value) * 2 - 1;
35 | }
36 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/IntegerEncoderBase.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedMember.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public abstract class IntegerEncoderBase
6 | {
7 | protected abstract void Encode(UInt64 value);
8 |
9 | public void EncodeBit(Boolean value) => Encode(value ? 1UL : 0UL);
10 | public void EncodeUInt8(Byte value) => Encode(value);
11 | public void EncodeUInt16(UInt16 value) => Encode(value);
12 | public void EncodeUInt32(UInt32 value) => Encode(value);
13 | public void EncodeUInt64(UInt64 value) => Encode(value);
14 | public void EncodeInt8(SByte value) => Encode(ZigZagUtility.Encode(value));
15 | public void EncodeInt16(Int16 value) => Encode(ZigZagUtility.Encode(value));
16 | public void EncodeInt32(Int32 value) => Encode(ZigZagUtility.Encode(value));
17 | public void EncodeInt64(Int64 value) => Encode(ZigZagUtility.Encode(value));
18 |
19 | public abstract Int32? PredictEncodedBits(UInt64 value);
20 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/ZigZag.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing.Codecs.Integers;
2 |
3 | ///
4 | /// Encode signed values as unsigned using ProtoBuffer ZigZag bijection encoding algorithm.
5 | /// https://developers.google.com/protocol-buffers/docs/encoding
6 | ///
7 | public static class ZigZagUtility
8 | {
9 | ///
10 | /// Encode a signed long into an ZigZag unsigned long
11 | ///
12 | ///
13 | ///
14 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
15 | public static UInt64 Encode(Int64 value) => (UInt64)((value << 1) ^ (value >> 63));
16 |
17 | ///
18 | /// Decode a ZigZag unsigned long back into a signed long
19 | ///
20 | ///
21 | ///
22 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
23 | public static Int64 Decode(UInt64 value) => (Int64)((value >> 1) ^ (~(value & 1) + 1));
24 | }
--------------------------------------------------------------------------------
/Test/EliasDeltaTests.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public class EliasDeltaTests
4 | {
5 | // TODO: A full set of tests are required! I haven't bothered yet as I haven't found any use for this codec beyond academic interest
6 |
7 | [Fact]
8 | public void CanEncodeDecodeFirst1000()
9 | {
10 | using var stream = new MemoryStream();
11 |
12 | using (var writer = new StreamBitWriter(stream))
13 | {
14 | var encoder = new EliasDeltaIntegerEncoder(writer);
15 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol);
16 | }
17 |
18 | stream.Seek(0, SeekOrigin.Begin);
19 |
20 | using (var reader = new StreamBitReader(stream))
21 | {
22 | var decoder = new EliasDeltaIntegerDecoder(reader);
23 | for (UInt64 symbol = 0; symbol < 1000; symbol++)
24 | {
25 | Assert.Equal(symbol, decoder.DecodeUInt64());
26 | }
27 | }
28 | }
29 | }
--------------------------------------------------------------------------------
/Test/EliasGammaTests.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public class EliasGammaTests
4 | {
5 | // TODO: A full set of tests are required! I haven't bothered yet as I haven't found any use for this codec beyond academic interest
6 |
7 | [Fact]
8 | public void CanEncodeDecodeFirst1000()
9 | {
10 | using var stream = new MemoryStream();
11 |
12 | using (var writer = new StreamBitWriter(stream))
13 | {
14 | var encoder = new EliasGammaIntegerEncoder(writer);
15 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol);
16 | }
17 |
18 | stream.Seek(0, SeekOrigin.Begin);
19 |
20 | using (var reader = new StreamBitReader(stream))
21 | {
22 | var decoder = new EliasGammaIntegerDecoder(reader);
23 | for (UInt64 symbol = 0; symbol < 1000; symbol++)
24 | {
25 | Assert.Equal(symbol, decoder.DecodeUInt64());
26 | }
27 | }
28 | }
29 | }
--------------------------------------------------------------------------------
/Library/Bits.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public static class Bits
4 | {
5 | public const Int32 ByteBits = 1 * 8;
6 | public const Int32 LongBits = 8 * 8;
7 |
8 | ///
9 | /// Count the number of bits used to express number
10 | ///
11 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
12 | public static Byte CountUsed(UInt64 value)
13 | {
14 | Byte bits = 0;
15 |
16 | do
17 | {
18 | bits++;
19 | value >>= 1;
20 | } while (value > 0);
21 |
22 | return bits;
23 | }
24 |
25 | ///
26 | /// Count the number of bits used to express number
27 | ///
28 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
29 | public static Byte CountUsed(Byte value)
30 | {
31 | Byte bits = 0;
32 |
33 | do
34 | {
35 | bits++;
36 | value >>= 1;
37 | } while (value > 0);
38 |
39 | return bits;
40 | }
41 | }
--------------------------------------------------------------------------------
/Test/EliasOmegaTests.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public class EliasOmegaTests
4 | {
5 | // TODO: A full set of tests are required! I haven't bothered yet as I haven't found any use for this codec beyond academic interest
6 |
7 | [Fact]
8 | public void CanEncodeDecodeFirst1000()
9 | {
10 | using var stream = new MemoryStream();
11 |
12 | using (var writer = new StreamBitWriter(stream))
13 | {
14 | var encoder = new EliasOmegaIntegerEncoder(writer);
15 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol);
16 | }
17 |
18 | stream.Seek(0, SeekOrigin.Begin);
19 |
20 | using (var reader = new StreamBitReader(stream))
21 | {
22 | var decoder = new EliasOmegaIntegerDecoder(reader);
23 | for (UInt64 symbol = 0; symbol < 1000; symbol++)
24 | {
25 | Assert.Equal(symbol, decoder.DecodeUInt64());
26 | }
27 | }
28 | }
29 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/EliasOmegaIntegerDecoder.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 |
3 | // ReSharper disable UnusedType.Global
4 |
5 | namespace InvertedTomato.Packing.Codecs.Integers;
6 |
7 | public class EliasOmegaIntegerDecoder : IntegerDecoderBase
8 | {
9 | private readonly IBitReader _reader;
10 |
11 | public EliasOmegaIntegerDecoder(IBitReader reader)
12 | {
13 | _reader = reader;
14 | }
15 |
16 | protected override UInt64 Decode()
17 | {
18 | // #1 Start with a variable N, set to a value of 1.
19 | UInt64 value = 1;
20 |
21 | // #2 If the next bit is a "0", stop. The decoded number is N.
22 | while (_reader.PeakBit())
23 | {
24 | // #3 If the next bit is a "1", then read it plus N more bits, and use that binary number as the new value of N.
25 | value = _reader.ReadBits((Int32)value + 1);
26 | }
27 |
28 | // Burn last bit from input
29 | _reader.ReadBit();
30 |
31 | // Offset for min value
32 | return value - 1;
33 | }
34 | }
--------------------------------------------------------------------------------
/Sample/Program.cs:
--------------------------------------------------------------------------------
1 | using InvertedTomato.Packing;
2 | using InvertedTomato.Packing.Codecs.Integers;
3 |
4 | // Encode some values...
5 | using var stream = new MemoryStream(); // Could be a FileStream or a NetworkStream
6 | using (var writer = new StreamBitWriter(stream))
7 | {
8 | var fib = new FibonacciIntegerEncoder(writer); // Pick a codec - you can use one or many
9 |
10 | // Encode some values using the Fibonacci codec
11 | fib.EncodeUInt64(1);
12 | fib.EncodeUInt64(2);
13 | fib.EncodeUInt64(3);
14 | }
15 |
16 | Console.WriteLine("Compressed data is " + stream.Length + " bytes"); // Output: Now data is 2 bytes
17 |
18 | // Decode the values...
19 | stream.Position = 0;
20 | using (var reader = new StreamBitReader(stream))
21 | {
22 | var fib = new FibonacciIntegerDecoder(reader);
23 |
24 | // Decode the Fibonacci values
25 | Console.WriteLine(fib.DecodeUInt64()); // Output: 1
26 | Console.WriteLine(fib.DecodeUInt64()); // Output: 2
27 | Console.WriteLine(fib.DecodeUInt64()); // Output: 3
28 | }
29 |
30 |
31 | Console.WriteLine("Done.");
32 | Console.ReadKey(true);
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Ben Thompson
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/Library/Codecs/Integers/InvertedVlqInteger.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | ///
6 | /// VLQ similar to https://en.wikipedia.org/wiki/Variable-length_quantity with "Removing Redundancy", but the
7 | /// continuation bit flag is reversed. This might be more performant for datasets with consistently large values.
8 | ///
9 | public static class InvertedVlqInteger
10 | {
11 | public const UInt64 MinValue = UInt64.MinValue;
12 | public const UInt64 MaxValue = UInt64.MaxValue - 1;
13 |
14 | public static readonly Byte[] Zero = { 0x80 }; // 10000000
15 | public static readonly Byte[] One = { 0x81 }; // 10000001
16 | public static readonly Byte[] Two = { 0x82 }; // 10000010
17 | public static readonly Byte[] Four = { 0x84 }; // 10000100
18 | public static readonly Byte[] Eight = { 0x88 };
19 |
20 | internal const Byte Nil = 0x80; // 10000000
21 | internal const Byte Mask = 0x7f; // 01111111
22 | internal const Int32 PacketSize = 7;
23 | internal const UInt64 MinPacketValue = UInt64.MaxValue >> (64 - PacketSize);
24 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/ThompsonAlphaIntegerDecoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public class ThompsonAlphaIntegerDecoder : IntegerDecoderBase
6 | {
7 | private readonly IBitReader _reader;
8 | private readonly Int32 _lengthBits;
9 |
10 | public ThompsonAlphaIntegerDecoder(IBitReader reader, Int32 lengthBits)
11 | {
12 | if (lengthBits is < 1 or > 6) throw new ArgumentOutOfRangeException($"Must be between 1 and 6, not {lengthBits}.", nameof(lengthBits));
13 |
14 | _reader = reader;
15 | _lengthBits = lengthBits;
16 | }
17 |
18 | protected override UInt64 Decode()
19 | {
20 | // Read length
21 | var length = (Int32)_reader.ReadBits(_lengthBits);
22 |
23 | // Read number (max 32 bits can be written in one operation, so split it over two)
24 | var value = _reader.ReadBits(length);
25 |
26 | // Recover implied MSB
27 | value |= (UInt64)1 << length;
28 |
29 | // Remove offset to allow zeros
30 | value--;
31 |
32 | return value;
33 | }
34 | }
--------------------------------------------------------------------------------
/Test/Test.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | net7.0
5 |
6 | false
7 |
8 | InvertedTomato.Packing.Tests
9 |
10 | InvertedTomato.Packing
11 |
12 | 11
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | all
21 | runtime; build; native; contentfiles; analyzers; buildtransitive
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/Library/Codecs/Integers/EliasDeltaIntegerDecoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public class EliasDeltaIntegerDecoder : IntegerDecoderBase
6 | {
7 | private readonly IBitReader _reader;
8 |
9 | public EliasDeltaIntegerDecoder(IBitReader reader)
10 | {
11 | _reader = reader;
12 | }
13 |
14 | protected override UInt64 Decode()
15 | {
16 | // #1 Read and count zeros from the stream until you reach the first one. Call this count of zeros L
17 | var l = 1;
18 | while (!_reader.PeakBit())
19 | {
20 | // Note that length is one bit longer
21 | l++;
22 |
23 | // Remove 0 from input
24 | _reader.ReadBit();
25 | }
26 |
27 | // #2 Considering the one that was reached to be the first digit of an integer, with a value of 2L, read the remaining L digits of the integer. Call this integer N+1, and subtract one to get N.
28 | var n = (Int32)_reader.ReadBits(l) - 1;
29 |
30 | // #3 Put a one in the first place of our final output, representing the value 2N.
31 | // #4 Read and append the following N digits.
32 | var value = _reader.ReadBits(n) + ((UInt64)1 << n);
33 |
34 | // Remove zero offset
35 | value--;
36 |
37 | return value;
38 | }
39 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/VlqIntegerDecoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public class VlqIntegerDecoder : IntegerDecoderBase
6 | {
7 | private readonly IBitReader _reader;
8 |
9 | public VlqIntegerDecoder(IBitReader reader)
10 | {
11 | _reader = reader;
12 | }
13 |
14 | protected override UInt64 Decode()
15 | {
16 | // Setup symbol
17 | UInt64 symbol = 0;
18 | var bit = 0;
19 | Byte b;
20 | do
21 | {
22 | // Read byte
23 | b = (Byte)_reader.ReadBits(Bits.ByteBits);
24 |
25 | // Add input bits to output
26 | var chunk = (UInt64)(b & VlqInteger.Mask);
27 | var pre = symbol;
28 | symbol += (chunk + 1) << bit;
29 |
30 | #if DEBUG
31 | // Check for overflow
32 | if (symbol < pre) throw new OverflowException($"Symbol is larger than maximum supported value or is corrupt. See {nameof(VlqInteger)}.{nameof(VlqInteger.MaxValue)}.");
33 | #endif
34 |
35 | // Increment bit offset
36 | bit += VlqInteger.PacketSize;
37 | } while ((b & VlqInteger.More) > 0); // If not final byte
38 |
39 | // Remove zero offset
40 | symbol--;
41 |
42 | // Add to output
43 | return symbol;
44 | }
45 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/VlqIntegerEncoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public class VlqIntegerEncoder : IntegerEncoderBase
6 | {
7 | private readonly IBitWriter _writer;
8 |
9 | public VlqIntegerEncoder(IBitWriter writer)
10 | {
11 | _writer = writer;
12 | }
13 |
14 | protected override void Encode(UInt64 value)
15 | {
16 | #if DEBUG
17 | if (value > VlqInteger.MaxValue) throw new OverflowException($"Symbol is larger than maximum supported value. Must be less than or equal to {nameof(VlqInteger.MaxValue)}");
18 | #endif
19 |
20 | // Iterate through input, taking X bits of data each time, aborting when less than X bits left
21 | while (value > VlqInteger.MinPacketValue)
22 | {
23 | // Write payload, skipping MSB bit
24 | _writer.WriteBits((value & VlqInteger.Mask) | VlqInteger.More, 8);
25 |
26 | // Offset value for next cycle
27 | value >>= VlqInteger.PacketSize;
28 | value--;
29 | }
30 |
31 | // Write remaining - marking it as the final byte for symbol
32 | _writer.WriteBits(value & VlqInteger.Mask, 8);
33 | }
34 |
35 | public override Int32? PredictEncodedBits(UInt64 value)
36 | {
37 | var packets = (Int32)Math.Ceiling(Bits.CountUsed(value) / (Single)VlqInteger.PacketSize);
38 | return packets * (VlqInteger.PacketSize + 1);
39 | }
40 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/InvertedVlqIntegerDecoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | ///
6 | /// VLQ similar to https://en.wikipedia.org/wiki/Variable-length_quantity with "Removing Redundancy", but the
7 | /// continuation bit flag is reversed. This might be more performant for datasets with consistently large values.
8 | ///
9 | public class InvertedVlqIntegerDecoder : IntegerDecoderBase
10 | {
11 | private readonly IBitReader _reader;
12 |
13 | public InvertedVlqIntegerDecoder(IBitReader reader)
14 | {
15 | _reader = reader;
16 | }
17 |
18 | protected override UInt64 Decode()
19 | {
20 | // Setup symbol
21 | UInt64 symbol = 0;
22 | var bit = 0;
23 |
24 | UInt64 b;
25 | do
26 | {
27 | // Read byte
28 | b = _reader.ReadBits(Bits.ByteBits);
29 |
30 | // Add input bits to output
31 | var chunk = b & InvertedVlqInteger.Mask;
32 | var pre = symbol;
33 | symbol += (chunk + 1) << bit;
34 |
35 | #if DEBUG
36 | // Check for overflow
37 | if (symbol < pre) throw new OverflowException("Input symbol larger than the supported limit of 64 bits. Probable corrupt input.");
38 | #endif
39 |
40 | // Increment bit offset
41 | bit += InvertedVlqInteger.PacketSize;
42 | } while ((b & InvertedVlqInteger.Nil) == 0); // If not final bit
43 |
44 | // Remove zero offset
45 | symbol--;
46 |
47 | // Add to output
48 | return symbol;
49 | }
50 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/FibonacciInteger.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 | // ReSharper disable UnusedMember.Global
3 |
4 | namespace InvertedTomato.Packing.Codecs.Integers;
5 |
6 | public static class FibonacciInteger
7 | {
8 | public const UInt64 MinValue = UInt64.MinValue;
9 | public const UInt64 MaxValue = UInt64.MaxValue - 1;
10 |
11 | ///
12 | /// Lookup table of Fibonacci numbers that can fit in a UInt64
13 | ///
14 | internal static readonly UInt64[] Table =
15 | {
16 | 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181, 6765, 10946, 17711, 28657,
17 | 46368, 75025, 121393, 196418, 317811, 514229, 832040, 1346269, 2178309, 3524578, 5702887, 9227465, 14930352,
18 | 24157817, 39088169, 63245986, 102334155, 165580141, 267914296, 433494437, 701408733, 1134903170, 1836311903,
19 | 2971215073, 4807526976, 7778742049, 12586269025, 20365011074, 32951280099, 53316291173, 86267571272,
20 | 139583862445, 225851433717, 365435296162, 591286729879, 956722026041, 1548008755920, 2504730781961,
21 | 4052739537881, 6557470319842, 10610209857723, 17167680177565, 27777890035288, 44945570212853,
22 | 72723460248141, 117669030460994, 190392490709135, 308061521170129, 498454011879264, 806515533049393,
23 | 1304969544928657, 2111485077978050, 3416454622906707, 5527939700884757, 8944394323791464, 14472334024676221,
24 | 23416728348467685, 37889062373143906, 61305790721611591, 99194853094755497, 160500643816367088,
25 | 259695496911122585, 420196140727489673, 679891637638612258, 1100087778366101931, 1779979416004714189,
26 | 2880067194370816120, 4660046610375530309, 7540113804746346429, 12200160415121876738,
27 | };
28 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/EliasDeltaIntegerEncoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public class EliasDeltaIntegerEncoder : IntegerEncoderBase
6 | {
7 | private readonly IBitWriter _writer;
8 |
9 | public EliasDeltaIntegerEncoder(IBitWriter writer)
10 | {
11 | _writer = writer;
12 | }
13 |
14 | protected override void Encode(UInt64 value)
15 | {
16 | // Offset value to allow zeros
17 | value++;
18 |
19 | // #1 Separate X into the highest power of 2 it contains (2N) and the remaining N binary digits.
20 | var n = 0;
21 | while (Math.Pow(2, n + 1) <= value) n++;
22 | var r = value - (UInt64)Math.Pow(2, n);
23 |
24 | // #2 Encode N+1 with Elias gamma coding.
25 | var np = (Byte)(n + 1);
26 | var len = Bits.CountUsed(np);
27 | _writer.WriteBits(0, len - 1);
28 | _writer.WriteBits(np, len);
29 |
30 | // #3 Append the remaining N binary digits to this representation of N+1.
31 | _writer.WriteBits(r, n);
32 | }
33 |
34 | public override Int32? PredictEncodedBits(UInt64 value)
35 | {
36 | var result = 0;
37 |
38 | // Offset for zero
39 | value++;
40 |
41 | // #1 Separate X into the highest power of 2 it contains (2N) and the remaining N binary digits.
42 | Byte n = 0;
43 | while (Math.Pow(2, n + 1) <= value) n++;
44 |
45 | // #2 Encode N+1 with Elias gamma coding.
46 | var np = (Byte)(n + 1);
47 | var len = Bits.CountUsed(np);
48 | result += len - 1;
49 | result += len;
50 |
51 | // #3 Append the remaining N binary digits to this representation of N+1.
52 | result += n;
53 |
54 | return result;
55 | }
56 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/InvertedVlqIntegerEncoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | ///
6 | /// VLQ similar to https://en.wikipedia.org/wiki/Variable-length_quantity with "Removing Redundancy", but the
7 | /// continuation bit flag is reversed. This might be more performant for datasets with consistently large values.
8 | ///
9 | public class InvertedVlqIntegerEncoder : IntegerEncoderBase
10 | {
11 | private readonly IBitWriter _writer;
12 |
13 | public InvertedVlqIntegerEncoder(IBitWriter writer)
14 | {
15 | _writer = writer;
16 | }
17 |
18 | protected override void Encode(UInt64 value)
19 | {
20 | #if DEBUG
21 | if (value > InvertedVlqInteger.MaxValue) throw new OverflowException($"Symbol is larger than maximum supported value. Must be less than or equal to {InvertedVlqInteger.MaxValue}");
22 | #endif
23 |
24 | // Iterate through input, taking X bits of data each time, aborting when less than X bits left
25 | while (value > InvertedVlqInteger.MinPacketValue)
26 | {
27 | // Write payload, skipping MSB bit
28 | _writer.WriteBits((Byte)(value & InvertedVlqInteger.Mask), Bits.ByteBits);
29 |
30 | // Offset value for next cycle
31 | value >>= InvertedVlqInteger.PacketSize;
32 | value--;
33 | }
34 |
35 | // Write remaining - marking it as the final byte for symbol
36 | _writer.WriteBits((Byte)(value | InvertedVlqInteger.Nil), Bits.ByteBits);
37 | }
38 |
39 | public override Int32? PredictEncodedBits(UInt64 value)
40 | {
41 | var packets = (Int32)Math.Ceiling(Bits.CountUsed(value) / (Single)InvertedVlqInteger.PacketSize);
42 | return packets * (InvertedVlqInteger.PacketSize + 1);
43 | }
44 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/FibonacciIntegerDecoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public class FibonacciIntegerDecoder : IntegerDecoderBase
6 | {
7 | private readonly IBitReader _reader;
8 |
9 | public FibonacciIntegerDecoder(IBitReader reader)
10 | {
11 | _reader = reader;
12 | }
13 |
14 | protected override UInt64 Decode()
15 | {
16 | // Current symbol being decoded
17 | UInt64 symbol = 0;
18 |
19 | // State of the last bit while decoding
20 | var lastBit = false;
21 |
22 | // Loop through each possible fib
23 | foreach (var fib in FibonacciInteger.Table)
24 | {
25 | // Read bit of input
26 | var bit = _reader.ReadBit();
27 | if (bit)
28 | {
29 | // If double 1 bits - all done! Return symbol less zero offset
30 | if (lastBit) return symbol - 1;
31 |
32 | // Add value to current symbol
33 | var pre = symbol;
34 | symbol += fib;
35 | #if DEBUG
36 | if (symbol < pre)
37 | {
38 | // Input is larger than expected
39 | throw new OverflowException($"Symbol is larger than the max value of {FibonacciInteger.MaxValue}. Data is probably corrupt");
40 | }
41 | #endif
42 | }
43 |
44 | // Note bit for next cycle
45 | lastBit = bit;
46 | }
47 |
48 | // If double 1 bits - all done! Return symbol less zero offset (this occurs only when decoding MaxValue)
49 | if (lastBit && _reader.ReadBit()) return symbol - 1;
50 |
51 | // Input longer than supported
52 | throw new OverflowException($"Termination not found within supported {FibonacciInteger.Table.Length} bit range. Data is probably corrupt.");
53 | }
54 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/ThompsonAlphaIntegerEncoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 | // ReSharper disable MemberCanBePrivate.Global
3 | // ReSharper disable UnusedMember.Global
4 |
5 | namespace InvertedTomato.Packing.Codecs.Integers;
6 |
7 | public class ThompsonAlphaIntegerEncoder : IntegerEncoderBase
8 | {
9 | public UInt64 MinValue => UInt64.MinValue;
10 |
11 | public UInt64 MaxValue => IntegerUtil.Pow(2, IntegerUtil.Pow(2, (UInt64)_lengthBits + 1)) - 1; // (2^(2^(bits+1)))-1
12 |
13 | private readonly IBitWriter _writer;
14 | private readonly Int32 _lengthBits;
15 |
16 | public ThompsonAlphaIntegerEncoder(IBitWriter writer, Int32 lengthBits)
17 | {
18 | if (lengthBits is < 1 or > 6) throw new ArgumentOutOfRangeException($"Must be between 1 and 6, not {lengthBits}.", nameof(lengthBits));
19 |
20 | _writer = writer;
21 | _lengthBits = lengthBits;
22 | }
23 |
24 | protected override void Encode(UInt64 value)
25 | {
26 | if (value > MaxValue) throw new ArgumentOutOfRangeException($"Value is greater than maximum of {MaxValue}. Consider increasing length bits to support larger numbers.");
27 |
28 | // Offset value to allow zeros
29 | value++;
30 |
31 | // Count length
32 | var length = Bits.CountUsed(value);
33 |
34 | // Clip MSB, it's redundant
35 | length--;
36 | value = length == 0 ? 0 : value << (Bits.LongBits - length) >> (Bits.LongBits - length);
37 |
38 | // Write length
39 | _writer.WriteBits(length, _lengthBits);
40 |
41 | // Write number
42 | _writer.WriteBits(value, length);
43 | }
44 |
45 | public override Int32? PredictEncodedBits(UInt64 value)
46 | {
47 | // Offset value to allow zeros
48 | value++;
49 |
50 | // Count length
51 | var length = Bits.CountUsed(value);
52 |
53 | // Check not too large
54 | if (length > (_lengthBits + 2) * 8) return null;
55 |
56 | // Clip MSB, it's redundant
57 | length--;
58 |
59 | return _lengthBits + length;
60 | }
61 | }
--------------------------------------------------------------------------------
/Library/Codecs/Integers/EliasOmegaIntegerEncoder.cs:
--------------------------------------------------------------------------------
1 | using System.Collections.Generic;
2 |
3 | // ReSharper disable UnusedType.Global
4 |
5 | namespace InvertedTomato.Packing.Codecs.Integers;
6 |
7 | public class EliasOmegaIntegerEncoder : IntegerEncoderBase
8 | {
9 | private readonly IBitWriter _writer;
10 |
11 | public EliasOmegaIntegerEncoder(IBitWriter writer)
12 | {
13 | _writer = writer;
14 | }
15 |
16 | protected override void Encode(UInt64 value)
17 | {
18 | // Offset min value
19 | value++;
20 |
21 | // Prepare buffer
22 | var groups = new Stack>();
23 |
24 | // #1 Place a "0" at the end of the code.
25 | groups.Push(new(0, 1));
26 |
27 | // #2 If N=1, stop; encoding is complete.
28 | while (value > 1)
29 | {
30 | // Calculate the length of value
31 | var length = Bits.CountUsed(value);
32 |
33 | // #3 Prepend the binary representation of N to the beginning of the code (this will be at least two bits, the first bit of which is a 1)
34 | groups.Push(new(value, length));
35 |
36 | // #4 Let N equal the number of bits just prepended, minus one.
37 | value = (UInt64)length - 1;
38 | }
39 |
40 | // Write buffer
41 | foreach (var item in groups)
42 | {
43 | var bits = item.Value;
44 | var group = item.Key;
45 |
46 | _writer.WriteBits(group, bits);
47 | }
48 | }
49 |
50 | public override Int32? PredictEncodedBits(UInt64 value)
51 | {
52 | var result = 1; // Termination bit
53 |
54 | // Offset value to allow for 0s
55 | value++;
56 |
57 | // #2 If N=1, stop; encoding is complete.
58 | while (value > 1)
59 | {
60 | // Calculate the length of value
61 | var length = Bits.CountUsed(value);
62 |
63 | // #3 Prepend the binary representation of N to the beginning of the code (this will be at least two bits, the first bit of which is a 1)
64 | result += length;
65 |
66 | // #4 Let N equal the number of bits just prepended, minus one.
67 | value = (UInt64)length - 1;
68 | }
69 |
70 | return result;
71 | }
72 | }
--------------------------------------------------------------------------------
/Library/Library.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | enable
5 | enable
6 | InvertedTomato.Packing
7 | InvertedTomato.Packing
8 | net7.0;netstandard1.0;netstandard2.0
9 | 11
10 | 5.0.2
11 | InvertedTomato.Packing
12 | Inverted Tomato
13 | Library for expressing data in the least possible space without using compression. Handy storage and transmission of data when both speed and size are critical. Includes VLQ, Elias Omega, Elias Gamma, Elias Delta, Fibonacci and others.
14 | Inverted Tomato
15 | https://github.com/invertedtomato/packing
16 | https://raw.githubusercontent.com/invertedtomato/packing/master/LICENSE
17 | https://github.com/invertedtomato/packing
18 | git
19 | Compression VLQ Elias Omega Gamma Delta Fibonacci Integer Numbers
20 | disable
21 | true
22 | InvertedTomato.Packing
23 | InvertedTomato.Packing
24 | 5.0.2
25 | 5.0.2
26 | Updated readme
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 | README.md
36 |
37 |
38 |
39 | InvertedTomato.Packing.xml
40 |
41 |
42 |
43 | InvertedTomato.Packing.xml
44 |
45 |
46 |
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/packing.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 16
4 | VisualStudioVersion = 16.0.30114.105
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Sample", "Sample\Sample.csproj", "{66CD159E-51BA-4021-AE05-1540CBB53137}"
7 | EndProject
8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Test", "Test\Test.csproj", "{40D316FC-DF50-4620-905E-88F8959FAE32}"
9 | EndProject
10 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LoadTest", "LoadTest\LoadTest.csproj", "{F63FB851-FCC5-4E9D-8DD7-6D5D156FA962}"
11 | EndProject
12 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Library", "Library\Library.csproj", "{F84E594A-76F6-409A-8210-3C332C2CA785}"
13 | EndProject
14 | Global
15 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
16 | Debug|Any CPU = Debug|Any CPU
17 | Release|Any CPU = Release|Any CPU
18 | EndGlobalSection
19 | GlobalSection(SolutionProperties) = preSolution
20 | HideSolutionNode = FALSE
21 | EndGlobalSection
22 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
23 | {66CD159E-51BA-4021-AE05-1540CBB53137}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
24 | {66CD159E-51BA-4021-AE05-1540CBB53137}.Debug|Any CPU.Build.0 = Debug|Any CPU
25 | {66CD159E-51BA-4021-AE05-1540CBB53137}.Release|Any CPU.ActiveCfg = Release|Any CPU
26 | {66CD159E-51BA-4021-AE05-1540CBB53137}.Release|Any CPU.Build.0 = Release|Any CPU
27 | {40D316FC-DF50-4620-905E-88F8959FAE32}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
28 | {40D316FC-DF50-4620-905E-88F8959FAE32}.Debug|Any CPU.Build.0 = Debug|Any CPU
29 | {40D316FC-DF50-4620-905E-88F8959FAE32}.Release|Any CPU.ActiveCfg = Release|Any CPU
30 | {40D316FC-DF50-4620-905E-88F8959FAE32}.Release|Any CPU.Build.0 = Release|Any CPU
31 | {F63FB851-FCC5-4E9D-8DD7-6D5D156FA962}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
32 | {F63FB851-FCC5-4E9D-8DD7-6D5D156FA962}.Debug|Any CPU.Build.0 = Debug|Any CPU
33 | {F63FB851-FCC5-4E9D-8DD7-6D5D156FA962}.Release|Any CPU.ActiveCfg = Release|Any CPU
34 | {F63FB851-FCC5-4E9D-8DD7-6D5D156FA962}.Release|Any CPU.Build.0 = Release|Any CPU
35 | {F84E594A-76F6-409A-8210-3C332C2CA785}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
36 | {F84E594A-76F6-409A-8210-3C332C2CA785}.Debug|Any CPU.Build.0 = Debug|Any CPU
37 | {F84E594A-76F6-409A-8210-3C332C2CA785}.Release|Any CPU.ActiveCfg = Release|Any CPU
38 | {F84E594A-76F6-409A-8210-3C332C2CA785}.Release|Any CPU.Build.0 = Release|Any CPU
39 | EndGlobalSection
40 | EndGlobal
41 |
--------------------------------------------------------------------------------
/Library/Codecs/Integers/FibonacciIntegerEncoder.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 |
3 | namespace InvertedTomato.Packing.Codecs.Integers;
4 |
5 | public class FibonacciIntegerEncoder : IntegerEncoderBase
6 | {
7 | private const UInt64 One = 1;
8 |
9 | private readonly IBitWriter _writer;
10 |
11 | public FibonacciIntegerEncoder(IBitWriter writer)
12 | {
13 | _writer = writer;
14 | }
15 |
16 | protected override void Encode(UInt64 value)
17 | {
18 | #if DEBUG
19 | // Check for overflow
20 | if (value > FibonacciInteger.MaxValue) throw new OverflowException($"Exceeded FibonacciCodec maximum supported symbol value of {FibonacciInteger.MaxValue}.");
21 | #endif
22 |
23 | // Fibonacci doesn't support 0s, so offset by 1 to allow for them
24 | value++;
25 |
26 | // #1 Find the largest Fibonacci number equal to or less than N; subtract this number from N, keeping track of the remainder.
27 | // #3 Repeat the previous steps, substituting the remainder for N, until a remainder of 0 is reached.
28 | UInt64[]? buffers = null;
29 | Int32[]? counts = null;
30 | Int32 a;
31 | // ReSharper disable once TooWideLocalVariableScope
32 | Int32 b;
33 | for (var i = FibonacciInteger.Table.Length - 1; i >= 0; i--)
34 | {
35 | // Do nothing if not a fib match
36 | if (value < FibonacciInteger.Table[i]) continue;
37 |
38 | // If this is the first fib match...
39 | if (buffers == null)
40 | {
41 | // Calculate the total bit count
42 | var totalCount = i + 2; // The current index, add one to make it a count, and add another one for the termination bit
43 |
44 | // Allocate buffers
45 | buffers = new UInt64[totalCount / Bits.LongBits + 1];
46 | counts = new Int32[totalCount / Bits.LongBits + 1];
47 |
48 | // Calculate the count of bits for each buffer
49 | for (var j = 0; j < counts.Length; j++)
50 | {
51 | counts[j] = Math.Min(totalCount, Bits.LongBits);
52 | totalCount -= counts[j];
53 | }
54 |
55 | // Calculate address for termination bit
56 | a = (i + 1) / Bits.LongBits;
57 |
58 | // Set termination bit
59 | buffers[a] |= One;
60 | }
61 |
62 | // Calculate address
63 | a = i / Bits.LongBits;
64 | b = counts![a] - i - 1;
65 |
66 | // Write to buffer
67 | buffers[a] |= One << b;
68 |
69 | // Deduct Fibonacci number from value
70 | value -= FibonacciInteger.Table[i];
71 | }
72 |
73 | // Write out buffers
74 | for (a = 0; a < buffers!.Length; a++) _writer.WriteBits(buffers[a], counts![a]);
75 | }
76 |
77 | public override Int32? PredictEncodedBits(UInt64 value)
78 | {
79 | // Check for overflow
80 | if (value > FibonacciInteger.MaxValue)
81 | {
82 | return null;
83 | }
84 |
85 | // Offset for zero
86 | value++;
87 |
88 | for (var i = FibonacciInteger.Table.Length - 1; i >= 0; i--)
89 | {
90 | if (value >= FibonacciInteger.Table[i])
91 | {
92 | return i + 1;
93 | }
94 | }
95 |
96 | return 0;
97 | }
98 | }
--------------------------------------------------------------------------------
/Library/StreamBitReader.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 | // ReSharper disable MemberCanBePrivate.Global
3 |
4 | namespace InvertedTomato.Packing;
5 |
6 | public class StreamBitReader : IBitReader, IDisposable
7 | {
8 | private readonly Stream _underlying;
9 | private readonly Boolean _ownUnderlying;
10 | private readonly Byte[] _buffer;
11 | private Int32 _offset;
12 | private Int32 _count;
13 |
14 | public Boolean IsDisposed { get; private set; }
15 |
16 | public StreamBitReader(Stream underlying, Boolean ownUnderlying = false, Int32 bufferSize = 1024)
17 | {
18 | _underlying = underlying;
19 | _ownUnderlying = ownUnderlying;
20 | _buffer = new Byte[bufferSize];
21 | }
22 |
23 | public UInt64 ReadBits(int count)
24 | {
25 | #if DEBUG
26 | if (count is < 0 or > Bits.LongBits) throw new ArgumentOutOfRangeException(nameof(count), $"Must be between 0 and {Bits.LongBits}");
27 | #endif
28 |
29 | // If nothing to do, do nothing - we don't want UnderlyingRead trying to read bits when we don't need any
30 | if (count == 0) return 0;
31 |
32 | UInt64 value = 0;
33 | do
34 | {
35 | // Load more bits if needed
36 | UnderlyingRead();
37 |
38 | // Calculate bit address
39 | var a = _offset / Bits.ByteBits;
40 | var b = _offset % Bits.ByteBits;
41 |
42 | // Calculate number of bits available in this byte
43 | var load = Math.Min(Bits.ByteBits - b, count);
44 |
45 | // Extract bits
46 | var chunk = (Byte)(_buffer[a] << b) >> Bits.ByteBits - load; // This is a little complex, as it must mask out any previous bits in this byte at the same time
47 |
48 | // Load the bits
49 | value |= (UInt64)chunk << count - load;
50 | _offset += load;
51 | _count -= load;
52 |
53 | // Decrement input
54 | count -= load;
55 |
56 | // If all bits have been written, end here
57 | } while (count > 0);
58 |
59 | return value;
60 | }
61 |
62 | public Boolean ReadBit() => ReadBits(1) > 0;
63 |
64 | public void Align() => ReadBits(_count % Bits.ByteBits);
65 |
66 | public bool PeakBit()
67 | {
68 | // Load more bits if needed
69 | UnderlyingRead();
70 |
71 | // Calculate bit address
72 | var a = _offset / Bits.ByteBits;
73 | var b = _offset % Bits.ByteBits;
74 |
75 | // Get bit at that address
76 | var bit = _buffer[a] & (Byte)(1 << Bits.ByteBits - b - 1);
77 |
78 | // Test if non-zero
79 | return bit > 0;
80 | }
81 |
82 | private void UnderlyingRead()
83 | {
84 | // If there's more bits in the buffer, do nothing
85 | if (_count > 0) return;
86 |
87 | // Otherwise load more bits
88 | _offset = 0;
89 | _count = _underlying.Read(_buffer) * Bits.ByteBits;
90 |
91 | // If nothing could be loaded, throw exception
92 | if (_count == 0) throw new EndOfStreamException();
93 | }
94 |
95 | public void Dispose()
96 | {
97 | // Don't allow running twice
98 | if (IsDisposed) return;
99 | IsDisposed = true;
100 |
101 | // If we own the underlying, dispose it too
102 | if (_ownUnderlying) _underlying.Dispose();
103 | }
104 |
105 | public override String ToString() => _buffer.ToBinaryString(_offset, _count);
106 | }
--------------------------------------------------------------------------------
/Test/RawCodecTests.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public class RawCodecTests
4 | {
5 | private static Byte[] Encode(UInt64 value)
6 | {
7 | using var stream = new MemoryStream();
8 | using (var writer = new StreamBitWriter(stream))
9 | {
10 | var encoder = new RawIntegerEncoder(writer);
11 | encoder.EncodeUInt64(value);
12 | }
13 |
14 | return stream.ToArray();
15 | }
16 |
17 | [Fact]
18 | public void CanEncode0() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000 }.ToHexString(), Encode(0).ToHexString());
19 |
20 | [Fact]
21 | public void CanEncode1() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000001 }.ToHexString(), Encode(1).ToHexString());
22 |
23 | [Fact]
24 | public void CanEncode2() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000010 }.ToHexString(), Encode(2).ToHexString());
25 |
26 | [Fact]
27 | public void CanEncode3() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000011 }.ToHexString(), Encode(3).ToHexString());
28 |
29 | [Fact]
30 | public void CanEncodeMax() => Assert.Equal(new Byte[] { 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111 }.ToHexString(),
31 | Encode(UInt64.MaxValue).ToHexString());
32 |
33 | private static UInt64 Decode(Byte[] encoded)
34 | {
35 | using var stream = new MemoryStream(encoded);
36 | using var reader = new StreamBitReader(stream);
37 | var decoder = new RawIntegerDecoder(reader);
38 | return decoder.DecodeUInt64();
39 | }
40 |
41 | [Fact]
42 | public void CanDecode0() => Assert.Equal((UInt64)0, Decode(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000 }));
43 |
44 | [Fact]
45 | public void CanDecode1() => Assert.Equal((UInt64)1, Decode(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000001 }));
46 |
47 | [Fact]
48 | public void CanDecode2() => Assert.Equal((UInt64)2, Decode(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000010 }));
49 |
50 | [Fact]
51 | public void CanDecode3() => Assert.Equal((UInt64)3, Decode(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000011 }));
52 |
53 | [Fact]
54 | public void CanDecodeMax() => Assert.Equal(RawInteger.MaxValue, Decode(new Byte[] { 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111 }));
55 |
56 |
57 | [Fact]
58 | public void CanEncodeDecodeFirst1000()
59 | {
60 | using var stream = new MemoryStream();
61 |
62 | using (var writer = new StreamBitWriter(stream))
63 | {
64 | var encoder = new RawIntegerEncoder(writer);
65 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol);
66 | }
67 |
68 | stream.Seek(0, SeekOrigin.Begin);
69 |
70 | using (var reader = new StreamBitReader(stream))
71 | {
72 | var decoder = new RawIntegerDecoder(reader);
73 | for (UInt64 symbol = 0; symbol < 1000; symbol++)
74 | {
75 | Assert.Equal(symbol, decoder.DecodeUInt64());
76 | }
77 | }
78 | }
79 | }
--------------------------------------------------------------------------------
/Library/StreamBitWriter.cs:
--------------------------------------------------------------------------------
1 | // ReSharper disable UnusedType.Global
2 | // ReSharper disable MemberCanBePrivate.Global
3 |
4 | namespace InvertedTomato.Packing;
5 |
6 | public class StreamBitWriter : IBitWriter, IDisposable
7 | {
8 | private const UInt64 Zero = 0;
9 | private const UInt64 One = 1;
10 | private readonly Stream _underlying;
11 | private readonly Boolean _ownUnderlying;
12 | private readonly Byte[] _buffer;
13 | private Int32 _count;
14 |
15 | public Boolean IsDisposed { get; private set; }
16 |
17 | public StreamBitWriter(Stream underlying, Boolean ownUnderlying = false, Int32 bufferSize = 1024)
18 | {
19 | _underlying = underlying;
20 | _ownUnderlying = ownUnderlying;
21 | _buffer = new Byte[bufferSize];
22 | }
23 |
24 | public void WriteBits(UInt64 bits, int count)
25 | {
26 | #if DEBUG
27 | // Count the count is sane
28 | if (count is < 0 or > Bits.LongBits) throw new ArgumentOutOfRangeException(nameof(count), $"Must be between 0 and {Bits.LongBits} but was {count}");
29 |
30 | // Check that only bits within the count range are used (yep, we could clean this automatically, but that adds operations and slows things down, so we only check when debugging)
31 | if ((bits << Bits.LongBits - count >> Bits.LongBits - count != bits)
32 | || (count == 0 && bits > 0) // Once again, why does UInt64 >> 64 not equal 0?? Catching and handling this additional case here
33 | ) throw new ArgumentException("Bits must only have '1' bits within the 'count' range. Ie, if count=1, only the right-most bit can be used", nameof(bits));
34 | #endif
35 |
36 | // Cycle through buffer bytes
37 | do
38 | {
39 | // Calculate bit address
40 | var a = _count / Bits.ByteBits;
41 | var b = _count % Bits.ByteBits;
42 |
43 | // Calculate number of bits to load into this byte
44 | var load = Math.Min(Bits.ByteBits - b, count);
45 |
46 | // Extract bits
47 | var chunk = (Byte)(bits >> (count - load));
48 |
49 | // Load the bits
50 | _buffer[a] |= (Byte)(chunk << (Bits.ByteBits - load - b));
51 | _count += load;
52 |
53 | // Decrement input
54 | count -= load;
55 |
56 | // If buffer is full..
57 | if (_count == _buffer.Length * Bits.ByteBits)
58 | {
59 | // Flush buffer
60 | _underlying.Write(_buffer);
61 |
62 | // Clear buffer
63 | _buffer.Clear();
64 | _count = 0;
65 | }
66 |
67 | // If all bits have been written, end here
68 | } while (count > 0);
69 | }
70 |
71 | public void WriteBit(Boolean value) => WriteBits(value ? One : Zero, 1);
72 |
73 | public void Align()
74 | {
75 | if (HasPartialByte()) WriteBits(0, Bits.ByteBits - _count % Bits.ByteBits);
76 | }
77 |
78 | public void Dispose()
79 | {
80 | // Don't allow running twice
81 | if (IsDisposed) return;
82 | IsDisposed = true;
83 |
84 | // Write out any remaining bytes
85 | var count = _count / Bits.ByteBits;
86 | if (HasPartialByte()) count++; // If there's an incomplete byte, write it anyway
87 | _underlying.Write(_buffer, count);
88 |
89 | // If we own the underlying, dispose it too
90 | if (_ownUnderlying) _underlying.Dispose();
91 | }
92 |
93 | public override String ToString() => _buffer.ToBinaryString(0, _count);
94 |
95 | private Boolean HasPartialByte() => _count % Bits.ByteBits > 0;
96 | }
--------------------------------------------------------------------------------
/Test/ThompsonAlphaTests.cs:
--------------------------------------------------------------------------------
1 | using FluentAssertions;
2 |
3 | namespace InvertedTomato.Packing;
4 |
5 | public class ThompsonAlphaTests
6 | {
7 | // Max | Bin | Value
8 | // 1 | _1 | 2
9 | // 2 | _11 | 6
10 | // 3 | _111 | 14
11 | // 4 | _1111 | 30
12 | // 5 | _11111
13 |
14 | // 2^(bits + 1)
15 |
16 | [Fact]
17 | public void CanCalculateMaxValue1() => new ThompsonAlphaIntegerEncoder(null!, 1).MaxValue.Should().Be(15);
18 | [Fact]
19 | public void CanCalculateMaxValue2() => new ThompsonAlphaIntegerEncoder(null!, 2).MaxValue.Should().Be(255);
20 | [Fact]
21 | public void CanCalculateMaxValue3() => new ThompsonAlphaIntegerEncoder(null!, 3).MaxValue.Should().Be(65535);
22 | [Fact]
23 | public void CanCalculateMaxValue4() => new ThompsonAlphaIntegerEncoder(null!, 4).MaxValue.Should().Be(4294967295);
24 | [Fact]
25 | public void CanCalculateMaxValue5() => new ThompsonAlphaIntegerEncoder(null!, 5).MaxValue.Should().Be(18446744073709551615);
26 | [Fact]
27 | public void CanCalculateMaxValue6() => new ThompsonAlphaIntegerEncoder(null!, 6).MaxValue.Should().Be(18446744073709551615);
28 |
29 | private static Byte[] Encode(UInt64 value)
30 | {
31 | using var stream = new MemoryStream();
32 | using (var writer = new StreamBitWriter(stream))
33 | {
34 | var encoder = new ThompsonAlphaIntegerEncoder(writer, 6);
35 | encoder.EncodeUInt64(value);
36 | }
37 |
38 | return stream.ToArray();
39 | }
40 |
41 | [Fact]
42 | public void CanEncode0() => Assert.Equal(new Byte[] {0b000000_00}, Encode(0)); // Len=0, Val=(1)
43 |
44 | [Fact]
45 | public void CanEncode1() => Assert.Equal(new Byte[] {0b000001_0_0}, Encode(1)); // Len=1, Val=(1)1
46 |
47 | [Fact]
48 | public void CanEncode2() => Assert.Equal(new Byte[] {0b000001_1_0}, Encode(2)); // Len=10, val=(1)10
49 |
50 | [Fact]
51 | public void CanEncode3() => Assert.Equal(new Byte[] {0b000010_00}, Encode(3)); // Len=10, val=(1)11
52 |
53 | [Fact]
54 | public void CanEncode8589934590() => Assert.Equal(new Byte[] {0b100000_11, 0b11111111, 0b11111111, 0b11111111, 0b111111_00}, Encode(8589934590));
55 |
56 | [Fact]
57 | public void CanEncode8589934591() => Assert.Equal(new Byte[] {0b100001_00, 0b00000000, 0b00000000, 0b00000000, 0b0000000_0}, Encode(8589934591));
58 |
59 | [Fact]
60 | public void CanEncodeMax() => Assert.Equal(new Byte[] {0b111111_11, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111000}, Encode(UInt64.MaxValue - 1));
61 |
62 | private static UInt64 Decode(Byte[] encoded)
63 | {
64 | using var stream = new MemoryStream(encoded);
65 | using var reader = new StreamBitReader(stream);
66 | var decoder = new ThompsonAlphaIntegerDecoder(reader, 6);
67 |
68 | return decoder.DecodeUInt64();
69 | }
70 |
71 | [Fact]
72 | public void CanDecode0() => Assert.Equal((UInt64) 0, Decode(new Byte[] {0b000000_00}));
73 |
74 | [Fact]
75 | public void CanDecode1() => Assert.Equal((UInt64) 1, Decode(new Byte[] {0b000001_0_0})); // (len)_(val)_(padding)
76 |
77 | [Fact]
78 | public void CanDecode2() => Assert.Equal((UInt64) 2, Decode(new Byte[] {0b000001_1_0}));
79 |
80 | [Fact]
81 | public void CanDecode3() => Assert.Equal((UInt64) 3, Decode(new Byte[] {0b000010_00}));
82 |
83 | [Fact]
84 | public void CanDecode8589934590() => Assert.Equal((UInt64) 8589934590, Decode(new Byte[] {0b100000_11, 0b11111111, 0b11111111, 0b11111111, 0b111111_00}));
85 |
86 | [Fact]
87 | public void CanDecode8589934591() => Assert.Equal((UInt64) 8589934591, Decode(new Byte[] {0b100001_00, 0b00000000, 0b00000000, 0b00000000, 0b0000000_0}));
88 |
89 | [Fact]
90 | public void CanDecodeMax() => Assert.Equal(UInt64.MaxValue - 1, Decode(new Byte[] {0b111111_11, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111111, 0b11111000}));
91 |
92 | [Fact]
93 | public void CanEncodeDecodeFirst1000()
94 | {
95 | using var stream = new MemoryStream();
96 |
97 | using (var writer = new StreamBitWriter(stream))
98 | {
99 | var encoder = new ThompsonAlphaIntegerEncoder(writer, 6);
100 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol);
101 | }
102 |
103 | stream.Seek(0, SeekOrigin.Begin);
104 |
105 | using (var reader = new StreamBitReader(stream))
106 | {
107 | var decoder = new ThompsonAlphaIntegerDecoder(reader, 6);
108 | for (UInt64 symbol = 0; symbol < 1000; symbol++)
109 | {
110 | Assert.Equal(symbol, decoder.DecodeUInt64());
111 | }
112 | }
113 | }
114 | }
--------------------------------------------------------------------------------
/Test/InvertedVlqCodecTests.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public class InvertedVlqCodecTests
4 | {
5 | private static Byte[] Encode(UInt64 value, Int32 expectedCount)
6 | {
7 | using var stream = new MemoryStream(expectedCount);
8 | using (var writer = new StreamBitWriter(stream))
9 | {
10 | var encoder = new InvertedVlqIntegerEncoder(writer);
11 | encoder.EncodeUInt64(value);
12 | }
13 |
14 | return stream.ToArray();
15 | }
16 |
17 | [Fact]
18 | public void CanEncode0() => Assert.Equal(new Byte[] { 0b10000000 }, Encode(0, 1));
19 |
20 | [Fact]
21 | public void CanEncode1() => Assert.Equal(new Byte[] { 0b10000001 }, Encode(1, 1));
22 |
23 | [Fact]
24 | public void CanEncode2() => Assert.Equal(new Byte[] { 0b10000010 }, Encode(2, 1));
25 |
26 | [Fact]
27 | public void CanEncode3() => Assert.Equal(new Byte[] { 0b10000011 }, Encode(3, 1));
28 |
29 | [Fact]
30 | public void CanEncode127() => Assert.Equal(new Byte[] { 0b11111111 }, Encode(127, 1));
31 |
32 | [Fact]
33 | public void CanEncode128() => Assert.Equal(new Byte[] { 0b00000000, 0b10000000 }, Encode(128, 2));
34 |
35 | [Fact]
36 | public void CanEncode129() => Assert.Equal(new Byte[] { 0b00000001, 0b10000000 }, Encode(129, 2));
37 |
38 | [Fact]
39 | public void CanEncode16511() => Assert.Equal(new Byte[] { 0b01111111, 0b11111111 }, Encode(16511, 2));
40 |
41 | [Fact]
42 | public void CanEncode16512() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b10000000 }, Encode(16512, 3));
43 |
44 | [Fact]
45 | public void CanEncode2113663() => Assert.Equal(new Byte[] { 0b01111111, 0b01111111, 0b11111111 }, Encode(2113663, 3));
46 |
47 | [Fact]
48 | public void CanEncode2113664() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b10000000 }, Encode(2113664, 4));
49 |
50 | [Fact]
51 | public void CanEncodeMax() => Assert.Equal(new Byte[] { 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b10000000 },
52 | Encode(InvertedVlqInteger.MaxValue, 10));
53 |
54 | [Fact]
55 | public void EncoderOverflowThrows() => Assert.Throws(() => { Encode(UInt64.MaxValue, 32); });
56 |
57 | private static UInt64 Decode(Byte[] encoded, Int32 expectedUsed)
58 | {
59 | using var stream = new MemoryStream(encoded);
60 | using var reader = new StreamBitReader(stream);
61 | var decoder = new InvertedVlqIntegerDecoder(reader);
62 |
63 | return decoder.DecodeUInt64();
64 | }
65 |
66 | [Fact]
67 | public void CanDecode0() => Assert.Equal((UInt64)0, Decode(new Byte[] { 0b10000000 }, 1));
68 |
69 | [Fact]
70 | public void CanDecode1() => Assert.Equal((UInt64)1, Decode(new Byte[] { 0b10000001 }, 1));
71 |
72 | [Fact]
73 | public void CanDecode2() => Assert.Equal((UInt64)2, Decode(new Byte[] { 0b10000010 }, 1));
74 |
75 | [Fact]
76 | public void CanDecode3() => Assert.Equal((UInt64)3, Decode(new Byte[] { 0b10000011 }, 1));
77 |
78 | [Fact]
79 | public void CanDecode127() => Assert.Equal((UInt64)127, Decode(new Byte[] { 0b11111111 }, 1));
80 |
81 | [Fact]
82 | public void CanDecode128() => Assert.Equal((UInt64)128, Decode(new Byte[] { 0b00000000, 0b10000000 }, 2));
83 |
84 | [Fact]
85 | public void CanDecode129() => Assert.Equal((UInt64)129, Decode(new Byte[] { 0b00000001, 0b10000000 }, 2));
86 |
87 | [Fact]
88 | public void CanDecode16511() => Assert.Equal((UInt64)16511, Decode(new Byte[] { 0b01111111, 0b11111111 }, 2));
89 |
90 | [Fact]
91 | public void CanDecode16512() => Assert.Equal((UInt64)16512, Decode(new Byte[] { 0b00000000, 0b00000000, 0b10000000 }, 3));
92 |
93 | [Fact]
94 | public void CanDecode16513() => Assert.Equal((UInt64)16513, Decode(new Byte[] { 0b00000001, 0b00000000, 0b10000000 }, 3));
95 |
96 | [Fact]
97 | public void CanDecode2113663() => Assert.Equal((UInt64)2113663, Decode(new Byte[] { 0b01111111, 0b01111111, 0b11111111 }, 3));
98 |
99 | [Fact]
100 | public void CanDecode2113664() => Assert.Equal((UInt64)2113664, Decode(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b10000000 }, 4));
101 |
102 | [Fact]
103 | public void CanDecodeMax() => Assert.Equal(InvertedVlqInteger.MaxValue,
104 | Decode(new Byte[] { 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b10000000 }, 10));
105 |
106 | [Fact]
107 | public void DecodingOverflowThrows() => Assert.Throws(() =>
108 | {
109 | Decode(new Byte[] { 0b01111111, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b01111110, 0b10000000 }, 11);
110 | });
111 |
112 |
113 | [Fact]
114 | public void CanEncodeDecodeFirst1000()
115 | {
116 | using var stream = new MemoryStream();
117 |
118 | using (var writer = new StreamBitWriter(stream))
119 | {
120 | var encoder = new InvertedVlqIntegerEncoder(writer);
121 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol);
122 | }
123 |
124 | stream.Seek(0, SeekOrigin.Begin);
125 |
126 | using (var reader = new StreamBitReader(stream))
127 | {
128 | var decoder = new InvertedVlqIntegerDecoder(reader);
129 | for (UInt64 symbol = 0; symbol < 1000; symbol++)
130 | {
131 | Assert.Equal(symbol, decoder.DecodeUInt64());
132 | }
133 | }
134 | }
135 | }
--------------------------------------------------------------------------------
/LoadTest/Program.cs:
--------------------------------------------------------------------------------
1 | // #5
2 | // FIBONACCI (Gen2)
3 | // Compress: 5986ms 12.75MB / s Total 38MB
4 | // Decompress: 3455ms 22.08MB / s
5 |
6 | // VLQ (Gen2)
7 | // Compress: 386ms 197.65MB / s Total 36MB
8 | // Decompress: 874ms 87.29MB / s
9 |
10 | // 220707
11 | // CODEC ENCODE TIME DECODE TIME RESULT SIZE
12 | // ThompsonAlpha
13 | // InvertedTomato.Compression.Integers.Gen2.ThompsonAlphaCodec 839ms 736ms 32.00MB
14 | // InvertedTomato.Compression.Integers.ThompsonAlphaCodec 897ms 738ms 32.00MB
15 | // Fibonacci
16 | // InvertedTomato.Compression.Integers.Gen2.FibonacciCodec 2,874ms 1,442ms 38.00MB
17 | // InvertedTomato.Compression.Integers.FibonacciCodec 8,399ms 6,777ms 38.00MB
18 | // VLQ
19 | // InvertedTomato.Compression.Integers.Gen2.VlqCodec 265ms 346ms 36.00MB
20 | // InvertedTomato.Compression.Integers.VlqCodec 959ms 1,112ms 36.00MB
21 | // Raw
22 | // InvertedTomato.Compression.Integers.Gen2.RawCodec 631ms 625ms 76.00MB
23 | // InvertedTomato.Compression.Integers.RawCodec 2,000ms 2,093ms 76.00MB
24 |
25 | // 220711 Added buffer to StreamBitReader&StreamBitWriter (ie, writes byte[] rather than byte)
26 | // CODEC ENCODE TIME DECODE TIME RESULT SIZE
27 | // ThompsonAlpha
28 | // InvertedTomato.Compression.Integers.Gen2.ThompsonAlphaCodec 860ms 745ms 32.00MB
29 | // InvertedTomato.Compression.Integers.Gen3.ThompsonAlphaCodec 758ms 625ms 32.00MB
30 | // Fibonacci
31 | // InvertedTomato.Compression.Integers.Gen2.FibonacciCodec 2,891ms 1,445ms 38.00MB
32 | // InvertedTomato.Compression.Integers.Gen3.FibonacciCodec 7,972ms 6,385ms 38.00MB
33 | // VLQ
34 | // InvertedTomato.Compression.Integers.Gen2.VlqCodec 271ms 356ms 36.00MB
35 | // InvertedTomato.Compression.Integers.Gen3.VlqCodec 525ms 683ms 36.00MB
36 | // Raw
37 | // InvertedTomato.Compression.Integers.Gen2.RawCodec 647ms 639ms 76.00MB
38 | // InvertedTomato.Compression.Integers.Gen3.RawCodec 825ms 850ms 76.00MB
39 |
40 | // 220713 Added Fib write buffering rather than pushing raw bits
41 | // Fibonacci
42 | // InvertedTomato.Compression.Integers.Gen2.FibonacciCodec 2,924ms 1,484ms 38.00MB
43 | // InvertedTomato.Compression.Integers.Gen3.FibonacciCodec 3,396ms 7,443ms 38.00MB
44 |
45 | using System.Diagnostics;
46 | using InvertedTomato.Packing;
47 | using InvertedTomato.Packing.Codecs.Integers;
48 |
49 | // ReSharper disable ForeachCanBeConvertedToQueryUsingAnotherGetEnumerator
50 |
51 | var min = 100000;
52 | var count = 10000000;
53 |
54 | // Seed
55 | var input = new List(count);
56 | for (var v = min; v < min + count; v++)
57 | {
58 | input.Add((UInt64)v);
59 | }
60 |
61 | void Gen3Test(string name, Func encoderFactory, Func decoderFactory)
62 | {
63 | // Compress
64 | using var stream = new MemoryStream(count * 5);
65 | var compressStopwatch = Stopwatch.StartNew();
66 | using (var writer = new StreamBitWriter(stream))
67 | {
68 | var encoder = encoderFactory(writer);
69 | foreach (var item in input)
70 | {
71 | encoder.EncodeUInt64(item);
72 | }
73 | }
74 |
75 | compressStopwatch.Stop();
76 |
77 | // Decompress
78 | stream.Position = 0;
79 | var decompressStopwatch = Stopwatch.StartNew();
80 | using (var reader = new StreamBitReader(stream))
81 | {
82 | var decoder = decoderFactory(reader);
83 | foreach (var item in input)
84 | {
85 | if (item != decoder.DecodeUInt64()) throw new("Incorrect result.");
86 | }
87 | }
88 |
89 | decompressStopwatch.Stop();
90 |
91 | Console.WriteLine("{0,-75} {1,15:N0}ms {2,15:N0}ms {3,15:N}MB", name, compressStopwatch.ElapsedMilliseconds, decompressStopwatch.ElapsedMilliseconds,
92 | stream.Length / 1024 / 1024);
93 | }
94 |
95 |
96 | Console.WriteLine("CODEC ENCODE TIME DECODE TIME RESULT SIZE");
97 | Console.WriteLine("ThompsonAlpha");
98 | Gen3Test(
99 | "ThompsonAlpha(6)",
100 | writer => new ThompsonAlphaIntegerEncoder(writer, 6),
101 | reader => new ThompsonAlphaIntegerDecoder(reader, 6)
102 | );
103 |
104 | Console.WriteLine("Fibonacci");
105 | Gen3Test(
106 | "Fibbonacci",
107 | writer => new FibonacciIntegerEncoder(writer),
108 | reader => new FibonacciIntegerDecoder(reader)
109 | );
110 |
111 | Console.WriteLine("VLQ");
112 | Gen3Test(
113 | "VLQ",
114 | writer => new VlqIntegerEncoder(writer),
115 | reader => new VlqIntegerDecoder(reader)
116 | );
117 |
118 | Console.WriteLine("Raw");
119 | Gen3Test("Raw",
120 | writer => new RawIntegerEncoder(writer),
121 | reader => new RawIntegerDecoder(reader)
122 | );
123 |
124 | Console.WriteLine("\nDone.");
--------------------------------------------------------------------------------
/.idea/.idea.binary/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | LoadTest/LoadTest.csproj
5 | Sample/Sample.csproj
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 | 1678013536669
89 |
90 |
91 | 1678013536669
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
--------------------------------------------------------------------------------
/Test/VlqCodecTests.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public class VlqCodecTests
4 | {
5 | private static Byte[] Encode(UInt64 value)
6 | {
7 | using var stream = new MemoryStream();
8 | using (var writer = new StreamBitWriter(stream))
9 | {
10 | var encoder = new VlqIntegerEncoder(writer);
11 | encoder.EncodeUInt64(value);
12 | }
13 |
14 | return stream.ToArray();
15 | }
16 |
17 | [Fact]
18 | public void CanEncode0() => Assert.Equal(new Byte[] { 0b00000000 }, Encode(0));
19 |
20 | [Fact]
21 | public void CanEncode1() => Assert.Equal(new Byte[] { 0b00000001 }, Encode(1));
22 |
23 | [Fact]
24 | public void CanEncode2() => Assert.Equal(new Byte[] { 0b00000010 }, Encode(2));
25 |
26 | [Fact]
27 | public void CanEncode3() => Assert.Equal(new Byte[] { 0b00000011 }, Encode(3));
28 |
29 | [Fact]
30 | public void CanEncode127() => Assert.Equal(new Byte[] { 0b01111111 }, Encode(127));
31 |
32 | [Fact]
33 | public void CanEncode128() => Assert.Equal(new Byte[] { 0b10000000, 0b00000000 }, Encode(128));
34 |
35 | [Fact]
36 | public void CanEncode129() => Assert.Equal(new Byte[] { 0b10000001, 0b00000000 }, Encode(129));
37 |
38 | [Fact]
39 | public void CanEncode16511() => Assert.Equal(new Byte[] { 0b11111111, 0b01111111 }, Encode(16511));
40 |
41 | [Fact]
42 | public void CanEncode16512() => Assert.Equal(new Byte[] { 0b10000000, 0b10000000, 0b00000000 }, Encode(16512));
43 |
44 | [Fact]
45 | public void CanEncode2113663() => Assert.Equal(new Byte[] { 0b11111111, 0b11111111, 0b01111111 }, Encode(2113663));
46 |
47 | [Fact]
48 | public void CanEncode2113664() => Assert.Equal(new Byte[] { 0b10000000, 0b10000000, 0b10000000, 0b00000000 }, Encode(2113664));
49 |
50 | [Fact]
51 | public void EncodeMax() => Assert.Equal(new Byte[] { 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b00000000 },
52 | Encode(VlqInteger.MaxValue));
53 |
54 | [Fact]
55 | public void EncodeOverflow() => Assert.Throws(() => { Encode(UInt64.MaxValue); });
56 |
57 | private static UInt64 Decode(Byte[] encoded)
58 | {
59 | using var stream = new MemoryStream(encoded);
60 | using var reader = new StreamBitReader(stream);
61 | var decoder = new VlqIntegerDecoder(reader);
62 | return decoder.DecodeUInt64();
63 | }
64 |
65 | [Fact]
66 | public void CanDecode0() => Assert.Equal((UInt64)0, Decode(new Byte[] { 0b00000000 }));
67 |
68 | [Fact]
69 | public void CanDecode1() => Assert.Equal((UInt64)1, Decode(new Byte[] { 0b00000001 }));
70 |
71 | [Fact]
72 | public void CanDecode2() => Assert.Equal((UInt64)2, Decode(new Byte[] { 0b00000010 }));
73 |
74 | [Fact]
75 | public void CanDecode3() => Assert.Equal((UInt64)3, Decode(new Byte[] { 0b00000011 }));
76 |
77 | [Fact]
78 | public void CanDecode127() => Assert.Equal((UInt64)127, Decode(new Byte[] { 0b01111111 }));
79 |
80 | [Fact]
81 | public void CanDecode128() => Assert.Equal((UInt64)128, Decode(new Byte[] { 0b10000000, 0b00000000 }));
82 |
83 | [Fact]
84 | public void CanDecode129() => Assert.Equal((UInt64)129, Decode(new Byte[] { 0b10000001, 0b00000000 }));
85 |
86 | [Fact]
87 | public void CanDecode16511() => Assert.Equal((UInt64)16511, Decode(new Byte[] { 0b11111111, 0b01111111 }));
88 |
89 | [Fact]
90 | public void CanDecode16512() => Assert.Equal((UInt64)16512, Decode(new Byte[] { 0b10000000, 0b10000000, 0b00000000 }));
91 |
92 | [Fact]
93 | public void CanDecode16513() => Assert.Equal((UInt64)16513, Decode(new Byte[] { 0b10000001, 0b10000000, 0b00000000 }));
94 |
95 | [Fact]
96 | public void CanDecode2113663() => Assert.Equal((UInt64)2113663, Decode(new Byte[] { 0b11111111, 0b11111111, 0b01111111 }));
97 |
98 | [Fact]
99 | public void CanDecode2113664() => Assert.Equal((UInt64)2113664, Decode(new Byte[] { 0b10000000, 0b10000000, 0b10000000, 0b00000000 }));
100 |
101 | [Fact]
102 | public void DecodeMax() => Assert.Equal(VlqInteger.MaxValue,
103 | Decode(new Byte[] { 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b00000000 }));
104 |
105 | [Fact]
106 | public void CanDecode1_1_1()
107 | {
108 | using var stream = new MemoryStream(new Byte[] { 0b00000001, 0b00000001, 0b00000001 });
109 | using var reader = new StreamBitReader(stream);
110 | var decoder = new VlqIntegerDecoder(reader);
111 |
112 | Assert.Equal((UInt64)1, decoder.DecodeUInt64());
113 | Assert.Equal((UInt64)1, decoder.DecodeUInt64());
114 | Assert.Equal((UInt64)1, decoder.DecodeUInt64());
115 | Assert.Throws(() => decoder.DecodeUInt64());
116 | }
117 |
118 | [Fact]
119 | public void DecodeInputClipped() => Assert.Throws(() => { Decode(new Byte[] { 0b10000000 }); });
120 |
121 | [Fact]
122 | public void DecodeOverflow() => Assert.Throws(() =>
123 | {
124 | Decode(new Byte[] { 0b11111111, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b11111110, 0b00000000 });
125 | });
126 |
127 | [Fact]
128 | public void CanDecode1_X() => Assert.Equal((UInt64)1, Decode(new Byte[] { 0b00000001, 0b10000011 }));
129 |
130 |
131 | [Fact]
132 | public void CanEncodeDecodeFirst1000()
133 | {
134 | using var stream = new MemoryStream();
135 |
136 | using (var writer = new StreamBitWriter(stream))
137 | {
138 | var encoder = new VlqIntegerEncoder(writer);
139 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol);
140 | }
141 |
142 | stream.Seek(0, SeekOrigin.Begin);
143 |
144 | using (var reader = new StreamBitReader(stream))
145 | {
146 | var decoder = new VlqIntegerDecoder(reader);
147 | for (UInt64 symbol = 0; symbol < 1000; symbol++)
148 | {
149 | Assert.Equal(symbol, decoder.DecodeUInt64());
150 | }
151 | }
152 | }
153 | }
--------------------------------------------------------------------------------
/Test/StreamBitReaderTests.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public class StreamBitReaderTests
4 | {
5 | [Fact]
6 | public void CanReadBit1()
7 | {
8 | using var stream = new MemoryStream(new byte[] { 0b_10000000 });
9 | using var reader = new StreamBitReader(stream);
10 |
11 | Assert.True(reader.ReadBit());
12 | }
13 |
14 | [Fact]
15 | public void CanReadBit0()
16 | {
17 | using var stream = new MemoryStream(new byte[] { 0b_00000000 });
18 | using var reader = new StreamBitReader(stream);
19 |
20 | Assert.False(reader.ReadBit());
21 | }
22 |
23 | [Fact]
24 | public void CanReadBit0_1()
25 | {
26 | using var stream = new MemoryStream(new byte[] { 0b_01000000 });
27 | using var reader = new StreamBitReader(stream);
28 |
29 | Assert.False(reader.ReadBit());
30 | Assert.True(reader.ReadBit());
31 | }
32 |
33 | [Fact]
34 | public void CanPeak_ReadBit8()
35 | {
36 | using var stream = new MemoryStream(new byte[] { 0b_11111111 });
37 | using var reader = new StreamBitReader(stream);
38 |
39 | Assert.True(reader.PeakBit());
40 | Assert.Equal((ulong)0b11111111, reader.ReadBits(8));
41 | }
42 |
43 | [Fact]
44 | public void CanPeak_ReadBit8_Peak_ReadBit8_ReadBit0()
45 | {
46 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b00000000 });
47 | using var reader = new StreamBitReader(stream);
48 |
49 | Assert.True(reader.PeakBit());
50 | Assert.Equal((ulong)0b11111111, reader.ReadBits(8));
51 |
52 | Assert.False(reader.PeakBit());
53 | Assert.Equal((ulong)0b00000000, reader.ReadBits(8));
54 | Assert.Equal((ulong)0b00000000, reader.ReadBits(0));
55 | }
56 |
57 | [Fact]
58 | public void CanReadBit1_1_1_1_1_1_1_1_0_0_0_0_0_0_0_0()
59 | {
60 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b00000000 });
61 | using var reader = new StreamBitReader(stream);
62 |
63 | Assert.True(reader.ReadBit());
64 | Assert.True(reader.ReadBit());
65 | Assert.True(reader.ReadBit());
66 | Assert.True(reader.ReadBit());
67 | Assert.True(reader.ReadBit());
68 | Assert.True(reader.ReadBit());
69 | Assert.True(reader.ReadBit());
70 | Assert.True(reader.ReadBit());
71 |
72 | Assert.False(reader.ReadBit());
73 | Assert.False(reader.ReadBit());
74 | Assert.False(reader.ReadBit());
75 | Assert.False(reader.ReadBit());
76 | Assert.False(reader.ReadBit());
77 | Assert.False(reader.ReadBit());
78 | Assert.False(reader.ReadBit());
79 | Assert.False(reader.ReadBit());
80 | }
81 |
82 | [Fact]
83 | public void CanReadBits4_Peak_8_Peak_4()
84 | {
85 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b00000000 });
86 | using var reader = new StreamBitReader(stream);
87 |
88 | Assert.Equal((ulong)0b1111, reader.ReadBits(4));
89 |
90 | Assert.True(reader.PeakBit());
91 | Assert.Equal((ulong)0b11110000, reader.ReadBits(8));
92 |
93 | Assert.False(reader.PeakBit());
94 | Assert.Equal((ulong)0b0000, reader.ReadBits(4));
95 | }
96 |
97 | [Fact]
98 | public void CanReadBits4_Peak_Align_Peak_4()
99 | {
100 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b00000000 });
101 | using var reader = new StreamBitReader(stream);
102 |
103 | Assert.Equal((ulong)0b1111, reader.ReadBits(4));
104 | Assert.True(reader.PeakBit());
105 | reader.Align();
106 |
107 | Assert.False(reader.PeakBit());
108 | Assert.Equal((ulong)0b0000, reader.ReadBits(4));
109 | Assert.Equal((ulong)0b0000, reader.ReadBits(4));
110 | }
111 |
112 | [Fact]
113 | public void CanReadBits32()
114 | {
115 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111 });
116 | using var reader = new StreamBitReader(stream);
117 |
118 | Assert.Equal(0b_11111111_11111111_11111111_11111111, reader.ReadBits(32));
119 | }
120 |
121 | [Fact]
122 | public void CanReadBits63()
123 | {
124 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111110 });
125 | using var reader = new StreamBitReader(stream);
126 |
127 | Assert.Equal((UInt64)0b_01111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, reader.ReadBits(63));
128 | }
129 |
130 | [Fact]
131 | public void CanReadBits64()
132 | {
133 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111 });
134 | using var reader = new StreamBitReader(stream);
135 |
136 | Assert.Equal(0b_11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, reader.ReadBits(64));
137 | }
138 |
139 | [Fact]
140 | public void CanReadBits1_32()
141 | {
142 | using var stream = new MemoryStream(new byte[] { 0b_01111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b10000000, });
143 | using var reader = new StreamBitReader(stream);
144 |
145 | Assert.False(reader.ReadBit());
146 | Assert.Equal(0b_11111111_11111111_11111111_11111111, reader.ReadBits(32));
147 | }
148 |
149 |
150 | [Fact]
151 | public void CanReadBitX_1()
152 | {
153 | using var stream = new MemoryStream(new byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000001, });
154 | using var reader = new StreamBitReader(stream);
155 |
156 | Assert.Equal((UInt64)1, reader.ReadBits(64));
157 | }
158 |
159 | [Fact]
160 | public void CanDisposeNotOwned()
161 | {
162 | using var stream = new MemoryStream(new byte[] { 0b00000000 });
163 | using var reader = new StreamBitReader(stream);
164 |
165 | Assert.False(reader.IsDisposed);
166 | reader.Dispose();
167 | Assert.True(reader.IsDisposed);
168 | stream.ReadByte();
169 | }
170 |
171 | [Fact]
172 | public void CanDisposeOwned()
173 | {
174 | using var stream = new MemoryStream(new byte[] { 0b00000000 });
175 | using var reader = new StreamBitReader(stream, true);
176 |
177 | Assert.False(reader.IsDisposed);
178 | reader.Dispose();
179 | Assert.True(reader.IsDisposed);
180 | Assert.Throws(() => stream.ReadByte());
181 | }
182 |
183 |
184 | [Fact]
185 | public void CanReadBlank()
186 | {
187 | using var stream = new MemoryStream(new byte[] { 0b_11111111 });
188 | using var reader = new StreamBitReader(stream);
189 |
190 | Assert.Equal((ulong)0b00000000, reader.ReadBits(0));
191 | }
192 |
193 | [Fact]
194 | public void ReadEndOfStreamThrows()
195 | {
196 | using var stream = new MemoryStream(new byte[] { 0b_11111111 });
197 | using var reader = new StreamBitReader(stream);
198 |
199 | reader.ReadBits(8);
200 | Assert.Throws(() => reader.ReadBits(1));
201 | }
202 |
203 | [Fact]
204 | public void PeakEndOfStreamThrows()
205 | {
206 | using var stream = new MemoryStream(new byte[] { 0b_11111111 });
207 | using var reader = new StreamBitReader(stream);
208 |
209 | reader.ReadBits(8);
210 | Assert.Throws(() => reader.PeakBit());
211 | }
212 |
213 | [Fact]
214 | public void CanReadBitsB8_8()
215 | {
216 | using var stream = new MemoryStream(new byte[] { 0b_11111111 });
217 | using var reader = new StreamBitReader(stream, false, 1);
218 |
219 | Assert.Equal((UInt64)0b_11111111, reader.ReadBits(8));
220 | }
221 |
222 | [Fact]
223 | public void CanReadBitsB8_9()
224 | {
225 | using var stream = new MemoryStream(new byte[] { 0b_11111111, 0b10000000, });
226 | using var reader = new StreamBitReader(stream, false, 1);
227 |
228 | Assert.Equal((UInt64)0b_00000001_11111111, reader.ReadBits(9));
229 | }
230 | }
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Mono auto generated files
17 | mono_crash.*
18 |
19 | # Build results
20 | [Dd]ebug/
21 | [Dd]ebugPublic/
22 | [Rr]elease/
23 | [Rr]eleases/
24 | x64/
25 | x86/
26 | [Ww][Ii][Nn]32/
27 | [Aa][Rr][Mm]/
28 | [Aa][Rr][Mm]64/
29 | bld/
30 | [Bb]in/
31 | [Oo]bj/
32 | [Ll]og/
33 | [Ll]ogs/
34 |
35 | # Visual Studio 2015/2017 cache/options directory
36 | .vs/
37 | # Uncomment if you have tasks that create the project's static files in wwwroot
38 | #wwwroot/
39 |
40 | # Visual Studio 2017 auto generated files
41 | Generated\ Files/
42 |
43 | # MSTest test Results
44 | [Tt]est[Rr]esult*/
45 | [Bb]uild[Ll]og.*
46 |
47 | # NUnit
48 | *.VisualState.xml
49 | TestResult.xml
50 | nunit-*.xml
51 |
52 | # Build Results of an ATL Project
53 | [Dd]ebugPS/
54 | [Rr]eleasePS/
55 | dlldata.c
56 |
57 | # Benchmark Results
58 | BenchmarkDotNet.Artifacts/
59 |
60 | # .NET Core
61 | project.lock.json
62 | project.fragment.lock.json
63 | artifacts/
64 |
65 | # ASP.NET Scaffolding
66 | ScaffoldingReadMe.txt
67 |
68 | # StyleCop
69 | StyleCopReport.xml
70 |
71 | # Files built by Visual Studio
72 | *_i.c
73 | *_p.c
74 | *_h.h
75 | *.ilk
76 | *.meta
77 | *.obj
78 | *.iobj
79 | *.pch
80 | *.pdb
81 | *.ipdb
82 | *.pgc
83 | *.pgd
84 | *.rsp
85 | *.sbr
86 | *.tlb
87 | *.tli
88 | *.tlh
89 | *.tmp
90 | *.tmp_proj
91 | *_wpftmp.csproj
92 | *.log
93 | *.tlog
94 | *.vspscc
95 | *.vssscc
96 | .builds
97 | *.pidb
98 | *.svclog
99 | *.scc
100 |
101 | # Chutzpah Test files
102 | _Chutzpah*
103 |
104 | # Visual C++ cache files
105 | ipch/
106 | *.aps
107 | *.ncb
108 | *.opendb
109 | *.opensdf
110 | *.sdf
111 | *.cachefile
112 | *.VC.db
113 | *.VC.VC.opendb
114 |
115 | # Visual Studio profiler
116 | *.psess
117 | *.vsp
118 | *.vspx
119 | *.sap
120 |
121 | # Visual Studio Trace Files
122 | *.e2e
123 |
124 | # TFS 2012 Local Workspace
125 | $tf/
126 |
127 | # Guidance Automation Toolkit
128 | *.gpState
129 |
130 | # ReSharper is a .NET coding add-in
131 | _ReSharper*/
132 | *.[Rr]e[Ss]harper
133 | *.DotSettings.user
134 |
135 | # TeamCity is a build add-in
136 | _TeamCity*
137 |
138 | # DotCover is a Code Coverage Tool
139 | *.dotCover
140 |
141 | # AxoCover is a Code Coverage Tool
142 | .axoCover/*
143 | !.axoCover/settings.json
144 |
145 | # Coverlet is a free, cross platform Code Coverage Tool
146 | coverage*.json
147 | coverage*.xml
148 | coverage*.info
149 |
150 | # Visual Studio code coverage results
151 | *.coverage
152 | *.coveragexml
153 |
154 | # NCrunch
155 | _NCrunch_*
156 | .*crunch*.local.xml
157 | nCrunchTemp_*
158 |
159 | # MightyMoose
160 | *.mm.*
161 | AutoTest.Net/
162 |
163 | # Web workbench (sass)
164 | .sass-cache/
165 |
166 | # Installshield output folder
167 | [Ee]xpress/
168 |
169 | # DocProject is a documentation generator add-in
170 | DocProject/buildhelp/
171 | DocProject/Help/*.HxT
172 | DocProject/Help/*.HxC
173 | DocProject/Help/*.hhc
174 | DocProject/Help/*.hhk
175 | DocProject/Help/*.hhp
176 | DocProject/Help/Html2
177 | DocProject/Help/html
178 |
179 | # Click-Once directory
180 | publish/
181 |
182 | # Publish Web Output
183 | *.[Pp]ublish.xml
184 | *.azurePubxml
185 | # Note: Comment the next line if you want to checkin your web deploy settings,
186 | # but database connection strings (with potential passwords) will be unencrypted
187 | *.pubxml
188 | *.publishproj
189 |
190 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
191 | # checkin your Azure Web App publish settings, but sensitive information contained
192 | # in these scripts will be unencrypted
193 | PublishScripts/
194 |
195 | # NuGet Packages
196 | *.nupkg
197 | # NuGet Symbol Packages
198 | *.snupkg
199 | # The packages folder can be ignored because of Package Restore
200 | **/[Pp]ackages/*
201 | # except build/, which is used as an MSBuild target.
202 | !**/[Pp]ackages/build/
203 | # Uncomment if necessary however generally it will be regenerated when needed
204 | #!**/[Pp]ackages/repositories.config
205 | # NuGet v3's project.json files produces more ignorable files
206 | *.nuget.props
207 | *.nuget.targets
208 |
209 | # Microsoft Azure Build Output
210 | csx/
211 | *.build.csdef
212 |
213 | # Microsoft Azure Emulator
214 | ecf/
215 | rcf/
216 |
217 | # Windows Store app package directories and files
218 | AppPackages/
219 | BundleArtifacts/
220 | Package.StoreAssociation.xml
221 | _pkginfo.txt
222 | *.appx
223 | *.appxbundle
224 | *.appxupload
225 |
226 | # Visual Studio cache files
227 | # files ending in .cache can be ignored
228 | *.[Cc]ache
229 | # but keep track of directories ending in .cache
230 | !?*.[Cc]ache/
231 |
232 | # Others
233 | ClientBin/
234 | ~$*
235 | *~
236 | *.dbmdl
237 | *.dbproj.schemaview
238 | *.jfm
239 | *.pfx
240 | *.publishsettings
241 | orleans.codegen.cs
242 |
243 | # Including strong name files can present a security risk
244 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
245 | #*.snk
246 |
247 | # Since there are multiple workflows, uncomment next line to ignore bower_components
248 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
249 | #bower_components/
250 |
251 | # RIA/Silverlight projects
252 | Generated_Code/
253 |
254 | # Backup & report files from converting an old project file
255 | # to a newer Visual Studio version. Backup files are not needed,
256 | # because we have git ;-)
257 | _UpgradeReport_Files/
258 | Backup*/
259 | UpgradeLog*.XML
260 | UpgradeLog*.htm
261 | ServiceFabricBackup/
262 | *.rptproj.bak
263 |
264 | # SQL Server files
265 | *.mdf
266 | *.ldf
267 | *.ndf
268 |
269 | # Business Intelligence projects
270 | *.rdl.data
271 | *.bim.layout
272 | *.bim_*.settings
273 | *.rptproj.rsuser
274 | *- [Bb]ackup.rdl
275 | *- [Bb]ackup ([0-9]).rdl
276 | *- [Bb]ackup ([0-9][0-9]).rdl
277 |
278 | # Microsoft Fakes
279 | FakesAssemblies/
280 |
281 | # GhostDoc plugin setting file
282 | *.GhostDoc.xml
283 |
284 | # Node.js Tools for Visual Studio
285 | .ntvs_analysis.dat
286 | node_modules/
287 |
288 | # Visual Studio 6 build log
289 | *.plg
290 |
291 | # Visual Studio 6 workspace options file
292 | *.opt
293 |
294 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
295 | *.vbw
296 |
297 | # Visual Studio 6 auto-generated project file (contains which files were open etc.)
298 | *.vbp
299 |
300 | # Visual Studio 6 workspace and project file (working project files containing files to include in project)
301 | *.dsw
302 | *.dsp
303 |
304 | # Visual Studio 6 technical files
305 | *.ncb
306 | *.aps
307 |
308 | # Visual Studio LightSwitch build output
309 | **/*.HTMLClient/GeneratedArtifacts
310 | **/*.DesktopClient/GeneratedArtifacts
311 | **/*.DesktopClient/ModelManifest.xml
312 | **/*.Server/GeneratedArtifacts
313 | **/*.Server/ModelManifest.xml
314 | _Pvt_Extensions
315 |
316 | # Paket dependency manager
317 | .paket/paket.exe
318 | paket-files/
319 |
320 | # FAKE - F# Make
321 | .fake/
322 |
323 | # CodeRush personal settings
324 | .cr/personal
325 |
326 | # Python Tools for Visual Studio (PTVS)
327 | __pycache__/
328 | *.pyc
329 |
330 | # Cake - Uncomment if you are using it
331 | # tools/**
332 | # !tools/packages.config
333 |
334 | # Tabs Studio
335 | *.tss
336 |
337 | # Telerik's JustMock configuration file
338 | *.jmconfig
339 |
340 | # BizTalk build output
341 | *.btp.cs
342 | *.btm.cs
343 | *.odx.cs
344 | *.xsd.cs
345 |
346 | # OpenCover UI analysis results
347 | OpenCover/
348 |
349 | # Azure Stream Analytics local run output
350 | ASALocalRun/
351 |
352 | # MSBuild Binary and Structured Log
353 | *.binlog
354 |
355 | # NVidia Nsight GPU debugger configuration file
356 | *.nvuser
357 |
358 | # MFractors (Xamarin productivity tool) working folder
359 | .mfractor/
360 |
361 | # Local History for Visual Studio
362 | .localhistory/
363 |
364 | # Visual Studio History (VSHistory) files
365 | .vshistory/
366 |
367 | # BeatPulse healthcheck temp database
368 | healthchecksdb
369 |
370 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
371 | MigrationBackup/
372 |
373 | # Ionide (cross platform F# VS Code tools) working folder
374 | .ionide/
375 |
376 | # Fody - auto-generated XML schema
377 | FodyWeavers.xsd
378 |
379 | # VS Code files for those working on multiple tools
380 | .vscode/*
381 | !.vscode/settings.json
382 | !.vscode/tasks.json
383 | !.vscode/launch.json
384 | !.vscode/extensions.json
385 | *.code-workspace
386 |
387 | # Local History for Visual Studio Code
388 | .history/
389 |
390 | # Windows Installer files from build outputs
391 | *.cab
392 | *.msi
393 | *.msix
394 | *.msm
395 | *.msp
396 |
397 | # JetBrains Rider
398 | *.sln.iml
399 |
400 | InvertedTomato.Packing.xml
--------------------------------------------------------------------------------
/Test/StreamBitWriterTests.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public class StreamBitWriterTests
4 | {
5 | [Fact]
6 | public void CanWriteBit_0()
7 | {
8 | using var stream = new MemoryStream();
9 | using (var writer = new StreamBitWriter(stream))
10 | {
11 | writer.WriteBit(false);
12 | }
13 |
14 | Assert.Equal(new Byte[] {0b00000000,}, stream.ToArray());
15 | }
16 |
17 | [Fact]
18 | public void CanWriteBit_1()
19 | {
20 | using var stream = new MemoryStream();
21 | using (var writer = new StreamBitWriter(stream))
22 | {
23 | writer.WriteBit(true);
24 | }
25 |
26 | Assert.Equal(new Byte[] {0b10000000,}, stream.ToArray());
27 | }
28 |
29 | [Fact]
30 | public void CanWriteBit_0_1()
31 | {
32 | using var stream = new MemoryStream();
33 | using (var writer = new StreamBitWriter(stream))
34 | {
35 | writer.WriteBit(false);
36 | writer.WriteBit(true);
37 | }
38 |
39 | Assert.Equal(new Byte[] {0b01000000,}, stream.ToArray());
40 | }
41 |
42 | [Fact]
43 | public void CanWriteBit_1_1_1_1_1_1_1_1()
44 | {
45 | using var stream = new MemoryStream();
46 | using (var writer = new StreamBitWriter(stream))
47 | {
48 | writer.WriteBit(true);
49 | writer.WriteBit(true);
50 | writer.WriteBit(true);
51 | writer.WriteBit(true);
52 | writer.WriteBit(true);
53 | writer.WriteBit(true);
54 | writer.WriteBit(true);
55 | writer.WriteBit(true);
56 | }
57 |
58 | Assert.Equal(new Byte[] {0b11111111,}, stream.ToArray());
59 | }
60 |
61 | [Fact]
62 | public void CanWriteBit_1_1_1_1_1_1_1_1_1()
63 | {
64 | using var stream = new MemoryStream();
65 | using (var writer = new StreamBitWriter(stream))
66 | {
67 | writer.WriteBit(true);
68 | writer.WriteBit(true);
69 | writer.WriteBit(true);
70 | writer.WriteBit(true);
71 | writer.WriteBit(true);
72 | writer.WriteBit(true);
73 | writer.WriteBit(true);
74 | writer.WriteBit(true);
75 | writer.WriteBit(true);
76 | }
77 |
78 | Assert.Equal(new Byte[] {0b11111111, 0b10000000}, stream.ToArray());
79 | }
80 |
81 | [Fact]
82 | public void CanWriteBits_10_1()
83 | {
84 | using var stream = new MemoryStream();
85 | using (var writer = new StreamBitWriter(stream))
86 | {
87 | writer.WriteBits(0b10, 2);
88 | writer.WriteBits(0b1, 1);
89 | }
90 |
91 | Assert.Equal(new Byte[] {0b10100000,}, stream.ToArray());
92 | }
93 |
94 | [Fact]
95 | public void CanWriteBits_1_0_1_0_1_0()
96 | {
97 | using var stream = new MemoryStream();
98 | using (var writer = new StreamBitWriter(stream))
99 | {
100 | writer.WriteBits(0b1, 1);
101 | writer.WriteBits(0b0, 1);
102 | writer.WriteBits(0b1, 1);
103 | writer.WriteBits(0b0, 1);
104 | writer.WriteBits(0b1, 1);
105 | writer.WriteBits(0b0, 1);
106 | writer.WriteBits(0b1, 1);
107 | writer.WriteBits(0b0, 1);
108 | }
109 |
110 | Assert.Equal(new Byte[] {0b10101010,}, stream.ToArray());
111 | }
112 |
113 |
114 | [Fact]
115 | public void CanWriteBits_10_10_10_101()
116 | {
117 | using var stream = new MemoryStream();
118 | using (var writer = new StreamBitWriter(stream))
119 | {
120 | writer.WriteBits(0b10, 2);
121 | writer.WriteBits(0b10, 2);
122 | writer.WriteBits(0b10, 2);
123 | writer.WriteBits(0b101, 3);
124 | }
125 |
126 | Assert.Equal(new Byte[] {0b10101010, 0b10000000,}, stream.ToArray());
127 | }
128 |
129 | [Fact]
130 | public void CanWriteBits_10_Align_10()
131 | {
132 | using var stream = new MemoryStream();
133 | using (var writer = new StreamBitWriter(stream))
134 | {
135 | writer.WriteBits(0b10, 2);
136 | writer.Align();
137 | writer.WriteBits(0b11, 2);
138 | }
139 |
140 | Assert.Equal(new Byte[] {0b10000000, 0b11000000}, stream.ToArray());
141 | }
142 |
143 | [Fact]
144 | public void CanWriteBits_Align()
145 | {
146 | using var stream = new MemoryStream();
147 | using (var writer = new StreamBitWriter(stream))
148 | {
149 | writer.Align();
150 | }
151 |
152 | Assert.Equal(new Byte[] { }, stream.ToArray());
153 | }
154 |
155 | [Fact]
156 | public void CanWriteBits_8_Align()
157 | {
158 | using var stream = new MemoryStream();
159 | using (var writer = new StreamBitWriter(stream))
160 | {
161 | writer.WriteBits(0b11111111, 8);
162 | writer.Align();
163 | }
164 |
165 | Assert.Equal(new Byte[] {0b11111111}, stream.ToArray());
166 | }
167 |
168 | [Fact]
169 | public void CanWriteBits_8_Align_8()
170 | {
171 | using var stream = new MemoryStream();
172 | using (var writer = new StreamBitWriter(stream))
173 | {
174 | writer.WriteBits(0b11111111, 8);
175 | writer.Align();
176 | writer.WriteBits(0b11111111, 8);
177 | }
178 |
179 | Assert.Equal(new Byte[] {0b11111111, 0b11111111}, stream.ToArray());
180 | }
181 |
182 | [Fact]
183 | public void CanWriteBits_32()
184 | {
185 | using var stream = new MemoryStream();
186 | using (var writer = new StreamBitWriter(stream))
187 | {
188 | writer.WriteBits(0b_11111111_11111111_11111111_11111111, 32);
189 | }
190 |
191 | Assert.Equal(new Byte[] {0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111,}, stream.ToArray());
192 | }
193 |
194 | [Fact]
195 | public void CanWriteBits_63()
196 | {
197 | using var stream = new MemoryStream();
198 | using (var writer = new StreamBitWriter(stream))
199 | {
200 | writer.WriteBits(0b_01111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, 63);
201 | }
202 |
203 | Assert.Equal(new Byte[] {0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111,0b_11111111, 0b_11111111, 0b_11111111, 0b_11111110,}, stream.ToArray());
204 | }
205 |
206 | [Fact]
207 | public void CanWriteBits_64()
208 | {
209 | using var stream = new MemoryStream();
210 | using (var writer = new StreamBitWriter(stream))
211 | {
212 | writer.WriteBits(0b_11111111_11111111_11111111_11111111_11111111_11111111_11111111_11111111, 64);
213 | }
214 |
215 | Assert.Equal(new Byte[] {0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111,0b_11111111, 0b_11111111, 0b_11111111, 0b_11111111,}, stream.ToArray());
216 | }
217 |
218 | [Fact]
219 | public void CanWriteBits_1_32()
220 | {
221 | using var stream = new MemoryStream();
222 | using (var writer = new StreamBitWriter(stream))
223 | {
224 | writer.WriteBit(false);
225 | writer.WriteBits(0b_11111111_11111111_11111111_11111111, 32);
226 | }
227 |
228 | Assert.Equal(new Byte[] {0b_01111111, 0b_11111111, 0b_11111111, 0b_11111111, 0b10000000,}, stream.ToArray());
229 | }
230 |
231 |
232 | [Fact]
233 | public void CanWriteBit_x1()
234 | {
235 | using var stream = new MemoryStream();
236 | using (var writer = new StreamBitWriter(stream))
237 | {
238 | writer.WriteBits(1,64);
239 | }
240 |
241 | Assert.Equal(new Byte[] {0b00000000,0b00000000,0b00000000,0b00000000,0b00000000,0b00000000,0b00000000,0b00000001,}.ToHexString(), stream.ToArray().ToHexString());
242 | }
243 |
244 | [Fact]
245 | public void CanDisposeNotOwned()
246 | {
247 | using var stream = new MemoryStream();
248 | using var writer = new StreamBitWriter(stream);
249 |
250 | Assert.False(writer.IsDisposed);
251 | writer.Dispose();
252 | Assert.True(writer.IsDisposed);
253 | stream.ReadByte();
254 | }
255 |
256 | [Fact]
257 | public void CanDisposeOwned()
258 | {
259 | using var stream = new MemoryStream();
260 | using var writer = new StreamBitWriter(stream, true);
261 |
262 | Assert.False(writer.IsDisposed);
263 | writer.Dispose();
264 | Assert.True(writer.IsDisposed);
265 | Assert.Throws(() => stream.ReadByte());
266 | }
267 |
268 |
269 | [Fact]
270 | public void CanWriteBit_B1_8()
271 | {
272 | using var stream = new MemoryStream();
273 | using (var writer = new StreamBitWriter(stream,false,1))
274 | {
275 | writer.WriteBits(0b11111111,8);
276 | }
277 |
278 | Assert.Equal(new Byte[] {0b11111111,}, stream.ToArray());
279 | }
280 |
281 | [Fact]
282 | public void CanWriteBit_B1_9()
283 | {
284 | using var stream = new MemoryStream();
285 | using (var writer = new StreamBitWriter(stream,false,1))
286 | {
287 | writer.WriteBits(0b111111111,9);
288 | }
289 |
290 | Assert.Equal(new Byte[] {0b11111111,0b10000000}, stream.ToArray());
291 | }
292 | }
--------------------------------------------------------------------------------
/Test/FibonaciCodecTests.cs:
--------------------------------------------------------------------------------
1 | namespace InvertedTomato.Packing;
2 |
3 | public class FibonacciCodecTests
4 | {
5 | private Byte[] Encode(UInt64 value)
6 | {
7 | using var stream = new MemoryStream();
8 | using (var writer = new StreamBitWriter(stream))
9 | {
10 | var encoder = new FibonacciIntegerEncoder(writer);
11 | encoder.EncodeUInt64(value);
12 | }
13 |
14 | return stream.ToArray();
15 | }
16 |
17 | [Fact]
18 | public void Encode_0() => Assert.Equal(new Byte[] { 0b11000000 }.ToBinaryString(), Encode(0).ToBinaryString());
19 |
20 | [Fact]
21 | public void Encode_1() => Assert.Equal(new Byte[] { 0b01100000 }.ToBinaryString(), Encode(1).ToBinaryString());
22 |
23 | [Fact]
24 | public void Encode_2() => Assert.Equal(new Byte[] { 0b00110000 }.ToBinaryString(), Encode(2).ToBinaryString());
25 |
26 | [Fact]
27 | public void Encode_3() => Assert.Equal(new Byte[] { 0b10110000 }.ToBinaryString(), Encode(3).ToBinaryString());
28 |
29 | [Fact]
30 | public void Encode_4() => Assert.Equal(new Byte[] { 0b00011000 }.ToBinaryString(), Encode(4).ToBinaryString());
31 |
32 | [Fact]
33 | public void Encode_5() => Assert.Equal(new Byte[] { 0b10011000 }.ToBinaryString(), Encode(5).ToBinaryString());
34 |
35 | [Fact]
36 | public void Encode_6() => Assert.Equal(new Byte[] { 0b01011000 }.ToBinaryString(), Encode(6).ToBinaryString());
37 |
38 | [Fact]
39 | public void Encode_7() => Assert.Equal(new Byte[] { 0b00001100 }.ToBinaryString(), Encode(7).ToBinaryString());
40 |
41 | [Fact]
42 | public void Encode_8() => Assert.Equal(new Byte[] { 0b10001100 }.ToBinaryString(), Encode(8).ToBinaryString());
43 |
44 | [Fact]
45 | public void Encode_9() => Assert.Equal(new Byte[] { 0b01001100 }.ToBinaryString(), Encode(9).ToBinaryString());
46 |
47 | [Fact]
48 | public void Encode_10() => Assert.Equal(new Byte[] { 0b00101100 }.ToBinaryString(), Encode(10).ToBinaryString());
49 |
50 | [Fact]
51 | public void Encode_11() => Assert.Equal(new Byte[] { 0b10101100 }, Encode(11));
52 |
53 | [Fact]
54 | public void Encode_12() => Assert.Equal(new Byte[] { 0b00000110 }, Encode(12));
55 |
56 | [Fact]
57 | public void Encode_13() => Assert.Equal(new Byte[] { 0b10000110 }, Encode(13));
58 |
59 | [Fact]
60 | public void Encode_20() => Assert.Equal(new Byte[] { 0b00000011 }, Encode(20)); // Exactly one byte
61 |
62 | [Fact]
63 | public void Encode_33() => Assert.Equal(new Byte[] { 0b00000001, 0b10000000 }, Encode(33)); // Termination bit is on next byte
64 |
65 | [Fact]
66 | public void Encode_54() => Assert.Equal(new Byte[] { 0b00000000, 0b11000000 }, Encode(54)); // Final and termination bits on next byte
67 |
68 | [Fact]
69 | public void Encode_986() => Assert.Equal(new Byte[] { 0b00000000, 0b00000011 }, Encode(986)); // Exactly one byte
70 |
71 | [Fact]
72 | public void Encode_1596() => Assert.Equal(new Byte[] { 0b00000000, 0b00000001, 0b10000000 }, Encode(1596)); // Termination bit is on next byte
73 |
74 | [Fact]
75 | public void Encode_2583() => Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b11000000 }, Encode(2583)); // Final and termination bits on next byte
76 |
77 | [Fact]
78 | public void Encode_6557470319841() =>
79 | Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000110 }.ToBinaryString(),
80 | Encode(6557470319841).ToBinaryString()); // All bits in first buffer
81 |
82 | [Fact]
83 | public void Encode_10610209857722() =>
84 | Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000011 }.ToBinaryString(),
85 | Encode(10610209857722).ToBinaryString()); // All bits in first buffer
86 |
87 | [Fact]
88 | public void Encode_17167680177564() =>
89 | Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000001, 0b10000000 }.ToBinaryString(),
90 | Encode(17167680177564).ToBinaryString()); // All value bits in first buffer and termination in second buffer
91 |
92 | [Fact]
93 | public void Encode_27777890035287() =>
94 | Assert.Equal(new Byte[] { 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b00000000, 0b11000000 }.ToBinaryString(),
95 | Encode(27777890035287).ToBinaryString()); // Value bits and termination bits in second buffer
96 |
97 | [Fact]
98 | public void Encode_Max()
99 | {
100 | var expected = new Byte[] { 0b01010000, 0b01010001, 0b01000001, 0b00010101, 0b00010010, 0b00100100, 0b00000010, 0b01000100, 0b10001000, 0b10100000, 0b10001010, 0b01011000 }.ToBinaryString();
101 | var actual = Encode(FibonacciInteger.MaxValue).ToBinaryString();
102 | Assert.Equal(expected, actual); // Not completely sure about this value
103 | // Actual: 10100010 01000100 10000000 01001000 10001010 00001010 00101000 00100010 10001000 10100000 10001010 01011000
104 | // Expected: 01010000 01010001 01000001 00010101 00010010 00100100 00000010 01000100 10001000 10100000 10001010 01011000
105 | }
106 |
107 | // Decode
108 |
109 | private UInt64 Decode(Byte[] encoded)
110 | {
111 | using var stream = new MemoryStream(encoded);
112 | using var reader = new StreamBitReader(stream);
113 | var decoder = new FibonacciIntegerDecoder(reader);
114 |
115 | return decoder.DecodeUInt64();
116 | }
117 |
118 | [Fact]
119 | public void Decode_0() => Assert.Equal((UInt64)0, Decode(new Byte[] { 0b11_000000 }));
120 |
121 | [Fact]
122 | public void Decode_1() => Assert.Equal((UInt64)1, Decode(new Byte[] { 0b011_00000 }));
123 |
124 | [Fact]
125 | public void Decode_2() => Assert.Equal((UInt64)2, Decode(new Byte[] { 0b0011_0000 }));
126 |
127 | [Fact]
128 | public void Decode_3() => Assert.Equal((UInt64)3, Decode(new Byte[] { 0b1011_0000 }));
129 |
130 | [Fact]
131 | public void Decode_4() => Assert.Equal((UInt64)4, Decode(new Byte[] { 0b00011_000 }));
132 |
133 | [Fact]
134 | public void Decode_5() => Assert.Equal((UInt64)5, Decode(new Byte[] { 0b10011_000 }));
135 |
136 | [Fact]
137 | public void Decode_6() => Assert.Equal((UInt64)6, Decode(new Byte[] { 0b01011_000 }));
138 |
139 | [Fact]
140 | public void Decode_7() => Assert.Equal((UInt64)7, Decode(new Byte[] { 0b000011_00 }));
141 |
142 | [Fact]
143 | public void Decode_8() => Assert.Equal((UInt64)8, Decode(new Byte[] { 0b100011_00 }));
144 |
145 | [Fact]
146 | public void Decode_9() => Assert.Equal((UInt64)9, Decode(new Byte[] { 0b010011_00 }));
147 |
148 | [Fact]
149 | public void Decode_10() => Assert.Equal((UInt64)10, Decode(new Byte[] { 0b001011_00 }));
150 |
151 | [Fact]
152 | public void Decode_11() => Assert.Equal((UInt64)11, Decode(new Byte[] { 0b101011_00 }));
153 |
154 | [Fact]
155 | public void Decode_20() => Assert.Equal((UInt64)20, Decode(new Byte[] { 0b00000011 })); // Exactly one byte
156 |
157 | [Fact]
158 | public void Decode_33() => Assert.Equal((UInt64)33, Decode(new Byte[] { 0b00000001, 0b1_0000000 })); // Termination bit is on next byte
159 |
160 | [Fact]
161 | public void Decode_54() => Assert.Equal((UInt64)54, Decode(new Byte[] { 0b00000000, 0b11_000000 })); // Final and termination bits on next byte
162 |
163 | [Fact]
164 | public void Decode_986() => Assert.Equal((UInt64)986, Decode(new Byte[] { 0b00000000, 0b00000011 })); // Exactly two bytes
165 |
166 | [Fact]
167 | public void Decode_1596() => Assert.Equal((UInt64)1596, Decode(new Byte[] { 0b00000000, 0b00000001, 0b1_0000000 })); // Termination bit is on next byte
168 |
169 | [Fact]
170 | public void Decode_2583() => Assert.Equal((UInt64)2583, Decode(new Byte[] { 0b00000000, 0b00000000, 0b11_000000 })); // Final and termination bits on next byte
171 |
172 | [Fact]
173 | public void Decode_Max() => Assert.Equal(FibonacciInteger.MaxValue,
174 | Decode(new Byte[] { 0b01010000, 0b01010001, 0b01000001, 0b00010101, 0b00010010, 0b00100100, 0b00000010, 0b01000100, 0b10001000, 0b10100000, 0b10001010, 0b01011_000 }));
175 |
176 | [Fact]
177 | public void Decode_Overflow1() => Assert.Throws(() =>
178 | {
179 | Decode(new Byte[] { 0b01010000, 0b01010001, 0b01000001, 0b00010101, 0b00010010, 0b00100100, 0b00000010, 0b01000100, 0b10001000, 0b10100000, 0b10101010, 0b01011_000 });
180 | }); // Symbol too large
181 |
182 | [Fact]
183 | public void Decode_Overflow2() => Assert.Throws(() =>
184 | {
185 | Decode(new Byte[] { 0b01010000, 0b01010001, 0b01000001, 0b00010101, 0b00010010, 0b00100100, 0b00000010, 0b01000100, 0b10001000, 0b10100000, 0b10001010, 0b010011_00 });
186 | }); // Symbol too long
187 |
188 | [Fact]
189 | public void CanEncodeDecodeFirst1000()
190 | {
191 | using var stream = new MemoryStream();
192 |
193 | using (var writer = new StreamBitWriter(stream))
194 | {
195 | var encoder = new FibonacciIntegerEncoder(writer);
196 | for (UInt64 symbol = 0; symbol < 1000; symbol++) encoder.EncodeUInt64(symbol);
197 | }
198 |
199 | stream.Seek(0, SeekOrigin.Begin);
200 |
201 | using (var reader = new StreamBitReader(stream))
202 | {
203 | var decoder = new FibonacciIntegerDecoder(reader);
204 | for (UInt64 symbol = 0; symbol < 1000; symbol++)
205 | {
206 | Assert.Equal(symbol, decoder.DecodeUInt64());
207 | }
208 | }
209 | }
210 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Packing
2 | `InvertedTomato.Packing` is all about encoding data in the smallest possible way quickly. This is super useful for both storage and transmission of data when size and speed are both important. Data isn't compressed, at least not in the traditional sense, rather stored in encoded in efficently manners.
3 |
4 | ## TLDR
5 | Here's how to squash 24 bytes of data down to 2 using Fibonacci coding:
6 | ```C#
7 | using InvertedTomato.Packing;
8 | using InvertedTomato.Packing.Codecs.Integers;
9 |
10 | // Encode some values...
11 | using var stream = new MemoryStream(); // Could be a FileStream or a NetworkStream
12 | using (var writer = new StreamBitWriter(stream))
13 | {
14 | // Pick a codec - you can use one or many - so long as you decode in the same order you encoded
15 | var fib = new FibonacciIntegerEncoder(writer);
16 |
17 | // Encode some values using the Fibonacci codec
18 | fib.EncodeUInt64(1);
19 | fib.EncodeUInt64(2);
20 | fib.EncodeUInt64(3);
21 | }
22 |
23 | Console.WriteLine("Compressed data is " + stream.Length + " bytes"); // Output: Now data is 2 bytes
24 |
25 | // Decode the values...
26 | stream.Position = 0;
27 | using (var reader = new StreamBitReader(stream))
28 | {
29 | var fib = new FibonacciIntegerDecoder(reader);
30 |
31 | // Decode the Fibonacci values
32 | Console.WriteLine(fib.DecodeUInt64()); // Output: 1
33 | Console.WriteLine(fib.DecodeUInt64()); // Output: 2
34 | Console.WriteLine(fib.DecodeUInt64()); // Output: 3
35 | }
36 | ```
37 |
38 | ## Introduction
39 | Modern PCs have stacks of RAM, so it's usually not a problem that integers take 4-8 bytes each
40 | to store in memory. There are times however when this is a problem. For exammple:
41 | - When you want to store a large set of numbers in memory (100 million * 8 bytes = 760MB)
42 | - When you want to store a large set of numbers on disk
43 | - When you want to transmit numbers over a network (the Internet?) quickly
44 |
45 | In almost all cases those numbers can be stored in a much lower number of bytes. Heck, its
46 | **possible to store three integers in a single byte**.
47 |
48 | ## Algorithms
49 | The example in the **TLDR** section used the Fibonacci codec. Whilst this codec is excellent for small numbers, it's not so
50 | great when numbers get larger. You really need to select a codec with your domain in mind. Following is a summary of the
51 | codecs available, their strengths and weaknesses.
52 |
53 | ### Bits required to represent each number with each codec
54 | Keep in mind that there is a physical minimum possible size for each number. That is displayed in blue.
55 | 
56 |
57 | ### Fibonacci *(best for integers <8,000)*
58 | - **Family:** [universal code](https://en.wikipedia.org/wiki/Universal_code_(data_compression))
59 | - **Random access:** yes *(can jump ahead)*
60 | - **Lossy:** no *(doesn't approximate)*
61 | - **Universal:** yes *(can handle any number)*
62 | - **Details:** [Wikipedia](https://en.wikipedia.org/wiki/Fibonacci_coding)
63 | - **Options:**
64 |
65 | This is a very interesting algorithm - it encodes the numbers against a Fibonacci sequence. It's the best algorithm in the pack for numbers up to 8,000, It
66 | degrades after that point - but not horrendously so. This is my personal favorite algo.
67 |
68 | ### Thompson-Alpha *(best for integers >8,000)*
69 | - **Family:** none
70 | - **Random access:** no
71 | - **Universal:** no *(can only handle a predefined range of numbers)*
72 | - **Details:** N/A
73 | - **Options:**
74 | - Length bits
75 |
76 | I couldn't find an algorithm which performed well for large integers (>8,000), so this is my own. In it's default configuration it has a flat 6-bits
77 | of overhead for each integer, no matter it's size. That makes it excellent if your numbers have a large distribution.
78 |
79 | ### Variable Length Quantities (VLQ)
80 | - **Random access:** no *(can't jump ahead)*
81 | - **Universal:** yes *(can handle any number)*
82 | - **Details:** [Wikipedia](https://en.wikipedia.org/wiki/Variable-length_quantity)
83 | - **Options:**
84 |
85 | It seems VLQ was originally invented by the designers of MIDI (you know, the old-school
86 | MP3). The algorithm is really retro, there's stacks of variations of it's spec and
87 | it smells a little musty, but it's awesome! It produces pretty good results for all numbers
88 | with a very low CPU overhead.
89 |
90 | ### Inverted Variable Length Quantities (VLQ)
91 | - **Random access:** no *(can't jump ahead)*
92 | - **Universal:** yes *(can handle any number)*
93 | - **Details:** N/A
94 | - **Options:**
95 |
96 | Similar to VLQ, Inverted-VLQ is a slight variation which uses a final-byte flag, rather than a
97 | more-bit flag. Theoretically this has slightly better CPU performance for numbers
98 | that encode to more than three bytes.
99 |
100 | ### Elias-Omega
101 | - **Family:** [universal code](https://en.wikipedia.org/wiki/Universal_code_(data_compression))
102 | - **Random access:** no (can't jump ahead)
103 | - **Universal:** yes (can handle any number)
104 | - **Supported values:** all
105 | - **Details:** [Wikipedia](https://en.wikipedia.org/wiki/Elias_omega_coding)
106 |
107 | Elias Omega is a sexy algorithm. It's well thought out and utterly brilliant. But I
108 | wouldn't use it. It does well for tiny integers (under 8), but just doesn't cut the
109 | mustard for larger values - all other algorithms do better. Sorry Omega :-/.
110 |
111 | ### Elias-Gamma
112 | - **Family:** [universal code](https://en.wikipedia.org/wiki/Universal_code_(data_compression))
113 | - **Random access:** no (can't jump ahead)
114 | - **Universal:** yes *(can handle any number)*
115 | - **Supported values:** all
116 | - **Details:** [Wikipedia](https://en.wikipedia.org/wiki/Elias_gamma_coding)
117 |
118 | Like Elias-Omega, this is a very interesting algorithm. However it's only really useful for small integers (less than 8). For bigger numbers
119 | it performs *terribly*.
120 |
121 | ### Elias-Delta
122 | - **Family:** [universal code](https://en.wikipedia.org/wiki/Universal_code_(data_compression))
123 | - **Random access:** no (can't jump ahead)
124 | - **Universal:** yes *(can handle any number)*
125 | - **Supported values:** all
126 | - **Details:** [Wikipedia](https://en.wikipedia.org/wiki/Elias_delta_coding)
127 |
128 | I have a lot of respect for this algorithm. It's an all-rounder, doing well on small numbers and large alike. If you knew you
129 | were mostly going to have small numbers, but you'd have a some larger ones as well, this would be my choice if it weren't for ThompsonAlpha. The algorithm is a little complex, so you might be cautious if you have extreme CPU limitations.
130 |
131 | ## Comparing algorithms
132 | In order to make an accurate assessment of a codec for your purpose, some
133 | algorithms have a method `CalculateEncodedBits` that allows you to know
134 | how many bits a given value would consume when encoded. I recommend getting a set
135 | of your data and running it through the `CalculateEncodedBits` methods of a few
136 | algorithms to see which one is best.
137 |
138 | ## Signed and unsigned
139 | If your numbers are unsigned (eg, no negatives), be sure to use **unsigned** calls to the Codec. That
140 | way you'll get the best size reduction. Obviously fall back to **signed** if you must. Hand-waving, it'll cost you an extra bit or so for each value if you used signed.
141 |
142 | ## Even better reduction
143 | There are a few techniques you can use to further increase the reduction of your integers.
144 | Following is a summary of each
145 |
146 | ### Use deltas
147 | Smaller numbers use less space. So take a moment to consider what
148 | you can do to keep your numbers small. One common technique is to store the difference
149 | between numbers instead of the numbers themselves. Consider if you wanted to store the
150 | following sequence:
151 | - 10000
152 | - 10001
153 | - 10002
154 | - 10003
155 | - 10004
156 |
157 | If you converted them to deltas you could instead store:
158 | - 1000
159 | - 1
160 | - 2
161 | - 3
162 | - 4
163 |
164 | This sequence uses a stack less bytes!
165 |
166 | Naturally this isn't suitable for all contexts. If the receiver has the potential to
167 | loose state (eg. UDP transport) you'll have to include a recovery mechanism (eg keyframes),
168 | otherwise those deltas become meaningless.
169 |
170 | ### Make lossy
171 | Sometimes it's okay to loose data when encoding. Let's say that you're compressing a
172 | list of distances in meters, however you only really care about the distance rounded
173 | to the nearest 100 meters. You can save a heap of data by dividing your value by
174 | 100 before compressing it, and multiplying it by 100 after.
175 |
176 | ### Use a false floor
177 | Sometimes all of your values are always going to be above zero. Let's say that you're
178 | storing the number of cars going over a busy bridge each hour. If it's safe to assume
179 | there will never be 0 cars you could save some data by subtracting one from your
180 | value before encoding and adding one after decoding.
181 |
182 | This may seem like a trivial optimization, however with most algorithms it will save
183 | you one or two bits per number. If you have several million numbers that really
184 | adds up.
185 |
186 | ### Intermix codecs
187 | So Fibonacci is best for small numbers, and ThompsonAlpha is better for large values -
188 | so why not use both? So long as I read it in the same order I wrote it. If you use this
189 | cleverly you can get some real size wins.
190 |
191 | ### Compress it
192 | You thought we were compressing integers already? We'll it depends how you define your terms, but I'd say I was just encoding them more cleverly. But you can compress it as well. Check out [BrotliStream](https://docs.microsoft.com/en-us/dotnet/api/system.io.compression.brotlistream). If you wrap your stream in this you can further compress your dataset. While the above encoding stores your data in the most efficent manner, Brotli will then look for patterns in your data to exploit to make it smaller again.
193 |
--------------------------------------------------------------------------------