├── .github └── workflows │ ├── c.yml │ └── go.yml ├── LICENSE ├── README.md ├── c ├── Makefile ├── bench.cpp ├── flit.h └── test.c ├── flit.go ├── flit_test.go └── go.mod /.github/workflows/c.yml: -------------------------------------------------------------------------------- 1 | name: C 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Clean 16 | run: make -C c clean 17 | - name: Test 18 | run: make -C c test 19 | -------------------------------------------------------------------------------- /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | jobs: 10 | 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v3 18 | with: 19 | go-version: 1.19 20 | 21 | - name: Test 22 | run: go test -v ./... 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | To the extent possible under law, Pascal S. de Kloe has waived all 2 | copyright and related or neighboring rights to FLIT. This work is 3 | published from The Netherlands. 4 | 5 | https://creativecommons.org/publicdomain/zero/1.0/legalcode 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Specification 2 | 3 | ## Abstract 4 | 5 | Fixed-Length Integer Trim (FLIT) is a byte oriented 6 | [universal code](https://en.wikipedia.org/wiki/Universal_code_%28data_compression%29). 7 | FLIT64 outperforms VLQ, a.k.a. varint, in terms of speed and size. 8 | 9 | * Fixed serial size range by design 10 | * Size known at fist byte 11 | * CPU friendly: count zeros & bit shift, no loops 12 | 13 | This is free and unencumbered software released into the 14 | [public domain](http://creativecommons.org/publicdomain/zero/1.0). 15 | 16 | [![C](https://github.com/pascaldekloe/flit/actions/workflows/c.yml/badge.svg)](https://github.com/pascaldekloe/flit/actions/workflows/c.yml) 17 | [![Go](https://github.com/pascaldekloe/flit/actions/workflows/go.yml/badge.svg)](https://github.com/pascaldekloe/flit/actions/workflows/go.yml) 18 | [![Go Reference](https://pkg.go.dev/badge/github.com/pascaldekloe/flit.svg)](https://pkg.go.dev/github.com/pascaldekloe/flit) 19 | 20 | 21 | ## FLIT64 22 | 23 | The 64-bit unsigned version encodes an integer in 1 to 9 octets. 24 | 25 | The first octet advertises the number of octets following with the trailing 26 | zero count. Any remaining bits hold the least significant data bits and the 27 | following octets, if any at all, hold the rest in little-endian order. 28 | 29 | | Total Size | First Bits | Range | 30 | |:-----------|:------------|:------------------------------------| 31 | | 1 octet | `xxxx xxx1` | 7-bit (128) | 32 | | 2 octets | `xxxx xx10` | 14-bit (16'384) | 33 | | 3 octets | `xxxx x100` | 21-bit (2'097'152) | 34 | | 4 octets | `xxxx 1000` | 28-bit (268'435'456) | 35 | | 5 octets | `xxx1 0000` | 35-bit (34'359'738'368) | 36 | | 6 octets | `xx10 0000` | 42-bit (4'398'046'511'104) | 37 | | 7 octets | `x100 0000` | 49-bit (562'949'953'421'312) | 38 | | 8 octets | `1000 0000` | 56-bit (72'057'594'037'927'936) | 39 | | 9 octets | `0000 0000` | 64-bit (18'446'744'073'709'551'616) | 40 | 41 | Encoding *should* pick the smallest range capable to hold the value. 42 | 43 | 44 | ### Decoding Example 45 | 46 | 1. The first byte read is `1010 0110`. 47 | 2. One tailing zero means we need to read one more byte, which is `0000 1111`. 48 | 3. The two bytes in little-endian order make `0000 1111 1010 0110`. 49 | 4. Drop the two size bits with a bit shift to get `0000 0011 1110 1001` or decimal 1001. 50 | 51 | 52 | ## FLIT64S 53 | 54 | For signed integers, the algorithm is preceded with *ZigZag encoding*. See the 55 | [Protocol Buffers Encoding](https://developers.google.com/protocol-buffers/docs/encoding#signed-integers) 56 | documentation for detailed information. 57 | 58 | ## Benchmark 59 | 60 | Encoding and decoding in C is faster than memcpy on an Apple M1. 61 | 62 | ``` 63 | -------------------------------------------------------------------------- 64 | Benchmark Time CPU Iterations UserCounters... 65 | -------------------------------------------------------------------------- 66 | BM_flit64_enc/1 0.547 ns 0.547 ns 1000000000 bytes_per_second=13.6251G/s items_per_second=1.82873G/s 67 | BM_flit64_enc/5 0.860 ns 0.860 ns 811782442 bytes_per_second=8.66524G/s items_per_second=1.16303G/s 68 | BM_flit64_enc/9 0.625 ns 0.625 ns 1000000000 bytes_per_second=11.9131G/s items_per_second=1.59894G/s 69 | BM_flit64_dec/1 0.742 ns 0.742 ns 939862243 bytes_per_second=10.0367G/s items_per_second=1.34711G/s 70 | BM_flit64_dec/5 0.742 ns 0.742 ns 936479906 bytes_per_second=10.0357G/s items_per_second=1.34697G/s 71 | BM_flit64_dec/9 0.705 ns 0.705 ns 990701558 bytes_per_second=10.5737G/s items_per_second=1.41918G/s 72 | BM_memcpy64 2.04 ns 2.04 ns 343174264 bytes_per_second=3.65752G/s items_per_second=490.905M/s 73 | ``` 74 | 75 | The speed is similar to native endian encoding in Go on Apple M1. Dito for Intel Xeon. 76 | 77 | ``` 78 | name time/op 79 | PutUint64-8 2.08ns ± 0% 80 | PutUint64Raw-8 2.08ns ± 0% 81 | PutUint64VQL-8 3.77ns ± 0% 82 | Uint64-8 2.80ns ± 1% 83 | Uint64Raw-8 2.10ns ± 1% 84 | Uint64VQL-8 5.73ns ± 1% 85 | 86 | name speed 87 | PutUint64-8 3.85GB/s ± 0% 88 | PutUint64Raw-8 3.84GB/s ± 0% 89 | PutUint64VQL-8 2.12GB/s ± 0% 90 | Uint64-8 2.86GB/s ± 1% 91 | Uint64Raw-8 3.80GB/s ± 1% 92 | Uint64VQL-8 1.40GB/s ± 1% 93 | ``` 94 | 95 | 96 | ## Credits 97 | 98 | * Aleksey Demakov @ademakov 99 | * Roman Gershman @romange 100 | -------------------------------------------------------------------------------- /c/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | all: test bench 3 | 4 | .PHONY: test 5 | test: clean test.bin 6 | ./test.bin 7 | 8 | .PHONY: bench 9 | bench: clean bench.bin 10 | ./bench.bin 11 | 12 | test.bin: 13 | $(CC) -o test.bin -O0 test.c 14 | 15 | bench.bin: 16 | $(CC) -o bench.bin -O3 -lstdc++ -lbenchmark bench.cpp 17 | 18 | .PHONY: clean 19 | clean: 20 | rm -fr test.bin bench.bin 21 | -------------------------------------------------------------------------------- /c/bench.cpp: -------------------------------------------------------------------------------- 1 | #include "flit.h" 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | 9 | uint8_t* buf = new uint8_t[9]; 10 | uint64_t v64; 11 | 12 | 13 | static inline void setup(benchmark::State& state) { 14 | v64 = (uint64_t)1 << ((state.range(0) - 1) * 7); 15 | size_t n = flit64_enc(buf, v64); 16 | if (n != state.range(0)) { 17 | state.SkipWithError("size miss"); 18 | } 19 | } 20 | 21 | static inline void roundup(benchmark::State& state, size_t byteCount) { 22 | state.SetItemsProcessed(state.iterations()); 23 | state.SetBytesProcessed(state.iterations() * byteCount); 24 | } 25 | 26 | static void BM_flit64_enc(benchmark::State& state) { 27 | setup(state); 28 | 29 | while (state.KeepRunning()) { 30 | benchmark::DoNotOptimize(flit64_enc(buf, v64)); 31 | benchmark::ClobberMemory(); 32 | } 33 | 34 | roundup(state, 8); 35 | } 36 | 37 | static void BM_flit64_dec(benchmark::State& state) { 38 | setup(state); 39 | 40 | while (state.KeepRunning()) { 41 | benchmark::DoNotOptimize(flit64_dec(&v64, buf)); 42 | } 43 | 44 | roundup(state, 8); 45 | } 46 | 47 | static void BM_memcpy64(benchmark::State& state) { 48 | v64 = 123456789; 49 | 50 | while (state.KeepRunning()) { 51 | memcpy(buf, &v64, 8); 52 | benchmark::ClobberMemory(); 53 | } 54 | 55 | roundup(state, 8); 56 | } 57 | 58 | BENCHMARK(BM_flit64_enc)->DenseRange(1, 9, 4); 59 | BENCHMARK(BM_flit64_dec)->DenseRange(1, 9, 4); 60 | BENCHMARK(BM_memcpy64); 61 | 62 | BENCHMARK_MAIN(); 63 | -------------------------------------------------------------------------------- /c/flit.h: -------------------------------------------------------------------------------- 1 | // FLIT64 Implementation 2 | 3 | #include 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | // Decodes buf into v and returns the serial octet size. 11 | size_t flit64_dec(uint64_t* v, const void* buf) { 12 | uint64_t x = *(uint64_t*)buf; 13 | 14 | int tzc = 8; 15 | if (x) tzc = __builtin_ctzll(x); 16 | if (tzc > 7) { 17 | uint8_t* cp = (uint8_t*)buf; 18 | *v = *(uint64_t*)++cp; 19 | return 9; 20 | } 21 | 22 | static const uint64_t mask[8] = { 23 | 0xff, 24 | 0xffff, 25 | 0xffffff, 26 | 0xffffffff, 27 | 0xffffffffff, 28 | 0xffffffffffff, 29 | 0xffffffffffffff, 30 | 0xffffffffffffffff, 31 | }; 32 | x &= mask[tzc]; 33 | 34 | // const here seems to ensure that 'size' is not aliased by '*v' 35 | const size_t size = tzc + 1; 36 | 37 | *v = x >> size; 38 | 39 | return size; 40 | } 41 | 42 | // Decodes buf into v and returns the serial octet size. 43 | size_t flit64s_dec(int64_t* v, const void* buf) { 44 | uint64_t u; 45 | size_t n = flit64_dec(&u, buf); 46 | *v = (u >> 1) ^ (~(u & 1) + 1); 47 | return n; 48 | } 49 | 50 | // Encodes v into buf and returns the serial octet size. 51 | size_t flit64_enc(void* buf, uint64_t v) { 52 | if (v < 128) { 53 | *(uint8_t*)buf = (uint8_t)v << 1 | 1; 54 | return 1; 55 | } 56 | if (v >= (uint64_t)1 << 56) { 57 | uint8_t* p = (uint8_t*)buf; 58 | *p++ = 0; 59 | *(uint64_t*)p = v; 60 | return 9; 61 | } 62 | 63 | int lzc = __builtin_clzll(v); 64 | // extra bytes = (bits - 1) / 7 = (63 - lzc) / 7 65 | size_t e = ((63 - lzc) * 2454267027) >> 34; 66 | 67 | v <<= 1; 68 | v |= 1; 69 | v <<= e; 70 | *(uint64_t*)buf = v; 71 | 72 | return e + 1; 73 | } 74 | 75 | // Encodes v into buf and returns the serial octet size. 76 | size_t flit64s_enc(void* buf, int64_t v) { 77 | return flit64_enc(buf, (v << 1) ^ (v >> 63)); 78 | } 79 | 80 | #ifdef __cplusplus 81 | } // extern "C" 82 | #endif 83 | -------------------------------------------------------------------------------- /c/test.c: -------------------------------------------------------------------------------- 1 | #include "flit.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | const uint64_t tests[] = {0, 127, 128, 16383, 16384, 2097151, 2097152, 268435455, 268435456, 34359738367, 34359738368, 4398046511103, 4398046511104, 562949953421311, 562949953421312, 72057594037927935, 72057594037927936, 18446744073709551615ull}; 8 | 9 | int main(int argc, char* argv[]) { 10 | // verify test cases 11 | for (int i = 0; i < sizeof(tests) / 8; ++i) { 12 | uint64_t val = tests[i]; 13 | 14 | uint8_t* buf = malloc(9); 15 | uint64_t got; 16 | int enced = flit64_enc(buf, val); 17 | int deced = flit64_dec(&got, buf); 18 | 19 | if (got != val || enced != deced) { 20 | printf("got %" PRIu64 ", want %" PRIu64 21 | "; encoded %d octets and decoded %d octets" 22 | "; buffer: %02x%02x %02x%02x %02x%02x %02x%02x %02x\n", 23 | got, val, enced, deced, 24 | buf[0], buf[1], buf[2], buf[3], 25 | buf[4], buf[5], buf[6], buf[7], 26 | buf[8]); 27 | } 28 | } 29 | 30 | // verify preservation of each bit with signed values 31 | for (int bit = 0; bit < 64; bit++) { 32 | int64_t val = (1LL << bit) ^ (1LL << 63); 33 | 34 | uint8_t* buf = malloc(9); 35 | int64_t got; 36 | int enced = flit64s_enc(buf, val); 37 | int deced = flit64s_dec(&got, buf); 38 | 39 | if (got != val || enced != deced) { 40 | printf("got %" PRIi64 ", want %" PRIi64 41 | "; encoded %d octets and decoded %d octets" 42 | "; buffer: %02x%02x %02x%02x %02x%02x %02x%02x %02x\n", 43 | got, val, enced, deced, 44 | buf[0], buf[1], buf[2], buf[3], 45 | buf[4], buf[5], buf[6], buf[7], 46 | buf[8]); 47 | } 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /flit.go: -------------------------------------------------------------------------------- 1 | // Package flit implements the FLIT64 algorithm. 2 | package flit 3 | 4 | import ( 5 | "encoding/binary" 6 | "math/bits" 7 | ) 8 | 9 | var readMasks = [...]uint64{ 10 | 0xff, 11 | 0xffff, 12 | 0xffffff, 13 | 0xffffffff, 14 | 0xffffffffff, 15 | 0xffffffffffff, 16 | 0xffffffffffffff, 17 | 0xffffffffffffffff, 18 | } 19 | 20 | // Uint64 decodes an integer from buf and returns that value and the serial size. 21 | // If the is buffer smaller than 9 bytes, Uint64 may panic. 22 | func Uint64(buf []byte) (v uint64, n int) { 23 | v = binary.LittleEndian.Uint64(buf) 24 | 25 | tz := bits.TrailingZeros64(v) 26 | if tz > 7 { 27 | v = binary.LittleEndian.Uint64(buf[1:]) 28 | return v, 9 29 | } 30 | 31 | v &= readMasks[tz] 32 | 33 | size := tz + 1 34 | v >>= uint(size) 35 | return v, size 36 | } 37 | 38 | // Int64 decodes an integer from buf and returns that value and the serial size. 39 | // If the is buffer smaller than 9 bytes, Int64 may panic. 40 | func Int64(buf []byte) (v int64, n int) { 41 | u, n := Uint64(buf) 42 | return int64(u>>1) ^ -int64(u&1), n 43 | } 44 | 45 | // PutUint64 encodes an integer into buf and returns the serial size. 46 | // If the buffer is smaller than 9 bytes, PutUint64 may panic. 47 | func PutUint64(buf []byte, v uint64) (n int) { 48 | if v >= uint64(1)<<56 { 49 | buf[0] = 0 50 | binary.LittleEndian.PutUint64(buf[1:], v) 51 | return 9 52 | } 53 | 54 | bitCount := bits.Len64(v) 55 | e := (bitCount + (bitCount >> 3)) >> 3 56 | 57 | v = v<<1 | 1 58 | v <<= uint(e) 59 | binary.LittleEndian.PutUint64(buf, v) 60 | 61 | return e + 1 62 | } 63 | 64 | // PutInt64 encodes an integer into buf and returns the serial size. 65 | // If the buffer is smaller than 9 bytes, PutInt64 may panic. 66 | func PutInt64(buf []byte, v int64) (n int) { 67 | return PutUint64(buf, uint64(v<<1)^uint64(v>>63)) 68 | } 69 | -------------------------------------------------------------------------------- /flit_test.go: -------------------------------------------------------------------------------- 1 | package flit 2 | 3 | import ( 4 | "encoding/binary" 5 | "encoding/hex" 6 | "testing" 7 | ) 8 | 9 | var tests = []struct { 10 | hex string 11 | val uint64 12 | }{ 13 | {"01", 0}, 14 | {"ff", 127}, 15 | {"0202", 128}, 16 | {"feff", 16383}, 17 | {"040002", 16384}, 18 | {"fcffff", 2097151}, 19 | {"08000002", 2097152}, 20 | {"f8ffffff", 268435455}, 21 | {"1000000002", 268435456}, 22 | {"f0ffffffff", 34359738367}, 23 | {"200000000002", 34359738368}, 24 | {"e0ffffffffff", 4398046511103}, 25 | {"40000000000002", 4398046511104}, 26 | {"c0ffffffffffff", 562949953421311}, 27 | {"8000000000000002", 562949953421312}, 28 | {"80ffffffffffffff", 72057594037927935}, 29 | {"000000000000000001", 72057594037927936}, 30 | {"00ffffffffffffffff", 18446744073709551615}, 31 | } 32 | 33 | func TestPutUint64(t *testing.T) { 34 | for _, gold := range tests { 35 | buf := [...]byte{1, 2, 3, 4, 5, 6, 7, 8, 9} 36 | n := PutUint64(buf[:], gold.val) 37 | got := hex.EncodeToString(buf[:n]) 38 | if got != gold.hex { 39 | t.Errorf("%d: got 0x%s, want 0x%s", gold.val, got, gold.hex) 40 | continue 41 | } 42 | } 43 | } 44 | 45 | func TestUint64(t *testing.T) { 46 | for _, gold := range tests { 47 | buf := [...]byte{1, 2, 3, 4, 5, 6, 7, 8, 9} 48 | n, err := hex.Decode(buf[:], []byte(gold.hex)) 49 | if err != nil { 50 | t.Fatal(err) 51 | } 52 | 53 | got, read := Uint64(buf[:]) 54 | if got != gold.val || read != n { 55 | t.Errorf("got %d, want %d; read %d bytes, want %d", got, gold.val, read, n) 56 | } 57 | } 58 | } 59 | 60 | func TestSigned64(t *testing.T) { 61 | buf := make([]byte, 9) 62 | 63 | // verify preservation of each bit for 64 | // both negative and positive range 65 | for bit := uint(0); bit < 63; bit++ { 66 | for _, u := range []uint64{0, 1 << 63} { 67 | u |= uint64(1) << bit 68 | 69 | want := int64(u) 70 | wrote := PutInt64(buf, want) 71 | got, read := Int64(buf) 72 | if read != wrote { 73 | t.Errorf("%016x: wrote %d bytes, read %d", u, wrote, read) 74 | } 75 | if got != want { 76 | t.Errorf("%016x: got %d for %d", u, got, want) 77 | } 78 | } 79 | } 80 | } 81 | 82 | // benchmark data 83 | var values [18]uint64 84 | var serials [18][]byte 85 | var varints [18][]byte 86 | 87 | func init() { 88 | for i, gold := range tests { 89 | values[i] = gold.val 90 | 91 | buf := make([]byte, 9) 92 | if _, err := hex.Decode(buf, []byte(gold.hex)); err != nil { 93 | panic(err) 94 | } 95 | serials[i] = buf 96 | 97 | buf = make([]byte, 10) 98 | binary.PutUvarint(buf, gold.val) 99 | varints[i] = buf 100 | } 101 | } 102 | 103 | // prevents compiler optimization 104 | var ckv uint64 105 | var ckn int 106 | var ckb byte 107 | 108 | func BenchmarkPutUint64(b *testing.B) { 109 | b.SetBytes(8) 110 | buf := make([]byte, 9) 111 | 112 | for i := 0; i < b.N; i++ { 113 | ckn += PutUint64(buf, values[i%len(values)]) 114 | } 115 | } 116 | 117 | // BenchmarkPutUint64Raw is for comparison with fixed-width values. 118 | func BenchmarkPutUint64Raw(b *testing.B) { 119 | b.SetBytes(8) 120 | buf := make([]byte, 8) 121 | 122 | for i := 0; i < b.N; i++ { 123 | binary.LittleEndian.PutUint64(buf, values[i%len(values)]) 124 | ckb += buf[0] 125 | } 126 | } 127 | 128 | // BenchmarkPutUint64VQL is for comparison the core library. 129 | func BenchmarkPutUint64VQL(b *testing.B) { 130 | b.SetBytes(8) 131 | buf := make([]byte, 10) 132 | 133 | for i := 0; i < b.N; i++ { 134 | ckn += binary.PutUvarint(buf, values[i%len(values)]) 135 | } 136 | } 137 | 138 | func BenchmarkUint64(b *testing.B) { 139 | b.SetBytes(8) 140 | 141 | for i := 0; i < b.N; i++ { 142 | v, n := Uint64(serials[i%len(serials)]) 143 | ckv += v 144 | ckn += n 145 | } 146 | } 147 | 148 | // BenchmarkUint64Raw is for comparison with fixed-width values. 149 | func BenchmarkUint64Raw(b *testing.B) { 150 | b.SetBytes(8) 151 | 152 | for i := 0; i < b.N; i++ { 153 | ckv += binary.LittleEndian.Uint64(serials[i%len(serials)]) 154 | } 155 | } 156 | 157 | // BenchmarkUint64VQL is for comparison with the core library. 158 | func BenchmarkUint64VQL(b *testing.B) { 159 | b.SetBytes(8) 160 | 161 | for i := 0; i < b.N; i++ { 162 | v, n := binary.Uvarint(varints[i%len(varints)]) 163 | ckv += v 164 | ckn += n 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/pascaldekloe/flit 2 | 3 | go 1.9 4 | --------------------------------------------------------------------------------