├── .travis.yml
├── LICENSE
├── README.md
├── sample.cc
├── tests.cxx
├── vle.cpp
└── vle.hpp
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: cpp
2 | sudo: required
3 |
4 | compiler:
5 | - clang
6 | - gcc
7 |
8 | install:
9 | - wget --quiet -O - https://raw.githubusercontent.com/r-lyeh/depot/master/travis.pre.sh | bash -x
10 |
11 | script:
12 | - wget --quiet -O - https://raw.githubusercontent.com/r-lyeh/depot/master/travis.build.sh | bash -x
13 | - ./sample.bin
14 |
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2015 r-lyeh (https://github.com/r-lyeh)
2 |
3 | This software is provided 'as-is', without any express or implied
4 | warranty. In no event will the authors be held liable for any damages
5 | arising from the use of this software.
6 |
7 | Permission is granted to anyone to use this software for any purpose,
8 | including commercial applications, and to alter it and redistribute it
9 | freely, subject to the following restrictions:
10 |
11 | 1. The origin of this software must not be misrepresented; you must not
12 | claim that you wrote the original software. If you use this software
13 | in a product, an acknowledgment in the product documentation would be
14 | appreciated but is not required.
15 | 2. Altered source versions must be plainly marked as such, and must not be
16 | misrepresented as being the original software.
17 | 3. This notice may not be removed or altered from any source distribution.
18 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | vle
2 | ===
3 |
4 | - VLE is a simple variable-length encoder/decoder (C99)(C++03)
5 | - VLE is simple. Format is 7-bit packing, MSB stream terminator.
6 | - VLE is streamable. Designed to encode and decode integers with low overhead.
7 | - VLE is embeddable. Header-only. No external deps. C and C++ APIs provided.
8 | - VLE is extendable. Signed/unsigned integers of any size (8/16/32/64/...) are supported.
9 | - VLE is zlib/libpng licensed.
10 |
11 | ## Quick tutorial
12 | - You want to serialize an `struct { uint16_t len; uint64_t buffer[6]; }` to disk, network, etc...
13 | - You could just flush a 50-bytes stream, or you could flush a VLE stream instead.
14 | - This VLE stream will range from `7-bytes` (best-case, 43-bytes saved) up to `63-bytes` (worst-case, 13-bytes overhead).
15 |
16 | ## Features
17 | - For any 8-bit sequence, the VLE stream will range from `len` up to `2*len` bytes.
18 | - For any 16-bit sequence, the VLE stream will range from `len` up to `3*len` bytes.
19 | - For any 32-bit sequence, the VLE stream will range from `len` up to `5*len` bytes.
20 | - For any 64-bit sequence, the VLE stream will range from `len` up to `10*len` bytes.
21 | - Magnitude of value determinates size of encoded stream. Magnitude of type does not matter.
22 | - All `byte(0), short(0), int(0), int64(0)...` serialize to a `1-byte` stream.
23 | - All `byte(127), short(127), int(127), int64(127)...` serialize to a `1-byte` stream.
24 | - All `byte(128), short(128), int(128), int64(128)...` serialize to a `2-bytes` stream.
25 | - All `byte(255), short(255), int(255), int64(255)...` serialize to a `2-bytes` stream.
26 | - All `short(16383), int(16383), int64(16383)...` serialize to a `2-bytes` stream.
27 | - All `short(16384), int(16384), int64(16384)...` serialize to a `3-bytes` stream.
28 | - And so on... (see range tables below).
29 | - Rule: The closer to zero integer you encode, the smaller the stream you decode.
30 | - Note: Negative integers are rearranged to meet this criteria (see appendix).
31 |
32 | ## VLE stream format
33 | - All non-significative (zero) bits on the left are discarded.
34 | - Bytes are repacked into 7-bit components and glued together until a LSB/MSB bit is found.
35 | - Additionally, signed integers are pre-encoded and post-decoded to a more efficient signed number representation.
36 |
37 | ## API, C
38 | - Basic API. Allows streaming and fine control.
39 | - Encoders do not append null character at end of string.
40 | - Decoders do not need null character at end of string.
41 | - All functions assume buffers are preallocated to worst-case scenarios.
42 | - All functions return integer of streamed bytes.
43 | ```c
44 | VLE_API uint64_t vle_encode_u( uint8_t *buffer, uint64_t value );
45 | VLE_API uint64_t vle_encode_i( uint8_t *buffer, int64_t value );
46 | VLE_API uint64_t vle_decode_u( uint64_t *value, const uint8_t *buffer );
47 | VLE_API uint64_t vle_decode_i( int64_t *value, const uint8_t *buffer );
48 | ```
49 |
50 | ## API, C++
51 | ```c++
52 | vlei:: {
53 | string encode( int64_t );
54 | int64_t decode( string );
55 | }
56 | vleu:: {
57 | string encode( uint64_t );
58 | uint64_t decode( string );
59 | }
60 | ```
61 |
62 | ## VLE unsigned, 64-bit ranges
63 | ```c++
64 | 1 byte [0..127]
65 | 2 bytes [128..16383]
66 | 3 bytes [16384..2097151]
67 | 4 bytes [2097152..268435455]
68 | 5 bytes [268435456..34359738367]
69 | 6 bytes [34359738368..4398046511103]
70 | 7 bytes [4398046511104..562949953421311]
71 | 8 bytes [562949953421312..72057594037927935]
72 | 9 bytes [72057594037927936..9223372036854775807]
73 | 10 bytes [9223372036854775808..18446744073709551615]
74 | ```
75 |
76 | ## VLE signed, 64-bit ranges
77 | ```c++
78 | 10 bytes [-9223372036854775808..-4611686018427387905]
79 | 9 bytes [-4611686018427387904..-36028797018963969]
80 | 8 bytes [-36028797018963968..-281474976710657]
81 | 7 bytes [-281474976710656..-2199023255553]
82 | 6 bytes [-2199023255552..-17179869185]
83 | 5 bytes [-17179869184..-134217729]
84 | 4 bytes [-134217728..-1048577]
85 | 3 bytes [-1048576..-8193]
86 | 2 bytes [-8192..-65]
87 | 1 byte [-64..63]
88 | 2 bytes [64..8191]
89 | 3 bytes [8192..1048575]
90 | 4 bytes [1048576..134217727]
91 | 5 bytes [134217728..17179869183]
92 | 6 bytes [17179869184..2199023255551]
93 | 7 bytes [2199023255552..281474976710655]
94 | 8 bytes [281474976710656..36028797018963967]
95 | 9 bytes [36028797018963968..4611686018427387903]
96 | 10 bytes [4611686018427387904..9223372036854775807]
97 | ```
98 |
99 | ## used in
100 | - [collage](https://github.com/r-lyeh/collage), a diff/patch library.
101 | - [bundle](https://github.com/r-lyeh/bundle), a de/compression library.
102 |
103 | ## appendix: sign theory and conversion functions
104 |
105 | | ## | Signed-Magnitude | Complement-2 | VLEi (*) |
106 | |:--:|:----------------:|:-------------:|:-----:|
107 | | +7 | 0 111 | 0 _111_ | _111_ 0 |
108 | | +6 | 0 110 | 0 _110_ | _110_ 0 |
109 | | +5 | 0 101 | 0 _101_ | _101_ 0 |
110 | | +4 | 0 100 | 0 _100_ | _100_ 0 |
111 | | +3 | 0 011 | 0 _011_ | _011_ 0 |
112 | | +2 | 0 010 | 0 _010_ | _010_ 0 |
113 | | +1 | 0 001 | 0 _001_ | _001_ 0 |
114 | | +0 | 0 000 | 0 _000_ | _000_ 0 |
115 | | -0 | 1 000 | ----- | ----- |
116 | | -1 | 1 001 | 1 **111** | **000** 1 |
117 | | -2 | 1 010 | 1 **110** | **001** 1 |
118 | | -3 | 1 011 | 1 **101** | **010** 1 |
119 | | -4 | 1 100 | 1 **100** | **011** 1 |
120 | | -5 | 1 101 | 1 **011** | **100** 1 |
121 | | -6 | 1 110 | 1 **010** | **101** 1 |
122 | | -7 | 1 111 | 1 **001** | **110** 1 |
123 | | -8 | ---- | 1 **000** | **111** 1 |
124 |
125 | (\*): compared to C2 (default signed integer), bits are _shared_ or **inverted**
126 |
127 | ```c++
128 | c2tovlei(n) {
129 | return n & MSB ? ~(n<<1) : (n<<1);
130 | }
131 | vleitoc2(n) {
132 | return n & LSB ? ~(n>>1) : (n>>1);
133 | }
134 | ```
135 |
136 |
--------------------------------------------------------------------------------
/sample.cc:
--------------------------------------------------------------------------------
1 | #include "vle.hpp"
2 |
3 | #include
4 | #include
5 | #include
6 |
7 | std::string report;
8 | uint64_t errors = 0;
9 |
10 | void quick_tests() {
11 | int64_t is[] = { (int64_t)1ull << 63, +(1ll << 63), (int64_t)-(1ull << 63), INT64_MIN, INT64_MIN + 1, INT64_MIN / 2, INT64_MIN / 4, -0, 0, INT64_MAX / 4, INT64_MAX / 2, INT64_MAX - 1, INT64_MAX };
12 | uint64_t us[] = { 1ull << 63, (uint64_t)+(1ll << 63), -(1ull << 63), -0, 0, UINT64_MAX / 4, UINT64_MAX / 2, UINT64_MAX - 1, UINT64_MAX };
13 |
14 | for( auto i : is ) {
15 | if( i != vlei::decode(vlei::encode(i)) ) {
16 | errors++;
17 | report += std::to_string(i) + "i,";
18 | }
19 | }
20 |
21 | for( auto u : us ) {
22 | if( u != vleu::decode(vleu::encode(u)) ) {
23 | errors++;
24 | report += std::to_string(u) + "u,";
25 | }
26 | }
27 | }
28 |
29 |
30 | #include
31 | #include
32 |
33 | int main( int argc, const char **argv )
34 | {
35 | // signed
36 | {
37 | // negative
38 | int64_t capacity = ~0u, i0 = -(1ll << 63);
39 | for( int32_t j = 64; j-- >= 0; ) {
40 | int64_t i = -(1ll << j);
41 | auto len = vlei::encode( i ).size();
42 | assert( i == vlei::decode( vlei::encode( i ) ) );
43 | if( len < capacity && j < 63 ) {
44 | capacity = len;
45 | printf("%2lld bytes [%lld..%lld]\n", capacity + 1, i0, j < 0 ? 0 : i - 1 );
46 | i0 = i;
47 | }
48 | }
49 | // positive
50 | capacity = 1, i0 = 0;
51 | for( uint32_t j = 0; j <= 63; ++j ) {
52 | int64_t i = 1ll << j;
53 | auto len = vlei::encode( i ).size();
54 | assert( i == vlei::decode( vlei::encode( i ) ) );
55 | if( len > capacity || j == 63 ) {
56 | printf("%2lld bytes [%llu..%llu]\n", capacity, i0, (i-1) );
57 | capacity = len;
58 | i0 = i;
59 | }
60 | }
61 | }
62 |
63 | // unsigned, positive
64 | {
65 | uint64_t capacity = 1, i0 = 0;
66 | for( uint32_t j = 0; j <= 64; ++j ) {
67 | uint64_t i = 1ull << j;
68 | auto len = vleu::encode( i ).size();
69 | assert( i == vleu::decode( vleu::encode( i ) ) );
70 | if( len > capacity || j == 64 ) {
71 | printf("%2lld bytes [%llu..%llu]\n", capacity, i0, (i-1) );
72 | capacity = len;
73 | i0 = i;
74 | }
75 | }
76 | }
77 |
78 | if( argc > 1 ) {
79 | // usage:
80 | // sample 128
81 | // sample -128
82 | // sample 128u
83 |
84 | std::string arg( argv[1] );
85 | if( arg.back() == 'u' ) {
86 | arg.pop_back();
87 | uint64_t value = std::strtoull( arg.c_str(), 0, 0 );
88 | std::string encoded = vleu::encode(value);
89 | int64_t decoded = vleu::decode(encoded);
90 | assert( value == decoded );
91 | printf("%llu encodes into %d byte(s)\n", value, encoded.size() );
92 | } else {
93 | int64_t value = std::strtoll( arg.c_str(), 0, 0 );
94 | std::string encoded = vlei::encode(value);
95 | int64_t decoded = vlei::decode(encoded);
96 | assert( value == decoded );
97 | printf("%lld encodes into %d byte(s)\n", value, encoded.size() );
98 | }
99 | }
100 |
101 | quick_tests();
102 |
103 | if( report.empty() ) {
104 | printf("%s\n", "All ok.");
105 | } else {
106 | printf("%lld errors: %s\n", errors, report.c_str());
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/tests.cxx:
--------------------------------------------------------------------------------
1 | // check all possible VLEi/VLEu combinations by brute force
2 | // r-lyeh, boost licensed
3 |
4 | #include "vle.hpp"
5 |
6 | #include
7 | #include
8 |
9 | #ifdef ONLY_32BITS
10 | #define IMIN INT32_MIN
11 | #define IMAX INT32_MAX
12 | #define UMAX UINT32_MAX
13 | #else
14 | #define IMIN INT64_MIN
15 | #define IMAX INT64_MAX
16 | #define UMAX UINT64_MAX
17 | #endif
18 |
19 | std::string report;
20 | uint64_t errors = 0;
21 |
22 | inline
23 | void testi( int64_t i ) {
24 | if( i != vlei::decode(vlei::encode(i)) ) {
25 | errors++;
26 | report += std::to_string(i) + "i,";
27 | }
28 | if( ( i & 0xffffff ) == 0 ) {
29 | printf("%c", '.');
30 | }
31 | }
32 |
33 | inline
34 | void testu( uint64_t u ) {
35 | if( u != vleu::decode(vleu::encode(u)) ) {
36 | errors++;
37 | report += std::to_string(u) + "u,";
38 | }
39 | if( ( u & 0xffffff ) == 0 ) {
40 | printf("%c", ',');
41 | }
42 | }
43 |
44 | int main() {
45 |
46 | for( int64_t i = IMIN; i < IMAX; ++i ) {
47 | testi(i);
48 | }
49 | testi(IMAX);
50 |
51 | for( uint64_t u = 0; u < UMAX; ++u ) {
52 | testu(u);
53 | }
54 | testu(UMAX);
55 |
56 | if( report.empty() ) {
57 | printf("\n%s\n", "All ok.");
58 | } else {
59 | printf("\n%d errors: %s\n", errors, report.c_str() );
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/vle.cpp:
--------------------------------------------------------------------------------
1 | #include "vle.hpp"
2 |
--------------------------------------------------------------------------------
/vle.hpp:
--------------------------------------------------------------------------------
1 | /* Simple variable-length encoder/decoder (C99)(C++03)
2 | * - rlyeh, zlib/libpng licensed.
3 | */
4 |
5 | /*
6 | * C++ API
7 | * Quick API.
8 | *
9 | * ns vlei {
10 | * string encode( int64_t );
11 | * int64_t decode( string );
12 | * }
13 | * ns vleu {
14 | * string encode( uint64_t );
15 | * uint64_t decode( string );
16 | * }
17 | */
18 |
19 | /* C API
20 | * Basic API. Allows streaming and fine control.
21 | * Encoders do not append null character at end of string.
22 | * Decoders do not need null character at end of string.
23 | * All functions assume buffers are preallocated to worst-case scenarios.
24 | * All functions return integer of streamed bytes.
25 | *
26 | * - VLE_API uint64_t vle_encode_u( uint8_t *buffer, uint64_t value );
27 | * - VLE_API uint64_t vle_encode_i( uint8_t *buffer, int64_t value );
28 | * - VLE_API uint64_t vle_decode_u( uint64_t *value, const uint8_t *buffer );
29 | * - VLE_API uint64_t vle_decode_i( int64_t *value, const uint8_t *buffer );
30 | */
31 |
32 | #ifndef __VLE_H__
33 | #define __VLE_H__
34 |
35 | #include
36 |
37 | #ifdef __cplusplus
38 | #include
39 | #define VLE_API static inline
40 | extern "C" {
41 | #endif
42 |
43 | #ifndef VLE_API
44 | #define VLE_API static
45 | #endif
46 |
47 | enum { VLE_MIN_REQ_BYTES = 1, VLE_MAX_REQ_BYTES = 10 };
48 |
49 | VLE_API uint64_t vle_encode_u( uint8_t *buffer, uint64_t value ) {
50 | /* 7-bit packing. MSB terminates stream */
51 | const uint8_t *buffer0 = buffer;
52 | do {
53 | *buffer++ = (uint8_t)( 0x80 | (value & 0x7f) );
54 | value >>= 7;
55 | } while( value > 0 );
56 | *(buffer-1) ^= 0x80;
57 | return buffer - buffer0;
58 | }
59 | VLE_API uint64_t vle_decode_u( uint64_t *value, const uint8_t *buffer ) {
60 | /* 7-bit unpacking. MSB terminates stream */
61 | const uint8_t *buffer0 = buffer;
62 | uint64_t out = 0, j = -7;
63 | do {
64 | out |= (( ((uint64_t)(*buffer)) & 0x7f) << (j += 7) );
65 | } while( ((uint64_t)(*buffer++)) & 0x80 );
66 | *value = out;
67 | return buffer - buffer0;
68 | }
69 |
70 | VLE_API uint64_t vle_encode_i( uint8_t *buffer, int64_t value ) {
71 | /* convert sign|magnitude to magnitude|sign */
72 | uint64_t nv = (uint64_t)value;
73 | nv = nv & (1ull << 63) ? ~(nv << 1) : (nv << 1);
74 | /* encode unsigned */
75 | return vle_encode_u( buffer, nv );
76 | }
77 | VLE_API uint64_t vle_decode_i( int64_t *value, const uint8_t *buffer ) {
78 | /* decode unsigned */
79 | uint64_t nv, ret = vle_decode_u( &nv, buffer );
80 | /* convert magnitude|sign to sign|magnitude */
81 | *value = nv & (1) ? ~(nv >> 1) : (nv >> 1);
82 | return ret;
83 | }
84 |
85 | #ifdef __cplusplus
86 | }
87 |
88 | namespace vlei {
89 | static inline
90 | std::string encode( int64_t value ) {
91 | unsigned char buf[ VLE_MAX_REQ_BYTES ];
92 | return std::string( (const char *)buf, vle_encode_i( &buf[0], value ) );
93 | }
94 |
95 | static inline
96 | int64_t decode( const std::string &buf ) {
97 | int64_t value;
98 | vle_decode_i( &value, (const unsigned char *)&buf[0] );
99 | return value;
100 | }
101 | }
102 |
103 | namespace vleu {
104 | static inline
105 | std::string encode( uint64_t value ) {
106 | unsigned char buf[ VLE_MAX_REQ_BYTES ];
107 | return std::string( (const char *)buf, vle_encode_u( &buf[0], value ) );
108 | }
109 |
110 | static inline
111 | uint64_t decode( const std::string &buf ) {
112 | uint64_t value;
113 | vle_decode_u( &value, (const unsigned char *)&buf[0] );
114 | return value;
115 | }
116 | }
117 |
118 | #endif
119 |
120 | #endif
121 |
--------------------------------------------------------------------------------