├── .travis.yml ├── LICENSE ├── README.md ├── sample.cc ├── tests.cxx ├── vle.cpp └── vle.hpp /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | sudo: required 3 | 4 | compiler: 5 | - clang 6 | - gcc 7 | 8 | install: 9 | - wget --quiet -O - https://raw.githubusercontent.com/r-lyeh/depot/master/travis.pre.sh | bash -x 10 | 11 | script: 12 | - wget --quiet -O - https://raw.githubusercontent.com/r-lyeh/depot/master/travis.build.sh | bash -x 13 | - ./sample.bin 14 | 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 r-lyeh (https://github.com/r-lyeh) 2 | 3 | This software is provided 'as-is', without any express or implied 4 | warranty. In no event will the authors be held liable for any damages 5 | arising from the use of this software. 6 | 7 | Permission is granted to anyone to use this software for any purpose, 8 | including commercial applications, and to alter it and redistribute it 9 | freely, subject to the following restrictions: 10 | 11 | 1. The origin of this software must not be misrepresented; you must not 12 | claim that you wrote the original software. If you use this software 13 | in a product, an acknowledgment in the product documentation would be 14 | appreciated but is not required. 15 | 2. Altered source versions must be plainly marked as such, and must not be 16 | misrepresented as being the original software. 17 | 3. This notice may not be removed or altered from any source distribution. 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | vle 2 | === 3 | 4 | - VLE is a simple variable-length encoder/decoder (C99)(C++03) 5 | - VLE is simple. Format is 7-bit packing, MSB stream terminator. 6 | - VLE is streamable. Designed to encode and decode integers with low overhead. 7 | - VLE is embeddable. Header-only. No external deps. C and C++ APIs provided. 8 | - VLE is extendable. Signed/unsigned integers of any size (8/16/32/64/...) are supported. 9 | - VLE is zlib/libpng licensed. 10 | 11 | ## Quick tutorial 12 | - You want to serialize an `struct { uint16_t len; uint64_t buffer[6]; }` to disk, network, etc... 13 | - You could just flush a 50-bytes stream, or you could flush a VLE stream instead. 14 | - This VLE stream will range from `7-bytes` (best-case, 43-bytes saved) up to `63-bytes` (worst-case, 13-bytes overhead). 15 | 16 | ## Features 17 | - For any 8-bit sequence, the VLE stream will range from `len` up to `2*len` bytes. 18 | - For any 16-bit sequence, the VLE stream will range from `len` up to `3*len` bytes. 19 | - For any 32-bit sequence, the VLE stream will range from `len` up to `5*len` bytes. 20 | - For any 64-bit sequence, the VLE stream will range from `len` up to `10*len` bytes. 21 | - Magnitude of value determinates size of encoded stream. Magnitude of type does not matter. 22 | - All `byte(0), short(0), int(0), int64(0)...` serialize to a `1-byte` stream. 23 | - All `byte(127), short(127), int(127), int64(127)...` serialize to a `1-byte` stream. 24 | - All `byte(128), short(128), int(128), int64(128)...` serialize to a `2-bytes` stream. 25 | - All `byte(255), short(255), int(255), int64(255)...` serialize to a `2-bytes` stream. 26 | - All `short(16383), int(16383), int64(16383)...` serialize to a `2-bytes` stream. 27 | - All `short(16384), int(16384), int64(16384)...` serialize to a `3-bytes` stream. 28 | - And so on... (see range tables below). 29 | - Rule: The closer to zero integer you encode, the smaller the stream you decode. 30 | - Note: Negative integers are rearranged to meet this criteria (see appendix). 31 | 32 | ## VLE stream format 33 | - All non-significative (zero) bits on the left are discarded. 34 | - Bytes are repacked into 7-bit components and glued together until a LSB/MSB bit is found. 35 | - Additionally, signed integers are pre-encoded and post-decoded to a more efficient signed number representation. 36 | 37 | ## API, C 38 | - Basic API. Allows streaming and fine control. 39 | - Encoders do not append null character at end of string. 40 | - Decoders do not need null character at end of string. 41 | - All functions assume buffers are preallocated to worst-case scenarios. 42 | - All functions return integer of streamed bytes. 43 | ```c 44 | VLE_API uint64_t vle_encode_u( uint8_t *buffer, uint64_t value ); 45 | VLE_API uint64_t vle_encode_i( uint8_t *buffer, int64_t value ); 46 | VLE_API uint64_t vle_decode_u( uint64_t *value, const uint8_t *buffer ); 47 | VLE_API uint64_t vle_decode_i( int64_t *value, const uint8_t *buffer ); 48 | ``` 49 | 50 | ## API, C++ 51 | ```c++ 52 | vlei:: { 53 | string encode( int64_t ); 54 | int64_t decode( string ); 55 | } 56 | vleu:: { 57 | string encode( uint64_t ); 58 | uint64_t decode( string ); 59 | } 60 | ``` 61 | 62 | ## VLE unsigned, 64-bit ranges 63 | ```c++ 64 | 1 byte [0..127] 65 | 2 bytes [128..16383] 66 | 3 bytes [16384..2097151] 67 | 4 bytes [2097152..268435455] 68 | 5 bytes [268435456..34359738367] 69 | 6 bytes [34359738368..4398046511103] 70 | 7 bytes [4398046511104..562949953421311] 71 | 8 bytes [562949953421312..72057594037927935] 72 | 9 bytes [72057594037927936..9223372036854775807] 73 | 10 bytes [9223372036854775808..18446744073709551615] 74 | ``` 75 | 76 | ## VLE signed, 64-bit ranges 77 | ```c++ 78 | 10 bytes [-9223372036854775808..-4611686018427387905] 79 | 9 bytes [-4611686018427387904..-36028797018963969] 80 | 8 bytes [-36028797018963968..-281474976710657] 81 | 7 bytes [-281474976710656..-2199023255553] 82 | 6 bytes [-2199023255552..-17179869185] 83 | 5 bytes [-17179869184..-134217729] 84 | 4 bytes [-134217728..-1048577] 85 | 3 bytes [-1048576..-8193] 86 | 2 bytes [-8192..-65] 87 | 1 byte [-64..63] 88 | 2 bytes [64..8191] 89 | 3 bytes [8192..1048575] 90 | 4 bytes [1048576..134217727] 91 | 5 bytes [134217728..17179869183] 92 | 6 bytes [17179869184..2199023255551] 93 | 7 bytes [2199023255552..281474976710655] 94 | 8 bytes [281474976710656..36028797018963967] 95 | 9 bytes [36028797018963968..4611686018427387903] 96 | 10 bytes [4611686018427387904..9223372036854775807] 97 | ``` 98 | 99 | ## used in 100 | - [collage](https://github.com/r-lyeh/collage), a diff/patch library. 101 | - [bundle](https://github.com/r-lyeh/bundle), a de/compression library. 102 | 103 | ## appendix: sign theory and conversion functions 104 | 105 | | ## | Signed-Magnitude | Complement-2 | VLEi (*) | 106 | |:--:|:----------------:|:-------------:|:-----:| 107 | | +7 | 0 111 | 0 _111_ | _111_ 0 | 108 | | +6 | 0 110 | 0 _110_ | _110_ 0 | 109 | | +5 | 0 101 | 0 _101_ | _101_ 0 | 110 | | +4 | 0 100 | 0 _100_ | _100_ 0 | 111 | | +3 | 0 011 | 0 _011_ | _011_ 0 | 112 | | +2 | 0 010 | 0 _010_ | _010_ 0 | 113 | | +1 | 0 001 | 0 _001_ | _001_ 0 | 114 | | +0 | 0 000 | 0 _000_ | _000_ 0 | 115 | | -0 | 1 000 | ----- | ----- | 116 | | -1 | 1 001 | 1 **111** | **000** 1 | 117 | | -2 | 1 010 | 1 **110** | **001** 1 | 118 | | -3 | 1 011 | 1 **101** | **010** 1 | 119 | | -4 | 1 100 | 1 **100** | **011** 1 | 120 | | -5 | 1 101 | 1 **011** | **100** 1 | 121 | | -6 | 1 110 | 1 **010** | **101** 1 | 122 | | -7 | 1 111 | 1 **001** | **110** 1 | 123 | | -8 | ---- | 1 **000** | **111** 1 | 124 | 125 | (\*): compared to C2 (default signed integer), bits are _shared_ or **inverted** 126 | 127 | ```c++ 128 | c2tovlei(n) { 129 | return n & MSB ? ~(n<<1) : (n<<1); 130 | } 131 | vleitoc2(n) { 132 | return n & LSB ? ~(n>>1) : (n>>1); 133 | } 134 | ``` 135 | 136 | -------------------------------------------------------------------------------- /sample.cc: -------------------------------------------------------------------------------- 1 | #include "vle.hpp" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | std::string report; 8 | uint64_t errors = 0; 9 | 10 | void quick_tests() { 11 | int64_t is[] = { (int64_t)1ull << 63, +(1ll << 63), (int64_t)-(1ull << 63), INT64_MIN, INT64_MIN + 1, INT64_MIN / 2, INT64_MIN / 4, -0, 0, INT64_MAX / 4, INT64_MAX / 2, INT64_MAX - 1, INT64_MAX }; 12 | uint64_t us[] = { 1ull << 63, (uint64_t)+(1ll << 63), -(1ull << 63), -0, 0, UINT64_MAX / 4, UINT64_MAX / 2, UINT64_MAX - 1, UINT64_MAX }; 13 | 14 | for( auto i : is ) { 15 | if( i != vlei::decode(vlei::encode(i)) ) { 16 | errors++; 17 | report += std::to_string(i) + "i,"; 18 | } 19 | } 20 | 21 | for( auto u : us ) { 22 | if( u != vleu::decode(vleu::encode(u)) ) { 23 | errors++; 24 | report += std::to_string(u) + "u,"; 25 | } 26 | } 27 | } 28 | 29 | 30 | #include 31 | #include 32 | 33 | int main( int argc, const char **argv ) 34 | { 35 | // signed 36 | { 37 | // negative 38 | int64_t capacity = ~0u, i0 = -(1ll << 63); 39 | for( int32_t j = 64; j-- >= 0; ) { 40 | int64_t i = -(1ll << j); 41 | auto len = vlei::encode( i ).size(); 42 | assert( i == vlei::decode( vlei::encode( i ) ) ); 43 | if( len < capacity && j < 63 ) { 44 | capacity = len; 45 | printf("%2lld bytes [%lld..%lld]\n", capacity + 1, i0, j < 0 ? 0 : i - 1 ); 46 | i0 = i; 47 | } 48 | } 49 | // positive 50 | capacity = 1, i0 = 0; 51 | for( uint32_t j = 0; j <= 63; ++j ) { 52 | int64_t i = 1ll << j; 53 | auto len = vlei::encode( i ).size(); 54 | assert( i == vlei::decode( vlei::encode( i ) ) ); 55 | if( len > capacity || j == 63 ) { 56 | printf("%2lld bytes [%llu..%llu]\n", capacity, i0, (i-1) ); 57 | capacity = len; 58 | i0 = i; 59 | } 60 | } 61 | } 62 | 63 | // unsigned, positive 64 | { 65 | uint64_t capacity = 1, i0 = 0; 66 | for( uint32_t j = 0; j <= 64; ++j ) { 67 | uint64_t i = 1ull << j; 68 | auto len = vleu::encode( i ).size(); 69 | assert( i == vleu::decode( vleu::encode( i ) ) ); 70 | if( len > capacity || j == 64 ) { 71 | printf("%2lld bytes [%llu..%llu]\n", capacity, i0, (i-1) ); 72 | capacity = len; 73 | i0 = i; 74 | } 75 | } 76 | } 77 | 78 | if( argc > 1 ) { 79 | // usage: 80 | // sample 128 81 | // sample -128 82 | // sample 128u 83 | 84 | std::string arg( argv[1] ); 85 | if( arg.back() == 'u' ) { 86 | arg.pop_back(); 87 | uint64_t value = std::strtoull( arg.c_str(), 0, 0 ); 88 | std::string encoded = vleu::encode(value); 89 | int64_t decoded = vleu::decode(encoded); 90 | assert( value == decoded ); 91 | printf("%llu encodes into %d byte(s)\n", value, encoded.size() ); 92 | } else { 93 | int64_t value = std::strtoll( arg.c_str(), 0, 0 ); 94 | std::string encoded = vlei::encode(value); 95 | int64_t decoded = vlei::decode(encoded); 96 | assert( value == decoded ); 97 | printf("%lld encodes into %d byte(s)\n", value, encoded.size() ); 98 | } 99 | } 100 | 101 | quick_tests(); 102 | 103 | if( report.empty() ) { 104 | printf("%s\n", "All ok."); 105 | } else { 106 | printf("%lld errors: %s\n", errors, report.c_str()); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /tests.cxx: -------------------------------------------------------------------------------- 1 | // check all possible VLEi/VLEu combinations by brute force 2 | // r-lyeh, boost licensed 3 | 4 | #include "vle.hpp" 5 | 6 | #include 7 | #include 8 | 9 | #ifdef ONLY_32BITS 10 | #define IMIN INT32_MIN 11 | #define IMAX INT32_MAX 12 | #define UMAX UINT32_MAX 13 | #else 14 | #define IMIN INT64_MIN 15 | #define IMAX INT64_MAX 16 | #define UMAX UINT64_MAX 17 | #endif 18 | 19 | std::string report; 20 | uint64_t errors = 0; 21 | 22 | inline 23 | void testi( int64_t i ) { 24 | if( i != vlei::decode(vlei::encode(i)) ) { 25 | errors++; 26 | report += std::to_string(i) + "i,"; 27 | } 28 | if( ( i & 0xffffff ) == 0 ) { 29 | printf("%c", '.'); 30 | } 31 | } 32 | 33 | inline 34 | void testu( uint64_t u ) { 35 | if( u != vleu::decode(vleu::encode(u)) ) { 36 | errors++; 37 | report += std::to_string(u) + "u,"; 38 | } 39 | if( ( u & 0xffffff ) == 0 ) { 40 | printf("%c", ','); 41 | } 42 | } 43 | 44 | int main() { 45 | 46 | for( int64_t i = IMIN; i < IMAX; ++i ) { 47 | testi(i); 48 | } 49 | testi(IMAX); 50 | 51 | for( uint64_t u = 0; u < UMAX; ++u ) { 52 | testu(u); 53 | } 54 | testu(UMAX); 55 | 56 | if( report.empty() ) { 57 | printf("\n%s\n", "All ok."); 58 | } else { 59 | printf("\n%d errors: %s\n", errors, report.c_str() ); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /vle.cpp: -------------------------------------------------------------------------------- 1 | #include "vle.hpp" 2 | -------------------------------------------------------------------------------- /vle.hpp: -------------------------------------------------------------------------------- 1 | /* Simple variable-length encoder/decoder (C99)(C++03) 2 | * - rlyeh, zlib/libpng licensed. 3 | */ 4 | 5 | /* 6 | * C++ API 7 | * Quick API. 8 | * 9 | * ns vlei { 10 | * string encode( int64_t ); 11 | * int64_t decode( string ); 12 | * } 13 | * ns vleu { 14 | * string encode( uint64_t ); 15 | * uint64_t decode( string ); 16 | * } 17 | */ 18 | 19 | /* C API 20 | * Basic API. Allows streaming and fine control. 21 | * Encoders do not append null character at end of string. 22 | * Decoders do not need null character at end of string. 23 | * All functions assume buffers are preallocated to worst-case scenarios. 24 | * All functions return integer of streamed bytes. 25 | * 26 | * - VLE_API uint64_t vle_encode_u( uint8_t *buffer, uint64_t value ); 27 | * - VLE_API uint64_t vle_encode_i( uint8_t *buffer, int64_t value ); 28 | * - VLE_API uint64_t vle_decode_u( uint64_t *value, const uint8_t *buffer ); 29 | * - VLE_API uint64_t vle_decode_i( int64_t *value, const uint8_t *buffer ); 30 | */ 31 | 32 | #ifndef __VLE_H__ 33 | #define __VLE_H__ 34 | 35 | #include 36 | 37 | #ifdef __cplusplus 38 | #include 39 | #define VLE_API static inline 40 | extern "C" { 41 | #endif 42 | 43 | #ifndef VLE_API 44 | #define VLE_API static 45 | #endif 46 | 47 | enum { VLE_MIN_REQ_BYTES = 1, VLE_MAX_REQ_BYTES = 10 }; 48 | 49 | VLE_API uint64_t vle_encode_u( uint8_t *buffer, uint64_t value ) { 50 | /* 7-bit packing. MSB terminates stream */ 51 | const uint8_t *buffer0 = buffer; 52 | do { 53 | *buffer++ = (uint8_t)( 0x80 | (value & 0x7f) ); 54 | value >>= 7; 55 | } while( value > 0 ); 56 | *(buffer-1) ^= 0x80; 57 | return buffer - buffer0; 58 | } 59 | VLE_API uint64_t vle_decode_u( uint64_t *value, const uint8_t *buffer ) { 60 | /* 7-bit unpacking. MSB terminates stream */ 61 | const uint8_t *buffer0 = buffer; 62 | uint64_t out = 0, j = -7; 63 | do { 64 | out |= (( ((uint64_t)(*buffer)) & 0x7f) << (j += 7) ); 65 | } while( ((uint64_t)(*buffer++)) & 0x80 ); 66 | *value = out; 67 | return buffer - buffer0; 68 | } 69 | 70 | VLE_API uint64_t vle_encode_i( uint8_t *buffer, int64_t value ) { 71 | /* convert sign|magnitude to magnitude|sign */ 72 | uint64_t nv = (uint64_t)value; 73 | nv = nv & (1ull << 63) ? ~(nv << 1) : (nv << 1); 74 | /* encode unsigned */ 75 | return vle_encode_u( buffer, nv ); 76 | } 77 | VLE_API uint64_t vle_decode_i( int64_t *value, const uint8_t *buffer ) { 78 | /* decode unsigned */ 79 | uint64_t nv, ret = vle_decode_u( &nv, buffer ); 80 | /* convert magnitude|sign to sign|magnitude */ 81 | *value = nv & (1) ? ~(nv >> 1) : (nv >> 1); 82 | return ret; 83 | } 84 | 85 | #ifdef __cplusplus 86 | } 87 | 88 | namespace vlei { 89 | static inline 90 | std::string encode( int64_t value ) { 91 | unsigned char buf[ VLE_MAX_REQ_BYTES ]; 92 | return std::string( (const char *)buf, vle_encode_i( &buf[0], value ) ); 93 | } 94 | 95 | static inline 96 | int64_t decode( const std::string &buf ) { 97 | int64_t value; 98 | vle_decode_i( &value, (const unsigned char *)&buf[0] ); 99 | return value; 100 | } 101 | } 102 | 103 | namespace vleu { 104 | static inline 105 | std::string encode( uint64_t value ) { 106 | unsigned char buf[ VLE_MAX_REQ_BYTES ]; 107 | return std::string( (const char *)buf, vle_encode_u( &buf[0], value ) ); 108 | } 109 | 110 | static inline 111 | uint64_t decode( const std::string &buf ) { 112 | uint64_t value; 113 | vle_decode_u( &value, (const unsigned char *)&buf[0] ); 114 | return value; 115 | } 116 | } 117 | 118 | #endif 119 | 120 | #endif 121 | --------------------------------------------------------------------------------