├── testdata ├── .gitattributes ├── ends_with_truncated_dictionary ├── random_then_unicode ├── ends_with_truncated_dictionary.ir └── make_mux_test.py ├── .gitignore ├── wasm ├── favicon.ico ├── progress.gif ├── .htaccess ├── mod.py ├── README ├── brotli_wrapper.js └── brotli_iframe.html ├── AUTHORS ├── research ├── bench_revs.sh ├── brotli_bill_transform.py ├── bill_transform.py ├── brute.py ├── select_best_quandruple.py ├── select_best_triple.py ├── divansplot.py └── summary.py ├── c ├── Makefile ├── vec_u8.h ├── custom_alloc.h ├── divans │ └── ffi.h ├── arg.h └── example.c ├── examples ├── decompress.rs ├── compress.rs └── util_prior_stream_cost.rs ├── src ├── ffi │ ├── decompressor.rs │ ├── interface.rs │ └── alloc_util.rs ├── raw_to_cmd │ └── hash_match.rs ├── test_helper.rs ├── probability │ ├── mod.rs │ ├── make_div_lut.rs │ ├── frequentist_cdf.rs │ ├── numeric.rs │ ├── variant_speed_cdf.rs │ ├── external_cdf.rs │ ├── opt_frequentist_cdf.rs │ ├── common_tests.rs │ └── blend_cdf.rs ├── codec │ ├── specializations.rs │ ├── crc32.rs │ ├── weights.rs │ ├── priors.rs │ ├── io.rs │ └── block_type.rs ├── stub_parallel_decompressor.rs ├── constants.rs ├── resizable_buffer.rs ├── debug_encoder.rs ├── divans_to_raw │ └── mod.rs ├── lib.rs ├── ir_optimize │ └── cache.rs ├── cmd_to_divans │ └── mod.rs ├── billing.rs ├── slice_util.rs └── bin │ └── util.rs ├── no-stdlib └── Cargo.toml └── Cargo.toml /testdata/.gitattributes: -------------------------------------------------------------------------------- 1 | * binary -------------------------------------------------------------------------------- /testdata/ends_with_truncated_dictionary: -------------------------------------------------------------------------------- 1 | often referred to as -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Cargo.lock 2 | target 3 | wasm/brotli.js 4 | wasm/brotli.wasm 5 | -------------------------------------------------------------------------------- /wasm/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/divans/HEAD/wasm/favicon.ico -------------------------------------------------------------------------------- /wasm/progress.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/divans/HEAD/wasm/progress.gif -------------------------------------------------------------------------------- /testdata/random_then_unicode: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dropbox/divans/HEAD/testdata/random_then_unicode -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Daniel Reiter Horn 2 | Jongmin Baek 3 | Anatoly Yakovenko 4 | Patrick Reiter Horn 5 | CRC32: Andrew Gallant (BurntSushi) 6 | -------------------------------------------------------------------------------- /wasm/.htaccess: -------------------------------------------------------------------------------- 1 | 2 | 3 | Header set Access-Control-Allow-Origin "*" 4 | 5 | -------------------------------------------------------------------------------- /testdata/ends_with_truncated_dictionary.ir: -------------------------------------------------------------------------------- 1 | window 22 len 22 2 | insert 0 3 | dict 5 word 5,100 6f6674656e func 0 6f6674656e ctx 3 4 | insert 1 20 5 | dict 7 word 8,98 7265666572726572 func 12 72656665727265 ctx 3 6 | insert 9 6420746f2061732020 7 | window 22 len 22 8 | -------------------------------------------------------------------------------- /research/bench_revs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for rev in `git log --oneline $1..$2 | cut -f 1 -d ' '`; do 4 | git checkout $rev 5 | echo starting $rev 6 | for rep in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do 7 | RUSTFLAGS="-C target-cpu=core-avx-i" time cargo bench --bin divans --features="benchmark simd" decode_context_pure_average 8 | done 9 | echo done $rev 10 | done 11 | -------------------------------------------------------------------------------- /c/Makefile: -------------------------------------------------------------------------------- 1 | libdeps := $(wildcard ../target/release/*.so) $(wildcard ../target/release/*.dylib) $(wildcard ../target/release/*.dll) 2 | ffi_example: example.c arg.h divans/ffi.h vec_u8.h custom_alloc.h $(libdeps) 3 | gcc -Wall -g -O2 -o ffi_example example.c -L../target/release -ldivans -Wl,-rpath -Wl,../target/release 4 | debug: example.c arg.h divans/ffi.h vec_u8.h custom_alloc.h $(libdeps) 5 | gcc -Wall -Wno-unused-result -g -o ffi_example_d example.c -L../target/debug -ldivans -Wl,-rpath -Wl,../target/debug 6 | clean: 7 | rm -f ffi_example_d ffi_example 8 | -------------------------------------------------------------------------------- /examples/decompress.rs: -------------------------------------------------------------------------------- 1 | extern crate divans; 2 | #[cfg(feature="no-stdlib")] 3 | fn main() { 4 | panic!("For no-stdlib examples please see the tests") 5 | } 6 | #[cfg(not(feature="no-stdlib"))] 7 | fn main() { 8 | use std::io; 9 | let stdin = &mut io::stdin(); 10 | { 11 | let mut reader = divans::DivansDecompressorReader::new( 12 | stdin, 13 | 4096, // buffer size 14 | false, 15 | true, // parallel 16 | ); 17 | io::copy(&mut reader, &mut io::stdout()).unwrap(); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /testdata/make_mux_test.py: -------------------------------------------------------------------------------- 1 | import random 2 | import sys 3 | 4 | def main(sizea, sizeb, pct, minsize, maxsize): 5 | print "&rand(" + str(sizea)+",13)[..]," 6 | print "&rand(" + str(sizeb)+",17)[..]," 7 | print "&[" 8 | while sizea != 0 or sizeb != 0: 9 | cur_buf = random.randrange(minsize, maxsize + 1); 10 | is_a = random.randrange(0,100) < pct; 11 | index = 0 if is_a else 1 12 | if is_a: 13 | cur_buf = min(cur_buf, sizea) 14 | sizea -= cur_buf 15 | else: 16 | cur_buf = min(cur_buf, sizeb) 17 | sizeb -= cur_buf 18 | if cur_buf: 19 | print " (" + str(index) + "," + str(cur_buf) + ")," 20 | print "]" 21 | 22 | if __name__ == "__main__": 23 | main(random.randrange(1, int(sys.argv[1])),random.randrange(1, int(sys.argv[2])),int(sys.argv[3]), int(sys.argv[4]), int(sys.argv[5])) 24 | -------------------------------------------------------------------------------- /wasm/mod.py: -------------------------------------------------------------------------------- 1 | import sys 2 | data = sys.stdin.read(); 3 | magic = ['-551.62445', '413.18079'] 4 | rmagic = []#'846.69629', '-95.776024'] 5 | out_magic = [m for m in magic] 6 | out_rmagic = [r for r in rmagic] 7 | 8 | inc = 14 9 | for i in range(0,30): 10 | for index in range(len(magic)): 11 | out_magic[index] = str(float(magic[index]) + inc * i) 12 | for index in range(len(rmagic)): 13 | out_rmagic[index] = str(float(rmagic[index]) - inc * i) 14 | fn = '' 15 | fn += str(int(i/1000)) 16 | fn += str(int(i/100)%10) 17 | fn += str(int(i/10)%10) 18 | fn += str(int(i%10)) 19 | temp = data 20 | for index in range(len(magic)): 21 | temp = temp.replace(magic[index], out_magic[index]) 22 | for index in range(len(rmagic)): 23 | temp = temp.replace(rmagic[index], out_rmagic[index]) 24 | print 'replacing', magic,'with',out_magic 25 | print 'replacing', rmagic,'with',out_rmagic 26 | with open(fn + '.svg', 'w') as out: 27 | out.write(temp); 28 | 29 | -------------------------------------------------------------------------------- /src/ffi/decompressor.rs: -------------------------------------------------------------------------------- 1 | use super::alloc_util::SubclassableAllocator; 2 | use divans_decompressor::StaticCommand; 3 | use super::interface::CAllocator; 4 | //use ::interface::DivansDecompressorFactory; 5 | pub type DecompressorFactory = ::DivansDecompressorFactoryStruct, 6 | SubclassableAllocator<::DefaultCDF16>, 7 | SubclassableAllocator>; 8 | #[repr(C)] 9 | #[no_mangle] 10 | pub struct DivansDecompressorState { 11 | pub custom_allocator: CAllocator, 12 | pub decompressor: ::DivansDecompressor<, SubclassableAllocator<::DefaultCDF16>, SubclassableAllocator>>::DefaultDecoder, 13 | SubclassableAllocator, 14 | SubclassableAllocator<::DefaultCDF16>, 15 | SubclassableAllocator>, 16 | } 17 | impl Drop for DivansDecompressorState { 18 | fn drop(&mut self) { 19 | self.decompressor.free_ref(); 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /research/brotli_bill_transform.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from collections import defaultdict 3 | 4 | features = defaultdict(lambda:0) 5 | remap = { 6 | 'CopyDistance':'CopyDistance', 7 | 'DistanceHuffmanTable':'CopyDistance', 8 | 'ComplexLiterals':'ComplexLiterals', 9 | 'CopyLength':'CopyLength', 10 | 'LiteralHuffmanTable':'ComplexLiterals', 11 | 'InsertCopyHuffmanTable':'CopyLength', 12 | 'LiteralContextMode':'LiteralContextMode', 13 | 'MetablockHeader':'Misc', 14 | 'BlockTypeMetadata':'BlockTypeMetadata', 15 | 'DistancContextMode':'DistanceContextMode', 16 | 'Misc':'Misc', 17 | } 18 | 19 | for line in open(sys.argv[1]): 20 | for key,val in remap.iteritems(): 21 | if key != val: 22 | line = line.replace(key,val) 23 | vals = line.split() 24 | bytes = float(vals[1]) 25 | features[vals[2]] += bytes 26 | maxb = max(len(str(item)) for item in features.values()) 27 | maxa = max(len(str(int(item*8 + .5))) for item in features.values()) 28 | 29 | for item in sorted(features.keys()): 30 | bitval = str(int(features[item] * 8 + .5)) 31 | byteval = str(features[item]) 32 | print bitval + ' '*(maxa + 2 - len(bitval)) + byteval + ' '*(maxb + 2 - len(byteval)) + item 33 | -------------------------------------------------------------------------------- /src/raw_to_cmd/hash_match.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Dropbox, Inc 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use core; 16 | pub use alloc::{AllocatedStackMemory, Allocator, SliceWrapper, SliceWrapperMut, StackAllocator}; 17 | 18 | 19 | pub struct HashMatch > { 20 | ht: AllocU32::AllocatedMemory, 21 | } 22 | impl > HashMatch { 23 | pub fn new(m32: &mut AllocU32) -> Self { 24 | HashMatch { 25 | ht:m32.alloc_cell(128), 26 | } 27 | } 28 | pub fn free(&mut self, m32: &mut AllocU32) { 29 | m32.free_cell(core::mem::replace(&mut self.ht, AllocU32::AllocatedMemory::default())); 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /wasm/README: -------------------------------------------------------------------------------- 1 | Building DivANS for the browser: 2 | ================================ 3 | 4 | Make sure to add this to /etc/mime.types: 5 | ----------- 6 | application/wasm wasm 7 | ----------- 8 | 9 | For divans, you must actually build with wasm32-unknown-unknown: 10 | ----------------- 11 | cargo build --target wasm32-unknown-unknown --release 12 | ----------------- 13 | 14 | 15 | Add a working brotli demo to the wasm page: 16 | =========================================== 17 | 18 | checkout the rust-brotli repository, then: 19 | Add a emcc wrapper as follows 20 | 21 | Create a script 'myscript' somewhere with the followiing 22 | ------------- 23 | #!/bin/bash 24 | emcc -s ALLOW_MEMORY_GROWTH=1 "$@" 25 | ------------- 26 | chmod +x myscript 27 | 28 | Now modify ~/.cargo/config and set 29 | -------------- 30 | target.wasm32-unknown-emscripten] 31 | linker = "/home/user/bin/myscript" 32 | -------------- 33 | 34 | Now build with 35 | ------------------- 36 | cargo build --target wasm32-unknown-emscripten --release 37 | ------------------- 38 | And copy the binaries from target/wasm32-unknown-emscripten/release/brotli.wasm and brotli.js to the divans/wasm/ directory 39 | 40 | Finally, in addition to adding wasm to /etc/mime.types, you must allow CORS in your webserver config: search for setting the "Access-Control-Allow-Origin" header. 41 | If you are not able to do this, you may modify brotli_wrapper.html and remove the line which sets "sandbox". 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /src/test_helper.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | extern crate std; 3 | use std::vec::{ 4 | Vec, 5 | }; 6 | use std::boxed::{ 7 | Box, 8 | }; 9 | use core; 10 | use alloc; 11 | 12 | pub struct Rebox { 13 | b: Box<[T]>, 14 | } 15 | 16 | impl core::default::Default for Rebox { 17 | fn default() -> Self { 18 | let v: Vec = Vec::new(); 19 | let b = v.into_boxed_slice(); 20 | Rebox:: { b: b } 21 | } 22 | } 23 | 24 | impl core::ops::Index for Rebox { 25 | type Output = T; 26 | fn index(&self, index: usize) -> &T { 27 | &(*self.b)[index] 28 | } 29 | } 30 | 31 | impl core::ops::IndexMut for Rebox { 32 | fn index_mut(&mut self, index: usize) -> &mut T { 33 | &mut (*self.b)[index] 34 | } 35 | } 36 | 37 | impl alloc::SliceWrapper for Rebox { 38 | fn slice(&self) -> &[T] { 39 | &*self.b 40 | } 41 | } 42 | 43 | impl alloc::SliceWrapperMut for Rebox { 44 | fn slice_mut(&mut self) -> &mut [T] { 45 | &mut *self.b 46 | } 47 | } 48 | 49 | pub struct HeapAllocator { 50 | pub default_value: T, 51 | } 52 | 53 | impl alloc::Allocator for HeapAllocator { 54 | type AllocatedMemory = Rebox; 55 | fn alloc_cell(self: &mut HeapAllocator, len: usize) -> Rebox { 56 | let v: Vec = vec![self.default_value.clone();len]; 57 | let b = v.into_boxed_slice(); 58 | Rebox:: { b: b } 59 | } 60 | fn free_cell(self: &mut HeapAllocator, _data: Rebox) {} 61 | } 62 | -------------------------------------------------------------------------------- /src/probability/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Dropbox, Inc 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #![allow(unused)] 16 | use core; 17 | use core::clone::Clone; 18 | pub mod div_lut; 19 | pub mod numeric; 20 | 21 | #[macro_use] 22 | mod common_tests; 23 | pub mod interface; 24 | pub mod external_cdf; 25 | pub mod blend_cdf; 26 | pub mod frequentist_cdf; 27 | mod variant_speed_cdf; 28 | #[cfg(feature="simd")] 29 | pub mod simd_frequentist_cdf; 30 | pub mod opt_frequentist_cdf; 31 | 32 | pub use self::interface::{BaseCDF, CDF16, CDF2, Speed, SpeedPalette, Prob, LOG2_SCALE, BLEND_FIXED_POINT_PRECISION, ProbRange, SPEED_PALETTE_SIZE}; 33 | #[cfg(feature="debug_entropy")] 34 | pub use self::interface::DebugWrapperCDF16; 35 | pub use self::blend_cdf::{BlendCDF16}; 36 | pub use self::frequentist_cdf::FrequentistCDF16; 37 | pub use self::external_cdf::ExternalProbCDF16; 38 | #[cfg(feature="simd")] 39 | pub use self::simd_frequentist_cdf::SIMDFrequentistCDF16; 40 | pub use self::opt_frequentist_cdf::OptFrequentistCDF16; 41 | pub use self::variant_speed_cdf::VariantSpeedCDF; 42 | -------------------------------------------------------------------------------- /src/probability/make_div_lut.rs: -------------------------------------------------------------------------------- 1 | mod numeric; 2 | fn main() { 3 | print!("pub static RECIPROCAL8: [i32; 256] = [\n 0, "); 4 | for divisor in 1..256 { 5 | let next_str = if divisor % 16 == 15 { 6 | "\n " 7 | } else { 8 | " " 9 | }; 10 | let reciprocal = numeric::compute_divisor8(divisor as numeric::Denominator8Type); 11 | let mut fail = false; 12 | for num in 0u16..65535u16 { 13 | let correct = num as u16 /divisor; 14 | let trial = numeric::fast_divide_16bit_by_8bit(num as u16, reciprocal) as u16; 15 | if trial != correct { 16 | print!("FAIL: {} : {} / {} = fast: {} slow: {}\n", 17 | reciprocal, 18 | num, 19 | divisor, 20 | trial, 21 | correct); 22 | fail = true; 23 | } 24 | } 25 | assert!(!fail); 26 | assert!(reciprocal <= (1<<30)); 27 | print!("{},{}", reciprocal, next_str) 28 | } 29 | print!("];\n"); 30 | print!("pub static RECIPROCAL: [(i64, u8); 65536] = [\n (0,0), "); 31 | for divisor in 1..65536 { 32 | let next_str = if divisor % 16 == 15 { 33 | "\n " 34 | } else { 35 | " " 36 | }; 37 | let reciprocal = numeric::compute_divisor(divisor as numeric::DenominatorType); 38 | for num in 0..65536 { 39 | assert_eq!((num<<15)/divisor, numeric::fast_divide_30bit_by_16bit(num << 15, reciprocal)); 40 | } 41 | print!("({},{}),{}", reciprocal.0, numeric::compute_divisor(divisor as numeric::DenominatorType).1, next_str) 42 | } 43 | print!("];\n"); 44 | } 45 | -------------------------------------------------------------------------------- /c/vec_u8.h: -------------------------------------------------------------------------------- 1 | struct VecU8 { 2 | unsigned char *data; 3 | size_t size; 4 | }; 5 | struct VecU8 new_vec_u8() { 6 | struct VecU8 ret; 7 | ret.data = NULL; 8 | ret.size = 0; 9 | return ret; 10 | } 11 | uint64_t round_up_to_power_of_two(uint64_t v) { 12 | v--; 13 | v |= v >> 1; 14 | v |= v >> 2; 15 | v |= v >> 4; 16 | v |= v >> 8; 17 | v |= v >> 16; 18 | { 19 | uint64_t tmp = v; 20 | tmp >>= 32; 21 | v |= tmp; 22 | } 23 | v++; 24 | return v; 25 | } 26 | 27 | 28 | void push_vec_u8(struct VecU8 *thus, const unsigned char*data, size_t size) { 29 | size_t new_actual_size = thus->size + size; 30 | if (size == 0 || new_actual_size < thus->size) { 31 | return; 32 | } 33 | { 34 | size_t new_alloc_size = round_up_to_power_of_two(new_actual_size); 35 | size_t old_alloc_size = round_up_to_power_of_two(thus->size); 36 | if (thus->size == 0 || old_alloc_size != new_alloc_size ) { 37 | unsigned char *tmp = custom_malloc_f(custom_alloc_opaque, new_alloc_size); 38 | size_t to_copy = old_alloc_size; 39 | if (new_alloc_size < old_alloc_size) { 40 | to_copy = new_alloc_size; 41 | } 42 | memcpy(tmp, thus->data, to_copy); 43 | custom_free_f(custom_alloc_opaque, thus->data); 44 | thus->data = tmp; 45 | } 46 | if (new_alloc_size < new_actual_size) { 47 | abort(); // assert 48 | } 49 | memcpy(thus->data + thus->size, data, size); 50 | thus->size = new_actual_size; 51 | } 52 | } 53 | 54 | void release_vec_u8(struct VecU8 *thus) { 55 | if (thus->size) { 56 | custom_free_f(custom_alloc_opaque, thus->data); 57 | thus->size = 0; 58 | thus->data = NULL; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/codec/specializations.rs: -------------------------------------------------------------------------------- 1 | use ::probability::CDF16; 2 | use alloc::Allocator; 3 | pub use super::interface::{CrossCommandBookKeeping,LiteralBookKeeping}; 4 | 5 | pub trait CodecTraits { 6 | const MIXING_PRIORS: bool; 7 | } 8 | macro_rules! define_codec_trait { 9 | ($name: ident, $global: ident, mix: $mix: expr) => { 10 | #[derive(Default)] 11 | pub struct $name {} 12 | impl CodecTraits for $name { 13 | const MIXING_PRIORS: bool = $mix; 14 | } 15 | pub static $global: $name = $name{}; 16 | } 17 | } 18 | define_codec_trait!(MixingTrait, MIXING_TRAIT, mix: true); 19 | define_codec_trait!(DefaultTrait, DEFAULT_TRAIT, mix: false); 20 | 21 | #[derive(Clone,Copy)] 22 | pub enum CodecTraitSelector { 23 | DefaultTrait(&'static DefaultTrait), 24 | MixingTrait(&'static MixingTrait), 25 | } 26 | 27 | pub fn construct_codec_trait_from_bookkeeping, 29 | AllocCDF16:Allocator>( 30 | lbk:&LiteralBookKeeping, 31 | ) -> CodecTraitSelector { 32 | if lbk.model_weights[0].should_mix() || lbk.model_weights[1].should_mix() { 33 | return CodecTraitSelector::MixingTrait(&MIXING_TRAIT); 34 | } 35 | return CodecTraitSelector::DefaultTrait(&DEFAULT_TRAIT); 36 | } 37 | 38 | pub trait NibbleHalfTrait { 39 | const HIGH_NIBBLE: bool; 40 | } 41 | 42 | pub struct HighNibbleTrait { 43 | } 44 | impl NibbleHalfTrait for HighNibbleTrait { 45 | const HIGH_NIBBLE:bool = true; 46 | } 47 | pub static HIGH_NIBBLE_TRAIT: HighNibbleTrait = HighNibbleTrait{}; 48 | 49 | pub struct LowNibbleTrait { 50 | } 51 | impl NibbleHalfTrait for LowNibbleTrait { 52 | const HIGH_NIBBLE:bool = false; 53 | } 54 | pub static LOW_NIBBLE_TRAIT: LowNibbleTrait = LowNibbleTrait{}; 55 | -------------------------------------------------------------------------------- /src/stub_parallel_decompressor.rs: -------------------------------------------------------------------------------- 1 | #![cfg(not(feature="std"))] 2 | pub use interface::{DivansCompressorFactory, BlockSwitch, LiteralBlockSwitch, Command, Compressor, CopyCommand, Decompressor, DictCommand, LiteralCommand, Nop, NewWithAllocator, ArithmeticEncoderOrDecoder, LiteralPredictionModeNibble, PredictionModeContextMap, free_cmd, FeatureFlagSliceType, 3 | DefaultCDF16, DivansResult}; 4 | pub use alloc::{AllocatedStackMemory, Allocator, SliceWrapper, SliceWrapperMut, StackAllocator}; 5 | pub use super::divans_decompressor::StaticCommand; 6 | pub use core::marker::PhantomData; 7 | 8 | pub struct ParallelDivansProcess, 9 | AllocU8:Allocator, 10 | AllocCDF16:Allocator, 11 | AllocCommand:Allocator> { 12 | p0: PhantomData, 13 | p1: PhantomData, 14 | p2: PhantomData, 15 | p3: PhantomData, 16 | } 17 | 18 | impl, 19 | AllocU8:Allocator, 20 | AllocCDF16:Allocator, 21 | AllocCommand:Allocator> 22 | ParallelDivansProcess { 23 | 24 | pub fn new(_header: &mut T, mut _window_size: usize) -> Self { 25 | unimplemented!(); 26 | } 27 | pub fn decode(&mut self, 28 | _input:&[u8], 29 | _input_offset:&mut usize, 30 | _output:&mut [u8], 31 | _output_offset: &mut usize) -> DivansResult { 32 | unimplemented!(); 33 | } 34 | pub fn free_ref(&mut self){ 35 | unimplemented!(); 36 | } 37 | pub fn free(self) -> (AllocU8, AllocCDF16, AllocCommand) { 38 | unimplemented!(); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /examples/compress.rs: -------------------------------------------------------------------------------- 1 | extern crate divans; 2 | #[cfg(feature="no-stdlib")] 3 | fn main() { 4 | panic!("For no-stdlib examples please see the tests") 5 | } 6 | #[cfg(not(feature="no-stdlib"))] 7 | fn main() { 8 | let example_opts = divans::DivansCompressorOptions::default(); 9 | use std::io; 10 | let stdout = &mut io::stdout(); 11 | { 12 | use std::io::Write; 13 | let mut writer = divans::DivansBrotliHybridCompressorWriter::new( 14 | stdout, 15 | divans::DivansCompressorOptions{ 16 | brotli_literal_byte_score:example_opts.brotli_literal_byte_score, 17 | force_literal_context_mode:example_opts.force_literal_context_mode, 18 | literal_adaptation:example_opts.literal_adaptation, // should we override how fast the cdfs converge for literals? 19 | window_size:example_opts.window_size, // log 2 of the window size 20 | lgblock:example_opts.lgblock, // should we override how often metablocks are created in brotli 21 | quality:example_opts.quality, // the quality of brotli commands 22 | q9_5:example_opts.q9_5, 23 | dynamic_context_mixing:example_opts.dynamic_context_mixing, // if we want to mix together the stride prediction and the context map 24 | prior_depth:example_opts.prior_depth, 25 | use_brotli:example_opts.use_brotli, // ignored 26 | use_context_map:example_opts.use_context_map, // whether we should use the brotli context map in addition to the last 8 bits of each byte as a prior 27 | force_stride_value: example_opts.force_stride_value, // if we should use brotli to decide on the stride 28 | speed_detection_quality: example_opts.speed_detection_quality, 29 | stride_detection_quality: example_opts.stride_detection_quality, 30 | prior_bitmask_detection: example_opts.prior_bitmask_detection, 31 | divans_ir_optimizer:example_opts.divans_ir_optimizer, 32 | }, 33 | 4096, // internal buffer size 34 | ); 35 | io::copy(&mut io::stdin(), &mut writer).unwrap(); 36 | writer.flush().unwrap(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /no-stdlib/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "divans-no-stdlib" 3 | version = "0.0.1" 4 | authors = [ 5 | "Daniel Reiter Horn ", 6 | "Jongmin Baek ", 7 | "Anatoly Yakovenko " 8 | ] 9 | license = "BSD-3-Clause/MIT or Apache" 10 | documentation = "tbd" 11 | keywords = ["brotli", "decompression", "lz77", "huffman", "nostd"] 12 | readme = "README.md" 13 | build = "../build.rs" 14 | 15 | [[bin]] 16 | name="divans" 17 | path="../src/bin/divans.rs" 18 | 19 | [lib] 20 | crate-type=["rlib"] 21 | path="../src/lib.rs" 22 | 23 | [profile.release] 24 | lto=true 25 | 26 | [build-dependencies] 27 | vergen = "0.1" 28 | 29 | [dependencies] 30 | "alloc-no-stdlib" = "~1.3" 31 | "brotli" = { version = "~2.5"} 32 | 33 | [features] 34 | default= ["no-stdlib","no-stdlib-rust-binding", "no-stdlib-rlib"] 35 | # allow use of SIMD to update probability and compute crc32c 36 | simd = [] 37 | 38 | # use avx2-specific instructions 39 | avx2=[] 40 | 41 | # avoid inlining some key functions (for profiling). Significant perf hit 42 | no-inline=[] 43 | 44 | # do not compile any unsafe code (this disables C-FFI) 45 | safe=[] 46 | 47 | # use 65536-sized table to do 16 bit integer divides: similar speed, but strains memory subsystem 48 | avoid-divide=[] 49 | 50 | #use dynamic CDF blending depending on samples 51 | findspeed = [] 52 | 53 | # print breakdown of which parts of the file cost 54 | billing = [] 55 | 56 | # use divisionless CDF: averages with other CDFs rather than counting samples like FrequentistCDF 57 | blend = [] 58 | 59 | # print trace of probability 60 | debug_entropy = [] 61 | 62 | # only use portable SIMD instructions for probability updates 63 | portable-simd = [] 64 | 65 | no-stdlib = ["alloc-no-stdlib/no-stdlib", "brotli/no-stdlib"] 66 | 67 | # bind to rust with nostdlib 68 | no-stdlib-rust-binding = [] 69 | 70 | # turn on benchmark tests and --bench commands (requires nigtly) 71 | benchmark = ["brotli/benchmark"] 72 | 73 | no-stdlib-rlib = [] 74 | 75 | # allow specification of a external probability array, to test new prediction schemes 76 | external-literal-probability = ["brotli/external-literal-probability"] 77 | 78 | # frequentist without division tables 79 | uncached_frequentist = [] 80 | 81 | -------------------------------------------------------------------------------- /src/ffi/interface.rs: -------------------------------------------------------------------------------- 1 | #[allow(non_camel_case_types)] 2 | #[repr(u8)] 3 | pub enum c_void{ 4 | _Nothing = 0, 5 | } 6 | 7 | #[no_mangle] 8 | pub type DivansReturnCode = u8; 9 | pub const DIVANS_SUCCESS: DivansReturnCode = 0; 10 | pub const DIVANS_NEEDS_MORE_INPUT: DivansReturnCode = 1; 11 | pub const DIVANS_NEEDS_MORE_OUTPUT: DivansReturnCode = 2; 12 | pub const DIVANS_FAILURE: DivansReturnCode = 3; 13 | 14 | 15 | 16 | pub type DivansOptionSelect = u8; 17 | 18 | pub const DIVANS_OPTION_QUALITY:DivansOptionSelect = 1; 19 | pub const DIVANS_OPTION_WINDOW_SIZE:DivansOptionSelect = 2; 20 | pub const DIVANS_OPTION_LGBLOCK:DivansOptionSelect = 3; 21 | pub const DIVANS_OPTION_DYNAMIC_CONTEXT_MIXING:DivansOptionSelect = 4; 22 | pub const DIVANS_OPTION_USE_BROTLI_COMMAND_SELECTION:DivansOptionSelect = 5; 23 | pub const DIVANS_OPTION_USE_BROTLI_BITSTREAM:DivansOptionSelect = 6; 24 | pub const DIVANS_OPTION_USE_CONTEXT_MAP:DivansOptionSelect = 7; 25 | pub const DIVANS_OPTION_LITERAL_ADAPTATION_CM_HIGH:DivansOptionSelect = 8; 26 | pub const DIVANS_OPTION_FORCE_STRIDE_VALUE:DivansOptionSelect = 9; 27 | pub const DIVANS_OPTION_STRIDE_DETECTION_QUALITY:DivansOptionSelect = 10; 28 | pub const DIVANS_OPTION_PRIOR_DEPTH:DivansOptionSelect = 11; 29 | pub const DIVANS_OPTION_LITERAL_ADAPTATION_STRIDE_HIGH:DivansOptionSelect = 12; 30 | pub const DIVANS_OPTION_LITERAL_ADAPTATION_CM_LOW:DivansOptionSelect = 13; 31 | pub const DIVANS_OPTION_LITERAL_ADAPTATION_STRIDE_LOW:DivansOptionSelect = 14; 32 | pub const DIVANS_OPTION_BROTLI_LITERAL_BYTE_SCORE:DivansOptionSelect = 15; 33 | pub const DIVANS_OPTION_SPEED_DETECTION_QUALITY:DivansOptionSelect = 16; 34 | pub const DIVANS_OPTION_PRIOR_BITMASK_DETECTION:DivansOptionSelect = 17; 35 | pub const DIVANS_OPTION_Q9_5:DivansOptionSelect = 18; 36 | pub const DIVANS_OPTION_FORCE_LITERAL_CONTEXT_MODE:DivansOptionSelect = 19; 37 | pub const DIVANS_OPTION_IR_OPTIMIZER:DivansOptionSelect = 20; 38 | 39 | 40 | #[repr(C)] 41 | #[no_mangle] 42 | #[derive(Clone)] 43 | pub struct CAllocator { 44 | pub alloc_func: Option *mut c_void>, 45 | pub free_func: Option ()>, 46 | pub opaque: *mut c_void, 47 | } 48 | 49 | unsafe impl Send for CAllocator { 50 | } 51 | 52 | 53 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "divans" 3 | version = "0.0.1" 4 | authors = [ 5 | "Daniel Reiter Horn ", 6 | "Jongmin Baek ", 7 | "Anatoly Yakovenko " 8 | ] 9 | license = "Apache-2.0" 10 | description = "DivANS is a new way of structuring compression programs to make them more open to innovation in the wider community, by separating compression into multiple stages that can each be improved independently" 11 | documentation = "https://blogs.dropbox.com/tech/2018/06/building-better-compression-together-with-divans/" 12 | keywords = ["brotli", "decompression", "lz77", "huffman", "nostd"] 13 | readme = "README.md" 14 | build = "build.rs" 15 | autobins = false 16 | 17 | [lib] 18 | crate-type=["cdylib", "rlib", "staticlib"] 19 | 20 | [[bin]] 21 | doc = false 22 | name = "divans" 23 | 24 | [build-dependencies] 25 | vergen = "0.1" 26 | 27 | [dependencies] 28 | "alloc-no-stdlib" = "~2.0" 29 | 30 | "brotli" = {version = "~3.1"} 31 | "packed_simd" = {"optional"=true, version="0.3"} 32 | "alloc-stdlib" = {"optional"=true, version="~0.2"} 33 | 34 | [features] 35 | default = ["std"] 36 | 37 | # allow use of SIMD to update probability and compute crc32c 38 | simd = ["brotli/simd", "packed_simd/into_bits"] 39 | 40 | # use avx2-specific instructions 41 | avx2=[] 42 | 43 | # avoid inlining some key functions (for profiling). Significant perf hit 44 | no-inline=[] 45 | 46 | # do not compile any unsafe code (this disables C-FFI) 47 | safe=[] 48 | 49 | # use 65536-sized table to do 16 bit integer divides: similar speed, but strains memory subsystem 50 | avoid-divide=[] 51 | 52 | #use dynamic CDF blending depending on samples 53 | findspeed = [] 54 | 55 | # print breakdown of which parts of the file cost 56 | billing = [] 57 | 58 | # use divisionless CDF: averages with other CDFs rather than counting samples like FrequentistCDF 59 | blend = [] 60 | 61 | # print trace of probability 62 | debug_entropy = [] 63 | 64 | # only use portable SIMD instructions for probability updates 65 | portable-simd = [] 66 | 67 | std = ["alloc-stdlib", "brotli/std"] 68 | 69 | no-stdlib-rust-binding = [] 70 | 71 | # turn on benchmark tests and --bench commands (requires nigtly) 72 | benchmark = ["brotli/benchmark"] 73 | 74 | 75 | # allow specification of a external probability array, to test new prediction schemes 76 | external-literal-probability = ["brotli/external-literal-probability"] 77 | 78 | uncached_frequentist = [] 79 | 80 | threadlog = [] 81 | -------------------------------------------------------------------------------- /research/bill_transform.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from collections import defaultdict 3 | 4 | features = defaultdict(lambda:0) 5 | remap = { 6 | 'CountLengthFirst': 'CopyLength', 7 | 'CountMantissaNibbles': 'CopyLength', 8 | 'CountSmall': 'CopyLength', 9 | 'DistanceLengthFirst': 'CopyDistance', 10 | 'DistanceLengthGreater15Less25': 'CopyDistance', 11 | 'DistanceLengthMnemonic': 'CopyDistance', 12 | 'DistanceMantissaNibbles': 'CopyDistance', 13 | 'BlockSwitchType': 'BlockTypeMetadata', 14 | 'FullSelection': 'CopyLength', # not quite truthful 15 | 'LiteralCountFirst': 'CopyLength', # not quite truthful 16 | 'LiteralCountLengthGreater14Less25': 'CopyLength', # not quite truthful 17 | 'LiteralCountMantissaNibbles': 'CopyLength', # not quite truthful 18 | 'LiteralCountSmall': 'CopyLength', # not quite truthful 19 | 'LiteralNibbleIndex': 'ComplexLiterals', 20 | 'LiteralNibbleIndex': 'ComplexLiterals', 21 | 'Begin': 'Misc', 22 | 'TransformHigh':'DictIndex', 23 | 'TransformLow':'DictIndex', 24 | 'WordIndexMantissa':'DictIndex', 25 | 'WordSizeFirst':'DictLength', 26 | 'ContextMapFirstNibble(0, Literal)': 'LiteralContextMode', 27 | 'ContextMapFirstNibble(0, Distance)': 'DistanceContextMode', 28 | 'ContextMapMnemonic(0, Literal)': 'LiteralContextMode', 29 | 'ContextMapMnemonic(0, Distance)': 'DistanceContextMode', 30 | 'ContextMapSecondNibble(0, Literal, 0)': 'LiteralContextMode', 31 | 'ContextMapSecondNibble(0, Distance, 0)': 'DistanceContextMode', 32 | 'DynamicContextMixing': 'Misc', 33 | 'LiteralAdaptationRate': 'Misc', 34 | 'PriorDepth': 'Misc', 35 | } 36 | for line in open(sys.argv[1]): 37 | if 'Total' in line: 38 | break 39 | pairmatch = '('.join(line.split('(')[1:]).split(')')[:-1] 40 | typ = ')'.join(pairmatch) 41 | counts = line.split('count:')[1:] 42 | byte_count_str = counts[1].strip().split(' ')[0].strip() 43 | byte_count = float(byte_count_str) 44 | if typ not in remap: 45 | typ = typ.split('(')[0] 46 | if typ not in remap: 47 | print typ, 'not found' 48 | continue 49 | features[remap[typ]] += byte_count 50 | maxb = max(len(str(item)) for item in features.values()) 51 | maxa = max(len(str(int(item*8 + .5))) for item in features.values()) 52 | for item in sorted(features.keys()): 53 | bitval = str(int(features[item] * 8 + .5)) 54 | byteval = str(features[item]) 55 | print bitval + ' '*(maxa + 2 - len(bitval)) + byteval + ' '*(maxb + 2 - len(byteval)) + item 56 | -------------------------------------------------------------------------------- /wasm/brotli_wrapper.js: -------------------------------------------------------------------------------- 1 | (function() { 2 | var brotliIframe = null; 3 | var brotliWindow = null; 4 | 5 | var brotliReady = false; 6 | var brotliProcessing = null; 7 | var brotliInputQueue = []; 8 | 9 | window.onmessage = function(e) { 10 | brotliWindow = brotliIframe.contentWindow; 11 | if (e.source !== brotliWindow) { 12 | throw "Invalid source " + e.source; 13 | } 14 | msgtype = e.data[0]; 15 | if (msgtype == "brotli-worker-ready") { 16 | if (brotliInputQueue.length > 0) { 17 | var queueEl = brotliInputQueue.shift(); 18 | processBrotliNow(queueEl[0], queueEl[1]); 19 | } else { 20 | brotliReady = true; 21 | } 22 | } 23 | if (msgtype == "brotli-finished") { 24 | var outputLen = e.data[1]; 25 | var intermediateRep = e.data[2]; 26 | var originalInput = brotliProcessing; 27 | var finFunc = brotliFinished; 28 | brotliProcessing = null; 29 | brotliFinished = null; 30 | setTimeout(function() { 31 | finFunc(originalInput, outputLen, intermediateRep); 32 | }, 0); 33 | } 34 | } 35 | 36 | function processBrotliNow(arrayBuf, finishedFunc) { 37 | brotliProcessing = arrayBuf; 38 | brotliFinished = finishedFunc; 39 | brotliReady = false; 40 | setTimeout(function() { 41 | 42 | brotliWindow.postMessage(max_quality, "*"); 43 | brotliWindow.postMessage(arrayBuf, "*", [arrayBuf]); 44 | }, 0); 45 | } 46 | 47 | function addToBrotliQueue(arrayBuf, finishedFunc) { 48 | if (brotliReady) { 49 | brotliReady = false; 50 | processBrotliNow(arrayBuf, finishedFunc); 51 | } else { 52 | brotliInputQueue.push([arrayBuf, finishedFunc]); 53 | } 54 | } 55 | 56 | function createBrotliIframe() { 57 | brotliIframe = document.createElement("iframe"); 58 | brotliIframe.setAttribute("id", "brotli_iframe"); 59 | brotliIframe.setAttribute("src", "brotli_iframe.html"); 60 | brotliIframe.setAttribute("sandbox", "allow-scripts"); 61 | brotliIframe.style.display = "none"; 62 | document.body.appendChild(brotliIframe); 63 | } 64 | 65 | function init() { 66 | createBrotliIframe(); 67 | } 68 | document.addEventListener("DOMContentLoaded", init); 69 | 70 | function runBrotliDestroysInput(arrayBuf, finishedFunc) { 71 | if (!(arrayBuf instanceof ArrayBuffer)) { 72 | throw "Invalid input"; 73 | } 74 | addToBrotliQueue(arrayBuf, finishedFunc); 75 | } 76 | 77 | window.Brotli = { 78 | init: init, 79 | runBrotliDestroysInput: runBrotliDestroysInput 80 | }; 81 | 82 | })(); 83 | -------------------------------------------------------------------------------- /src/constants.rs: -------------------------------------------------------------------------------- 1 | pub static UTF8_CONTEXT_LOOKUP: [u8; 512] = 2 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 | 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, 44, 44, 44, 44, 44, 44, 44, 44, 4 | 44, 44, 32, 32, 24, 40, 28, 12, 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, 5 | 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, 12, 56, 60, 60, 60, 56, 60, 60, 6 | 60, 56, 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, 7 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 8 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 9 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 10 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 11 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 13 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 14 | 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, 15 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 17 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]; 19 | 20 | pub static SIGNED_3_BIT_CONTEXT_LOOKUP: [u8; 256] = 21 | [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 22 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 23 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 24 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 25 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 26 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 27 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 28 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7]; 29 | -------------------------------------------------------------------------------- /src/resizable_buffer.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Dropbox, Inc 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use core; 16 | pub use super::alloc::{Allocator, SliceWrapper, SliceWrapperMut}; 17 | 18 | 19 | pub struct ResizableByteBuffer> { 20 | data: AllocT::AllocatedMemory, 21 | size: usize, 22 | } 23 | impl> Default for ResizableByteBuffer{ 24 | fn default() -> Self { 25 | Self::new() 26 | } 27 | } 28 | impl> ResizableByteBuffer{ 29 | pub fn new() -> Self { 30 | ResizableByteBuffer:: { 31 | data: AllocT::AllocatedMemory::default(), 32 | size: 0, 33 | } 34 | } 35 | fn ensure_free_space_in_buffer(&mut self, allocator: &mut AllocT, min_size: usize) { 36 | if self.data.slice().is_empty() { 37 | self.data = allocator.alloc_cell(66_000); // some slack room to deal with worst case compression sizes 38 | } else if self.size + min_size > self.data.slice().len() { 39 | let mut cell = allocator.alloc_cell(self.size * 2); 40 | cell.slice_mut().split_at_mut(self.size).0.clone_from_slice(self.data.slice().split_at(self.size).0); 41 | allocator.free_cell(core::mem::replace(&mut self.data, cell)); 42 | } 43 | } 44 | pub fn checkout_next_buffer(&mut self, allocator: &mut AllocT, min_size: Option) -> &mut [T] { 45 | self.ensure_free_space_in_buffer(allocator, min_size.unwrap_or(1)); 46 | self.data.slice_mut().split_at_mut(self.size).1 47 | } 48 | pub fn commit_next_buffer(&mut self, size:usize) { 49 | self.size += size; 50 | } 51 | pub fn len(&self) -> usize { 52 | self.size 53 | } 54 | pub fn is_empty(&self) -> bool { 55 | self.size == 0 56 | } 57 | pub fn slice(&self) -> &[T] { 58 | self.data.slice().split_at(self.size).0 59 | } 60 | pub fn free(&mut self, allocator: &mut AllocT) { 61 | allocator.free_cell(core::mem::replace(&mut self.data, AllocT::AllocatedMemory::default())) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/debug_encoder.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Dropbox, Inc 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | use core; 15 | use super::arithmetic_coder::{ 16 | EntropyEncoder, 17 | ByteQueue, 18 | RegisterQueue, 19 | EntropyDecoder, 20 | }; 21 | use probability::CDF16; 22 | use super::interface::ArithmeticEncoderOrDecoder; 23 | use super::DivansResult; 24 | #[derive(Default)] 25 | pub struct DebugEncoder { 26 | buffer: RegisterQueue, 27 | } 28 | 29 | 30 | impl EntropyEncoder for DebugEncoder { 31 | type Queue = RegisterQueue; 32 | fn get_internal_buffer_mut(&mut self) -> &mut RegisterQueue { 33 | &mut self.buffer 34 | } 35 | fn get_internal_buffer(&self) -> &RegisterQueue { 36 | &self.buffer 37 | } 38 | fn put_bit(&mut self, bit: bool, prob_of_false: u8) { 39 | assert!(self.buffer.num_push_bytes_avail() > 0); 40 | let buf_to_push = [prob_of_false ^ bit as u8]; 41 | let cnt = self.buffer.push_data(&buf_to_push[..]); 42 | assert_eq!(cnt, 1); 43 | } 44 | fn flush(&mut self) { 45 | } 46 | } 47 | 48 | #[derive(Default)] 49 | pub struct DebugDecoder { 50 | buffer: RegisterQueue, 51 | } 52 | 53 | 54 | impl EntropyDecoder for DebugDecoder { 55 | type Queue = RegisterQueue; 56 | fn get_internal_buffer_mut(&mut self) -> &mut RegisterQueue { 57 | &mut self.buffer 58 | } 59 | fn get_internal_buffer(&self) -> &RegisterQueue { 60 | &self.buffer 61 | } 62 | fn get_bit(&mut self, prob_of_false: u8) -> bool { 63 | assert!(self.buffer.num_pop_bytes_avail() > 0); 64 | let mut buf_to_pop = [0u8]; 65 | let cnt = self.buffer.pop_data(&mut buf_to_pop[..]); 66 | assert_eq!(cnt, 1); 67 | let return_value = buf_to_pop[0] ^ prob_of_false; 68 | if return_value != 0 { 69 | assert_eq!(return_value, 1); 70 | } 71 | return_value != 0 72 | } 73 | fn flush(&mut self) -> DivansResult { 74 | DivansResult::Success 75 | } 76 | } 77 | 78 | impl DebugEncoder { 79 | fn mov_internal(&mut self) -> Self { 80 | core::mem::replace(self, DebugEncoder::default()) 81 | } 82 | } 83 | impl ArithmeticEncoderOrDecoder for DebugEncoder { 84 | arithmetic_encoder_or_decoder_methods!(); 85 | } 86 | -------------------------------------------------------------------------------- /c/custom_alloc.h: -------------------------------------------------------------------------------- 1 | int use_real_malloc = 1; 2 | int use_fake_malloc = 0; 3 | void* custom_alloc_opaque = &use_real_malloc; 4 | unsigned char huge_buffer[1024*1024 * 255]; 5 | size_t huge_buffer_offset = 0; 6 | const uint32_t science = 0x5C1E11CE; 7 | 8 | void * custom_malloc_f(void* opaque, size_t user_size) { 9 | unsigned char * retval; 10 | size_t amt = user_size + 2*sizeof(opaque) + 4 + 32; 11 | if (opaque == &use_fake_malloc) { 12 | retval = &huge_buffer[huge_buffer_offset]; 13 | huge_buffer_offset += amt; 14 | } else { 15 | retval = (unsigned char*)malloc(amt); 16 | } 17 | memset(retval, 0x34, 2*sizeof(opaque) + 4 + 32); // make sure control areas are initialized to something--to help debug 18 | memcpy(retval, &science, 4); 19 | memcpy(retval + 4, &opaque, sizeof(opaque)); 20 | memcpy(retval + 4 + sizeof(opaque), &user_size, sizeof(size_t)); 21 | signed char alignment_offset = (32 - (((size_t)(retval + 4 + sizeof(opaque) + sizeof(size_t) + 1)) & 0x1f)) & 0x1f; 22 | retval[sizeof(opaque) + sizeof(size_t) + 4 + alignment_offset] = alignment_offset; 23 | void * final_return = retval + sizeof(opaque) + sizeof(size_t) + 4 + 1 + alignment_offset; 24 | assert((((size_t)final_return)&0x1f) == 0); 25 | return final_return; 26 | } 27 | void * (*custom_malloc)(void* opaque, size_t data) = &custom_malloc_f; 28 | void custom_free_f(void* opaque, void *mfd) { 29 | void * local_opaque; 30 | uint32_t local_science; 31 | size_t local_size = 0; 32 | char * local_mfd = (char *)mfd; 33 | if (mfd == NULL) { 34 | return; 35 | } 36 | local_mfd -= 1; 37 | local_mfd -= *local_mfd; 38 | local_mfd -= 4; 39 | local_mfd -= sizeof(opaque); 40 | local_mfd -= sizeof(size_t); 41 | memcpy(&local_science, local_mfd, 4); 42 | assert(local_science == science); 43 | memcpy(&local_opaque, local_mfd + 4, sizeof(opaque)); 44 | memcpy(&local_size, local_mfd + 4 + sizeof(opaque), sizeof(size_t)); 45 | assert(opaque == local_opaque); 46 | if (opaque == &use_fake_malloc) { 47 | void *retval = &huge_buffer[huge_buffer_offset]; 48 | if ((void*)(retval - local_size) == mfd) { 49 | huge_buffer_offset -= 4 + sizeof(opaque) + sizeof(size_t) + local_size; 50 | } 51 | } else { 52 | free(local_mfd); 53 | } 54 | } 55 | 56 | void (*custom_free)(void* opaque, void *mfd) = &custom_free_f; 57 | void custom_atoi(char * dst, size_t data) { 58 | if (!data) { 59 | memcpy(dst, "0\0", 2); 60 | return; 61 | } 62 | char *ptr = dst; 63 | while(data) { 64 | *ptr = '0' + (data % 10); 65 | ++ptr; 66 | data /= 10; 67 | } 68 | *ptr = '\0'; 69 | int del = (int)(ptr - dst); 70 | int i; 71 | for (i = 0;i < del/2;i+= 1) { 72 | char tmp = dst[i]; 73 | dst[i] = *(ptr - i - 1); 74 | *(ptr - i - 1) = tmp; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/probability/frequentist_cdf.rs: -------------------------------------------------------------------------------- 1 | use core; 2 | use super::interface::{Prob, BaseCDF, Speed, CDF16, BLEND_FIXED_POINT_PRECISION}; 3 | fn to_bit_i32(val: i32, shift_val: u8) -> u32 { 4 | if val != 0 { 5 | 1 << shift_val 6 | } else { 7 | 0 8 | } 9 | } 10 | 11 | 12 | #[derive(Clone,Copy)] 13 | pub struct FrequentistCDF16 { 14 | pub cdf: [Prob; 16] 15 | } 16 | 17 | impl Default for FrequentistCDF16 { 18 | fn default() -> Self { 19 | FrequentistCDF16 { 20 | cdf: [4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64] 21 | } 22 | } 23 | } 24 | 25 | 26 | impl BaseCDF for FrequentistCDF16 { 27 | fn num_symbols() -> u8 { 16 } 28 | fn used(&self) -> bool { 29 | self.entropy() != Self::default().entropy() 30 | } 31 | #[inline(always)] 32 | fn max(&self) -> Prob { 33 | self.cdf[15] 34 | } 35 | #[inline(always)] 36 | fn div_by_max(&self, val:i32) -> i32 { 37 | return val / i32::from(self.max()) 38 | } 39 | fn log_max(&self) -> Option { None } 40 | #[inline(always)] 41 | fn cdf(&self, symbol: u8) -> Prob { 42 | self.cdf[symbol as usize & 0xf] 43 | } 44 | fn valid(&self) -> bool { 45 | let mut prev = 0; 46 | for item in self.cdf.split_at(15).0.iter() { 47 | if *item <= prev { 48 | return false; 49 | } 50 | prev = *item; 51 | } 52 | true 53 | } 54 | } 55 | 56 | impl CDF16 for FrequentistCDF16 { 57 | #[inline(always)] 58 | fn average(&self, other:&Self, mix_rate:i32) -> Self { 59 | let mut retval = *self; 60 | let ourmax = i32::from(self.max()); 61 | let othermax = i32::from(other.max()); 62 | let ourmax_times_othermax = ourmax * othermax; 63 | let leading_zeros_combo = core::cmp::min(ourmax_times_othermax.leading_zeros(), 17); 64 | let desired_shift = 17 - leading_zeros_combo; 65 | let inv_mix_rate = (1 << BLEND_FIXED_POINT_PRECISION) - mix_rate; 66 | for (s, o) in retval.cdf.iter_mut().zip(other.cdf.iter()) { 67 | let rescaled_self = (i32::from(*s) * othermax) >> desired_shift; 68 | let rescaled_other = (i32::from(*o) * ourmax) >> desired_shift; 69 | *s = ((rescaled_self * mix_rate + rescaled_other * inv_mix_rate + 1) >> BLEND_FIXED_POINT_PRECISION) as Prob; 70 | } 71 | retval 72 | } 73 | #[inline(always)] 74 | fn blend(&mut self, symbol: u8, speed: Speed) { 75 | const CDF_BIAS : [Prob;16] = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]; 76 | let increment : Prob = speed.inc() as Prob; 77 | for i in (symbol as usize)..16 { 78 | self.cdf[i] = self.cdf[i].wrapping_add(increment); 79 | } 80 | if self.cdf[15] >= speed.lim() { 81 | for i in 0..16 { 82 | self.cdf[i] = self.cdf[i].wrapping_add(CDF_BIAS[i]).wrapping_sub(self.cdf[i].wrapping_add(CDF_BIAS[i]) >> 2); 83 | } 84 | } 85 | } 86 | } 87 | 88 | #[cfg(test)] 89 | mod test { 90 | use super::FrequentistCDF16; 91 | declare_common_tests!(FrequentistCDF16); 92 | } 93 | -------------------------------------------------------------------------------- /c/divans/ffi.h: -------------------------------------------------------------------------------- 1 | #ifndef _DIVANS_H_ 2 | #define _DIVANS_H_ 3 | #include 4 | #include 5 | 6 | typedef uint8_t DivansResult; 7 | 8 | #define DIVANS_SUCCESS ((uint8_t)0) 9 | #define DIVANS_NEEDS_MORE_INPUT ((uint8_t)1) 10 | #define DIVANS_NEEDS_MORE_OUTPUT ((uint8_t)2) 11 | #define DIVANS_FAILURE ((uint8_t)3) 12 | 13 | typedef uint8_t DivansOptionSelect; 14 | 15 | #define DIVANS_OPTION_QUALITY 1 16 | #define DIVANS_OPTION_WINDOW_SIZE 2 17 | #define DIVANS_OPTION_LGBLOCK 3 18 | #define DIVANS_OPTION_DYNAMIC_CONTEXT_MIXING 4 19 | #define DIVANS_OPTION_USE_BROTLI_COMMAND_SELECTION 5 20 | #define DIVANS_OPTION_USE_BROTLI_BITSTREAM 6 21 | #define DIVANS_OPTION_USE_CONTEXT_MAP 7 22 | #define DIVANS_OPTION_LITERAL_ADAPTATION_CM_HIGH 8 23 | #define DIVANS_OPTION_FORCE_STRIDE_VALUE 9 24 | #define DIVANS_OPTION_STRIDE_DETECTION_QUALITY 10 25 | #define DIVANS_OPTION_PRIOR_DEPTH 11 26 | #define DIVANS_OPTION_LITERAL_ADAPTATION_STRIDE_HIGH 12 27 | #define DIVANS_OPTION_LITERAL_ADAPTATION_CM_LOW 13 28 | #define DIVANS_OPTION_LITERAL_ADAPTATION_STRIDE_LOW 14 29 | #define DIVANS_OPTION_BROTLI_LITERAL_BYTE_SCORE 15 30 | #define DIVANS_OPTION_SPEED_DETECTION_QUALITY 16 31 | #define DIVANS_OPTION_PRIOR_BITMASK_DETECTION 17 32 | #define DIVANS_OPTION_Q9_5 18 33 | #define DIVANS_OPTION_FORCE_LITERAL_CONTEXT_MODE 19 34 | 35 | 36 | /// a struct specifying custom allocators for divans to use instead of the builtin rust allocators. 37 | /// if all 3 values are set to NULL, the Rust allocators are used instead. 38 | struct CAllocator { 39 | /// Allocate length bytes. The returned pointer must be 32-byte aligned unless divans was built without features=simd 40 | void* (*alloc_func)(void * opaque, size_t length); 41 | void (*free_func)(void * opaque, void * mfd); 42 | void * opaque; 43 | }; 44 | struct DivansDecompressorState; 45 | struct DivansCompressorState; 46 | 47 | struct DivansCompressorState* divans_new_compressor(); 48 | struct DivansCompressorState* divans_new_compressor_with_custom_alloc(struct CAllocator alloc); 49 | DivansResult divans_set_option(struct DivansCompressorState* state, DivansOptionSelect selector, uint32_t value); 50 | DivansResult divans_encode(struct DivansCompressorState* state, 51 | const uint8_t *input_buf_ptr, size_t input_size, size_t*input_offset, 52 | uint8_t *output_buf_ptr, size_t output_size, size_t *output_offset); 53 | 54 | DivansResult divans_encode_flush(struct DivansCompressorState* state, 55 | uint8_t *output_buf_ptr, size_t output_size, size_t *output_offset); 56 | 57 | void divans_free_compressor(struct DivansCompressorState* mfd); 58 | 59 | 60 | struct DivansDecompressorState* divans_new_decompressor(); 61 | struct DivansDecompressorState* divans_new_decompressor_with_custom_alloc(struct CAllocator alloc, uint8_t skip_crc); 62 | DivansResult divans_decode(struct DivansDecompressorState* state, 63 | const uint8_t *input_buf_ptr, size_t input_size, size_t*input_offset, 64 | uint8_t *output_buf_ptr, size_t output_size, size_t *output_offset); 65 | 66 | void divans_free_decompressor(struct DivansDecompressorState* mfd); 67 | 68 | 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /src/probability/numeric.rs: -------------------------------------------------------------------------------- 1 | #[cfg(not(feature="division_table_gen"))] 2 | use super::div_lut; 3 | 4 | #[cfg(feature="simd")] 5 | use packed_simd; 6 | #[cfg(not(feature="division_table_gen"))] 7 | pub type DenominatorType = i16; 8 | #[cfg(feature="division_table_gen")] 9 | pub type DenominatorType = u16; 10 | #[inline(always)] 11 | fn k16bit_length(d:DenominatorType) -> u8 { 12 | (16 - d.leading_zeros()) as u8 13 | } 14 | pub const LOG_MAX_NUMERATOR: usize = 31; 15 | #[inline(always)] 16 | pub fn compute_divisor(d: DenominatorType) -> (i64, u8) { 17 | let bit_len = k16bit_length(d); 18 | (((((( 1i64 << bit_len) - i64::from(d)) << (LOG_MAX_NUMERATOR))) / i64::from(d)) + 1, bit_len.wrapping_sub(1)) 19 | } 20 | #[cfg(not(feature="division_table_gen"))] 21 | #[inline(always)] 22 | pub fn lookup_divisor(d: i16) -> (i64, u8) { 23 | div_lut::RECIPROCAL[d as u16 as usize] 24 | } 25 | #[inline(always)] 26 | pub fn fast_divide_30bit_by_16bit(num: i32, inv_denom_and_bitlen: (i64, u8)) -> i32 { 27 | let idiv_mul_num = i64::from(inv_denom_and_bitlen.0) * i64::from(num); 28 | ((idiv_mul_num >> LOG_MAX_NUMERATOR as u32) as i32 29 | + (((i64::from(num) - (idiv_mul_num >> LOG_MAX_NUMERATOR as u32)) as i32) >> 1)) 30 | >> inv_denom_and_bitlen.1 31 | } 32 | 33 | #[cfg(feature="simd")] 34 | #[inline(always)] 35 | pub fn fast_divide_30bit_i64x2_by_16bit(num: packed_simd::i64x2, inv_denom_and_bitlen: (i64, u8)) -> packed_simd::i64x2 { 36 | let idiv_mul_num = packed_simd::i64x2::splat(inv_denom_and_bitlen.0) * num; 37 | let idiv_mul_num_shift_max_num = idiv_mul_num >> LOG_MAX_NUMERATOR as u32; 38 | (idiv_mul_num_shift_max_num 39 | + ((num - (idiv_mul_num_shift_max_num)) >> 1)) 40 | >> u32::from(inv_denom_and_bitlen.1) 41 | } 42 | 43 | 44 | 45 | 46 | pub type Denominator8Type = u8; 47 | const SHIFT_16_BY_8:usize = 24; 48 | 49 | #[inline(always)] 50 | pub fn compute_divisor8(d: Denominator8Type) -> i32 { 51 | let del = 1; 52 | del + (1 << SHIFT_16_BY_8) / i32::from(d) 53 | } 54 | #[cfg(not(feature="division_table_gen"))] 55 | #[inline(always)] 56 | pub fn lookup_divisor8(d: u8) -> i32 { 57 | div_lut::RECIPROCAL8[d as u8 as usize] 58 | } 59 | #[inline(always)] 60 | pub fn fast_divide_16bit_by_8bit(num: u16, inv_denom_and_bitlen: i32) -> i16 { 61 | (i64::from(inv_denom_and_bitlen) * i64::from(num) >> SHIFT_16_BY_8) as i16 62 | } 63 | 64 | 65 | #[cfg(test)] 66 | mod test { 67 | use super::{fast_divide_30bit_by_16bit, lookup_divisor}; 68 | 69 | fn divide_30bit_by_16bit(num: i32, denom: i16) -> i32 { 70 | fast_divide_30bit_by_16bit(num, lookup_divisor(denom)) 71 | } 72 | 73 | #[test] 74 | fn test_divide() { 75 | let nums: [i32; 10] = [3032127, 5049117, 16427165, 23282359, 35903174, 76 | 132971515, 163159927, 343856773, 935221996, 1829347323]; 77 | let denoms: [i16; 10] = [115, 248, 267, 764, 1337, 4005, 4965, 9846, 24693, 31604]; 78 | for n in nums.into_iter() { 79 | for d in denoms.into_iter() { 80 | let reference = n / (*d as i32); 81 | let actual = divide_30bit_by_16bit(*n, *d); 82 | assert_eq!(reference, actual); 83 | } 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /c/arg.h: -------------------------------------------------------------------------------- 1 | char * find_first_arg(int argc, char**argv) { 2 | int i; 3 | for (i = 1; i < argc; ++i) { 4 | if (argv[i][0] != '-') { 5 | return argv[i]; 6 | } 7 | } 8 | return NULL; 9 | } 10 | void set_options(struct DivansCompressorState *state, int argc, char **argv) { 11 | int i; 12 | unsigned int ret =0 ; 13 | int used_cm = 0; 14 | for (i = 1; i < argc; ++i) { 15 | if (strstr(argv[i], "-q") == argv[i]) { 16 | ret = divans_set_option(state, DIVANS_OPTION_QUALITY, atoi(argv[i] + 2)); 17 | assert(ret == DIVANS_SUCCESS); 18 | } 19 | if (strstr(argv[i], "-p") == argv[i]) { 20 | ret = divans_set_option(state, DIVANS_OPTION_PRIOR_BITMASK_DETECTION, atoi(argv[i] + 2)); 21 | assert(ret == DIVANS_SUCCESS); 22 | } 23 | if (strstr(argv[i], "-l") == argv[i]) { 24 | ret = divans_set_option(state, DIVANS_OPTION_USE_BROTLI_COMMAND_SELECTION, 0); 25 | assert(ret == DIVANS_SUCCESS); 26 | } 27 | if (strstr(argv[i], "-w") == argv[i]) { 28 | ret = divans_set_option(state, DIVANS_OPTION_WINDOW_SIZE, atoi(argv[i] + 2)); 29 | assert(ret == DIVANS_SUCCESS); 30 | } 31 | if (strstr(argv[i], "-a") == argv[i]) { 32 | ret = divans_set_option(state, DIVANS_OPTION_LITERAL_ADAPTATION_CM_HIGH, atoi(argv[i] + 2)); 33 | assert(ret == DIVANS_SUCCESS); 34 | } 35 | if (strstr(argv[i], "-cm") == argv[i]) { 36 | used_cm = 1; 37 | ret = divans_set_option(state, DIVANS_OPTION_USE_CONTEXT_MAP, 1); 38 | assert(ret == DIVANS_SUCCESS); 39 | if (argv[i] + 3 !='\0') { 40 | ret = divans_set_option(state, DIVANS_OPTION_FORCE_LITERAL_CONTEXT_MODE, atoi(argv[i] + 3)); 41 | assert(ret == DIVANS_SUCCESS); 42 | } 43 | } 44 | if (strstr(argv[i], "-bs") == argv[i]) { 45 | ret = divans_set_option(state, DIVANS_OPTION_STRIDE_DETECTION_QUALITY, 1); 46 | assert(ret == DIVANS_SUCCESS); 47 | } 48 | if (strstr(argv[i], "-as") == argv[i]) { 49 | ret = divans_set_option(state, DIVANS_OPTION_STRIDE_DETECTION_QUALITY, 2); 50 | assert(ret == DIVANS_SUCCESS); 51 | } 52 | } 53 | for (i = 1; i < argc; ++i) { 54 | if (strstr(argv[i], "-s") == argv[i]) { 55 | if (used_cm) { 56 | ret = divans_set_option(state, DIVANS_OPTION_DYNAMIC_CONTEXT_MIXING, 1); 57 | assert(ret == DIVANS_SUCCESS); 58 | } 59 | if (strcmp(argv[i], "-s") != 0) { // diff 60 | ret = divans_set_option(state, DIVANS_OPTION_FORCE_STRIDE_VALUE, atoi(argv[i]+2)); 61 | assert(ret == DIVANS_SUCCESS); 62 | } 63 | } 64 | } 65 | for (i = 1; i < argc; ++i) { 66 | if (strstr(argv[i], "-m") == argv[i]) { 67 | if (strcmp(argv[i], "-m") != 0) { // diff 68 | ret = divans_set_option(state, DIVANS_OPTION_DYNAMIC_CONTEXT_MIXING, atoi(argv[i]+2)); 69 | } else { 70 | ret = divans_set_option(state, DIVANS_OPTION_DYNAMIC_CONTEXT_MIXING, 2); 71 | } 72 | assert(ret == DIVANS_SUCCESS); 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/probability/variant_speed_cdf.rs: -------------------------------------------------------------------------------- 1 | use core; 2 | use super::interface::{Prob, BaseCDF, Speed, CDF16, BLEND_FIXED_POINT_PRECISION, SPEED_PALETTE_SIZE, SymStartFreq}; 3 | use brotli::enc::util::FastLog2u16; 4 | 5 | #[derive(Clone,Copy)] 6 | pub struct VariantSpeedCDF { 7 | cdf: [ChildCDF; SPEED_PALETTE_SIZE + 1], 8 | cost: [f32;SPEED_PALETTE_SIZE+1], 9 | } 10 | 11 | impl Default for VariantSpeedCDF { 12 | fn default() -> Self{ 13 | VariantSpeedCDF { 14 | cdf:[ChildCDF::default();SPEED_PALETTE_SIZE + 1], 15 | cost:[0.0;SPEED_PALETTE_SIZE+1], 16 | } 17 | } 18 | } 19 | 20 | impl CDF16 for VariantSpeedCDF { 21 | fn blend(&mut self, symbol: u8, dyn:Speed) { 22 | for (index, (cdf, cost)) in self.cdf.iter_mut().zip(self.cost.iter_mut()).enumerate() { 23 | let pdf = cdf.pdf(symbol); 24 | let max = cdf.max(); 25 | *cost += FastLog2u16(max as u16) - FastLog2u16(pdf as u16); 26 | cdf.blend(symbol, if index == 0 {dyn} else {Speed::ENCODER_DEFAULT_PALETTE[index - 1]}); 27 | } 28 | } 29 | fn average(&self, other: &Self, mix_rate: i32) ->Self { 30 | let mut ret = self.clone(); 31 | ret.cdf[0] = self.cdf[0].average(&other.cdf[0], mix_rate); 32 | ret 33 | } 34 | } 35 | 36 | impl BaseCDF for VariantSpeedCDF { 37 | fn num_symbols() -> u8 { 38 | ::num_symbols() 39 | } 40 | fn cdf(&self, symbol: u8) -> Prob { 41 | self.cdf[0].cdf(symbol) 42 | } 43 | fn pdf(&self, symbol: u8) -> Prob { 44 | self.cdf[0].pdf(symbol) 45 | } 46 | fn div_by_max(&self, val: i32) -> i32 { 47 | self.cdf[0].div_by_max(val) 48 | } 49 | fn max(&self) -> Prob { 50 | self.cdf[0].max() 51 | } 52 | fn log_max(&self) -> Option { 53 | self.cdf[0].log_max() 54 | } 55 | fn used(&self) -> bool { 56 | self.cdf[0].used() 57 | } 58 | 59 | // returns true if valid. 60 | fn valid(&self) -> bool { 61 | self.cdf[0].valid() 62 | } 63 | 64 | // returns the entropy of the current distribution. 65 | fn entropy(&self) -> f64 { 66 | self.cdf[0].entropy() 67 | } 68 | #[inline(always)] 69 | fn sym_to_start_and_freq(&self, 70 | sym: u8) -> SymStartFreq { 71 | self.cdf[0].sym_to_start_and_freq(sym) 72 | } 73 | #[inline(always)] 74 | fn rescaled_cdf(&self, sym: u8) -> i32 { 75 | self.cdf[0].rescaled_cdf(sym) 76 | } 77 | #[inline(always)] 78 | fn cdf_offset_to_sym_start_and_freq(&self, 79 | cdf_offset_p: Prob) -> SymStartFreq { 80 | self.cdf[0].cdf_offset_to_sym_start_and_freq(cdf_offset_p) 81 | } 82 | 83 | // These methods are optional because implementing them requires nontrivial bookkeeping. 84 | // Only CDFs that are intended for debugging should support them. 85 | fn num_samples(&self) -> Option { 86 | self.cdf[0].num_samples() 87 | } 88 | fn true_entropy(&self) -> Option { 89 | self.cdf[0].true_entropy() 90 | } 91 | fn rolling_entropy(&self) -> Option { 92 | self.cdf[0].rolling_entropy() 93 | } 94 | fn encoding_cost(&self) -> Option { 95 | self.cdf[0].encoding_cost() 96 | } 97 | fn num_variants(&self) -> usize { 98 | SPEED_PALETTE_SIZE 99 | } 100 | fn variant_cost(&self, variant_index: usize) -> f32 { 101 | self.cost[variant_index + 1] 102 | } 103 | fn base_variant_cost(&self) -> f32 { 104 | self.cost[0] 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/probability/external_cdf.rs: -------------------------------------------------------------------------------- 1 | use core; 2 | use super::interface::{BaseCDF, Prob, CDF16, Speed, BLEND_FIXED_POINT_PRECISION}; 3 | 4 | #[derive(Clone,Copy)] 5 | pub struct ExternalProbCDF16 { 6 | pub cdf: [Prob; 16], 7 | pub nibble: usize, 8 | } 9 | 10 | impl Default for ExternalProbCDF16 { 11 | fn default() -> Self { 12 | ExternalProbCDF16 { 13 | cdf: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 14 | nibble: 0, 15 | } 16 | } 17 | } 18 | 19 | impl ExternalProbCDF16 { 20 | pub fn init(&mut self, _n: u8, probs: &[u8], mix: &T) { 21 | //println_stderr!("init for {:x}", _n); 22 | //println_stderr!("init for {:x} {:x} {:x} {:x}", probs[0], probs[1], probs[2], probs[3]); 23 | //average the two probabilities 24 | assert!(probs.len() == 4); 25 | self.nibble = _n as usize; 26 | let mut pcdf = [1f64;16]; 27 | for nibble in 0..16 { 28 | //println_stderr!("setting for {:x}", nibble); 29 | for bit in 0..4 { 30 | let p1 = f64::from(probs[bit]) / f64::from(u8::max_value()); 31 | let isone = (nibble & (1<<(3 - bit))) != 0; 32 | //println_stderr!("bit {:} is {:} {:}", bit, isone, p1); 33 | if isone { 34 | pcdf[nibble] *= p1; 35 | } else { 36 | pcdf[nibble] *= 1f64 - p1; 37 | } 38 | } 39 | } 40 | let mut mcdf = [1f64;16]; 41 | for nibble in 1..16 { 42 | let prev = nibble - 1; 43 | let c = f64::from(mix.cdf(nibble)); 44 | let p = f64::from(mix.cdf(prev)); 45 | let m = f64::from(mix.max()); 46 | let d = (c - p) / m; 47 | assert!(d < 1.0); 48 | mcdf[nibble as usize] = d; 49 | } 50 | for nibble in 0..16 { 51 | pcdf[nibble] = (pcdf[nibble] + mcdf[nibble])/2f64; 52 | } 53 | let mut sum = 0f64; 54 | for pcdf_nibble in &mut pcdf { 55 | sum += *pcdf_nibble; 56 | *pcdf_nibble = sum; 57 | } 58 | for pcdf_nibble in &mut pcdf { 59 | *pcdf_nibble /= sum; 60 | } 61 | for nibble in 0..16 { 62 | let p = pcdf[nibble]; 63 | let res = (p * f64::from(Prob::max_value())) as Prob; 64 | let least1 = core::cmp::max(res, 1); 65 | self.cdf[nibble] = core::cmp::min(least1, self.max() - 1); 66 | //println_stderr!("cdf set {:x} {:x} {:}", nibble, self.cdf[nibble], p); 67 | } 68 | } 69 | } 70 | 71 | impl BaseCDF for ExternalProbCDF16 { 72 | fn num_symbols() -> u8 { 16 } 73 | fn div_by_max(&self, val:i32) -> i32 { 74 | return val / i32::from(self.max()) 75 | } 76 | fn used(&self) -> bool { 77 | self.entropy() != Self::default().entropy() 78 | } 79 | fn max(&self) -> Prob { 80 | Prob::max_value() 81 | } 82 | fn log_max(&self) -> Option { None } 83 | fn cdf(&self, symbol: u8) -> Prob { 84 | //println_stderr!("cdf for {:x} have {:x}", symbol, self.nibble); 85 | self.cdf[symbol as usize] 86 | } 87 | fn valid(&self) -> bool { 88 | true 89 | } 90 | } 91 | 92 | impl CDF16 for ExternalProbCDF16 { 93 | fn average(&self, other:&Self, mix_rate:i32) -> Self { 94 | if self.max() < 64 && other.max() > 64 { 95 | //return other.clone(); 96 | } 97 | if self.max() > 64 && other.max() < 64 { 98 | //return self.clone(); 99 | } 100 | if self.entropy() > other.entropy() { 101 | //return other.clone(); 102 | } 103 | //return self.clone(); 104 | let mut retval = *self; 105 | let ourmax = i64::from(self.max()); 106 | let othermax = i64::from(other.max()); 107 | let maxmax = core::cmp::min(ourmax, othermax); 108 | let lgmax = 64 - maxmax.leading_zeros(); 109 | let inv_mix_rate = (1 << BLEND_FIXED_POINT_PRECISION) - mix_rate; 110 | for (s, o) in retval.cdf.iter_mut().zip(other.cdf.iter()) { 111 | *s = (((i64::from(*s) * i64::from(mix_rate) *othermax + i64::from(*o) * i64::from(inv_mix_rate) * ourmax + 1) >> BLEND_FIXED_POINT_PRECISION) >> lgmax) as Prob; 112 | } 113 | retval 114 | } 115 | fn blend(&mut self, symbol: u8, speed: Speed) { 116 | return; 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /wasm/brotli_iframe.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Brotli 6 | 35 | 36 | 138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /src/divans_to_raw/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Dropbox, Inc 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use alloc::{SliceWrapper, Allocator}; 16 | 17 | use codec::EncoderOrDecoderSpecialization; 18 | 19 | use super::interface::{CopyCommand,DictCommand,LiteralCommand,Command}; 20 | 21 | use slice_util::AllocatedMemoryPrefix; 22 | 23 | #[derive(Default)] 24 | pub struct DecoderSpecialization { 25 | max_size: usize, 26 | } 27 | 28 | impl DecoderSpecialization { 29 | pub fn new() -> Self { 30 | DecoderSpecialization{ 31 | max_size:0usize, 32 | } 33 | } 34 | } 35 | 36 | 37 | 38 | impl EncoderOrDecoderSpecialization for DecoderSpecialization { 39 | const DOES_CALLER_WANT_ORIGINAL_FILE_BYTES: bool = true; 40 | const IS_DECODING_FILE: bool = true; 41 | fn alloc_literal_buffer>(&mut self, 42 | m8:&mut AllocU8, 43 | len: usize) -> AllocatedMemoryPrefix { 44 | if len > self.max_size { 45 | self.max_size = len; 46 | } 47 | AllocatedMemoryPrefix::::new(m8, self.max_size) 48 | } 49 | #[inline(always)] 50 | fn get_input_command<'a, ISlice:SliceWrapper>(&self, 51 | _data:&'a [Command], 52 | _offset: usize, 53 | backing:&'a Command) -> &'a Command { 54 | backing 55 | } 56 | #[inline(always)] 57 | fn get_output_command<'a, AllocU8:Allocator>(&self, data:&'a mut [Command>], 58 | offset: usize, 59 | _backing:&'a mut Command>) -> &'a mut Command> { 60 | &mut data[offset] 61 | } 62 | #[inline(always)] 63 | fn get_source_copy_command<'a, ISlice:SliceWrapper>(&self, 64 | _data: &'a Command, 65 | backing: &'a CopyCommand) -> &'a CopyCommand { 66 | backing 67 | } 68 | #[inline(always)] 69 | fn get_source_literal_command<'a, 70 | ISlice:SliceWrapper 71 | +Default>(&self, 72 | _data: &'a Command, 73 | backing: &'a LiteralCommand) -> &'a LiteralCommand { 74 | backing 75 | } 76 | #[inline(always)] 77 | fn get_source_dict_command<'a, ISlice:SliceWrapper>(&self, 78 | _data: &'a Command, 79 | backing: &'a DictCommand) -> &'a DictCommand { 80 | backing 81 | } 82 | #[inline(always)] 83 | fn get_literal_byte>(&self, 84 | _in_cmd: &LiteralCommand, 85 | _index: usize) -> u8 { 86 | 0 87 | } 88 | #[inline(always)] 89 | fn get_recoder_output<'a>(&'a mut self, 90 | passed_in_output_bytes: &'a mut [u8]) -> &'a mut[u8] { 91 | assert_eq!(Self::DOES_CALLER_WANT_ORIGINAL_FILE_BYTES, true); 92 | passed_in_output_bytes 93 | } 94 | #[inline(always)] 95 | fn get_recoder_output_offset<'a>(&self, 96 | passed_in_output_bytes: &'a mut usize, 97 | _backing: &'a mut usize) -> &'a mut usize { 98 | assert_eq!(Self::DOES_CALLER_WANT_ORIGINAL_FILE_BYTES, true); 99 | passed_in_output_bytes 100 | } 101 | 102 | 103 | } 104 | -------------------------------------------------------------------------------- /src/probability/opt_frequentist_cdf.rs: -------------------------------------------------------------------------------- 1 | use core; 2 | use super::interface::{Prob, BaseCDF, Speed, CDF16, BLEND_FIXED_POINT_PRECISION, LOG2_SCALE, CDF_BITS}; 3 | use super::frequentist_cdf::FrequentistCDF16; 4 | use super::numeric; 5 | fn to_bit_i32(val: i32, shift_val: u8) -> u32 { 6 | if val != 0 { 7 | 1 << shift_val 8 | } else { 9 | 0 10 | } 11 | } 12 | 13 | 14 | fn movemask_epi8_i32(data:[i32;8]) -> u32{ 15 | to_bit_i32(data[0] & 0x80 , 0) | 16 | to_bit_i32(data[0] & 0x8000 , 1) | 17 | to_bit_i32(data[0] & 0x800000 , 2) | 18 | to_bit_i32(data[0] & -0x80000000, 3) | 19 | 20 | to_bit_i32(data[1] & 0x80 , 4) | 21 | to_bit_i32(data[1] & 0x8000 , 5) | 22 | to_bit_i32(data[1] & 0x800000 , 6) | 23 | to_bit_i32(data[1] & -0x80000000, 7) | 24 | 25 | to_bit_i32(data[2] & 0x80 , 8) | 26 | to_bit_i32(data[2] & 0x8000 , 9) | 27 | to_bit_i32(data[2] & 0x800000 , 10) | 28 | to_bit_i32(data[2] & -0x80000000, 11) | 29 | 30 | to_bit_i32(data[3] & 0x80 , 12) | 31 | to_bit_i32(data[3] & 0x8000 , 13) | 32 | to_bit_i32(data[3] & 0x800000 , 14) | 33 | to_bit_i32(data[3] & -0x80000000, 15) | 34 | 35 | to_bit_i32(data[4] & 0x80 , 16) | 36 | to_bit_i32(data[4] & 0x8000 , 17) | 37 | to_bit_i32(data[4] & 0x800000 , 18) | 38 | to_bit_i32(data[4] & -0x80000000, 19) | 39 | 40 | to_bit_i32(data[5] & 0x80 , 20) | 41 | to_bit_i32(data[5] & 0x8000 , 21) | 42 | to_bit_i32(data[5] & 0x800000 , 22) | 43 | to_bit_i32(data[5] & -0x80000000, 23) | 44 | 45 | to_bit_i32(data[6] & 0x80 , 24) | 46 | to_bit_i32(data[6] & 0x8000 , 25) | 47 | to_bit_i32(data[6] & 0x800000 , 26) | 48 | to_bit_i32(data[6] & -0x80000000, 27) | 49 | 50 | to_bit_i32(data[7] & 0x80 , 28) | 51 | to_bit_i32(data[7] & 0x8000 , 29) | 52 | to_bit_i32(data[7] & 0x800000 , 30) | 53 | to_bit_i32(data[7] & -0x80000000, 31) 54 | } 55 | #[derive(Clone,Copy)] 56 | pub struct OptFrequentistCDF16 { 57 | pub cdf: FrequentistCDF16, 58 | pub inv_max_and_bitlen: (i64, u8), 59 | } 60 | 61 | impl OptFrequentistCDF16 { 62 | fn new(input:FrequentistCDF16) -> Self { 63 | OptFrequentistCDF16{ 64 | cdf:input, 65 | inv_max_and_bitlen: numeric::lookup_divisor(input.max()), 66 | } 67 | } 68 | } 69 | 70 | impl Default for OptFrequentistCDF16 { 71 | fn default() -> Self { 72 | Self::new(FrequentistCDF16::default()) 73 | } 74 | } 75 | 76 | 77 | impl BaseCDF for OptFrequentistCDF16 { 78 | fn num_symbols() -> u8 { 16 } 79 | fn used(&self) -> bool { 80 | self.cdf.used() 81 | } 82 | #[inline(always)] 83 | fn max(&self) -> Prob { 84 | self.cdf.max() 85 | } 86 | fn log_max(&self) -> Option { None } 87 | #[inline(always)] 88 | fn cdf(&self, symbol: u8) -> Prob { 89 | self.cdf.cdf(symbol) 90 | } 91 | fn valid(&self) -> bool { 92 | let inv_max_and_bitlen = numeric::lookup_divisor(self.max()); 93 | if self.inv_max_and_bitlen != inv_max_and_bitlen { 94 | return false; 95 | } 96 | self.cdf.valid() 97 | } 98 | #[inline(always)] 99 | fn div_by_max(&self, num: i32) -> i32 { 100 | assert!(LOG2_SCALE as usize + CDF_BITS <= numeric::LOG_MAX_NUMERATOR); 101 | numeric::fast_divide_30bit_by_16bit(num, self.inv_max_and_bitlen) 102 | } 103 | } 104 | 105 | fn k16bit_length(d:i16) -> u8 { 106 | (16 - d.leading_zeros()) as u8 107 | } 108 | const LOG_MAX_NUMERATOR: usize = LOG2_SCALE as usize + CDF_BITS; 109 | 110 | impl CDF16 for OptFrequentistCDF16 { 111 | fn average(&self, other:&Self, mix_rate:i32) -> Self { 112 | let ret = self.cdf.average(&other.cdf, mix_rate); 113 | Self::new(ret) 114 | } 115 | fn blend(&mut self, symbol: u8, speed: Speed) { 116 | self.cdf.blend(symbol, speed); 117 | self.inv_max_and_bitlen = numeric::lookup_divisor(self.max()); 118 | } 119 | } 120 | 121 | #[cfg(test)] 122 | mod test { 123 | use super::OptFrequentistCDF16; 124 | declare_common_tests!(OptFrequentistCDF16); 125 | 126 | #[test] 127 | fn test_cdf_opt_eq_baseline() { 128 | use super::FrequentistCDF16; 129 | use super::super::common_tests; 130 | common_tests::operation_test_helper(&mut FrequentistCDF16::default(), 131 | &mut FrequentistCDF16::default(), 132 | &mut OptFrequentistCDF16::default(), 133 | &mut OptFrequentistCDF16::default()); 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Dropbox, Inc 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.// See the License for the specific language governing permissions and 12 | // limitations under the License. 13 | //! This is documentation for the `divans` crate 14 | //! 15 | //! The `divans` crate is meant to be used for generic data compression 16 | 17 | #![cfg_attr(feature="benchmark", feature(test))] 18 | //#![cfg_attr(feature="simd", feature(platform_intrinsics))] 19 | #![cfg_attr(not(feature="no-stdlib-rust-binding"),cfg_attr(not(feature="std"), feature(lang_items)))] 20 | #![cfg_attr(not(feature="no-stdlib-rust-binding"),cfg_attr(not(feature="std"), feature(compiler_builtins_lib)))] 21 | #![cfg_attr(not(feature="no-stdlib-rust-binding"),cfg_attr(not(feature="std"), crate_type="cdylib"))] 22 | #![no_std] 23 | 24 | #[cfg(not(test))] 25 | #[cfg(any(feature="findspeed", feature="billing"))] 26 | #[macro_use] 27 | extern crate std; 28 | 29 | #[cfg(feature="std")] 30 | #[cfg(not(test))] 31 | #[cfg(not(any(feature="billing", feature="findspeed")))] 32 | #[macro_use] 33 | extern crate std; 34 | 35 | #[cfg(test)] 36 | #[macro_use] 37 | extern crate std; 38 | #[cfg(feature="simd")] 39 | #[macro_use(shuffle)] 40 | extern crate packed_simd; 41 | #[cfg(feature="std")] 42 | extern crate alloc_stdlib; 43 | 44 | extern crate alloc_no_stdlib as alloc; 45 | extern crate brotli; 46 | pub mod resizable_buffer; 47 | pub mod interface; 48 | pub mod slice_util; 49 | pub mod alloc_util; 50 | mod probability; 51 | #[macro_use] 52 | mod priors; 53 | #[macro_use] 54 | mod arithmetic_coder; 55 | mod debug_encoder; 56 | mod cmd_to_raw; 57 | mod raw_to_cmd; 58 | mod codec; 59 | mod cmd_to_divans; 60 | mod divans_to_raw; 61 | #[macro_use] 62 | mod billing; 63 | pub mod test_helper; 64 | mod test_ans; 65 | mod test_mux; 66 | mod ans; 67 | mod brotli_ir_gen; 68 | mod divans_compressor; 69 | mod divans_decompressor; 70 | mod parallel_decompressor; 71 | mod stub_parallel_decompressor; 72 | pub mod ir_optimize; 73 | pub mod mux; 74 | pub mod constants; 75 | pub mod threading; 76 | pub mod multithreading; 77 | pub use self::interface::{DivansInputResult,DivansOpResult,DivansOutputResult, DivansResult, ErrMsg, MAGIC_NUMBER}; 78 | pub use alloc::{AllocatedStackMemory, Allocator, SliceWrapper, SliceWrapperMut, StackAllocator}; 79 | pub use interface::{DivansCompressorFactory, BlockSwitch, LiteralBlockSwitch, Command, Compressor, CopyCommand, Decompressor, DictCommand, LiteralCommand, Nop, NewWithAllocator, ArithmeticEncoderOrDecoder, LiteralPredictionModeNibble, PredictionModeContextMap, free_cmd, FeatureFlagSliceType, 80 | DefaultCDF16}; 81 | 82 | pub use brotli_ir_gen::{BrotliDivansHybridCompressor,BrotliDivansHybridCompressorFactory}; 83 | pub use cmd_to_raw::DivansRecodeState; 84 | pub use codec::CMD_BUFFER_SIZE; 85 | pub use divans_to_raw::DecoderSpecialization; 86 | pub use cmd_to_divans::EncoderSpecialization; 87 | pub use codec::{EncoderOrDecoderSpecialization, DivansCodec, StrideSelection}; 88 | pub use divans_compressor::{DivansCompressor, DivansCompressorFactoryStruct}; 89 | 90 | #[cfg(not(feature="safe"))] 91 | mod ffi; 92 | #[cfg(not(feature="safe"))] 93 | pub use ffi::*; 94 | mod reader; 95 | mod writer; 96 | #[cfg(feature="std")] 97 | pub use reader::DivansBrotliHybridCompressorReader; 98 | #[cfg(feature="std")] 99 | pub use reader::DivansExperimentalCompressorReader; 100 | #[cfg(feature="std")] 101 | pub use reader::DivansDecompressorReader; 102 | 103 | #[cfg(feature="std")] 104 | pub use writer::DivansBrotliHybridCompressorWriter; 105 | #[cfg(feature="std")] 106 | pub use writer::DivansExperimentalCompressorWriter; 107 | #[cfg(feature="std")] 108 | pub use writer::DivansDecompressorWriter; 109 | 110 | 111 | pub use probability::Speed; 112 | 113 | 114 | pub use probability::CDF2; 115 | pub use probability::CDF16; 116 | pub use probability::BaseCDF; 117 | 118 | pub use interface::BrotliCompressionSetting; 119 | pub use interface::DivansCompressorOptions; 120 | pub use divans_decompressor::{DivansDecompressor, 121 | DivansDecompressorFactory, 122 | DivansDecompressorFactoryStruct, 123 | StaticCommand}; 124 | -------------------------------------------------------------------------------- /research/brute.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import threading 3 | import subprocess 4 | import Queue 5 | ir = sys.stdin.read() 6 | found_mixing_offsets = [] 7 | original_values = [] 8 | start = 0 9 | def run(output_q, procedure, input): 10 | so, se = procedure.communicate(input) 11 | output_q.put(so) 12 | original_values = [] 13 | while True: 14 | key = "mixingvalues " 15 | where = ir.find(key, start) 16 | if where == -1: 17 | if start == 0: 18 | assert where != -1, "Must have at least one mixingvalues" 19 | break 20 | for end_index in range(where + len(key), len(ir)): 21 | if ir[end_index] not in ('0', '1', '2', '3', ' '): 22 | break 23 | found_mixing_offsets.append((where + len(key), end_index)) 24 | original_values.append(ir[where + len(key):end_index]) 25 | start = where + 1 26 | 27 | q = Queue.Queue() 28 | q_c = Queue.Queue() 29 | best_size = None 30 | last_ir = "" 31 | for (item, oarray) in zip(found_mixing_offsets, original_values): 32 | array = [x + ' ' for x in oarray.split(' ')] 33 | for sub_offset in range(0,8192): 34 | array[sub_offset] = '0 ' 35 | option_a = ''.join(array) 36 | array[sub_offset] = '1 ' 37 | option_b = ''.join(array) 38 | array[sub_offset] = '3 ' 39 | option_c = ''.join(array) 40 | ir_a = ir[:item[0]] + option_a + ir[item[1]:] 41 | ir_b = ir[:item[0]] + option_b + ir[item[1]:] 42 | ir_c = ir[:item[0]] + option_c + ir[item[1]:] 43 | proc_a = subprocess.Popen([sys.argv[1], 44 | '-i', '-cm', '-s', '-mixing=1'] + sys.argv[2:], 45 | stdin=subprocess.PIPE, 46 | stdout=subprocess.PIPE) 47 | proc_b = subprocess.Popen([sys.argv[1], 48 | '-i', '-cm', '-s', '-mixing=1'] + sys.argv[2:], 49 | stdin=subprocess.PIPE, 50 | stdout=subprocess.PIPE) 51 | proc_c = subprocess.Popen([sys.argv[1], 52 | '-i', '-cm', '-s', '-mixing=1'] + sys.argv[2:], 53 | stdin=subprocess.PIPE, 54 | stdout=subprocess.PIPE) 55 | threading.Thread(target=lambda: run(q, proc_a, ir_a)).start() 56 | threading.Thread(target=lambda: run(q_c, proc_c, ir_c)).start() 57 | b_stdout, _stderr = proc_b.communicate(ir_b) 58 | a_ec = proc_a.wait() 59 | b_ec = proc_b.wait() 60 | c_ec = proc_c.wait() 61 | if a_ec != 0 or b_ec != 0 or c_ec != 0: 62 | with open('/tmp/ira','w') as f: 63 | f.write(ir_a) 64 | with open('/tmp/irb','w') as f: 65 | f.write(ir_b) 66 | with open('/tmp/irc','w') as f: 67 | f.write(ir_c) 68 | assert a_ec == 0 69 | assert b_ec == 0 70 | assert c_ec == 0 71 | a_stdout = q.get() 72 | c_stdout = q_c.get() 73 | if best_size is not None: 74 | if min(len(a_stdout), len(b_stdout)) > best_size: 75 | print 'uh oh',len(a_stdout), len(b_stdout),min(len(a_stdout), len(b_stdout)),'>', best_size 76 | with open('/tmp/ira','w') as f: 77 | f.write(ir_a) 78 | with open('/tmp/irb','w') as f: 79 | f.write(ir_b) 80 | with open('/tmp/irc','w') as f: 81 | f.write(ir_c) 82 | with open('/tmp/iro','w') as f: 83 | f.write(last_ir) 84 | assert min(len(a_stdout), len(b_stdout)) > best_size, "optimization should get better" 85 | if len(c_stdout) < len(b_stdout) and len(c_stdout) < len(a_stdout): 86 | array[sub_offset] = '3 ' 87 | sys.stderr.write("index " + str(sub_offset) + "Prefer 3 for " + str(len(c_stdout)) + "\n") 88 | last_ir = ir_c 89 | ir = ir_c 90 | best_size = len(c_stdout) 91 | elif len(a_stdout) < len(b_stdout): 92 | array[sub_offset] = '0 ' 93 | sys.stderr.write("index " + str(sub_offset) + "Prefer 0 for " + str(len(a_stdout)) + "\n") 94 | last_ir = ir_a 95 | ir = ir_a 96 | best_size = len(a_stdout) 97 | else: 98 | sys.stderr.write("index " + str(sub_offset) + "Prefer 1 for "+ str(len(b_stdout)) + "\n") 99 | array[sub_offset] = '1 ' 100 | last_ir = ir_b 101 | ir = ir_b 102 | best_size = len(b_stdout) 103 | ir = ir[:item[0]] + ''.join(array) + ir[item[1]:] 104 | 105 | sys.stdout.write(ir) 106 | -------------------------------------------------------------------------------- /src/ir_optimize/cache.rs: -------------------------------------------------------------------------------- 1 | use core; 2 | use codec::get_distance_from_mnemonic_code; 3 | use alloc::{Allocator, SliceWrapperMut, SliceWrapper}; 4 | const CACHE_HIT_REFERENCE_SIZE: usize = 8; 5 | 6 | pub struct CacheHitReferenceMut<'a>(pub &'a mut [u8]); 7 | 8 | 9 | impl<'a> CacheHitReferenceMut<'a> { 10 | pub fn set_code_and_offset(&mut self, code: u8, mut offset: usize) { 11 | offset += 1; 12 | self.0[0] = code; 13 | self.0[1] = offset as u8; 14 | self.0[2] = (offset >> 8) as u8; 15 | self.0[3] = (offset >> 16) as u8; 16 | self.0[4] = (offset >> 24) as u8; 17 | } 18 | } 19 | 20 | 21 | pub struct CacheHitReference<'a>(pub &'a [u8]); 22 | 23 | 24 | impl<'a> CacheHitReference<'a> { 25 | pub fn offset(&self) ->usize { 26 | (self.0[1] as usize | ((self.0[2] as usize) << 8)| ((self.0[3] as usize) << 16)| ((self.0[4] as usize) << 24)).wrapping_sub(1) 27 | } 28 | pub fn entry_id(&self) -> u8 { 29 | self.0[0] 30 | } 31 | pub fn miss(&self) -> bool { 32 | (self.0[1] | self.0[2] | self.0[3] | self.0[4] | self.0[5] | self.0[6] | self.0[7]) == 0 33 | } 34 | } 35 | 36 | 37 | 38 | #[derive(Debug,Copy,Clone)] 39 | pub struct CacheEntry { 40 | dist:u32, 41 | origin_offset:usize, 42 | } 43 | pub struct Cache> { 44 | cache:[CacheEntry;4], 45 | hitlist:AllocU8::AllocatedMemory, 46 | } 47 | 48 | impl> Cache { 49 | // prepares the cache statistics tracker for operating on num_commands 50 | pub fn new(cur_cache:&[u32;4], num_commands:usize, m8:&mut AllocU8) -> Self { 51 | Cache::{ 52 | cache:[CacheEntry{dist:cur_cache[0], origin_offset:0}, 53 | CacheEntry{dist:cur_cache[1], origin_offset:0}, 54 | CacheEntry{dist:cur_cache[2], origin_offset:0}, 55 | CacheEntry{dist:cur_cache[3], origin_offset:0}], 56 | hitlist:m8.alloc_cell(num_commands * 8), 57 | } 58 | } 59 | pub fn free(&mut self, m8:&mut AllocU8) { 60 | m8.free_cell(core::mem::replace(&mut self.hitlist, AllocU8::AllocatedMemory::default())); 61 | } 62 | pub fn get_cache_hit_log(&mut self, cmd_offset:usize) -> CacheHitReference{ 63 | let mut index = cmd_offset * CACHE_HIT_REFERENCE_SIZE; 64 | if index + CACHE_HIT_REFERENCE_SIZE > self.hitlist.slice().len() { // if we somehow overestimated the cache size 65 | index = 0; 66 | } 67 | CacheHitReference(self.hitlist.slice_mut().split_at_mut(index).1) 68 | } 69 | fn get_cache_hit_log_mut(&mut self, cmd_offset:usize) -> CacheHitReferenceMut{ 70 | let mut index = cmd_offset * CACHE_HIT_REFERENCE_SIZE; 71 | if index + CACHE_HIT_REFERENCE_SIZE > self.hitlist.len() { // if we somehow overestimated the cache size 72 | index = 0; 73 | } 74 | CacheHitReferenceMut(self.hitlist.slice_mut().split_at_mut(index).1) 75 | } 76 | fn forward_reference_hitlist(&mut self, code: u8, cache_index: u8, cmd_offset: usize) { 77 | let origin = self.cache[usize::from(cache_index)].origin_offset; 78 | self.cache[usize::from(cache_index)].origin_offset = cmd_offset; // bump the "next use" of the cache 79 | let mut log = self.get_cache_hit_log_mut(origin); 80 | log.set_code_and_offset(code, cmd_offset); 81 | } 82 | pub fn populate(&mut self, dist:u32, copy_len:u32, cmd_offset:usize) { 83 | let cur_cache = [self.cache[0].dist, self.cache[1].dist, self.cache[2].dist, self.cache[3].dist]; 84 | for code in 0..15 { 85 | let (cache_dist, ok, cache_index) = get_distance_from_mnemonic_code(&cur_cache, code as u8, copy_len); 86 | if dist == cache_dist && ok { 87 | // we have a hit 88 | self.forward_reference_hitlist(code, cache_index, cmd_offset); 89 | break; 90 | } 91 | } 92 | let new_cache_entry = CacheEntry { 93 | dist:dist, 94 | origin_offset:cmd_offset, 95 | }; 96 | // note the different logic here from the codec: we need to replace the cache entry, even if it's equal to 0 to get the right command index 97 | if dist == cur_cache[0] { 98 | self.cache[0] = new_cache_entry; 99 | } else if dist == cur_cache[1] { 100 | self.cache = [new_cache_entry, self.cache[0], self.cache[2], self.cache[3]]; 101 | } else if dist == cur_cache[2] { 102 | self.cache = [new_cache_entry, self.cache[0], self.cache[1], self.cache[3]]; 103 | } else { 104 | self.cache = [new_cache_entry, self.cache[0], self.cache[1], self.cache[2]]; 105 | } 106 | } 107 | } 108 | 109 | 110 | -------------------------------------------------------------------------------- /src/cmd_to_divans/mod.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Dropbox, Inc 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use alloc::{SliceWrapper, Allocator}; 16 | 17 | use codec::EncoderOrDecoderSpecialization; 18 | 19 | use super::interface::{CopyCommand,DictCommand,LiteralCommand,Command}; 20 | 21 | use slice_util::AllocatedMemoryPrefix; 22 | pub struct EncoderSpecialization { 23 | backing: [u8; 128], 24 | max_size: usize, 25 | } 26 | impl EncoderSpecialization { 27 | pub fn new() -> Self { 28 | EncoderSpecialization{ 29 | backing:[0;128], 30 | max_size: 0usize, 31 | } 32 | } 33 | } 34 | impl Default for EncoderSpecialization { 35 | fn default() -> Self { 36 | Self::new() 37 | } 38 | } 39 | 40 | impl EncoderOrDecoderSpecialization for EncoderSpecialization { 41 | const DOES_CALLER_WANT_ORIGINAL_FILE_BYTES: bool = false; 42 | const IS_DECODING_FILE: bool = false; 43 | fn alloc_literal_buffer>(&mut self, 44 | m8:&mut AllocU8, 45 | len: usize) -> AllocatedMemoryPrefix { 46 | if len > self.max_size { 47 | self.max_size = len; 48 | } 49 | AllocatedMemoryPrefix::::new(m8, self.max_size) 50 | } 51 | fn get_input_command<'a, ISlice:SliceWrapper>(&self, 52 | data:&'a [Command], 53 | offset: usize, 54 | _backing:&'a Command) -> &'a Command { 55 | &data[offset] 56 | } 57 | fn get_output_command<'a, AllocU8:Allocator>(&self, 58 | _data:&'a mut [Command>], 59 | _offset: usize, 60 | backing:&'a mut Command>) -> &'a mut Command> { 61 | backing 62 | } 63 | fn get_source_copy_command<'a, ISlice:SliceWrapper>(&self, 64 | data: &'a Command, 65 | backing: &'a CopyCommand) -> &'a CopyCommand { 66 | match *data { 67 | Command::Copy(ref cc) => cc, 68 | _ => backing, 69 | } 70 | } 71 | fn get_source_literal_command<'a, 72 | ISlice:SliceWrapper 73 | +Default>(&self, 74 | data: &'a Command, 75 | backing: &'a LiteralCommand) -> &'a LiteralCommand { 76 | match *data { 77 | Command::Literal(ref lc) => lc, 78 | _ => backing, 79 | } 80 | } 81 | fn get_source_dict_command<'a, ISlice:SliceWrapper>(&self, 82 | data: &'a Command, 83 | backing: &'a DictCommand) -> &'a DictCommand { 84 | match *data { 85 | Command::Dict(ref dc) => dc, 86 | _ => backing, 87 | } 88 | } 89 | fn get_literal_byte>(&self, 90 | in_cmd: &LiteralCommand, 91 | index: usize) -> u8 { 92 | in_cmd.data.slice()[index] 93 | } 94 | fn get_recoder_output<'a>(&'a mut self, 95 | _passed_in_output_bytes: &'a mut [u8]) -> &'a mut[u8] { 96 | assert_eq!(Self::DOES_CALLER_WANT_ORIGINAL_FILE_BYTES, false); 97 | &mut self.backing[..] 98 | } 99 | fn get_recoder_output_offset<'a>(&self, 100 | _passed_in_output_bytes: &'a mut usize, 101 | backing: &'a mut usize) -> &'a mut usize { 102 | assert_eq!(Self::DOES_CALLER_WANT_ORIGINAL_FILE_BYTES, false); 103 | //*backing = self.backing.len(); 104 | backing 105 | } 106 | 107 | 108 | } 109 | -------------------------------------------------------------------------------- /research/select_best_quandruple.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | from collections import defaultdict 4 | cut = True 5 | best_other_alg = 'zlib' 6 | if len(sys.argv) > 1 and 'b95' in sys.argv[1]: 7 | best_other_alg = 'b95' 8 | elif len(sys.argv) > 1 and 'b11' in sys.argv[1]: 9 | best_other_alg = 'b11' 10 | elif len(sys.argv) > 1: 11 | assert 'zlib' in sys.argv[1] 12 | sub_item = 6 13 | combo_scores = defaultdict(lambda:0) 14 | data_list = [] 15 | zlib_other_list = [] 16 | score_record = [] 17 | 18 | for line in sys.stdin: 19 | try: 20 | if cut: 21 | line = line[line.find(':')+1:] 22 | raw = json.loads(line) 23 | b11_cost = raw['b11'][0] 24 | b95_cost = raw['b95'][0] 25 | zlib_cost = raw['zlib'][0] 26 | other_cost = raw[best_other_alg][0] 27 | if raw['~raw']*.995 < zlib_cost: 28 | continue 29 | clist = raw['~'][sub_item] 30 | data_list.append(clist) 31 | zlib_other_list.append((zlib_cost, other_cost)) 32 | for k0 in range(len(clist) - 1): 33 | for k1 in range(k0 + 1, len(clist)): 34 | key = (k0, k1) 35 | score = min(clist[k0][0], clist[k1][0], other_cost, zlib_cost) 36 | combo_scores[key] += score 37 | except Exception: 38 | continue 39 | best_combo = min([(v, k[0], k[1]) for k, v in combo_scores.iteritems()]) 40 | score_record.append(best_combo[0]) 41 | best_elements = [best_combo[1], best_combo[2]] 42 | print 'partial', best_elements,'score',score_record 43 | sys.stdout.flush() 44 | combo_scores = defaultdict(lambda:0) 45 | for (sample, other) in zip(data_list, zlib_other_list): 46 | for k in range(len(sample)): 47 | combo_scores[k] += min(sample[best_elements[0]][0], 48 | sample[best_elements[1]][0], 49 | sample[k][0], other[0], other[1]) 50 | best_val = min([(v,k) for k, v in combo_scores.iteritems()]) 51 | score_record.append(best_val[0]) 52 | best_elements.append(best_val[1]) 53 | print 'partial', best_elements,'score',score_record 54 | sys.stdout.flush() 55 | combo_scores = defaultdict(lambda:0) 56 | for (sample, other) in zip(data_list, zlib_other_list): 57 | for k in range(len(sample)): 58 | combo_scores[k] += min(sample[best_elements[0]][0], 59 | sample[best_elements[1]][0], 60 | sample[best_elements[2]][0], 61 | sample[k][0], other[0], other[1]) 62 | best_val = min([(v,k) for k, v in combo_scores.iteritems()]) 63 | score_record.append(best_val[0]) 64 | best_elements.append(best_val[1]) 65 | print 'partial', best_elements,'score',score_record 66 | sys.stdout.flush() 67 | combo_scores = defaultdict(lambda:0) 68 | 69 | for (sample, other) in zip(data_list, zlib_other_list): 70 | for k in range(len(sample)): 71 | combo_scores[k] += min(sample[best_elements[0]][0], 72 | sample[best_elements[1]][0], 73 | sample[best_elements[2]][0], 74 | sample[best_elements[3]][0], 75 | sample[k][0], other[0], other[1]) 76 | best_val = min([(v,k) for k, v in combo_scores.iteritems()]) 77 | score_record.append(best_val[0]) 78 | best_elements.append(best_val[1]) 79 | print 'partial', best_elements,'score',score_record 80 | sys.stdout.flush() 81 | combo_scores = defaultdict(lambda:0) 82 | for (sample, other) in zip(data_list, zlib_other_list): 83 | for k in range(len(sample)): 84 | combo_scores[k] += min(sample[best_elements[0]][0], 85 | sample[best_elements[1]][0], 86 | sample[best_elements[2]][0], 87 | sample[best_elements[3]][0], 88 | sample[best_elements[4]][0], 89 | sample[k][0], other[0], other[1]) 90 | best_val = min([(v,k) for k, v in combo_scores.iteritems()]) 91 | score_record.append(best_val[0]) 92 | best_elements.append(best_val[1]) 93 | print 'partial', best_elements,'score',score_record 94 | sys.stdout.flush() 95 | combo_scores = defaultdict(lambda:0) 96 | prescient_score = 0 97 | for (sample, other) in zip(data_list, zlib_other_list): 98 | prescient_score += min(min(x[0] for x in sample), min(other)) 99 | for k in range(len(sample)): 100 | combo_scores[k] += min(sample[best_elements[0]][0], 101 | sample[best_elements[1]][0], 102 | sample[best_elements[2]][0], 103 | sample[best_elements[3]][0], 104 | sample[best_elements[4]][0], 105 | sample[best_elements[5]][0], 106 | sample[k][0], other[0], other[1]) 107 | best_val = min([(v,k) for k, v in combo_scores.iteritems()]) 108 | score_record.append(best_val[0]) 109 | best_elements.append(best_val[1]) 110 | print best_elements,'score',score_record,'best',prescient_score 111 | -------------------------------------------------------------------------------- /research/select_best_triple.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | samples = [] 4 | othstats = [] 5 | hdrs = [] 6 | for line in sys.stdin: 7 | if line.startswith('hdr:'): 8 | hdrs = json.loads(line[line.find(':')+1:].replace("'",'"')) 9 | continue 10 | if line.startswith('stats:'): 11 | scores = [int(item.strip()) for item in line[6:].replace('baseline: ','').replace('vsIX','vs').replace('vsXI','vs').replace("vsX", "vs").replace('vsZstd','vs').replace('vsZ','vs').replace('vsU','vs').replace('vs:','vs').split('[')[0].split(' vs ')] 12 | othstats.append(scores) 13 | if not line.startswith('args:'): 14 | continue # ignore anything but the nonopt items 15 | where = line.find('[') 16 | if where == -1: 17 | continue 18 | where2 = line.find(']') 19 | json_src = json.loads(line[where:where2 + 1]) 20 | best_item = min(json_src) 21 | for index in range(len(json_src)): 22 | if json_src[index] == best_item: 23 | break 24 | samples.append(json_src) 25 | bytescore_required = "" 26 | if len(sys.argv) > 1: 27 | bytescore_required = "-bytescore=" + sys.argv[1] 28 | def not_ok(index): 29 | if index == 50: 30 | return True 31 | found_ok_byte_score = False 32 | for item in hdrs[index]: 33 | if bytescore_required in item or '-bytescore=340' in item: 34 | found_ok_byte_score = True 35 | return not found_ok_byte_score 36 | #if index >= 10: 37 | # return True # force us to use brotli-9 38 | for item in hdrs[index]: 39 | if 'speedlow' in item: 40 | return True 41 | return False 42 | 43 | #best_price = 0 44 | def is_blacklisted(baseline, uncompressed): 45 | if baseline / float(uncompressed) > .995: 46 | return True 47 | if uncompressed - baseline < 128: 48 | return True 49 | return False 50 | for include_ignored in [False, True]: 51 | perfect_prediction = 0 52 | num_options = len(samples[0]) 53 | total_count = [0] * num_options 54 | brotli_total = 0 55 | brotli9_total = 0 56 | brotli10_total = 0 57 | brotli11_total = 0 58 | zstd_total = 0 59 | baseline_total = 0 60 | cost = 0 61 | favored = [0, 0, 0, 0, 0, 0] 62 | display = {} 63 | ignored = 0 64 | ignored_and_viewed = 0 65 | for favored_index in range(0,6): 66 | total_count = [0] * num_options 67 | for xindex in range(len(samples)): 68 | sample = [n for n in samples[xindex]] 69 | divans,brotli,brotli9, brotli10,brotli11,zstd,baseline ,uncompressed= othstats[xindex] 70 | blacklist = is_blacklisted(baseline, uncompressed) 71 | ignored_and_viewed+= baseline 72 | if blacklist: 73 | ignored += baseline 74 | if not include_ignored: 75 | continue # ignore these samples--assume they are vids 76 | divans,brotli,brotli9, brotli10,brotli11,zstd = baseline, baseline, baseline, baseline, baseline, baseline 77 | for index in range(len(sample)): 78 | sample[index] = baseline 79 | if favored_index == 0: 80 | target = min([sample[index] for index in range(len(sample)) if not not_ok(index)]+ [baseline]) 81 | perfect_prediction += target 82 | baseline_total += baseline 83 | brotli_total += brotli 84 | brotli9_total += brotli9 85 | brotli10_total += brotli10 86 | brotli11_total += brotli11 87 | zstd_total += zstd 88 | cost += max(sample) 89 | else: 90 | target = min(sample) 91 | for index in range(num_options): 92 | cur = min([baseline] + [sample[index]] + [sample[fav] for fav in favored[:favored_index]]) 93 | if not_ok(index): 94 | total_count[index] += cur * 1000 95 | else: 96 | total_count[index] += cur 97 | 98 | for index in range(num_options): 99 | if total_count[index] < cost: 100 | cost = total_count[index] 101 | favored[favored_index] = index 102 | print cost / 1000. 103 | if include_ignored: 104 | print 'using zlib for % of bytes ',ignored * 100./ignored_and_viewed 105 | else: 106 | print 'ignored % bytes ',ignored * 100./ignored_and_viewed 107 | print 'perfect', perfect_prediction / 1000., 'brotli',brotli_total/1000.,'brotli9',brotli9_total/1000.,'brotli10',brotli10_total/1000.,'brotli11',brotli11_total/1000.,'zstd',zstd_total/1000.,'baseline',baseline_total/1000. 108 | print 'pct vs brotli', cost * 100. / brotli_total 109 | print 'pct vs brotli9', cost * 100. / brotli9_total 110 | print 'pct vs brotli10', cost * 100. / brotli10_total 111 | print 'pct vs brotli11', cost * 100. / brotli11_total 112 | print 'pct vs zstd', cost * 100. / zstd_total 113 | print 'pct vs zlib', cost * 100. / baseline_total 114 | #print json.dumps(display,indent=2) 115 | print favored 116 | 117 | print [hdrs[favor] for favor in favored] 118 | 119 | -------------------------------------------------------------------------------- /src/codec/crc32.rs: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2017 Andrew Gallant (BurntSushi) 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | 8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 9 | 10 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 13 | */ 14 | #[allow(unused_imports)] 15 | use core; 16 | use super::crc32_table::TABLE16; 17 | pub fn crc32c_init() -> u32 { 18 | 0 19 | } 20 | #[cfg(not(all(feature="simd", not(feature="portable-simd"), not(feature="safe"), any(target_arch="x86", target_arch="x86_64"))))] 21 | #[inline(always)] 22 | pub fn crc32c_update(crc:u32, buf: &[u8]) -> u32 { 23 | fallback_crc32c_update(crc, buf) 24 | } 25 | 26 | #[cfg(all(feature="simd", not(feature="portable-simd"), not(feature="safe"), any(target_arch="x86", target_arch="x86_64")))] 27 | #[inline(always)] 28 | pub fn crc32c_update(crc:u32, buf: &[u8]) -> u32 { 29 | if is_x86_feature_detected!("sse4.2") { 30 | return unsafe { 31 | sse_crc32c_update(crc, buf) 32 | }; 33 | } 34 | fallback_crc32c_update(crc, buf) 35 | } 36 | 37 | #[inline(always)] 38 | pub fn fallback_crc32c_update(mut crc:u32, mut buf: &[u8]) -> u32 { 39 | crc = !crc; 40 | while buf.len() >= 16 { 41 | crc ^= u32::from(buf[0]) | (u32::from(buf[1]) << 8) | (u32::from(buf[2]) << 16) | (u32::from(buf[3]) << 24); 42 | crc = TABLE16[0][buf[15] as usize] 43 | ^ TABLE16[1][buf[14] as usize] 44 | ^ TABLE16[2][buf[13] as usize] 45 | ^ TABLE16[3][buf[12] as usize] 46 | ^ TABLE16[4][buf[11] as usize] 47 | ^ TABLE16[5][buf[10] as usize] 48 | ^ TABLE16[6][buf[9] as usize] 49 | ^ TABLE16[7][buf[8] as usize] 50 | ^ TABLE16[8][buf[7] as usize] 51 | ^ TABLE16[9][buf[6] as usize] 52 | ^ TABLE16[10][buf[5] as usize] 53 | ^ TABLE16[11][buf[4] as usize] 54 | ^ TABLE16[12][(crc >> 24) as u8 as usize] 55 | ^ TABLE16[13][(crc >> 16) as u8 as usize] 56 | ^ TABLE16[14][(crc >> 8 ) as u8 as usize] 57 | ^ TABLE16[15][(crc ) as u8 as usize]; 58 | buf = &buf.split_at(16).1; 59 | } 60 | for &b in buf { 61 | crc = TABLE16[0][((crc as u8) ^ b) as usize] ^ (crc >> 8); 62 | } 63 | !crc 64 | } 65 | #[cfg(feature="simd")] 66 | #[cfg(not(target_arch = "x86_64"))] 67 | fn sse_crc32c_update(_crc:u32, _buf: &[u8]) -> u32 { 68 | unimplemented!(); 69 | } 70 | #[cfg(feature="simd")] 71 | #[cfg(target_arch = "x86_64")] 72 | #[inline(always)] 73 | //#[target_feature(enable = "sse4.2")] 74 | unsafe fn sse_crc32c_update(mut crc:u32, mut buf: &[u8]) -> u32 { 75 | crc = !crc; 76 | while buf.len() >= 8 { 77 | crc = core::arch::x86_64::_mm_crc32_u64(u64::from(crc), 78 | u64::from(buf[0]) | (u64::from(buf[1]) << 8) | (u64::from(buf[2]) << 16) | (u64::from(buf[3]) << 24) 79 | |(u64::from(buf[4])<<32) | (u64::from(buf[5]) << 40) | (u64::from(buf[6]) << 48) | (u64::from(buf[7]) << 56)) as u32; 80 | buf = &buf.split_at(8).1; 81 | } 82 | for &b in buf { 83 | crc = core::arch::x86_64::_mm_crc32_u8(crc, b); 84 | } 85 | !crc 86 | } 87 | mod test { 88 | #[cfg(test)] 89 | use super::{crc32c_init, crc32c_update}; 90 | #[test] 91 | fn test_crc32c_empty() { 92 | assert_eq!(crc32c_update(crc32c_init(), &[]), 0x0); 93 | } 94 | #[test] 95 | fn test_crc32c_numeric() { 96 | let slice = b"123456789"; 97 | assert_eq!(crc32c_update(crc32c_init(), slice), 0xe3069283); 98 | } 99 | #[test] 100 | fn test_crc32c_numeric_half() { 101 | let slice = b"123456789"; 102 | let (firsthalf, secondhalf) = slice.split_at(5); 103 | assert_eq!(crc32c_update(crc32c_update(crc32c_init(), firsthalf), secondhalf), 0xe3069283); 104 | } 105 | #[test] 106 | fn test_crc32c_qbf() { 107 | let slice = b"The quick brown fox jumps over the lazy dog"; 108 | assert_eq!(crc32c_update(crc32c_init(), slice), 0x22620404); 109 | } 110 | #[test] 111 | fn test_crc32c_qbf_half() { 112 | let slice = b"The quick brown fox jumps over the lazy dog"; 113 | let (firsthalf, secondhalf) = slice.split_at(18); 114 | assert_eq!(crc32c_update(crc32c_update(crc32c_init(), firsthalf), secondhalf), 0x22620404); 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/codec/weights.rs: -------------------------------------------------------------------------------- 1 | use core; 2 | use ::probability::{Prob, BLEND_FIXED_POINT_PRECISION, LOG2_SCALE}; 3 | 4 | pub struct Weights { 5 | model_weights: [i32;2], 6 | mixing_param: u8, 7 | normalized_weight: i16, 8 | } 9 | impl Default for Weights { 10 | fn default() -> Self { 11 | Self::new() 12 | } 13 | } 14 | impl Weights { 15 | pub fn new() -> Self { 16 | Weights { 17 | model_weights:[1;2], 18 | mixing_param: 1, 19 | normalized_weight: 1 << (BLEND_FIXED_POINT_PRECISION - 1), 20 | } 21 | } 22 | #[inline(always)] 23 | pub fn update(&mut self, model_probs: [Prob; 2], weighted_prob: Prob) { 24 | debug_assert!(self.mixing_param != 0); 25 | normalize_weights(&mut self.model_weights); 26 | let w0new = compute_new_weight(model_probs, 27 | weighted_prob, 28 | self.model_weights, 29 | false, 30 | self.mixing_param - 1); 31 | let w1new = compute_new_weight(model_probs, 32 | weighted_prob, 33 | self.model_weights, 34 | true, 35 | self.mixing_param - 1); 36 | self.model_weights = [w0new, w1new]; 37 | self.normalized_weight = compute_normalized_weight(self.model_weights); 38 | } 39 | #[inline(always)] 40 | pub fn set_mixing_param(&mut self, param: u8) { 41 | self.mixing_param = param; 42 | } 43 | #[inline(always)] 44 | pub fn should_mix(&self) -> bool { 45 | self.mixing_param > 1 46 | } 47 | #[inline(always)] 48 | pub fn norm_weight(&self) -> i16 { 49 | self.normalized_weight 50 | } 51 | } 52 | 53 | #[inline(always)] 54 | fn compute_normalized_weight(model_weights: [i32;2]) -> i16 { 55 | let total = i64::from(model_weights[0]) + i64::from(model_weights[1]); 56 | let leading_zeros = total.leading_zeros(); 57 | let shift = core::cmp::max(56 - (leading_zeros as i16), 0); 58 | let total_8bit = total >> shift; 59 | ::probability::numeric::fast_divide_16bit_by_8bit( 60 | ((model_weights[0] >> shift) as u16)<< 8, 61 | ::probability::numeric::lookup_divisor8(total_8bit as u8)) << (BLEND_FIXED_POINT_PRECISION - 8) 62 | } 63 | 64 | #[cold] 65 | fn fix_weights(weights: &mut [i32;2]) { 66 | let ilog = 32 - core::cmp::min(weights[0].leading_zeros(), 67 | weights[1].leading_zeros()); 68 | let max_log = 24; 69 | if ilog >= max_log { 70 | weights[0] >>= ilog - max_log; 71 | weights[1] >>= ilog - max_log; 72 | } 73 | } 74 | 75 | #[inline(always)] 76 | fn normalize_weights(weights: &mut [i32;2]) { 77 | if ((weights[0]|weights[1])&0x7f000000) != 0 { 78 | fix_weights(weights); 79 | } 80 | } 81 | fn ilog2(item: i64) -> u32 { 82 | 64 - item.leading_zeros() 83 | } 84 | #[cfg(features="floating_point_context_mixing")] 85 | fn compute_new_weight(probs: [Prob; 2], 86 | weighted_prob: Prob, 87 | weights: [i32;2], 88 | index_equal_1: bool, 89 | _speed: u8) -> i32{ // speed ranges from 1 to 14 inclusive 90 | let index = index_equal_1 as usize; 91 | let n1i = probs[index] as f64 / ((1i64 << LOG2_SCALE) as f64); 92 | //let n0i = 1.0f64 - n1i; 93 | let ni = 1.0f64; 94 | let s1 = weighted_prob as f64 / ((1i64 << LOG2_SCALE) as f64); 95 | let s0 = 1.0f64 - s1; 96 | let s = 1.0f64; 97 | //let p0 = s0; 98 | let p1 = s1; 99 | let wi = weights[index] as f64 / ((1i64 << LOG2_SCALE) as f64); 100 | let mut wi_new = wi + (1.0 - p1) * (s * n1i - s1 * ni) / (s0 * s1); 101 | let eps = 0.00001f64; 102 | if !(wi_new > eps) { 103 | wi_new = eps; 104 | } 105 | (wi_new * ((1i64 << LOG2_SCALE) as f64)) as i32 106 | } 107 | 108 | #[cfg(not(features="floating_point_context_mixing"))] 109 | #[inline(always)] 110 | fn compute_new_weight(probs: [Prob; 2], 111 | weighted_prob: Prob, 112 | weights: [i32;2], 113 | index_equal_1: bool, 114 | _speed: u8) -> i32{ // speed ranges from 1 to 14 inclusive 115 | let index = index_equal_1 as usize; 116 | let full_model_sum_p1 = i64::from(weighted_prob); 117 | let full_model_total = 1i64 << LOG2_SCALE; 118 | let full_model_sum_p0 = full_model_total.wrapping_sub(i64::from(weighted_prob)); 119 | let n1i = i64::from(probs[index]); 120 | let ni = 1i64 << LOG2_SCALE; 121 | let error = full_model_total.wrapping_sub(full_model_sum_p1); 122 | let wi = i64::from(weights[index]); 123 | let efficacy = full_model_total.wrapping_mul(n1i) - full_model_sum_p1.wrapping_mul(ni); 124 | //let geometric_probabilities = full_model_sum_p1 * full_model_sum_p0; 125 | let log_geometric_probabilities = 64 - (full_model_sum_p1.wrapping_mul(full_model_sum_p0)).leading_zeros(); 126 | //let scaled_geometric_probabilities = geometric_probabilities * S; 127 | //let new_weight_adj = (error * efficacy) >> log_geometric_probabilities;// / geometric_probabilities; 128 | //let new_weight_adj = (error * efficacy)/(full_model_sum_p1 * full_model_sum_p0); 129 | let new_weight_adj = (error.wrapping_mul(efficacy)) >> log_geometric_probabilities; 130 | // assert!(wi + new_weight_adj < (1i64 << 31)); 131 | //print!("{} -> {} due to {:?} vs {}\n", wi as f64 / (weights[0] + weights[1]) as f64, (wi + new_weight_adj) as f64 /(weights[0] as i64 + new_weight_adj as i64 + weights[1] as i64) as f64, probs[index], weighted_prob); 132 | core::cmp::max(1,wi.wrapping_add(new_weight_adj) as i32) 133 | } 134 | -------------------------------------------------------------------------------- /src/codec/priors.rs: -------------------------------------------------------------------------------- 1 | use ::interface::{ 2 | CrossCommandBilling, 3 | }; 4 | use ::priors::{PriorCollection, PriorMultiIndex}; 5 | #[cfg(feature="billing")] 6 | #[cfg(feature="debug_entropy")] 7 | use ::priors::summarize_prior_billing; 8 | pub const NUM_BLOCK_TYPES:usize = 256; 9 | pub const NUM_STRIDES:usize = 8; 10 | use alloc::{SliceWrapper, Allocator, SliceWrapperMut}; 11 | use probability::{BaseCDF}; 12 | define_prior_struct!(CrossCommandPriors, CrossCommandBilling, 13 | (CrossCommandBilling::FullSelection, 16, 1), 14 | (CrossCommandBilling::EndIndicator, 1, NUM_BLOCK_TYPES)); 15 | 16 | 17 | 18 | #[derive(PartialEq, Debug, Clone)] 19 | pub enum LiteralCommandPriorType { 20 | CountSmall, 21 | SizeBegNib, 22 | SizeLastNib, 23 | SizeMantissaNib, 24 | } 25 | #[derive(PartialEq, Debug, Clone)] 26 | pub enum LiteralCMPriorType { 27 | FirstNibble, 28 | SecondNibble, 29 | } 30 | #[derive(PartialEq, Debug, Clone)] 31 | pub enum LiteralNibblePriorType { 32 | CombinedNibble, 33 | } 34 | 35 | define_prior_struct!(LiteralNibblePriors, LiteralNibblePriorType, 36 | (LiteralNibblePriorType::CombinedNibble, 3, 256, NUM_BLOCK_TYPES) 37 | ); 38 | 39 | define_prior_struct!(LiteralCommandPriors, LiteralCommandPriorType, 40 | (LiteralCommandPriorType::CountSmall, NUM_BLOCK_TYPES, 16), 41 | (LiteralCommandPriorType::SizeBegNib, NUM_BLOCK_TYPES), 42 | (LiteralCommandPriorType::SizeLastNib, NUM_BLOCK_TYPES), 43 | (LiteralCommandPriorType::SizeMantissaNib, NUM_BLOCK_TYPES)); 44 | 45 | define_prior_struct!(LiteralCommandPriorsCM, LiteralCMPriorType, 46 | (LiteralCMPriorType::FirstNibble, 1, NUM_BLOCK_TYPES), 47 | (LiteralCMPriorType::SecondNibble, 1, 16, NUM_BLOCK_TYPES)); 48 | 49 | #[derive(PartialEq, Debug, Clone)] 50 | pub enum RandLiteralNibblePriorType { 51 | CountSmall, 52 | SizeBegNib, 53 | SizeLastNib, 54 | SizeMantissaNib, 55 | } 56 | define_prior_struct!(RandLiteralCommandPriors, RandLiteralNibblePriorType, 57 | (RandLiteralNibblePriorType::CountSmall, NUM_BLOCK_TYPES, 16), 58 | (RandLiteralNibblePriorType::SizeBegNib, NUM_BLOCK_TYPES), 59 | (RandLiteralNibblePriorType::SizeLastNib, NUM_BLOCK_TYPES), 60 | (RandLiteralNibblePriorType::SizeMantissaNib, NUM_BLOCK_TYPES)); 61 | 62 | #[derive(PartialEq, Debug, Clone)] 63 | pub enum CopyCommandNibblePriorType { 64 | DistanceBegNib, 65 | DistanceLastNib, 66 | DistanceMnemonic, 67 | DistanceMnemonicTwo, 68 | DistanceMantissaNib, 69 | CountSmall, 70 | CountBegNib, 71 | CountLastNib, 72 | CountMantissaNib, 73 | } 74 | const NUM_COPY_COMMAND_ORGANIC_PRIORS: usize = 64; 75 | define_prior_struct!(CopyCommandPriors, CopyCommandNibblePriorType, 76 | (CopyCommandNibblePriorType::DistanceBegNib, NUM_BLOCK_TYPES, NUM_COPY_COMMAND_ORGANIC_PRIORS), 77 | (CopyCommandNibblePriorType::DistanceMnemonic, NUM_BLOCK_TYPES, 2), 78 | (CopyCommandNibblePriorType::DistanceLastNib, NUM_BLOCK_TYPES, 1), 79 | (CopyCommandNibblePriorType::DistanceMantissaNib, NUM_BLOCK_TYPES, 5), 80 | (CopyCommandNibblePriorType::CountSmall, NUM_BLOCK_TYPES, NUM_COPY_COMMAND_ORGANIC_PRIORS), 81 | (CopyCommandNibblePriorType::CountBegNib, NUM_BLOCK_TYPES, NUM_COPY_COMMAND_ORGANIC_PRIORS), 82 | (CopyCommandNibblePriorType::CountLastNib, NUM_BLOCK_TYPES, NUM_COPY_COMMAND_ORGANIC_PRIORS), 83 | (CopyCommandNibblePriorType::CountMantissaNib, NUM_BLOCK_TYPES, NUM_COPY_COMMAND_ORGANIC_PRIORS)); 84 | #[derive(PartialEq, Debug, Clone)] 85 | pub enum DictCommandNibblePriorType { 86 | SizeBegNib, 87 | SizeLastNib, 88 | Index, 89 | Transform, 90 | } 91 | 92 | const NUM_ORGANIC_DICT_DISTANCE_PRIORS: usize = 5; 93 | define_prior_struct!(DictCommandPriors, DictCommandNibblePriorType, 94 | (DictCommandNibblePriorType::SizeBegNib, NUM_BLOCK_TYPES), 95 | (DictCommandNibblePriorType::SizeLastNib, NUM_BLOCK_TYPES), 96 | (DictCommandNibblePriorType::Index, NUM_BLOCK_TYPES, NUM_ORGANIC_DICT_DISTANCE_PRIORS), 97 | (DictCommandNibblePriorType::Transform, 2, 25)); 98 | 99 | #[derive(PartialEq, Debug, Clone)] 100 | pub enum BlockTypePriorType { 101 | Mnemonic, 102 | FirstNibble, 103 | SecondNibble, 104 | StrideNibble, 105 | } 106 | define_prior_struct!(BlockTypePriors, BlockTypePriorType, 107 | (BlockTypePriorType::Mnemonic, 3), // 3 for each of ltype, ctype, dtype switches. 108 | (BlockTypePriorType::FirstNibble, 3), 109 | (BlockTypePriorType::SecondNibble, 3), 110 | (BlockTypePriorType::StrideNibble, 1)); 111 | 112 | #[derive(PartialEq, Debug, Clone)] 113 | pub enum PredictionModePriorType { 114 | Only, 115 | DynamicContextMixingSpeed, 116 | PriorDepth, 117 | PriorMixingValue, 118 | LiteralSpeed, 119 | Mnemonic, 120 | FirstNibble, 121 | SecondNibble, 122 | ContextMapSpeedPalette, 123 | } 124 | 125 | define_prior_struct!(PredictionModePriors, PredictionModePriorType, 126 | (PredictionModePriorType::Only, 1), 127 | (PredictionModePriorType::LiteralSpeed, 1), 128 | (PredictionModePriorType::FirstNibble, 2), 129 | (PredictionModePriorType::SecondNibble, 2), 130 | (PredictionModePriorType::Mnemonic, 4), 131 | (PredictionModePriorType::PriorMixingValue, 17), 132 | (PredictionModePriorType::ContextMapSpeedPalette, 4) 133 | ); 134 | -------------------------------------------------------------------------------- /examples/util_prior_stream_cost.rs: -------------------------------------------------------------------------------- 1 | extern crate divans; 2 | use ::std::io::{ErrorKind, BufReader, Result}; 3 | use std::env; 4 | use std::collections::HashMap; 5 | use divans::CDF16; 6 | use divans::BaseCDF; 7 | use std::vec; 8 | fn determine_cost(cdf: &divans::DefaultCDF16, 9 | nibble: u8) -> f64 { 10 | let pdf = cdf.pdf(nibble); 11 | let prob = (pdf as f64) / (cdf.max() as f64); 12 | return -prob.log2() 13 | } 14 | 15 | fn eval_stream( 16 | r :&mut Reader, 17 | speed: Option, 18 | is_hex: bool 19 | ) -> Result { 20 | let mut sub_streams = HashMap::>::new(); 21 | let mut buffer = String::new(); 22 | let mut cost: f64 = 0.0; 23 | loop { 24 | buffer.clear(); 25 | match r.read_line(&mut buffer) { 26 | Err(e) => { 27 | if e.kind() == ErrorKind::Interrupted { 28 | continue; 29 | } 30 | return Err(e); 31 | }, 32 | Ok(val) => { 33 | if val == 0 || val == 1{ 34 | break; 35 | } 36 | let line = buffer.trim().to_string(); 37 | let mut prior_val: Vec = if let Some(_) = line.find(",") { 38 | line.split(',').map(|s| s.to_string()).collect() 39 | } else { 40 | line.split(' ').map(|s| s.to_string()).collect() 41 | }; 42 | let prior = if is_hex { 43 | match u64::from_str_radix(&prior_val[0], 16) { 44 | Err(_) => return Err(std::io::Error::new(ErrorKind::InvalidData,prior_val[0].clone())), 45 | Ok(val) => val, 46 | } 47 | } else { 48 | match prior_val[0].parse::() { 49 | Err(_) => return Err(std::io::Error::new(ErrorKind::InvalidData,prior_val[0].clone())), 50 | Ok(val) => val, 51 | } 52 | }; 53 | 54 | let val = if is_hex { 55 | match u8::from_str_radix(&prior_val[1], 16) { 56 | Err(_) => return Err(std::io::Error::new(ErrorKind::InvalidData,prior_val[1].clone())), 57 | Ok(val) => val, 58 | } 59 | } else { 60 | match prior_val[1].parse::() { 61 | Err(_) => return Err(std::io::Error::new(ErrorKind::InvalidData, prior_val[1].clone())), 62 | Ok(val) => val, 63 | } 64 | }; 65 | let mut prior_stream = &mut sub_streams.entry(prior).or_insert(vec::Vec::::new()); 66 | prior_stream.push(val); 67 | } 68 | } 69 | } 70 | let specified_speed = match speed { 71 | Some(s) => [s], 72 | None => [divans::Speed::MUD], 73 | }; 74 | let trial_speeds = [divans::Speed::GEOLOGIC, divans::Speed::GLACIAL, divans::Speed::MUD, divans::Speed::SLOW, 75 | divans::Speed::MED, divans::Speed::FAST, divans::Speed::PLANE, divans::Speed::ROCKET]; 76 | let speed_choice = match speed { 77 | Some(_) => &specified_speed[..], 78 | None => &trial_speeds[..], 79 | }; 80 | for (_prior, sub_stream) in sub_streams.iter() { 81 | let mut best_cost_high: Option = None; 82 | let mut best_cost_low: Option = None; 83 | for cur_speed in speed_choice.iter() { 84 | let mut cur_cost_high: f64 = 0.0; 85 | let mut cur_cost_low: f64 = 0.0; 86 | let mut cdf0 = divans::DefaultCDF16::default(); 87 | let mut cdf1a = [ 88 | divans::DefaultCDF16::default(), divans::DefaultCDF16::default(),divans::DefaultCDF16::default(), divans::DefaultCDF16::default(), 89 | divans::DefaultCDF16::default(), divans::DefaultCDF16::default(),divans::DefaultCDF16::default(), divans::DefaultCDF16::default(), 90 | divans::DefaultCDF16::default(), divans::DefaultCDF16::default(),divans::DefaultCDF16::default(), divans::DefaultCDF16::default(), 91 | divans::DefaultCDF16::default(), divans::DefaultCDF16::default(),divans::DefaultCDF16::default(), divans::DefaultCDF16::default(), 92 | ]; 93 | for val in sub_stream.iter() { 94 | let val_nibbles = (val >> 4, val & 0xf); 95 | { 96 | cur_cost_high += determine_cost(&cdf0, val_nibbles.0); 97 | cdf0.blend(val_nibbles.0, *cur_speed); 98 | } 99 | { 100 | let cdf1 = &mut cdf1a[val_nibbles.0 as usize]; 101 | cur_cost_low += determine_cost(cdf1, val_nibbles.1); 102 | cdf1.blend(val_nibbles.1, *cur_speed); 103 | } 104 | } 105 | best_cost_high = match best_cost_high.clone() { 106 | None => Some(cur_cost_high), 107 | Some(bc) => Some(if bc > cur_cost_high {cur_cost_high} else {bc}), 108 | }; 109 | best_cost_low = match best_cost_low.clone() { 110 | None => Some(cur_cost_low), 111 | Some(bc) => Some(if bc > cur_cost_low {cur_cost_low} else {bc}), 112 | }; 113 | } 114 | cost += best_cost_high.unwrap(); 115 | cost += best_cost_low.unwrap(); 116 | } 117 | Ok(cost) 118 | } 119 | 120 | 121 | fn main() { 122 | let stdin = std::io::stdin(); 123 | let stdin = stdin.lock(); 124 | let mut buffered_in = BufReader::new(stdin); 125 | let mut speed: Option = None; 126 | if env::args_os().len() > 1 { 127 | for argument in env::args().skip(1) { 128 | speed = Some(argument.parse::().unwrap()); 129 | } 130 | } 131 | let cost = eval_stream(&mut buffered_in, speed, true).unwrap(); 132 | println!("{} bytes; {} bits", ((cost + 0.99) as u64) as f64 / 8.0, (cost + 0.99) as u64); 133 | } 134 | -------------------------------------------------------------------------------- /research/divansplot.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib.pyplot as plt 3 | from matplotlib import rcParams 4 | 5 | import matplotlib.patches as patches 6 | import matplotlib.transforms as transforms 7 | import numpy as np 8 | from matplotlib.ticker import ScalarFormatter 9 | def on_whitelist(key, label): 10 | #if 'key' == 'time_pct': 11 | # return label in ('b11, d0') 12 | return label in ('b11', 'b9', 'd1', 'dX', 'zlib', 'z19', 'lzma', 'bz') 13 | def label_reassign(key): 14 | keymap = { 15 | 'b11': 'Brotli\nq11', 16 | 'b9': 'Brotli\nq9', 17 | 'd0': u'DivANS .\nq11', 18 | 'd1': u'DivANS .\nq11', 19 | 'dX': u'DivANS\nq9', 20 | 'd5': u'DivANS\nq9', 21 | 'd35': u'DivANS\nq9', 22 | 'z19': 'Zstd\nq19', 23 | 'lzma': '7zip', 24 | 'bz': 'bz2', 25 | } 26 | if key in keymap: 27 | return keymap[key] 28 | return key 29 | colors = [[r for r in reversed(['#aaaaff','#9999dd','#4444aa','#000088',])], 30 | [r for r in reversed(['#ffffaa','#cccc88','#aaaa44','#999900',])], 31 | [r for r in reversed(['#ffaaaa','#cc8888','#aa4444','#880000',])], 32 | [r for r in reversed(['#aaffaa','#88cc88','#44aa44','#008800',])], 33 | [r for r in reversed(['#666666','#666666','#666666','#666666',])], 34 | ] 35 | map_color = { 36 | 'd0':colors[0][3], 37 | 'd1':colors[0][3], 38 | 'd2':colors[0][3], 39 | 'd3':colors[0][2], 40 | 'd4':colors[0][2], 41 | 'dX':colors[0][2], 42 | 'd5':colors[0][2], 43 | 'd35':colors[0][2], 44 | 'b9':colors[1][0], 45 | 'b11':colors[1][1], 46 | 'z19':colors[2][1], 47 | 'zlib':colors[4][1], 48 | 'bz':colors[3][1], 49 | 'lzma':colors[3][1], 50 | } 51 | ylabel = { 52 | 'savings_vs_zlib':'% saving vs zlib\n', 53 | 'encode_speed': 'Encode (Mbps)', 54 | 'decode_speed': 'Decode (Mbps)', 55 | 'time_pct':'Decode Time (ms)', 56 | } 57 | 58 | y_limits= { 59 | 'savings_vs_zlib':[0, 14], 60 | 'encode_speed': [1,400], 61 | 'decode_speed': [10,5000], 62 | # 'time_pct': 63 | } 64 | do_log = set(['decode_speed', 'encode_speed']) 65 | def build_figure(key, ax, data, last=False): 66 | if key in do_log: 67 | ax.set_yscale('log') 68 | else: 69 | ax.set_yscale('linear') 70 | labels = [] 71 | trans = transforms.blended_transform_factory( 72 | ax.transData, ax.transAxes) 73 | offset = .5 74 | for (index, sub_items_key) in enumerate([x for x in sorted(data.keys(), key=lambda v: v.replace('d','a').replace('z1','c1').replace('z2','c2').replace('bz','mz')) if on_whitelist(key, x)]): 75 | labels.append(sub_items_key) 76 | bar_width = 0.35 77 | sub_items = data[sub_items_key] 78 | axen = [] 79 | for (sub_index, sub_item) in enumerate(sub_items): 80 | kwargs = {} 81 | if key in do_log: 82 | kwargs['log'] = True 83 | #if key not in y_limits: 84 | # kwargs['transform'] = trans 85 | #if sub_index == 0: 86 | # kwargs['label'] = key.replace('_', ' ') 87 | kwargs['color'] = map_color[sub_items_key] 88 | axen.append(ax.bar(index + offset, sub_item, bar_width, **kwargs)) 89 | rect = axen[-1][-1] 90 | height = rect.get_height() 91 | if height > 100: 92 | dat = '%.0f' %height 93 | elif height > 5: 94 | dat = '%.1f' % height 95 | else: 96 | dat = '%.2f' % height 97 | ax.text(rect.get_x() + rect.get_width()/2.0, height, dat, ha='center', va='bottom') 98 | if index == 0 and len(sub_items) != 1: 99 | ax.legend(axen, ['p99.99', 'p99', 'p75', 'p50'], ncol=2) 100 | 101 | ax.set_xticks(np.arange(len(labels)) + offset + bar_width * .5) 102 | ax.set_xticklabels([label_reassign(l) for l in labels]) 103 | ax.set_ylabel(ylabel[key]) 104 | if key in y_limits: 105 | ax.set_ylim(y_limits[key][0], y_limits[key][1]) # 106 | ax.set_xlim(0,len(labels)) 107 | ax.yaxis.set_major_formatter(ScalarFormatter()) 108 | #ax.set_xticks([offset + x for (x,_) in enumerate(labels)]) 109 | 110 | def draw(ratio_vs_raw, ratio_vs_zlib, encode_avg, decode_avg, decode_pct): 111 | rcParams['pdf.fonttype'] = 42 112 | rcParams['ps.fonttype'] = 42 113 | rcParams['pgf.rcfonts'] = False 114 | fig, [ax1, ax2, ax3] = plt.subplots(3, 1, sharex=True, figsize=(6, 6)) 115 | plt.suptitle("Dropbox recent uploads") 116 | #build_figure('time_pct', ax1, decode_pct, last=True) 117 | build_figure('decode_speed', ax2, decode_avg, last=True) 118 | build_figure('encode_speed', ax3, encode_avg) 119 | build_figure('savings_vs_zlib', ax1, ratio_vs_zlib) 120 | #fig.subplots_adjust(bottom=0.15, right=.99, top=0.99, hspace=0.03) 121 | plt.savefig('compression_comparison_ratio_speed_time.pdf') 122 | plt.savefig('compression_comparison_ratio_speed_time.png') 123 | fig.clear() 124 | 125 | rcParams['pdf.fonttype'] = 42 126 | rcParams['ps.fonttype'] = 42 127 | rcParams['pgf.rcfonts'] = False 128 | fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=(6, 4.5)) 129 | plt.suptitle("Dropbox recent uploads timing") 130 | #build_figure('time_pct', ax1, decode_pct, last=True) 131 | build_figure('decode_speed', ax1, decode_avg, last=True) 132 | build_figure('encode_speed', ax2, encode_avg) 133 | #fig.subplots_adjust(bottom=0.15, right=.99, top=0.99, hspace=0.03) 134 | plt.savefig('compression_comparison_speed_time.pdf') 135 | plt.savefig('compression_comparison_speed_time.png') 136 | fig.clear() 137 | 138 | rcParams['pdf.fonttype'] = 42 139 | rcParams['ps.fonttype'] = 42 140 | rcParams['pgf.rcfonts'] = False 141 | fig, ax1 = plt.subplots(1, 1, sharex=True, figsize=(6, 2.7)) 142 | plt.suptitle(" Dropbox uploads compression ratio for uncompressed files") 143 | build_figure('savings_vs_zlib', ax1, ratio_vs_zlib) 144 | fig.subplots_adjust(bottom=0.2, right=.99, top=.9, hspace=0.03) 145 | plt.savefig('compression_comparison_ratio.pdf') 146 | plt.savefig('compression_comparison_ratio.png') 147 | fig.clear() 148 | 149 | -------------------------------------------------------------------------------- /src/codec/io.rs: -------------------------------------------------------------------------------- 1 | use core; 2 | use interface::{Command, PredictionModeContextMap, free_cmd, StreamDemuxer, ReadableBytes, StreamID, NUM_STREAMS}; 3 | use ::interface::{ 4 | DivansOutputResult, 5 | MAX_PREDMODE_SPEED_AND_DISTANCE_CONTEXT_MAP_SIZE, 6 | MAX_LITERAL_CONTEXT_MAP_SIZE, 7 | EncoderOrDecoderRecoderSpecialization, 8 | ErrMsg, 9 | }; 10 | use codec::interface::CMD_CODER; 11 | use slice_util::{AllocatedMemoryRange, AllocatedMemoryPrefix}; 12 | 13 | use alloc::{Allocator}; 14 | use alloc_util::{RepurposingAlloc, UninitializedOnAlloc}; 15 | use cmd_to_raw::DivansRecodeState; 16 | 17 | use threading::{ThreadToMain,ThreadData}; 18 | 19 | 20 | pub struct DemuxerAndRingBuffer, 21 | LinearInputBytes:StreamDemuxer>{ 22 | input: LinearInputBytes, 23 | phantom: core::marker::PhantomData, 24 | err: DivansOutputResult, 25 | } 26 | 27 | impl, LinearInputBytes:StreamDemuxer+Default> Default for DemuxerAndRingBuffer { 28 | fn default() ->Self { 29 | DemuxerAndRingBuffer::::new(LinearInputBytes::default()) 30 | } 31 | } 32 | impl, LinearInputBytes:StreamDemuxer> DemuxerAndRingBuffer { 33 | fn new(demuxer: LinearInputBytes) -> Self { 34 | DemuxerAndRingBuffer::{ 35 | input:demuxer, 36 | phantom:core::marker::PhantomData::::default(), 37 | err: DivansOutputResult::Success, 38 | } 39 | } 40 | } 41 | 42 | impl, LinearInputBytes:StreamDemuxer> StreamDemuxer for DemuxerAndRingBuffer { 43 | #[inline(always)] 44 | fn write_linear(&mut self, data:&[u8], m8: &mut AllocU8) -> usize { 45 | self.input.write_linear(data, m8) 46 | } 47 | #[inline(always)] 48 | fn read_buffer(&mut self) -> [ReadableBytes; NUM_STREAMS] { 49 | self.input.read_buffer() 50 | } 51 | #[inline(always)] 52 | fn data_ready(&self, stream_id:StreamID) -> usize { 53 | self.input.data_ready(stream_id) 54 | } 55 | #[inline(always)] 56 | fn peek(&self, stream_id: StreamID) -> &[u8] { 57 | self.input.peek(stream_id) 58 | } 59 | #[inline(always)] 60 | fn edit(&mut self, stream_id: StreamID) -> &mut AllocatedMemoryRange { 61 | self.input.edit(stream_id) 62 | } 63 | #[inline(always)] 64 | fn consume(&mut self, stream_id: StreamID, count: usize) { 65 | self.input.consume(stream_id, count) 66 | } 67 | #[inline(always)] 68 | fn consumed_all_streams_until_eof(&self) -> bool { 69 | self.input.consumed_all_streams_until_eof() 70 | } 71 | #[inline(always)] 72 | fn encountered_eof(&self) -> bool { 73 | self.input.encountered_eof() 74 | } 75 | #[inline(always)] 76 | fn free_demux(&mut self, m8: &mut AllocU8) { 77 | self.input.free_demux(m8); 78 | } 79 | } 80 | 81 | // this is an implementation of simply printing to the ring buffer that masquerades as communicating with a 'main thread' 82 | impl, LinearInputBytes:StreamDemuxer> ThreadToMain for DemuxerAndRingBuffer { 83 | const COOPERATIVE:bool = false; 84 | const ISOLATED:bool = false; 85 | fn pull_data(&mut self) -> ThreadData { 86 | ThreadData::Data(core::mem::replace(self.input.edit(CMD_CODER as StreamID), AllocatedMemoryRange::::default())) 87 | } 88 | fn pull_context_map(&mut self, mut m8: Option<&mut RepurposingAlloc>) -> Result>, ()> { 89 | match m8 { 90 | Some(ref mut m) => { 91 | let lit = m.use_cached_allocation::().alloc_cell(MAX_LITERAL_CONTEXT_MAP_SIZE); 92 | Ok(PredictionModeContextMap::> { 93 | literal_context_map:lit, 94 | predmode_speed_and_distance_context_map:m.use_cached_allocation::().alloc_cell( 95 | MAX_PREDMODE_SPEED_AND_DISTANCE_CONTEXT_MAP_SIZE), 96 | }) 97 | }, 98 | None => { 99 | panic!("Pull context map in Demuxer+RingBuffer without an allocator"); 100 | }, 101 | } 102 | } 103 | fn push_eof(&mut self) -> DivansOutputResult { 104 | self.err 105 | } 106 | fn push_consumed_data(&mut self, 107 | data: &mut AllocatedMemoryRange, 108 | mut m8: Option<&mut RepurposingAlloc>, 109 | ) -> DivansOutputResult { 110 | m8.as_mut().unwrap().free_cell(core::mem::replace(&mut data.0, AllocU8::AllocatedMemory::default())); 111 | self.err 112 | } 113 | fn broadcast_err(&mut self, err:ErrMsg) { 114 | self.err = DivansOutputResult::Failure(err); 115 | } 116 | fn push_cmd( 117 | &mut self, 118 | cmd:&mut Command>, 119 | mut m8: Option<&mut RepurposingAlloc>, 120 | mut recoder: Option<&mut DivansRecodeState>, 121 | specialization:&mut Specialization, 122 | output:&mut [u8], 123 | output_offset: &mut usize, 124 | ) -> DivansOutputResult { 125 | let mut tmp_output_offset_bytes_backing: usize = 0; 126 | let tmp_output_offset_bytes = specialization.get_recoder_output_offset( 127 | output_offset, 128 | &mut tmp_output_offset_bytes_backing); 129 | let ret = recoder.as_mut().unwrap().encode_cmd(cmd, 130 | specialization.get_recoder_output(output), 131 | tmp_output_offset_bytes); 132 | match ret { 133 | DivansOutputResult::Success => { 134 | free_cmd(cmd, &mut m8.as_mut().unwrap().use_cached_allocation::< 135 | UninitializedOnAlloc>()); 136 | self.err 137 | }, 138 | DivansOutputResult::Failure(_) => { 139 | free_cmd(cmd, &mut m8.as_mut().unwrap().use_cached_allocation::< 140 | UninitializedOnAlloc>()); 141 | ret 142 | } 143 | _ => ret, 144 | } 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /c/example.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #ifndef _WIN32 6 | #include 7 | #endif 8 | #include "divans/ffi.h" 9 | #include "arg.h" 10 | #include "custom_alloc.h" 11 | #include "vec_u8.h" 12 | const unsigned char example[]= 13 | "Mary had a little lamb. Its fleece was white as snow.\n" 14 | "And every where that Mary went, the lamb was sure to go.\n" 15 | "It followed her to school one day which was against the rule.\n" 16 | "It made the children laugh and play to see a lamb at sch00l!\n\n\n\n" 17 | "0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987 1597 2584 4181 6765\n" 18 | "\x11\x99\x2f\xfc\xfe\xef\xff\xd8\xfd\x9c\x43" 19 | "Additional testing characters here"; 20 | 21 | 22 | 23 | #define BUF_SIZE 65536 24 | DivansResult compress(const unsigned char *data, size_t len, struct VecU8 *ret_buffer, 25 | int argc, char** argv) { 26 | unsigned char buf[BUF_SIZE]; 27 | struct CAllocator alloc = {custom_malloc, custom_free, custom_alloc_opaque}; 28 | struct DivansCompressorState *state = divans_new_compressor_with_custom_alloc(alloc); 29 | set_options(state, argc, argv); 30 | while (len) { 31 | size_t read_offset = 0; 32 | size_t buf_offset = 0; 33 | DivansResult res = divans_encode(state, 34 | data, len, &read_offset, 35 | buf, sizeof(buf), &buf_offset); 36 | if (res == DIVANS_FAILURE) { 37 | divans_free_compressor(state); 38 | return res; 39 | } 40 | data += read_offset; 41 | len -= read_offset; 42 | push_vec_u8(ret_buffer, buf, buf_offset); 43 | } 44 | DivansResult res; 45 | do { 46 | size_t buf_offset = 0; 47 | res = divans_encode_flush(state, 48 | buf, sizeof(buf), &buf_offset); 49 | if (res == DIVANS_FAILURE) { 50 | divans_free_compressor(state); 51 | return res; 52 | } 53 | push_vec_u8(ret_buffer, buf, buf_offset); 54 | } while(res != DIVANS_SUCCESS); 55 | divans_free_compressor(state); 56 | return DIVANS_SUCCESS; 57 | } 58 | 59 | DivansResult decompress(const unsigned char *data, size_t len, struct VecU8 *ret_buffer) { 60 | unsigned char buf[BUF_SIZE]; 61 | struct CAllocator alloc = {custom_malloc, custom_free, custom_alloc_opaque}; 62 | struct DivansDecompressorState *state = divans_new_decompressor_with_custom_alloc(alloc, 0); 63 | DivansResult res; 64 | do { 65 | size_t read_offset = 0; 66 | size_t buf_offset = 0; 67 | res = divans_decode(state, 68 | data, len, &read_offset, 69 | buf, sizeof(buf), &buf_offset); 70 | if (res == DIVANS_FAILURE || (res == DIVANS_NEEDS_MORE_INPUT && len == 0)) { 71 | divans_free_decompressor(state); 72 | return res; 73 | } 74 | data += read_offset; 75 | len -= read_offset; 76 | push_vec_u8(ret_buffer, buf, buf_offset); 77 | } while (res != DIVANS_SUCCESS); 78 | divans_free_decompressor(state); 79 | return DIVANS_SUCCESS; 80 | } 81 | 82 | int main(int argc, char**argv) { 83 | custom_free_f(&use_fake_malloc, memset(custom_malloc_f(&use_fake_malloc, 127), 0x7e, 127)); 84 | if (getenv("NO_MALLOC")) { 85 | custom_alloc_opaque = &use_fake_malloc; 86 | } 87 | if (getenv("RUST_MALLOC")) { 88 | custom_alloc_opaque = NULL; 89 | custom_malloc = NULL; 90 | custom_free = NULL; 91 | } 92 | const unsigned char* data = example; 93 | size_t len = sizeof(example); 94 | unsigned char* to_free = NULL; 95 | if (find_first_arg(argc, argv)) { 96 | FILE * fp = fopen(find_first_arg(argc, argv), "rb"); 97 | if (fp != NULL) { 98 | size_t ret; 99 | (void)fseek(fp, 0, SEEK_END); 100 | len = ftell(fp); 101 | (void)fseek(fp, 0, SEEK_SET); 102 | to_free = malloc(len); 103 | ret = fread(to_free, 1, len, fp); 104 | if (ret == 0) { 105 | return -1; 106 | } 107 | data = to_free; 108 | (void)fclose(fp); 109 | } 110 | } 111 | { 112 | struct VecU8 divans_file = new_vec_u8(); 113 | struct VecU8 rt_file = new_vec_u8(); 114 | DivansResult res = compress(data, len, &divans_file, argc, argv); 115 | if (res != DIVANS_SUCCESS) { 116 | fprintf(stderr, "Failed to compress code:%d\n", (int) res); 117 | abort(); 118 | } 119 | res = decompress(divans_file.data, divans_file.size, &rt_file); 120 | if (res != DIVANS_SUCCESS) { 121 | fprintf(stderr, "Failed to compress code:%d\n", (int)res); 122 | abort(); 123 | } 124 | if (rt_file.size != len) { 125 | FILE * fp = fopen("/tmp/fail.rt", "wb"); 126 | fwrite(rt_file.data, 1, rt_file.size, fp); 127 | fclose(fp); 128 | fp = fopen("/tmp/fail.dv", "wb"); 129 | fwrite(divans_file.data, 1, divans_file.size, fp); 130 | fclose(fp); 131 | fp = fopen("/tmp/fail.or", "wb"); 132 | fwrite(data, 1, len, fp); 133 | fclose(fp); 134 | fprintf(stderr, "Decompressed file size %ld != %ld\n", (long) rt_file.size, (long)len); 135 | abort(); 136 | } 137 | if (memcmp(rt_file.data, data, len) != 0) { 138 | fprintf(stderr, "Roundtrip Contents mismatch\n"); 139 | abort(); 140 | } 141 | #ifdef _WIN32 142 | printf("File length %ld reduced to %ld, %0.2f%%\n", 143 | (long)len, (long)divans_file.size,(double)divans_file.size * 100.0 / (double)len); 144 | #else 145 | char buf[512]; 146 | int ret; 147 | ret = write(1, "File length ", strlen("File Length ")); 148 | if (ret <= 0) { 149 | return ret; 150 | } 151 | custom_atoi(buf, len); 152 | ret = write(1, buf, strlen(buf)); 153 | if (ret <= 0) { 154 | return ret; 155 | } 156 | ret = write(1, " reduced to ", strlen(" reduced to ")); 157 | if (ret <= 0) { 158 | return ret; 159 | } 160 | custom_atoi(buf, divans_file.size); 161 | ret = write(1, buf, strlen(buf)); 162 | if (ret <= 0) { 163 | return ret; 164 | } 165 | ret = write(1, ", ", strlen(", ")); 166 | if (ret <= 0) { 167 | return ret; 168 | } 169 | custom_atoi(buf, divans_file.size * 100 / len); 170 | ret = write(1, buf, strlen(buf)); 171 | if (ret <= 0) { 172 | return ret; 173 | } 174 | ret = write(1, ".", strlen(".")); 175 | if (ret <= 0) { 176 | return ret; 177 | } 178 | custom_atoi(buf, ((divans_file.size * 1000000 + len/2)/ len) % 10000 + 10000); 179 | ret = write(1, buf + 1, strlen(buf) - 1); 180 | if (ret <= 0) { 181 | return ret; 182 | } 183 | ret = write(1, "%\n", strlen("%\n")); 184 | if (ret <= 0) { 185 | return ret; 186 | } 187 | #endif 188 | release_vec_u8(&divans_file); 189 | release_vec_u8(&rt_file); 190 | } 191 | if (to_free != NULL) { 192 | free(to_free); 193 | } 194 | return 0; 195 | } 196 | -------------------------------------------------------------------------------- /research/summary.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import traceback 4 | from collections import defaultdict 5 | total = {} 6 | num_rows = 0 7 | raw_size =0 8 | 9 | cut = 0 10 | uncut = 0 11 | decode_hist = defaultdict(list) 12 | dig5 = 10000000.0 13 | def prec(x, scale=100000.0): 14 | return int(x * scale +.5)/scale 15 | 16 | def summarize(show_results=True): 17 | print "Summary for",num_rows,'Processed ',(uncut * 100.)/(cut + uncut),'%', raw_size / 1000.**4 18 | ratio_vs_zlib = {} 19 | ratio_vs_raw = {} 20 | encode_avg = {} 21 | decode_avg = {} 22 | decode_st_avg = {} 23 | decode_pct = {} 24 | 25 | for key in sorted(total.keys()): 26 | temp = [total[key][0] * 100. /total['zlib'][0], 27 | total[key][3]/max(total[key][1], 1), 28 | total[key][3]/max(total[key][2], 1), 29 | total[key][3]/max(total[key][4], 1)] 30 | print str(key) + ':' + str([prec(t) for t in temp]), 'sav', str(prec((total[key][0] + cut) * 100./ (cut + uncut))) + '%' 31 | ratio_vs_zlib[key] = [100 - 100. * float(total[key][0])/total['zlib'][0]] 32 | ratio_vs_raw[key] = [100 - 100. * float(total[key][0])/total['~raw'][0]] 33 | encode_avg[key] = [8 * total[key][3]/max(total[key][1], .00001)] 34 | decode_avg[key] = [8 * total[key][3]/max(total[key][2], .00001)] 35 | decode_st_avg[key] = [8 * total[key][3]/max(total[key][4], .00001)] 36 | if key in decode_hist: 37 | val = decode_hist[key] 38 | val.sort() 39 | vlen = len(val) 40 | p9999 = vlen * 9999 // 10000 41 | p99 = vlen * 99 // 100 42 | p95 = vlen * 95 // 100 43 | p75 = vlen * 75//100 44 | p50 = vlen // 2 45 | print str(key) + ': ' + str(total[key][0]) + '/' + str(total['zlib'][0]) + ' vs raw ' + str(total[key][0]) + '/' + str(total['~raw'][0]) 46 | decode_pct[key] = [1000 * val[p9999], 1000 * val[p99], 1000 * val[p75], 1000 * val[p50]] 47 | if show_results: 48 | try: 49 | import divansplot 50 | 51 | except Exception: 52 | traceback.print_exc() 53 | show_results = False 54 | if show_results: 55 | divansplot.draw(ratio_vs_raw, ratio_vs_zlib, encode_avg, decode_avg, decode_pct) 56 | gopts_map = { 57 | 'd1':[['-O2', '-q11', '-w22', '-lsb', '-lgwin22', '-mixing=1', '-findprior', '-speed=2,2048'], 58 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-findprior', '-bytescore=140', 59 | '-sign', '-speed=32,4096'], 60 | ['-O2', '-q10', '-w22', '-lgwin22', '-mixing=1', '-findprior', '-sign', '-speed=16,8192'], 61 | ['-O2', '-q11', '-w22', '-lgwin18', '-mixing=1', '-findprior', '-speed=16,8192'], 62 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-findprior', '-bytescore=340', 63 | '-lsb', '-speed=2,1024']], 64 | 'd12':[['-O2', '-q9.5', '-w22', '-defaultprior', '-lgwin22', '-mixing=2', '-bytescore=340']], 65 | 'd13':[ ['-O2', '-q9.5', '-w22', '-lsb', '-lgwin22', '-mixing=1', '-speed=2,2048', '-bytescore=540'], 66 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-bytescore=140', '-speed=32,4096']], 67 | 'd15':[['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-speed=2,2048', '-bytescore=840'], 68 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-bytescore=340'], 69 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-bytescore=140', '-speed=32,4096']], 70 | 'd20':[['-O2', '-q10', '-w22', '-lsb', '-lgwin22', '-mixing=1', '-findprior', '-speed=2,2048'], 71 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-findprior', '-bytescore=140', 72 | '-sign', '-speed=32,4096'], 73 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-findprior', '-bytescore=40', 74 | '-sign', '-speed=16,8192'], 75 | ['-O2', '-q10', '-w22', '-lgwin18', '-mixing=1', '-findprior', '-speed=16,8192'], 76 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-findprior', '-bytescore=340', 77 | '-lsb', '-speed=2,1024']], 78 | 'd21':[['-q9', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=140'], 79 | ['-q9', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=340'], 80 | ['-q9', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=840', "-speed=16,8192"]], 81 | 'd29':[['-q9', '-defaultprior', '-nocm', '-w22', '-lgwin22', '-mixing=0', '-bytescore=340']], 82 | 'd35':[ ['-q7', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=40'], 83 | ['-q7', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=340'], 84 | ['-q7', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=540'], 85 | ['-q7', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=840', '-speed=1,16384']], 86 | 'd38':[['-q5', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=340']], 87 | 88 | } 89 | for line in sys.stdin: 90 | if sys.argv[1] == '--cut': 91 | line = line[line.find(':') + 1:] 92 | try: 93 | row = json.loads(line) 94 | except Exception: 95 | traceback.print_exc() 96 | continue 97 | zlib_ratio = row['zlib'][0] / float(row['~raw']) 98 | if sys.argv[2] == "image": 99 | if row['zlib'][0] == row['~raw']: 100 | cut += row['zlib'][0] 101 | continue 102 | elif zlib_ratio > float(sys.argv[2])/100.: 103 | cut += row['zlib'][0] 104 | continue 105 | uncut += row['zlib'][0] 106 | raw_size += row['~raw'] 107 | mb_size = row['~raw']/1024./1024. 108 | num_rows += 1 109 | candidate = [0,0,0] 110 | rule = ['d12', 111 | 'd13', 112 | 'd20', 113 | 'd21', 114 | 'd1', 115 | 'd1'] 116 | if zlib_ratio > .99: 117 | candidate = row[rule[5]] 118 | elif zlib_ratio > .96: 119 | candidate = row[rule[4]] 120 | elif zlib_ratio > .92: # .85 121 | candidate = row[rule[3]] 122 | elif zlib_ratio > .89: # .25 123 | candidate = row[rule[2]] 124 | elif zlib_ratio > .85: # .22 125 | candidate = row[rule[1]] 126 | else: 127 | candidate = row[rule[0]] #1 128 | row['dY'] = candidate 129 | if zlib_ratio > .97: 130 | candidate = row['d29'] # fast to encode fast to decode 131 | #elif zlib_ratio > .9: 132 | # candidate = row['d38'] # fastest to encode slow to decode 133 | elif zlib_ratio > .5: 134 | candidate = row['d35'] # fast to encode and slow to decode 135 | else: 136 | candidate = row['d15'] # slow to encode fast to decode 137 | # candidate = row['d1'] # slowest to encode fat to decoed 138 | row['dX'] = candidate 139 | for (key, value) in row.iteritems(): 140 | if key not in total: 141 | total[key] = [0,0,0,0,0] 142 | if key == '~path' or key=='~': 143 | continue 144 | if key == '~raw': 145 | total[key][0] += value 146 | continue 147 | total[key][0] += value[0] 148 | decode_hist[key].append(value[2]) 149 | if mb_size >= 1 or True: 150 | total[key][1] += value[1] 151 | total[key][2] += value[2] 152 | total[key][3] += mb_size 153 | if len(value) > 3: 154 | total[key][4] += value[3] 155 | else: 156 | total[key][4] += value[2] 157 | if num_rows % 100000 == 0: 158 | summarize(False) 159 | summarize() 160 | -------------------------------------------------------------------------------- /src/billing.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Dropbox, Inc 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #![allow(unknown_lints,unused_macros,unused_imports)] 16 | use core::mem; 17 | use core::iter::FromIterator; 18 | use core::marker::PhantomData; 19 | use alloc::{Allocator}; 20 | use interface::{ArithmeticEncoderOrDecoder, BillingDesignation, NewWithAllocator, BillingCapability}; 21 | use super::probability::{CDF16, ProbRange}; 22 | use interface::{DivansResult, ReadableBytes, WritableBytes}; 23 | 24 | #[cfg(feature="billing")] 25 | mod billing { 26 | pub use std::collections::HashMap; 27 | pub use std::string::String; 28 | pub use std::vec::Vec; 29 | } 30 | 31 | #[cfg(feature="billing")] 32 | pub use std::io::Write; 33 | 34 | macro_rules! println_stderr( 35 | ($($val:tt)*) => { { 36 | writeln!(&mut ::std::io::stderr(), $($val)*).unwrap(); 37 | } } 38 | ); 39 | 40 | #[cfg(feature="billing")] 41 | pub struct BillingArithmeticCoder, Coder:ArithmeticEncoderOrDecoder> { 42 | coder: Coder, 43 | counter: billing::HashMap, 44 | movd: bool, 45 | _phantom: PhantomData, 46 | } 47 | 48 | #[cfg(feature="billing")] 49 | impl, 50 | Coder:ArithmeticEncoderOrDecoder+NewWithAllocator> NewWithAllocator for BillingArithmeticCoder { 51 | fn new(m8: &mut AllocU8) -> Self { 52 | BillingArithmeticCoder::{ 53 | coder: Coder::new(m8), 54 | counter: billing::HashMap::new(), 55 | movd:false, 56 | _phantom:PhantomData::::default(), 57 | } 58 | } 59 | fn free(&mut self, m8: &mut AllocU8) { 60 | self.coder.free(m8); 61 | } 62 | } 63 | 64 | #[cfg(feature="billing")] 65 | impl, Coder:ArithmeticEncoderOrDecoder> BillingArithmeticCoder { 66 | // Return the (bits, virtual bits) pair. 67 | pub fn get_total(&self) -> (f64, f64) { 68 | let mut total_bits : f64 = 0.0; 69 | let mut total_vbits : f64 = 0.0; 70 | for (_, v) in self.counter.iter() { 71 | total_bits += v.0; 72 | total_vbits += v.1; 73 | } 74 | (total_bits, total_vbits) 75 | } 76 | pub fn print_compression_ratio(&self, original_bytes : usize) { 77 | let (total_bits, _) = self.get_total(); 78 | println_stderr!("{:.2}/{:} Ratio {:.3}%", 79 | total_bits / 8.0, original_bytes, total_bits * 100.0 / 8.0 / (original_bytes as f64)); 80 | } 81 | } 82 | 83 | #[cfg(feature="billing")] 84 | impl, Coder:ArithmeticEncoderOrDecoder> Drop for BillingArithmeticCoder { 85 | fn drop(&mut self) { 86 | if self.movd { 87 | return; 88 | } 89 | let max_key_len = self.counter.keys().map(|k| format!("{:?}", k).len()).max().unwrap_or(5); 90 | let report = |k, v: (f64, f64)| { 91 | println_stderr!("{1:0$} Bit count: {2:9.1} Byte count: {3:11.3} Virtual bits: {4:7.0}", 92 | max_key_len, k, v.0, v.0 / 8.0, v.1); 93 | }; 94 | let mut sorted_entries = billing::Vec::from_iter(self.counter.iter()); 95 | sorted_entries.sort_by_key(|&(k, _)| format!("{:?}", k)); 96 | 97 | let mut total_bits : f64 = 0.0; 98 | let mut total_vbits : f64 = 0.0; 99 | 100 | for (k, v) in sorted_entries { 101 | report(format!("{:?}", k), *v); 102 | total_bits += v.0; 103 | total_vbits += v.1; 104 | } 105 | report(billing::String::from("Total"), (total_bits, total_vbits)); 106 | } 107 | } 108 | 109 | #[cfg(feature="billing")] 110 | impl, Coder:ArithmeticEncoderOrDecoder> ArithmeticEncoderOrDecoder for BillingArithmeticCoder { 111 | fn mov_consume(mut self) -> Self { 112 | self.mov() 113 | } 114 | fn mov(&mut self) -> Self { 115 | self.movd = true; 116 | BillingArithmeticCoder::{ 117 | coder: self.coder.mov(), 118 | counter: mem::replace(&mut self.counter, billing::HashMap::new()), 119 | movd: false, 120 | _phantom:PhantomData::::default(), 121 | } 122 | } 123 | fn has_data_to_drain_or_fill(&self) -> bool { 124 | self.coder.has_data_to_drain_or_fill() 125 | } 126 | fn drain_or_fill_internal_buffer_unchecked(&mut self, 127 | input_buffer: &mut ReadableBytes, 128 | output_buffer: &mut WritableBytes) -> DivansResult { 129 | self.coder.drain_or_fill_internal_buffer_unchecked(input_buffer, output_buffer) 130 | } 131 | fn get_or_put_bit_without_billing(&mut self, 132 | bit: &mut bool, 133 | prob_of_false: u8) { 134 | self.get_or_put_bit(bit, prob_of_false, BillingDesignation::Unknown) 135 | } 136 | fn get_or_put_bit(&mut self, 137 | bit: &mut bool, 138 | prob_of_false: u8, 139 | billing: BillingDesignation) { 140 | self.coder.get_or_put_bit_without_billing(bit, prob_of_false); 141 | let mut actual_prob = (prob_of_false as f64 + 0.5) / 256.0; 142 | if *bit { 143 | actual_prob = 1.0 - actual_prob; 144 | } 145 | let v = self.counter.entry(billing).or_insert((0.0, 0.0)); 146 | (*v).0 += -actual_prob.log2(); 147 | (*v).1 += 1.0; 148 | } 149 | fn get_or_put_nibble_without_billing(&mut self, 150 | nibble: &mut u8, 151 | prob: &C) -> ProbRange { 152 | self.get_or_put_nibble(nibble, prob, BillingDesignation::Unknown) 153 | } 154 | fn get_or_put_nibble(&mut self, 155 | nibble: &mut u8, 156 | prob: &C, 157 | billing: BillingDesignation) -> ProbRange { 158 | let ret = self.coder.get_or_put_nibble_without_billing(nibble, prob); 159 | let actual_prob = prob.pdf(*nibble) as f64 / (prob.max() as f64); 160 | let v = self.counter.entry(billing).or_insert((0.0, 0.0)); 161 | (*v).0 += -actual_prob.log2(); 162 | (*v).1 += 4.0; 163 | ret 164 | } 165 | fn close(&mut self) -> DivansResult { 166 | self.coder.close() 167 | } 168 | } 169 | 170 | // only need to implement this for feature=billing, since it's defined for any T in the default case 171 | #[cfg(feature="billing")] 172 | impl, Coder:ArithmeticEncoderOrDecoder> BillingCapability for BillingArithmeticCoder { 173 | fn debug_print(&self, byte_size: usize) { 174 | self.print_compression_ratio(byte_size); 175 | } 176 | } 177 | 178 | #[cfg(not(feature="billing"))] 179 | macro_rules! DefaultEncoderType( 180 | () => {::ans::ANSEncoder} 181 | ); 182 | 183 | #[cfg(not(feature="billing"))] 184 | macro_rules! DefaultDecoderType( 185 | () => {::ans::ANSDecoder} 186 | ); 187 | 188 | 189 | #[cfg(feature="billing")] 190 | macro_rules! DefaultEncoderType( 191 | () => { ::billing::BillingArithmeticCoder> } 192 | ); 193 | 194 | #[cfg(feature="billing")] 195 | macro_rules! DefaultDecoderType( 196 | () => { ::billing::BillingArithmeticCoder } 197 | ); 198 | -------------------------------------------------------------------------------- /src/probability/common_tests.rs: -------------------------------------------------------------------------------- 1 | use super::{BLEND_FIXED_POINT_PRECISION, CDF16, LOG2_SCALE, Prob, ProbRange, Speed}; 2 | 3 | #[cfg(test)] 4 | pub fn test_sym_to_start_and_freq() { 5 | let mut cdf = T::default(); 6 | for i in 0..100 { 7 | cdf.blend((i & 0xf) as u8, Speed::MED); 8 | let mut last_prob_range: ProbRange = ProbRange { start:0, freq:0 }; 9 | for sym in 0..16 { 10 | let result = cdf.sym_to_start_and_freq(sym as u8); 11 | assert_eq!(sym as u8, result.sym); 12 | // NOTE: the +1 is to mirror the default implementation of sym_to_start_and_freq, 13 | // which does +1 to the interpolated Prob value. 14 | let expected_start: Prob = 1 + if sym == 0 { 0 } else { 15 | last_prob_range.start + last_prob_range.freq 16 | }; 17 | assert_eq!(result.range.start, expected_start); 18 | last_prob_range = result.range.clone(); 19 | } 20 | } 21 | } 22 | 23 | #[cfg(test)] 24 | pub fn test_cdf_offset_to_sym_start_and_freq() { 25 | let mut cdf = T::default(); 26 | for i in 0..100 { 27 | cdf.blend((i & 0xf) as u8, Speed::MED); 28 | let mut prev_sym: u8 = 0; 29 | for val in 0..(1i32 << LOG2_SCALE) { 30 | let result = cdf.cdf_offset_to_sym_start_and_freq(val as Prob); 31 | // TODO: The following comparisons should not have +1's, but 32 | // cdf_offset_to_sym_start_and_freq(...) implementation at the moment is HAX. 33 | assert!(prev_sym <= result.sym); 34 | // check that val falls in the range defined by the return value. 35 | assert!(result.range.start as i32 <= val + 1); 36 | assert!(val <= (result.range.start as i32) + (result.range.freq as i32)); 37 | prev_sym = result.sym; 38 | } 39 | assert_eq!(prev_sym, 15); 40 | } 41 | } 42 | 43 | #[allow(unused)] 44 | fn simple_rand(state: &mut u64) -> u32 { 45 | const RAND_MAX : u32 = 32_767; 46 | *state = (*state).wrapping_mul(1_103_515_245).wrapping_add(12_345); 47 | ((*state / 65_536) as u32 % (RAND_MAX + 1)) as u32 48 | } 49 | 50 | #[cfg(test)] 51 | pub fn test_stationary_probability() { 52 | let mut cdf = T::default(); 53 | let groundtruth_pdf: [(u32, u32); 16] = [(0,1), (0,1), (1,16), (0,1), 54 | (1,32), (1,32), (0,1), (0,1), 55 | (1,8), (0,1), (0,1), (0,1), 56 | (1,5), (1,5), (1,5), (3,20)]; 57 | 58 | // compute CDF manually 59 | const CDF_MAX : u32 = 32_767; 60 | let mut cutoffs: [u32; 16] = [0; 16]; 61 | let mut sum_prob: f32 = 0.0f32; 62 | for i in 0..16 { 63 | sum_prob += (groundtruth_pdf[i].0 as f32) / (groundtruth_pdf[i].1 as f32); 64 | cutoffs[i] = (((CDF_MAX + 1) as f32) * sum_prob).round() as u32; 65 | } 66 | assert_eq!(cutoffs[15], CDF_MAX + 1); 67 | 68 | // make sure we have all probability taken care of 69 | let mut seed = 1u64; 70 | let num_trials = 1000000usize; 71 | for i in 0..num_trials { 72 | let rand_num = simple_rand(&mut seed) as u32; 73 | for j in 0..16 { 74 | if rand_num < cutoffs[j] { 75 | // we got an j as the next symbol 76 | cdf.blend(j as u8, Speed::MED); 77 | assert!(cdf.valid()); 78 | break; 79 | } 80 | assert!(j != 15); // should have broken 81 | } 82 | } 83 | for i in 0..16 { 84 | let actual = (cdf.pdf(i as u8) as f32) / (cdf.max() as f32); 85 | let expected = (groundtruth_pdf[i].0 as f32) / (groundtruth_pdf[i].1 as f32); 86 | let abs_delta = (expected - actual).abs(); 87 | let rel_delta = abs_delta / expected; // may be nan 88 | // TODO: These bounds should be tightened. 89 | assert!(rel_delta < 0.15f32 || abs_delta < 0.014f32); 90 | } 91 | } 92 | 93 | #[cfg(test)] 94 | pub fn test_nonzero_pdf() { 95 | // This is a regression test 96 | let mut cdf = T::default(); 97 | for _ in 0..1000000 { 98 | cdf.blend(15, Speed::MED); 99 | } 100 | for i in 0..15 { 101 | assert!(cdf.pdf(i) > 0); 102 | } 103 | } 104 | 105 | macro_rules! define_common_tests_helper { 106 | ($cdf_ty: ident; $($test_name: ident),+) => { 107 | $( 108 | #[test] 109 | fn $test_name() { 110 | use super::super::common_tests; 111 | common_tests::$test_name::<$cdf_ty>(); 112 | } 113 | )+ 114 | }; 115 | } 116 | 117 | #[macro_export] 118 | macro_rules! declare_common_tests { 119 | ($cdf_ty: ident) => { 120 | define_common_tests_helper!($cdf_ty; 121 | test_sym_to_start_and_freq, 122 | test_cdf_offset_to_sym_start_and_freq, 123 | test_stationary_probability, 124 | test_nonzero_pdf); 125 | } 126 | } 127 | 128 | pub fn assert_cdf_eq(cdf0: &CDF16A, cdf1: &CDF16B) { 129 | assert_eq!(cdf0.max(), cdf1.max()); 130 | for sym in 0..16 { 131 | assert_eq!(cdf0.cdf(sym as u8), cdf1.cdf(sym as u8)); 132 | } 133 | assert!(cdf0.valid()); 134 | assert!(cdf1.valid()); 135 | } 136 | 137 | pub fn assert_cdf_similar(cdf0: &CDF16A, cdf1: &CDF16B) { 138 | let max0 = cdf0.max() as i64; 139 | let max1 = cdf1.max() as i64; 140 | for sym in 0..16 { 141 | let sym0cdf = i64::from(cdf0.cdf(sym as u8)); 142 | let sym1cdf = i64::from(cdf1.cdf(sym as u8)); 143 | let cmp0 = sym0cdf * max1; 144 | let cmp1 = sym1cdf * max0; 145 | let delta = if cmp0 < cmp1 { cmp1.wrapping_sub(cmp0) } else { cmp0.wrapping_sub(cmp1) }; 146 | assert!(delta < max1 * max0 / 160); 147 | } 148 | assert!(cdf0.valid()); 149 | assert!(cdf1.valid()); 150 | } 151 | 152 | pub fn operation_test_helper (cdf0a: &mut CDFA, cdf1a: &mut CDFA, cdf0b: &mut CDFB, cdf1b: &mut CDFB) { 153 | assert_cdf_eq(cdf0a, cdf0b); 154 | assert_cdf_eq(cdf1a, cdf1b); 155 | let symbol_buffer0 = [0u8, 0u8, 0u8, 0u8, 0u8, 1u8, 2u8, 3u8, 4u8, 5u8, 5u8, 5u8, 5u8, 5u8, 5u8, 156 | 6u8, 7u8, 8u8, 8u8, 9u8, 9u8, 10u8, 10u8, 10u8, 10u8, 10u8, 10u8, 10u8, 157 | 10u8, 10u8, 10u8, 11u8, 12u8, 12u8, 12u8, 13u8, 13u8, 13u8, 14u8, 15u8, 158 | 15u8, 15u8, 15u8, 15u8, 15u8, 15u8]; 159 | let symbol_buffer1 = [0u8, 0u8, 0u8, 0u8, 0u8, 1u8, 2u8, 3u8, 4u8, 5u8, 5u8, 5u8, 5u8, 5u8, 5u8]; 160 | for sym in symbol_buffer0.iter() { 161 | cdf0a.blend(*sym, Speed::MED); 162 | cdf0b.blend(*sym, Speed::MED); 163 | assert_cdf_eq(cdf0a, cdf0b); 164 | } 165 | assert_cdf_similar(&cdf0a.average(cdf1a, (1<>2), &cdf0b.average(cdf1b, (1<>2)); 166 | for sym in symbol_buffer1.iter() { 167 | cdf0a.blend(*sym, Speed::MED); 168 | cdf0b.blend(*sym, Speed::MED); 169 | assert_cdf_eq(cdf0a, cdf0b); 170 | } 171 | let all = (1<>1; 173 | let quarter = (1<>2; 174 | let threequarters = half + quarter;; 175 | 176 | assert_cdf_eq(&cdf0a.average(cdf1a, quarter), &cdf0b.average(cdf1b, quarter)); 177 | assert_cdf_eq(&cdf0a.average(cdf1a, half), &cdf0b.average(cdf1b, half)); 178 | assert_cdf_eq(&cdf0a.average(cdf1a, threequarters), &cdf0b.average(cdf1b, threequarters)); 179 | assert_cdf_eq(&cdf0a.average(cdf1a, 0), &cdf0b.average(cdf1b, 0)); 180 | assert_cdf_eq(&cdf0a.average(cdf1a, all), &cdf0b.average(cdf1b, all)); 181 | assert_cdf_similar(&cdf0a.average(cdf1a, 0), cdf1a); 182 | assert_cdf_similar(&cdf0a.average(cdf1a, all), cdf0a); 183 | assert_cdf_similar(&cdf0b.average(cdf1b, 0), cdf1b); 184 | assert_cdf_similar(&cdf0b.average(cdf1b, all), cdf0b); 185 | } 186 | -------------------------------------------------------------------------------- /src/ffi/alloc_util.rs: -------------------------------------------------------------------------------- 1 | use core; 2 | use ::alloc; 3 | use super::interface::{c_void, CAllocator}; 4 | #[cfg(feature="std")] 5 | use std::vec::Vec; 6 | #[cfg(feature="std")] 7 | pub use std::boxed::Box; 8 | 9 | #[cfg(feature="std")] 10 | #[derive(Debug)] 11 | pub struct MemoryBlock(Box<[Ty]>); 12 | #[cfg(feature="std")] 13 | impl Default for MemoryBlock { 14 | fn default() -> Self { 15 | MemoryBlock(Vec::::new().into_boxed_slice()) 16 | } 17 | } 18 | #[cfg(feature="std")] 19 | impl alloc::SliceWrapper for MemoryBlock { 20 | fn slice(&self) -> &[Ty] { 21 | &self.0[..] 22 | } 23 | } 24 | #[cfg(feature="std")] 25 | impl alloc::SliceWrapperMut for MemoryBlock { 26 | fn slice_mut(&mut self) -> &mut [Ty] { 27 | &mut self.0[..] 28 | } 29 | } 30 | #[cfg(feature="std")] 31 | impl core::ops::Index for MemoryBlock { 32 | type Output = Ty; 33 | fn index(&self, index:usize) -> &Ty { 34 | &self.0[index] 35 | } 36 | } 37 | #[cfg(feature="std")] 38 | impl core::ops::IndexMut for MemoryBlock { 39 | 40 | fn index_mut(&mut self, index:usize) -> &mut Ty { 41 | &mut self.0[index] 42 | } 43 | } 44 | #[cfg(feature="std")] 45 | impl Drop for MemoryBlock { 46 | fn drop (&mut self) { 47 | if self.0.len() != 0 { 48 | print!("leaking memory block of length {} element size: {}\n", self.0.len(), core::mem::size_of::()); 49 | 50 | let to_forget = core::mem::replace(self, MemoryBlock::default()); 51 | core::mem::forget(to_forget);// leak it -- it's the only safe way with custom allocators 52 | } 53 | } 54 | } 55 | pub struct SubclassableAllocator { 56 | _ty: core::marker::PhantomData, 57 | alloc: CAllocator 58 | // have alternative ty here 59 | } 60 | 61 | impl SubclassableAllocator { 62 | pub fn new(sub_alloc:CAllocator) -> Self { 63 | SubclassableAllocator::{ 64 | _ty:core::marker::PhantomData::::default(), 65 | alloc:sub_alloc, 66 | } 67 | } 68 | } 69 | #[cfg(feature="std")] 70 | impl alloc::Allocator for SubclassableAllocator { 71 | type AllocatedMemory = MemoryBlock; 72 | fn alloc_cell(&mut self, size:usize) ->MemoryBlock{ 73 | if let Some(alloc_fn) = self.alloc.alloc_func { 74 | let ptr = alloc_fn(self.alloc.opaque, size * core::mem::size_of::()); 75 | let typed_ptr = unsafe {core::mem::transmute::<*mut c_void, *mut Ty>(ptr)}; 76 | let slice_ref = unsafe {core::slice::from_raw_parts_mut(typed_ptr, size)}; 77 | for item in slice_ref.iter_mut() { 78 | unsafe{core::ptr::write(item, Ty::default())}; 79 | } 80 | return MemoryBlock(unsafe{Box::from_raw(slice_ref)}) 81 | } 82 | MemoryBlock(vec![Ty::default();size].into_boxed_slice()) 83 | } 84 | fn free_cell(&mut self, mut bv:MemoryBlock) { 85 | if (*bv.0).len() != 0 { 86 | if let Some(_) = self.alloc.alloc_func { 87 | let slice_ptr = (*bv.0).as_mut_ptr(); 88 | let _box_ptr = Box::into_raw(core::mem::replace(&mut bv.0, Vec::::new().into_boxed_slice())); 89 | if let Some(free_fn) = self.alloc.free_func { 90 | unsafe {free_fn(self.alloc.opaque, core::mem::transmute::<*mut Ty, *mut c_void>(slice_ptr))}; 91 | } 92 | } else { 93 | let _to_free = core::mem::replace(&mut bv.0, Vec::::new().into_boxed_slice()); 94 | } 95 | } 96 | } 97 | } 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | #[cfg(not(feature="std"))] 110 | static mut G_SLICE:&mut[u8] = &mut[]; 111 | #[cfg(not(feature="std"))] 112 | #[derive(Debug)] 113 | pub struct MemoryBlock(*mut[Ty]); 114 | #[cfg(not(feature="std"))] 115 | impl Default for MemoryBlock { 116 | fn default() -> Self { 117 | MemoryBlock(unsafe{core::mem::transmute::<*mut [u8], *mut[Ty]>(G_SLICE.as_mut())}) 118 | } 119 | } 120 | #[cfg(not(feature="std"))] 121 | impl alloc::SliceWrapper for MemoryBlock { 122 | fn slice(&self) -> &[Ty] { 123 | if unsafe{(*self.0).len()} == 0 { 124 | &[] 125 | } else { 126 | unsafe{core::slice::from_raw_parts(&(*self.0)[0], (*self.0).len())} 127 | } 128 | } 129 | } 130 | #[cfg(not(feature="std"))] 131 | impl alloc::SliceWrapperMut for MemoryBlock { 132 | fn slice_mut(&mut self) -> &mut [Ty] { 133 | if unsafe{(*self.0).len()} == 0 { 134 | &mut [] 135 | } else { 136 | unsafe{core::slice::from_raw_parts_mut(&mut (*self.0)[0], (*self.0).len())} 137 | } 138 | } 139 | } 140 | 141 | #[cfg(not(feature="std"))] 142 | #[cfg(not(feature="no-stdlib-rust-binding"))] 143 | //#[lang="panic_fmt"] 144 | extern fn panic_fmt(_: ::core::fmt::Arguments, _: &'static str, _: u32) -> ! { 145 | loop {} 146 | } 147 | #[cfg(not(feature="std"))] 148 | #[cfg(not(feature="no-stdlib-rust-binding"))] 149 | #[lang = "eh_personality"] 150 | extern "C" fn eh_personality() { 151 | } 152 | 153 | #[cfg(not(feature="std"))] 154 | impl core::ops::Index for MemoryBlock { 155 | type Output = Ty; 156 | fn index(&self, index:usize) -> &Ty { 157 | unsafe{&(*self.0)[index]} 158 | } 159 | } 160 | #[cfg(not(feature="std"))] 161 | impl core::ops::IndexMut for MemoryBlock { 162 | 163 | fn index_mut(&mut self, index:usize) -> &mut Ty { 164 | unsafe{&mut (*self.0)[index]} 165 | } 166 | } 167 | 168 | #[cfg(not(feature="std"))] 169 | impl alloc::Allocator for SubclassableAllocator { 170 | type AllocatedMemory = MemoryBlock; 171 | fn alloc_cell(&mut self, size:usize) ->MemoryBlock{ 172 | if let Some(alloc_fn) = self.alloc.alloc_func { 173 | let ptr = alloc_fn(self.alloc.opaque, size * core::mem::size_of::()); 174 | let typed_ptr = unsafe {core::mem::transmute::<*mut c_void, *mut Ty>(ptr)}; 175 | let slice_ref = unsafe {core::slice::from_raw_parts_mut(typed_ptr, size)}; 176 | for item in slice_ref.iter_mut() { 177 | unsafe{core::ptr::write(item, Ty::default())}; 178 | } 179 | return MemoryBlock(slice_ref.as_mut()) 180 | } else { 181 | panic!("Must provide allocators in no-stdlib code"); 182 | } 183 | } 184 | fn free_cell(&mut self, mut bv:MemoryBlock) { 185 | use alloc::SliceWrapper; 186 | use alloc::SliceWrapperMut; 187 | if bv.slice().len() != 0 { 188 | if let Some(_) = self.alloc.alloc_func { 189 | if let Some(free_fn) = self.alloc.free_func { 190 | unsafe {free_fn(self.alloc.opaque, core::mem::transmute::<*mut Ty, *mut c_void>(&mut bv.slice_mut()[0]))}; 191 | } 192 | core::mem::replace(&mut bv, MemoryBlock::::default()); 193 | } else { 194 | panic!("Must provide allocators in no-stdlib code"); 195 | } 196 | } 197 | } 198 | } 199 | 200 | 201 | #[cfg(not(feature="std"))] 202 | pub fn free_stdlib(_data: *mut T, _size: usize) { 203 | panic!("Must supply allocators if calling divans when compiled with features=no-stdlib"); 204 | } 205 | #[cfg(not(feature="std"))] 206 | pub fn alloc_stdlib(_size: usize) -> *mut T { 207 | panic!("Must supply allocators if calling divans when compiled with features=no-stdlib"); 208 | } 209 | 210 | #[cfg(feature="std")] 211 | pub unsafe fn free_stdlib(ptr: *mut T, size: usize) { 212 | let slice_ref = core::slice::from_raw_parts_mut(ptr, size); 213 | Box::from_raw(slice_ref); // free on drop 214 | } 215 | #[cfg(feature="std")] 216 | pub fn alloc_stdlib(size: usize) -> *mut T { 217 | let mut newly_allocated = vec![T::default();size].into_boxed_slice(); 218 | let slice_ptr = newly_allocated.as_mut_ptr(); 219 | let _box_ptr = Box::into_raw(newly_allocated); 220 | slice_ptr 221 | } 222 | -------------------------------------------------------------------------------- /src/probability/blend_cdf.rs: -------------------------------------------------------------------------------- 1 | use super::interface::{Prob, BaseCDF, Speed, CDF_MAX, CDF16, BLEND_FIXED_POINT_PRECISION}; 2 | 3 | 4 | #[allow(unused)] 5 | fn gte(a:Prob, b:Prob) -> Prob { 6 | (-((a >= b) as i64)) as Prob 7 | } 8 | fn and(a:Prob, b:Prob) -> Prob { 9 | a & b 10 | } 11 | fn add(a:Prob, b:Prob) -> Prob { 12 | a.wrapping_add(b) 13 | } 14 | 15 | pub fn mul_blend(baseline: [Prob;16], to_blend: [Prob;16], blend : i32, bias : i32) -> [Prob;16] { 16 | const SCALE :i32 = 1i32 << BLEND_FIXED_POINT_PRECISION; 17 | let mut epi32:[i32;8] = [i32::from(to_blend[0]), 18 | i32::from(to_blend[1]), 19 | i32::from(to_blend[2]), 20 | i32::from(to_blend[3]), 21 | i32::from(to_blend[4]), 22 | i32::from(to_blend[5]), 23 | i32::from(to_blend[6]), 24 | i32::from(to_blend[7])]; 25 | let scale_minus_blend = SCALE - blend; 26 | for i in 0..8 { 27 | epi32[i] *= blend; 28 | epi32[i] += i32::from(baseline[i]) * scale_minus_blend + bias; 29 | epi32[i] >>= BLEND_FIXED_POINT_PRECISION; 30 | } 31 | let mut retval : [Prob;16] =[epi32[0] as Prob, 32 | epi32[1] as Prob, 33 | epi32[2] as Prob, 34 | epi32[3] as Prob, 35 | epi32[4] as Prob, 36 | epi32[5] as Prob, 37 | epi32[6] as Prob, 38 | epi32[7] as Prob, 39 | 0,0,0,0,0,0,0,0]; 40 | let mut epi32:[i32;8] = [i32::from(to_blend[8]), 41 | i32::from(to_blend[9]), 42 | i32::from(to_blend[10]), 43 | i32::from(to_blend[11]), 44 | i32::from(to_blend[12]), 45 | i32::from(to_blend[13]), 46 | i32::from(to_blend[14]), 47 | i32::from(to_blend[15])]; 48 | for i in 8..16 { 49 | epi32[i - 8] *= blend; 50 | epi32[i - 8] += i32::from(baseline[i]) * scale_minus_blend + bias; 51 | retval[i] = (epi32[i - 8] >> BLEND_FIXED_POINT_PRECISION) as Prob; 52 | } 53 | retval 54 | } 55 | 56 | macro_rules! each16bin { 57 | ($src0 : expr, $src1 : expr, $func: expr) => { 58 | [$func($src0[0], $src1[0]), 59 | $func($src0[1], $src1[1]), 60 | $func($src0[2], $src1[2]), 61 | $func($src0[3], $src1[3]), 62 | $func($src0[4], $src1[4]), 63 | $func($src0[5], $src1[5]), 64 | $func($src0[6], $src1[6]), 65 | $func($src0[7], $src1[7]), 66 | $func($src0[8], $src1[8]), 67 | $func($src0[9], $src1[9]), 68 | $func($src0[10], $src1[10]), 69 | $func($src0[11], $src1[11]), 70 | $func($src0[12], $src1[12]), 71 | $func($src0[13], $src1[13]), 72 | $func($src0[14], $src1[14]), 73 | $func($src0[15], $src1[15])] 74 | } 75 | } 76 | pub fn to_blend(symbol: u8) -> [Prob;16] { 77 | // The returned distribution has a max of DEL = CDF_MAX - 16, which guarantees that 78 | // by mixing only such distributions, we'll have at least 16 as the bias weight, 79 | // which is required to guarantee nonzero PDF everywhere. 80 | const CDF_INDEX : [Prob;16] = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]; 81 | const DEL: Prob = CDF_MAX - 16; 82 | let symbol16 = [Prob::from(symbol); 16]; 83 | let delta16 = [DEL; 16]; 84 | let mask_symbol = each16bin!(CDF_INDEX, symbol16, gte); 85 | each16bin!(delta16, mask_symbol, and) 86 | } 87 | 88 | pub fn to_blend_lut(symbol: u8) -> [Prob;16] { 89 | const DEL: Prob = CDF_MAX - 16; 90 | static CDF_SELECTOR : [[Prob;16];16] = [ 91 | [DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL as Prob], 92 | [0,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL as Prob], 93 | [0,0,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL as Prob], 94 | [0,0,0,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL as Prob], 95 | [0,0,0,0,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL as Prob], 96 | [0,0,0,0,0,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL as Prob], 97 | [0,0,0,0,0,0,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL as Prob], 98 | [0,0,0,0,0,0,0,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL as Prob], 99 | [0,0,0,0,0,0,0,0,DEL,DEL,DEL,DEL,DEL,DEL,DEL,DEL as Prob], 100 | [0,0,0,0,0,0,0,0,0,DEL,DEL,DEL,DEL,DEL,DEL,DEL as Prob], 101 | [0,0,0,0,0,0,0,0,0,0,DEL,DEL,DEL,DEL,DEL,DEL as Prob], 102 | [0,0,0,0,0,0,0,0,0,0,0,DEL,DEL,DEL,DEL,DEL as Prob], 103 | [0,0,0,0,0,0,0,0,0,0,0,0,DEL,DEL,DEL,DEL as Prob], 104 | [0,0,0,0,0,0,0,0,0,0,0,0,0,DEL,DEL,DEL as Prob], 105 | [0,0,0,0,0,0,0,0,0,0,0,0,0,0,DEL,DEL as Prob], 106 | [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,DEL as Prob]]; 107 | CDF_SELECTOR[symbol as usize] 108 | } 109 | #[derive(Clone,Copy)] 110 | pub struct BlendCDF16 { 111 | pub cdf: [Prob; 16], 112 | mix_rate: i32, 113 | count: i32, 114 | } 115 | 116 | impl BlendCDF16 { 117 | fn blend_internal(&mut self, to_blend: [Prob;16], mix_rate: i32) { 118 | self.cdf = mul_blend(self.cdf, to_blend, mix_rate, (self.count & 0xf) << (BLEND_FIXED_POINT_PRECISION - 4)); 119 | if self.cdf[15] < (CDF_MAX - 16) - (self.cdf[15] >> 1) { 120 | for i in 0..16 { 121 | self.cdf[i] += self.cdf[i] >> 1; 122 | } 123 | } 124 | debug_assert!(self.cdf[15] <= CDF_MAX - 16); 125 | 126 | } 127 | } 128 | impl Default for BlendCDF16 { 129 | fn default() -> Self { 130 | BlendCDF16 { 131 | cdf: [0; 16], 132 | mix_rate: (1 << 10) + (1 << 9), 133 | count: 0, 134 | } 135 | } 136 | } 137 | 138 | 139 | impl BaseCDF for BlendCDF16 { 140 | fn num_symbols() -> u8 { 16 } 141 | fn used(&self) -> bool { 142 | for i in 0..16 { 143 | if self.cdf[i] > 0 { 144 | return true; 145 | } 146 | } 147 | false 148 | } 149 | fn max(&self) -> Prob { 150 | CDF_MAX as Prob 151 | } 152 | fn log_max(&self) -> Option { 153 | Some(15) 154 | } 155 | fn div_by_max(&self, val:i32) -> i32 { 156 | return val>>self.log_max().unwrap() 157 | } 158 | fn cdf(&self, symbol: u8) -> Prob { 159 | match symbol { 160 | 15 => self.max(), 161 | _ => { 162 | // We want self.cdf[15] to be normalized to CDF_MAX, so take the difference to 163 | // be the latent bias term coming from a uniform distribution. 164 | let bias = CDF_MAX - self.cdf[15] as i16; 165 | debug_assert!(bias >= 16); 166 | self.cdf[symbol as usize] as Prob + ((i32::from(bias) * (i32::from(symbol + 1))) >> 4) as Prob 167 | } 168 | } 169 | } 170 | fn valid(&self) -> bool { 171 | for item in &self.cdf { 172 | if *item < 0 || !(*item <= CDF_MAX) { 173 | return false; 174 | } 175 | } 176 | true 177 | } 178 | } 179 | 180 | impl CDF16 for BlendCDF16 { 181 | fn average(&self, other: &Self, mix_rate: i32) ->Self { 182 | let mut retval = *self; 183 | retval.blend_internal(other.cdf, mix_rate); 184 | retval 185 | } 186 | fn blend(&mut self, symbol:u8, speed: Speed) { 187 | self.count = self.count.wrapping_add(1); 188 | let _mix_rate = match speed { 189 | Speed::GEOLOGIC => 32, 190 | Speed::GLACIAL => 64, 191 | Speed::MUD => 128, 192 | Speed::SLOW => 192, 193 | Speed::MED => 256, 194 | Speed::FAST => 384, 195 | Speed::PLANE => 512, 196 | Speed::ROCKET => 1100, 197 | a => a.inc(), 198 | }; 199 | let to_blend = to_blend_lut(symbol); 200 | let mr = self.mix_rate; 201 | self.blend_internal(to_blend, mr); 202 | // Reduce the weight of bias in the first few iterations. 203 | self.mix_rate -= self.mix_rate >> 7; 204 | // NOTE(jongmin): geometrically decay mix_rate until it dips below 1 << 7; 205 | 206 | 207 | } 208 | } 209 | 210 | #[cfg(test)] 211 | mod test { 212 | use super::{BlendCDF16, to_blend, to_blend_lut}; 213 | declare_common_tests!(BlendCDF16); 214 | 215 | #[test] 216 | fn test_blend_lut() { 217 | for i in 0..16 { 218 | let a = to_blend(i as u8); 219 | let b = to_blend_lut(i as u8); 220 | for j in 0..16 { 221 | assert_eq!(a[j], b[j]); 222 | } 223 | } 224 | } 225 | 226 | } 227 | -------------------------------------------------------------------------------- /src/slice_util.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Dropbox, Inc 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | use core; 15 | use brotli; 16 | pub use alloc::{AllocatedStackMemory, Allocator, SliceWrapper, SliceWrapperMut, StackAllocator}; 17 | 18 | #[derive(Copy,Clone,Default,Debug)] 19 | pub struct SlicePlaceholder32 { 20 | len:u32, 21 | ph: core::marker::PhantomData, 22 | } 23 | impl SlicePlaceholder32 { 24 | pub fn new(len: u32) -> Self { 25 | SlicePlaceholder32::{ 26 | len: len, 27 | ph: core::marker::PhantomData::::default(), 28 | } 29 | } 30 | } 31 | 32 | impl SliceWrapper for SlicePlaceholder32 { 33 | fn slice(&self) -> &[T]{ 34 | &[] 35 | } 36 | fn len(&self) -> usize { 37 | self.len as usize 38 | } 39 | } 40 | 41 | 42 | 43 | 44 | impl<'b> brotli::interface::Unfreezable for SliceReference<'b, u8> { 45 | fn thaw<'a>(&self, data: &'a [u8]) -> brotli::InputReference<'a> { 46 | brotli::InputReference{ 47 | data: data.split_at(self.start).1.split_at(self.len).0, 48 | orig_offset: self.start, 49 | } 50 | } 51 | fn thaw_mut<'a>(&self, data: &'a mut [u8]) -> brotli::interface::InputReferenceMut<'a> { 52 | brotli::interface::InputReferenceMut{ 53 | data: data.split_at_mut(self.start).1.split_at_mut(self.len).0, 54 | orig_offset: self.start, 55 | } 56 | } 57 | fn thaw_pair<'a>(&self, pair: &brotli::InputPair<'a>) -> Result, ()> { 58 | if self.start >= pair.1.orig_offset { 59 | return Ok(brotli::InputReference{ 60 | data: pair.1.data.split_at(self.start - pair.1.orig_offset).1.split_at(self.len).0, 61 | orig_offset: self.start, 62 | }); 63 | } 64 | let offset = self.start - pair.0.orig_offset; 65 | if offset + self.len as usize <= pair.0.data.len() { // overlap 66 | Ok(brotli::InputReference{ 67 | data: pair.0.data.split_at(offset).1.split_at(self.len).0, 68 | orig_offset: self.start, 69 | }) 70 | } else { 71 | Err(()) 72 | } 73 | } 74 | 75 | } 76 | #[derive(Copy,Clone)] 77 | pub struct SliceReference<'a, T:'a> { 78 | data: &'a[T], 79 | start: usize, 80 | len: usize, 81 | } 82 | 83 | impl<'a, T:'a> SliceReference<'a, T> { 84 | pub fn new(input: &'a[T], start: usize, len: usize) -> SliceReference<'a, T> { 85 | SliceReference:: { 86 | data: input.split_at(start).1.split_at(len).0, 87 | start: start, 88 | len: len, 89 | } 90 | } 91 | pub fn freeze_dry(&self) -> SliceReference<'static, T> { 92 | SliceReference:: { 93 | data: &[], 94 | start: self.start, 95 | len: self.len, 96 | } 97 | } 98 | pub fn freeze(old: brotli::SliceOffset) -> SliceReference<'static, T> { 99 | SliceReference:: { 100 | data: &[], 101 | start: old.offset(), 102 | len: old.len(), 103 | } 104 | } 105 | pub fn thaw(&self, slice:&'a [T]) -> SliceReference<'a, T> { 106 | SliceReference::<'a, T> { 107 | data: slice.split_at(self.start).1.split_at(self.len).0, 108 | start: self.start, 109 | len: self.len, 110 | } 111 | } 112 | } 113 | pub fn thaw_br<'a>(xself:&SliceReference<'a, u8>, slice:&'a [u8]) -> brotli::InputReference<'a> { 114 | brotli::InputReference::<'a> { 115 | data: slice.split_at(xself.start).1.split_at(xself.len).0, 116 | orig_offset: xself.start, 117 | } 118 | } 119 | 120 | impl<'a, T:'a> SliceWrapper for SliceReference<'a, T> { 121 | fn slice(&self) -> &[T]{ 122 | self.data 123 | } 124 | } 125 | 126 | impl<'a, T> Default for SliceReference<'a, T> { 127 | fn default() ->SliceReference<'a, T> { 128 | SliceReference:: { 129 | data:&[], 130 | start:0, 131 | len:0, 132 | } 133 | } 134 | } 135 | 136 | pub struct AllocatedMemoryPrefix>(pub AllocT::AllocatedMemory, pub u32); 137 | 138 | impl> core::ops::Index for AllocatedMemoryPrefix { 139 | type Output = T; 140 | fn index(&self, index: usize) -> &T { 141 | &self.0.slice()[index] 142 | } 143 | } 144 | 145 | impl> core::ops::IndexMut for AllocatedMemoryPrefix { 146 | fn index_mut(&mut self, index: usize) -> &mut T { 147 | &mut self.mem().slice_mut()[index] 148 | } 149 | } 150 | 151 | impl> Default for AllocatedMemoryPrefix { 152 | fn default() -> Self { 153 | AllocatedMemoryPrefix(AllocT::AllocatedMemory::default(), 0u32) 154 | } 155 | } 156 | impl> AllocatedMemoryPrefix { 157 | #[inline(always)] 158 | pub fn mem(&mut self) -> &mut AllocT::AllocatedMemory { 159 | &mut self.0 160 | } 161 | pub fn components(self) -> (AllocT::AllocatedMemory, usize) { 162 | (self.0, self.1 as usize) 163 | } 164 | #[inline(always)] 165 | pub fn max_len(&self) -> usize { 166 | self.0.len() 167 | } 168 | } 169 | 170 | impl> SliceWrapperMut for AllocatedMemoryPrefix { 171 | fn slice_mut(&mut self) -> &mut [T] { 172 | self.0.slice_mut().split_at_mut(self.1 as usize).0 173 | } 174 | } 175 | impl> SliceWrapper for AllocatedMemoryPrefix { 176 | fn slice(&self) -> &[T] { 177 | self.0.slice().split_at(self.1 as usize).0 178 | } 179 | fn len(&self) -> usize { 180 | self.1 as usize 181 | } 182 | } 183 | impl > AllocatedMemoryPrefix { 184 | pub fn new(m8 : &mut AllocT, len: usize) -> Self { 185 | AllocatedMemoryPrefix::(m8.alloc_cell(len), len as u32) 186 | } 187 | pub fn realloc(mem : AllocT::AllocatedMemory, len: usize) -> Self { 188 | debug_assert!(len <= mem.slice().len(), "Must realloc to a smaller size for AllocatedMemoryPrefix"); 189 | AllocatedMemoryPrefix::(mem, len as u32) 190 | } 191 | } 192 | 193 | 194 | 195 | pub struct AllocatedMemoryRange>(pub AllocT::AllocatedMemory, pub core::ops::Range); 196 | 197 | impl> core::ops::Index for AllocatedMemoryRange { 198 | type Output = T; 199 | fn index(&self, index: usize) -> &T { 200 | &self.0.slice()[self.1.start + index] 201 | } 202 | } 203 | 204 | impl> core::ops::IndexMut for AllocatedMemoryRange { 205 | fn index_mut(&mut self, index: usize) -> &mut T { 206 | let i = self.1.start + index; 207 | &mut self.mem().slice_mut()[i] 208 | } 209 | } 210 | 211 | impl> Default for AllocatedMemoryRange { 212 | fn default() -> Self { 213 | AllocatedMemoryRange(AllocT::AllocatedMemory::default(), 0..0) 214 | } 215 | } 216 | impl> AllocatedMemoryRange { 217 | pub fn mem(&mut self) -> &mut AllocT::AllocatedMemory { 218 | &mut self.0 219 | } 220 | pub fn components(self) -> (AllocT::AllocatedMemory, core::ops::Range) { 221 | (self.0, self.1.clone()) 222 | } 223 | } 224 | 225 | impl> SliceWrapperMut for AllocatedMemoryRange { 226 | fn slice_mut(&mut self) -> &mut [T] { 227 | &mut self.0.slice_mut()[self.1.clone()] 228 | } 229 | } 230 | impl> SliceWrapper for AllocatedMemoryRange { 231 | fn slice(&self) -> &[T] { 232 | &self.0.slice()[self.1.clone()] 233 | } 234 | } 235 | impl > AllocatedMemoryRange { 236 | pub fn new(m8 : &mut AllocT, len: usize) -> Self { 237 | AllocatedMemoryRange::(m8.alloc_cell(len), 0..len) 238 | } 239 | pub fn realloc(mem : AllocT::AllocatedMemory, range: core::ops::Range) -> Self { 240 | debug_assert!(range.end <= mem.slice().len(), "Must realloc to a smaller size for AllocatedMemoryRange"); 241 | debug_assert!(range.start <= range.end); 242 | AllocatedMemoryRange::(mem, range) 243 | } 244 | } 245 | 246 | 247 | 248 | -------------------------------------------------------------------------------- /src/codec/block_type.rs: -------------------------------------------------------------------------------- 1 | use interface::{DivansResult, StreamMuxer, StreamDemuxer}; 2 | use alloc::Allocator; 3 | use super::interface::{ 4 | EncoderOrDecoderSpecialization, 5 | CrossCommandState, 6 | BLOCK_TYPE_LITERAL_SWITCH, 7 | }; 8 | use ::interface::{ 9 | ArithmeticEncoderOrDecoder, 10 | BillingDesignation, 11 | CrossCommandBilling, 12 | BlockSwitch, 13 | LiteralBlockSwitch, 14 | }; 15 | use ::probability::{Speed, CDF16}; 16 | use ::priors::PriorCollection; 17 | use super::priors::{BlockTypePriorType}; 18 | #[derive(Clone,Copy,PartialEq,Eq, Hash, Debug)] 19 | pub enum BlockTypeState { 20 | Begin, 21 | TwoNibbleType, 22 | FinalNibble(u8), 23 | FullyDecoded(u8), 24 | } 25 | 26 | 27 | impl BlockTypeState { 28 | pub fn begin() -> Self { 29 | BlockTypeState::Begin 30 | } 31 | pub fn encode_or_decode, 34 | LinearOutputBytes:StreamMuxer+Default, 35 | Cdf16:CDF16, 36 | AllocU8:Allocator, 37 | AllocCDF16:Allocator>( 38 | &mut self, 39 | superstate: &mut CrossCommandState, 46 | input_bs: BlockSwitch, 47 | block_type_switch_index:usize, 48 | output_bytes: &mut [u8], 49 | output_offset: &mut usize) -> DivansResult { 50 | let mut varint_nibble:u8 = 51 | if input_bs.block_type() == superstate.bk.btype_lru[block_type_switch_index][1] { 52 | 0 53 | } else if input_bs.block_type() == superstate.bk.btype_max_seen[block_type_switch_index].wrapping_add(1) { 54 | 1 55 | } else if input_bs.block_type() <= 12 { 56 | input_bs.block_type() + 2 57 | } else { 58 | 15 59 | }; 60 | let mut first_nibble:u8 = input_bs.block_type() & 0xf; 61 | let mut second_nibble:u8 = input_bs.block_type() >> 4; 62 | loop { 63 | match superstate.drain_or_fill_internal_buffer_cmd( 64 | output_bytes, 65 | output_offset) { 66 | DivansResult::Success => {}, 67 | need_something => return need_something, 68 | } 69 | let billing = BillingDesignation::CrossCommand(CrossCommandBilling::BlockSwitchType); 70 | match *self { 71 | BlockTypeState::Begin => { 72 | let mut nibble_prob = superstate.bk.btype_priors.get(BlockTypePriorType::Mnemonic, 73 | (block_type_switch_index,)); 74 | superstate.coder.get_or_put_nibble(&mut varint_nibble, nibble_prob, billing); 75 | if superstate.specialization.adapt_cdf() { 76 | nibble_prob.blend(varint_nibble, Speed::SLOW); 77 | } 78 | match varint_nibble { 79 | 0 => *self = BlockTypeState::FullyDecoded( 80 | superstate.bk.btype_lru[block_type_switch_index][1]), 81 | 1 => *self = BlockTypeState::FullyDecoded( 82 | superstate.bk.btype_max_seen[block_type_switch_index].wrapping_add(1)), 83 | 15 => *self = BlockTypeState::TwoNibbleType, 84 | val => *self = BlockTypeState::FullyDecoded(val - 2), 85 | } 86 | }, 87 | BlockTypeState::TwoNibbleType => { 88 | let mut nibble_prob = superstate.bk.btype_priors.get(BlockTypePriorType::FirstNibble, 89 | (block_type_switch_index,)); 90 | superstate.coder.get_or_put_nibble(&mut first_nibble, nibble_prob, billing); 91 | if superstate.specialization.adapt_cdf() { 92 | nibble_prob.blend(first_nibble, Speed::SLOW); 93 | } 94 | *self = BlockTypeState::FinalNibble(first_nibble); 95 | }, 96 | BlockTypeState::FinalNibble(first_nibble) => { 97 | let mut nibble_prob = superstate.bk.btype_priors.get(BlockTypePriorType::SecondNibble, 98 | (block_type_switch_index,)); 99 | superstate.coder.get_or_put_nibble(&mut second_nibble, nibble_prob, billing); 100 | if superstate.specialization.adapt_cdf() { 101 | nibble_prob.blend(second_nibble, Speed::SLOW); 102 | } 103 | *self = BlockTypeState::FullyDecoded((second_nibble << 4) | first_nibble); 104 | } 105 | BlockTypeState::FullyDecoded(_) => { 106 | return DivansResult::Success; 107 | } 108 | } 109 | } 110 | } 111 | } 112 | 113 | #[derive(Clone,Copy)] 114 | pub enum LiteralBlockTypeState { 115 | Begin, 116 | Intermediate(BlockTypeState), 117 | StrideNibble(u8), 118 | FullyDecoded(u8, u8), 119 | } 120 | 121 | impl LiteralBlockTypeState { 122 | pub fn begin() -> Self { 123 | LiteralBlockTypeState::Begin 124 | } 125 | pub fn encode_or_decode, 128 | LinearOutputBytes:StreamMuxer+Default, 129 | Cdf16:CDF16, 130 | AllocU8:Allocator, 131 | AllocCDF16:Allocator>( 132 | &mut self, 133 | superstate: &mut CrossCommandState, 140 | input_bs: LiteralBlockSwitch, 141 | output_bytes: &mut [u8], 142 | output_offset: &mut usize) -> DivansResult { 143 | loop { 144 | let billing = BillingDesignation::CrossCommand(CrossCommandBilling::BlockSwitchType); 145 | match *self { 146 | LiteralBlockTypeState::Begin => { 147 | *self = LiteralBlockTypeState::Intermediate(BlockTypeState::Begin); 148 | }, 149 | LiteralBlockTypeState::Intermediate(bts) => { 150 | let mut local_bts = bts; 151 | let early_ret = match local_bts.encode_or_decode(superstate, 152 | input_bs.0, 153 | BLOCK_TYPE_LITERAL_SWITCH, 154 | output_bytes, 155 | output_offset) { 156 | DivansResult::Success => None, 157 | any => Some(any), 158 | }; 159 | match local_bts { 160 | BlockTypeState::FullyDecoded(val) => { 161 | *self = LiteralBlockTypeState::StrideNibble(val); 162 | } 163 | any => { 164 | *self = LiteralBlockTypeState::Intermediate(any); 165 | } 166 | } 167 | if let Some(val) = early_ret { 168 | return val; 169 | } 170 | }, 171 | LiteralBlockTypeState::StrideNibble(ltype) => { 172 | match superstate.drain_or_fill_internal_buffer_cmd(output_bytes, 173 | output_offset) { 174 | DivansResult::Success => {}, 175 | need_something => return need_something, 176 | } 177 | let mut stride_nibble = match superstate.bk.desired_force_stride { 178 | super::StrideSelection::UseBrotliRec => input_bs.stride(), 179 | matched_stride => matched_stride as u8, 180 | }; 181 | let mut nibble_prob = superstate.bk.btype_priors.get(BlockTypePriorType::StrideNibble, 182 | (0,)); 183 | superstate.coder.get_or_put_nibble(&mut stride_nibble, nibble_prob, billing); 184 | if superstate.specialization.adapt_cdf() { 185 | nibble_prob.blend(stride_nibble, Speed::SLOW); 186 | } 187 | *self = LiteralBlockTypeState::FullyDecoded(ltype, stride_nibble); 188 | }, 189 | LiteralBlockTypeState::FullyDecoded(_ltype, _stride) => { 190 | return DivansResult::Success; 191 | } 192 | } 193 | } 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /src/bin/util.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Dropbox, Inc 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | use core; 16 | use std::io; 17 | use super::{SliceWrapperMut,SliceWrapper}; 18 | use super::alloc; 19 | pub struct DynBuffer(Box<[u8]>); 20 | 21 | impl core::default::Default for DynBuffer { 22 | fn default() -> Self { 23 | let v: Vec = Vec::new(); 24 | let b = v.into_boxed_slice(); 25 | DynBuffer(b) 26 | } 27 | } 28 | 29 | 30 | impl DynBuffer { 31 | #[allow(unused)] 32 | pub fn new(size:usize) -> DynBuffer { 33 | DynBuffer(vec![0u8;size].into_boxed_slice()) 34 | } 35 | } 36 | 37 | impl SliceWrapper for DynBuffer { 38 | fn slice(&self) -> &[u8] { 39 | &*self.0 40 | } 41 | } 42 | 43 | impl SliceWrapperMut for DynBuffer { 44 | fn slice_mut(&mut self) -> &mut [u8] { 45 | &mut *self.0 46 | } 47 | } 48 | 49 | #[cfg(feature="inplace-new")] 50 | macro_rules! define_static_heap_buffer { 51 | ($name : ident, $size: expr) => { 52 | pub struct $name(Box<[u8;$size]>); 53 | impl core::default::Default for $name { 54 | fn default() -> Self { 55 | static DEFAULT_VALUE: [u8;$size] = [0u8;$size]; 56 | $name(Box::<[u8;$size]>::new(DEFAULT_VALUE)) 57 | } 58 | } 59 | impl SliceWrapper for $name { 60 | fn slice(&self) -> &[u8] { 61 | &*self.0 62 | } 63 | } 64 | 65 | impl SliceWrapperMut for $name { 66 | fn slice_mut(&mut self) -> &mut [u8] { 67 | &mut *self.0 68 | } 69 | } 70 | } 71 | } 72 | 73 | #[cfg(not(feature="inplace-new"))] 74 | macro_rules! define_static_heap_buffer { 75 | ($name : ident, $size: expr) => { 76 | pub struct $name(DynBuffer); 77 | impl core::default::Default for $name { 78 | fn default() -> Self { 79 | $name(DynBuffer((vec![0u8;$size]).into_boxed_slice())) 80 | } 81 | } 82 | impl SliceWrapper for $name { 83 | fn slice(&self) -> &[u8] { 84 | (&*(self.0).0).split_at($size).0 85 | } 86 | } 87 | 88 | impl SliceWrapperMut for $name { 89 | fn slice_mut(&mut self) -> &mut [u8] { 90 | (&mut *(self.0).0).split_at_mut($size).0 91 | } 92 | } 93 | } 94 | } 95 | 96 | define_static_heap_buffer!(StaticHeapBuffer10, 1<<10); 97 | define_static_heap_buffer!(StaticHeapBuffer11, 1<<11); 98 | define_static_heap_buffer!(StaticHeapBuffer12, 1<<12); 99 | define_static_heap_buffer!(StaticHeapBuffer13, 1<<13); 100 | define_static_heap_buffer!(StaticHeapBuffer14, 1<<14); 101 | define_static_heap_buffer!(StaticHeapBuffer15, 1<<15); 102 | define_static_heap_buffer!(StaticHeapBuffer16, 1<<16); 103 | define_static_heap_buffer!(StaticHeapBuffer17, 1<<17); 104 | define_static_heap_buffer!(StaticHeapBuffer18, 1<<18); 105 | define_static_heap_buffer!(StaticHeapBuffer19, 1<<19); 106 | define_static_heap_buffer!(StaticHeapBuffer20, 1<<20); 107 | define_static_heap_buffer!(StaticHeapBuffer21, 1<<21); 108 | define_static_heap_buffer!(StaticHeapBuffer22, 1<<22); 109 | define_static_heap_buffer!(StaticHeapBuffer23, 1<<23); 110 | define_static_heap_buffer!(StaticHeapBuffer24, 1<<24); 111 | 112 | 113 | pub struct Rebox { 114 | b: Box<[T]>, 115 | } 116 | 117 | impl core::default::Default for Rebox { 118 | fn default() -> Self { 119 | let v: Vec = Vec::new(); 120 | let b = v.into_boxed_slice(); 121 | Rebox:: { b: b } 122 | } 123 | } 124 | 125 | impl core::ops::Index for Rebox { 126 | type Output = T; 127 | fn index(&self, index: usize) -> &T { 128 | &(*self.b)[index] 129 | } 130 | } 131 | 132 | impl core::ops::IndexMut for Rebox { 133 | fn index_mut(&mut self, index: usize) -> &mut T { 134 | &mut (*self.b)[index] 135 | } 136 | } 137 | 138 | impl alloc::SliceWrapper for Rebox { 139 | fn slice(&self) -> &[T] { 140 | &*self.b 141 | } 142 | } 143 | 144 | impl alloc::SliceWrapperMut for Rebox { 145 | fn slice_mut(&mut self) -> &mut [T] { 146 | &mut *self.b 147 | } 148 | } 149 | 150 | pub struct HeapAllocator { 151 | pub default_value: T, 152 | } 153 | 154 | impl alloc::Allocator for HeapAllocator { 155 | type AllocatedMemory = Rebox; 156 | fn alloc_cell(self: &mut HeapAllocator, len: usize) -> Rebox { 157 | let v: Vec = vec![self.default_value.clone();len]; 158 | let b = v.into_boxed_slice(); 159 | Rebox:: { b: b } 160 | } 161 | fn free_cell(self: &mut HeapAllocator, _data: Rebox) {} 162 | } 163 | 164 | 165 | 166 | fn hex_to_nibble(byte: u8) -> Result { 167 | if byte >= b'A' && byte <= b'F' { 168 | Ok(byte - b'A' + 10) 169 | } else if byte >= b'a' && byte <= b'f' { 170 | Ok(byte - b'a' + 10) 171 | } else if byte >= b'0' && byte <= b'9' { 172 | Ok(byte - b'0') 173 | } else { 174 | Err(()) 175 | } 176 | } 177 | fn quoted_slice_to_vec(s: &[u8]) -> Result, io::Error> { 178 | if s.len() < 2 { 179 | return Err(io::Error::new(io::ErrorKind::InvalidInput, core::str::from_utf8(s).unwrap())); 180 | } 181 | let mut output = Vec::::with_capacity(s.len() - 2); 182 | let mut must_end = false; 183 | let mut escaped = false; 184 | let mut hexed = false; 185 | let mut upper: Option = None; 186 | 187 | for byte_ref in s.iter().skip(1) { 188 | let byte = *byte_ref; 189 | if must_end { 190 | return Err(io::Error::new(io::ErrorKind::InvalidInput, core::str::from_utf8(s).unwrap())); 191 | } 192 | if byte == b'\"' && !escaped { 193 | must_end = true; 194 | continue; 195 | } 196 | 197 | if byte == b'\\' && !escaped { 198 | escaped = true; 199 | continue; 200 | } 201 | if escaped { 202 | if hexed { 203 | if let Ok(nib) = hex_to_nibble(byte) { 204 | if let Some(unib) = upper { 205 | output.push((unib << 4) | nib); 206 | hexed = false; 207 | escaped = false; 208 | upper = None; 209 | } else { 210 | upper = Some(nib); 211 | } 212 | } else { 213 | return Err(io::Error::new(io::ErrorKind::InvalidInput, core::str::from_utf8(s).unwrap())); 214 | } 215 | } else if byte == b'x' { 216 | hexed = true; 217 | } else if byte == b'n' { 218 | output.push(b'\n'); 219 | escaped = false; 220 | } else if byte == b'r' { 221 | output.push(b'\r'); 222 | escaped = false; 223 | } else if byte == b't' { 224 | output.push(b'\t'); 225 | escaped = false; 226 | } else if byte == b'\\' { 227 | output.push(b'\\'); 228 | escaped = false; 229 | } else if byte == b'\'' { 230 | output.push(b'\''); 231 | escaped = false; 232 | } else if byte == b'\"' { 233 | output.push(b'\"'); 234 | escaped = false; 235 | } else if byte == b'?' { 236 | output.push(b'?'); 237 | escaped = false; 238 | } else { 239 | return Err(io::Error::new(io::ErrorKind::InvalidInput, core::str::from_utf8(s).unwrap())); 240 | } 241 | } else { 242 | output.push(byte); 243 | } 244 | } 245 | if hexed || escaped || !must_end { 246 | return Err(io::Error::new(io::ErrorKind::InvalidInput, core::str::from_utf8(s).unwrap())); 247 | } 248 | return Ok(output); 249 | } 250 | 251 | pub fn literal_slice_to_vec(s: &[u8]) -> Result, io::Error> { 252 | if s.len() == 0 { 253 | return Ok(Vec::::new()); 254 | } 255 | if *s.iter().next().unwrap() == b'\"' { 256 | quoted_slice_to_vec(s) 257 | } else { 258 | hex_slice_to_vec(s) 259 | } 260 | } 261 | pub fn hex_slice_to_vec(s: &[u8]) -> Result, io::Error> { 262 | let mut output = Vec::with_capacity(s.len() >> 1); 263 | let mut rem = 0; 264 | let mut buf : u8 = 0; 265 | for byte_ref in s.iter() { 266 | let byte = *byte_ref; 267 | if let Ok(b) = hex_to_nibble(byte) { 268 | buf <<= 4; 269 | buf |= b; 270 | } else if byte == b'\n'|| byte == b'\t'|| byte == b'\r' { 271 | continue; 272 | } else { 273 | return Err(io::Error::new(io::ErrorKind::InvalidInput, core::str::from_utf8(s).unwrap())); 274 | } 275 | rem += 1; 276 | if rem == 2 { 277 | rem = 0; 278 | output.push(buf); 279 | } 280 | } 281 | if rem != 0 { 282 | return Err(io::Error::new(io::ErrorKind::InvalidInput, 283 | "String must have an even number of digits")); 284 | } 285 | Ok(output) 286 | } 287 | --------------------------------------------------------------------------------