├── testdata
├── .gitattributes
├── ends_with_truncated_dictionary
├── random_then_unicode
├── ends_with_truncated_dictionary.ir
└── make_mux_test.py
├── .gitignore
├── wasm
├── favicon.ico
├── progress.gif
├── .htaccess
├── mod.py
├── README
├── brotli_wrapper.js
└── brotli_iframe.html
├── AUTHORS
├── research
├── bench_revs.sh
├── brotli_bill_transform.py
├── bill_transform.py
├── brute.py
├── select_best_quandruple.py
├── select_best_triple.py
├── divansplot.py
└── summary.py
├── c
├── Makefile
├── vec_u8.h
├── custom_alloc.h
├── divans
│ └── ffi.h
├── arg.h
└── example.c
├── examples
├── decompress.rs
├── compress.rs
└── util_prior_stream_cost.rs
├── src
├── ffi
│ ├── decompressor.rs
│ ├── interface.rs
│ └── alloc_util.rs
├── raw_to_cmd
│ └── hash_match.rs
├── test_helper.rs
├── probability
│ ├── mod.rs
│ ├── make_div_lut.rs
│ ├── frequentist_cdf.rs
│ ├── numeric.rs
│ ├── variant_speed_cdf.rs
│ ├── external_cdf.rs
│ ├── opt_frequentist_cdf.rs
│ ├── common_tests.rs
│ └── blend_cdf.rs
├── codec
│ ├── specializations.rs
│ ├── crc32.rs
│ ├── weights.rs
│ ├── priors.rs
│ ├── io.rs
│ └── block_type.rs
├── stub_parallel_decompressor.rs
├── constants.rs
├── resizable_buffer.rs
├── debug_encoder.rs
├── divans_to_raw
│ └── mod.rs
├── lib.rs
├── ir_optimize
│ └── cache.rs
├── cmd_to_divans
│ └── mod.rs
├── billing.rs
├── slice_util.rs
└── bin
│ └── util.rs
├── no-stdlib
└── Cargo.toml
└── Cargo.toml
/testdata/.gitattributes:
--------------------------------------------------------------------------------
1 | * binary
--------------------------------------------------------------------------------
/testdata/ends_with_truncated_dictionary:
--------------------------------------------------------------------------------
1 | often referred to as
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Cargo.lock
2 | target
3 | wasm/brotli.js
4 | wasm/brotli.wasm
5 |
--------------------------------------------------------------------------------
/wasm/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/divans/HEAD/wasm/favicon.ico
--------------------------------------------------------------------------------
/wasm/progress.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/divans/HEAD/wasm/progress.gif
--------------------------------------------------------------------------------
/testdata/random_then_unicode:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dropbox/divans/HEAD/testdata/random_then_unicode
--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | Daniel Reiter Horn
2 | Jongmin Baek
3 | Anatoly Yakovenko
4 | Patrick Reiter Horn
5 | CRC32: Andrew Gallant (BurntSushi)
6 |
--------------------------------------------------------------------------------
/wasm/.htaccess:
--------------------------------------------------------------------------------
1 |
2 |
3 | Header set Access-Control-Allow-Origin "*"
4 |
5 |
--------------------------------------------------------------------------------
/testdata/ends_with_truncated_dictionary.ir:
--------------------------------------------------------------------------------
1 | window 22 len 22
2 | insert 0
3 | dict 5 word 5,100 6f6674656e func 0 6f6674656e ctx 3
4 | insert 1 20
5 | dict 7 word 8,98 7265666572726572 func 12 72656665727265 ctx 3
6 | insert 9 6420746f2061732020
7 | window 22 len 22
8 |
--------------------------------------------------------------------------------
/research/bench_revs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | for rev in `git log --oneline $1..$2 | cut -f 1 -d ' '`; do
4 | git checkout $rev
5 | echo starting $rev
6 | for rep in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15; do
7 | RUSTFLAGS="-C target-cpu=core-avx-i" time cargo bench --bin divans --features="benchmark simd" decode_context_pure_average
8 | done
9 | echo done $rev
10 | done
11 |
--------------------------------------------------------------------------------
/c/Makefile:
--------------------------------------------------------------------------------
1 | libdeps := $(wildcard ../target/release/*.so) $(wildcard ../target/release/*.dylib) $(wildcard ../target/release/*.dll)
2 | ffi_example: example.c arg.h divans/ffi.h vec_u8.h custom_alloc.h $(libdeps)
3 | gcc -Wall -g -O2 -o ffi_example example.c -L../target/release -ldivans -Wl,-rpath -Wl,../target/release
4 | debug: example.c arg.h divans/ffi.h vec_u8.h custom_alloc.h $(libdeps)
5 | gcc -Wall -Wno-unused-result -g -o ffi_example_d example.c -L../target/debug -ldivans -Wl,-rpath -Wl,../target/debug
6 | clean:
7 | rm -f ffi_example_d ffi_example
8 |
--------------------------------------------------------------------------------
/examples/decompress.rs:
--------------------------------------------------------------------------------
1 | extern crate divans;
2 | #[cfg(feature="no-stdlib")]
3 | fn main() {
4 | panic!("For no-stdlib examples please see the tests")
5 | }
6 | #[cfg(not(feature="no-stdlib"))]
7 | fn main() {
8 | use std::io;
9 | let stdin = &mut io::stdin();
10 | {
11 | let mut reader = divans::DivansDecompressorReader::new(
12 | stdin,
13 | 4096, // buffer size
14 | false,
15 | true, // parallel
16 | );
17 | io::copy(&mut reader, &mut io::stdout()).unwrap();
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/testdata/make_mux_test.py:
--------------------------------------------------------------------------------
1 | import random
2 | import sys
3 |
4 | def main(sizea, sizeb, pct, minsize, maxsize):
5 | print "&rand(" + str(sizea)+",13)[..],"
6 | print "&rand(" + str(sizeb)+",17)[..],"
7 | print "&["
8 | while sizea != 0 or sizeb != 0:
9 | cur_buf = random.randrange(minsize, maxsize + 1);
10 | is_a = random.randrange(0,100) < pct;
11 | index = 0 if is_a else 1
12 | if is_a:
13 | cur_buf = min(cur_buf, sizea)
14 | sizea -= cur_buf
15 | else:
16 | cur_buf = min(cur_buf, sizeb)
17 | sizeb -= cur_buf
18 | if cur_buf:
19 | print " (" + str(index) + "," + str(cur_buf) + "),"
20 | print "]"
21 |
22 | if __name__ == "__main__":
23 | main(random.randrange(1, int(sys.argv[1])),random.randrange(1, int(sys.argv[2])),int(sys.argv[3]), int(sys.argv[4]), int(sys.argv[5]))
24 |
--------------------------------------------------------------------------------
/wasm/mod.py:
--------------------------------------------------------------------------------
1 | import sys
2 | data = sys.stdin.read();
3 | magic = ['-551.62445', '413.18079']
4 | rmagic = []#'846.69629', '-95.776024']
5 | out_magic = [m for m in magic]
6 | out_rmagic = [r for r in rmagic]
7 |
8 | inc = 14
9 | for i in range(0,30):
10 | for index in range(len(magic)):
11 | out_magic[index] = str(float(magic[index]) + inc * i)
12 | for index in range(len(rmagic)):
13 | out_rmagic[index] = str(float(rmagic[index]) - inc * i)
14 | fn = ''
15 | fn += str(int(i/1000))
16 | fn += str(int(i/100)%10)
17 | fn += str(int(i/10)%10)
18 | fn += str(int(i%10))
19 | temp = data
20 | for index in range(len(magic)):
21 | temp = temp.replace(magic[index], out_magic[index])
22 | for index in range(len(rmagic)):
23 | temp = temp.replace(rmagic[index], out_rmagic[index])
24 | print 'replacing', magic,'with',out_magic
25 | print 'replacing', rmagic,'with',out_rmagic
26 | with open(fn + '.svg', 'w') as out:
27 | out.write(temp);
28 |
29 |
--------------------------------------------------------------------------------
/src/ffi/decompressor.rs:
--------------------------------------------------------------------------------
1 | use super::alloc_util::SubclassableAllocator;
2 | use divans_decompressor::StaticCommand;
3 | use super::interface::CAllocator;
4 | //use ::interface::DivansDecompressorFactory;
5 | pub type DecompressorFactory = ::DivansDecompressorFactoryStruct,
6 | SubclassableAllocator<::DefaultCDF16>,
7 | SubclassableAllocator>;
8 | #[repr(C)]
9 | #[no_mangle]
10 | pub struct DivansDecompressorState {
11 | pub custom_allocator: CAllocator,
12 | pub decompressor: ::DivansDecompressor<, SubclassableAllocator<::DefaultCDF16>, SubclassableAllocator>>::DefaultDecoder,
13 | SubclassableAllocator,
14 | SubclassableAllocator<::DefaultCDF16>,
15 | SubclassableAllocator>,
16 | }
17 | impl Drop for DivansDecompressorState {
18 | fn drop(&mut self) {
19 | self.decompressor.free_ref();
20 | }
21 | }
22 |
23 |
--------------------------------------------------------------------------------
/research/brotli_bill_transform.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from collections import defaultdict
3 |
4 | features = defaultdict(lambda:0)
5 | remap = {
6 | 'CopyDistance':'CopyDistance',
7 | 'DistanceHuffmanTable':'CopyDistance',
8 | 'ComplexLiterals':'ComplexLiterals',
9 | 'CopyLength':'CopyLength',
10 | 'LiteralHuffmanTable':'ComplexLiterals',
11 | 'InsertCopyHuffmanTable':'CopyLength',
12 | 'LiteralContextMode':'LiteralContextMode',
13 | 'MetablockHeader':'Misc',
14 | 'BlockTypeMetadata':'BlockTypeMetadata',
15 | 'DistancContextMode':'DistanceContextMode',
16 | 'Misc':'Misc',
17 | }
18 |
19 | for line in open(sys.argv[1]):
20 | for key,val in remap.iteritems():
21 | if key != val:
22 | line = line.replace(key,val)
23 | vals = line.split()
24 | bytes = float(vals[1])
25 | features[vals[2]] += bytes
26 | maxb = max(len(str(item)) for item in features.values())
27 | maxa = max(len(str(int(item*8 + .5))) for item in features.values())
28 |
29 | for item in sorted(features.keys()):
30 | bitval = str(int(features[item] * 8 + .5))
31 | byteval = str(features[item])
32 | print bitval + ' '*(maxa + 2 - len(bitval)) + byteval + ' '*(maxb + 2 - len(byteval)) + item
33 |
--------------------------------------------------------------------------------
/src/raw_to_cmd/hash_match.rs:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Dropbox, Inc
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | use core;
16 | pub use alloc::{AllocatedStackMemory, Allocator, SliceWrapper, SliceWrapperMut, StackAllocator};
17 |
18 |
19 | pub struct HashMatch > {
20 | ht: AllocU32::AllocatedMemory,
21 | }
22 | impl > HashMatch {
23 | pub fn new(m32: &mut AllocU32) -> Self {
24 | HashMatch {
25 | ht:m32.alloc_cell(128),
26 | }
27 | }
28 | pub fn free(&mut self, m32: &mut AllocU32) {
29 | m32.free_cell(core::mem::replace(&mut self.ht, AllocU32::AllocatedMemory::default()));
30 | }
31 | }
32 |
33 |
--------------------------------------------------------------------------------
/wasm/README:
--------------------------------------------------------------------------------
1 | Building DivANS for the browser:
2 | ================================
3 |
4 | Make sure to add this to /etc/mime.types:
5 | -----------
6 | application/wasm wasm
7 | -----------
8 |
9 | For divans, you must actually build with wasm32-unknown-unknown:
10 | -----------------
11 | cargo build --target wasm32-unknown-unknown --release
12 | -----------------
13 |
14 |
15 | Add a working brotli demo to the wasm page:
16 | ===========================================
17 |
18 | checkout the rust-brotli repository, then:
19 | Add a emcc wrapper as follows
20 |
21 | Create a script 'myscript' somewhere with the followiing
22 | -------------
23 | #!/bin/bash
24 | emcc -s ALLOW_MEMORY_GROWTH=1 "$@"
25 | -------------
26 | chmod +x myscript
27 |
28 | Now modify ~/.cargo/config and set
29 | --------------
30 | target.wasm32-unknown-emscripten]
31 | linker = "/home/user/bin/myscript"
32 | --------------
33 |
34 | Now build with
35 | -------------------
36 | cargo build --target wasm32-unknown-emscripten --release
37 | -------------------
38 | And copy the binaries from target/wasm32-unknown-emscripten/release/brotli.wasm and brotli.js to the divans/wasm/ directory
39 |
40 | Finally, in addition to adding wasm to /etc/mime.types, you must allow CORS in your webserver config: search for setting the "Access-Control-Allow-Origin" header.
41 | If you are not able to do this, you may modify brotli_wrapper.html and remove the line which sets "sandbox".
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/src/test_helper.rs:
--------------------------------------------------------------------------------
1 | #![cfg(test)]
2 | extern crate std;
3 | use std::vec::{
4 | Vec,
5 | };
6 | use std::boxed::{
7 | Box,
8 | };
9 | use core;
10 | use alloc;
11 |
12 | pub struct Rebox {
13 | b: Box<[T]>,
14 | }
15 |
16 | impl core::default::Default for Rebox {
17 | fn default() -> Self {
18 | let v: Vec = Vec::new();
19 | let b = v.into_boxed_slice();
20 | Rebox:: { b: b }
21 | }
22 | }
23 |
24 | impl core::ops::Index for Rebox {
25 | type Output = T;
26 | fn index(&self, index: usize) -> &T {
27 | &(*self.b)[index]
28 | }
29 | }
30 |
31 | impl core::ops::IndexMut for Rebox {
32 | fn index_mut(&mut self, index: usize) -> &mut T {
33 | &mut (*self.b)[index]
34 | }
35 | }
36 |
37 | impl alloc::SliceWrapper for Rebox {
38 | fn slice(&self) -> &[T] {
39 | &*self.b
40 | }
41 | }
42 |
43 | impl alloc::SliceWrapperMut for Rebox {
44 | fn slice_mut(&mut self) -> &mut [T] {
45 | &mut *self.b
46 | }
47 | }
48 |
49 | pub struct HeapAllocator {
50 | pub default_value: T,
51 | }
52 |
53 | impl alloc::Allocator for HeapAllocator {
54 | type AllocatedMemory = Rebox;
55 | fn alloc_cell(self: &mut HeapAllocator, len: usize) -> Rebox {
56 | let v: Vec = vec![self.default_value.clone();len];
57 | let b = v.into_boxed_slice();
58 | Rebox:: { b: b }
59 | }
60 | fn free_cell(self: &mut HeapAllocator, _data: Rebox) {}
61 | }
62 |
--------------------------------------------------------------------------------
/src/probability/mod.rs:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Dropbox, Inc
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #![allow(unused)]
16 | use core;
17 | use core::clone::Clone;
18 | pub mod div_lut;
19 | pub mod numeric;
20 |
21 | #[macro_use]
22 | mod common_tests;
23 | pub mod interface;
24 | pub mod external_cdf;
25 | pub mod blend_cdf;
26 | pub mod frequentist_cdf;
27 | mod variant_speed_cdf;
28 | #[cfg(feature="simd")]
29 | pub mod simd_frequentist_cdf;
30 | pub mod opt_frequentist_cdf;
31 |
32 | pub use self::interface::{BaseCDF, CDF16, CDF2, Speed, SpeedPalette, Prob, LOG2_SCALE, BLEND_FIXED_POINT_PRECISION, ProbRange, SPEED_PALETTE_SIZE};
33 | #[cfg(feature="debug_entropy")]
34 | pub use self::interface::DebugWrapperCDF16;
35 | pub use self::blend_cdf::{BlendCDF16};
36 | pub use self::frequentist_cdf::FrequentistCDF16;
37 | pub use self::external_cdf::ExternalProbCDF16;
38 | #[cfg(feature="simd")]
39 | pub use self::simd_frequentist_cdf::SIMDFrequentistCDF16;
40 | pub use self::opt_frequentist_cdf::OptFrequentistCDF16;
41 | pub use self::variant_speed_cdf::VariantSpeedCDF;
42 |
--------------------------------------------------------------------------------
/src/probability/make_div_lut.rs:
--------------------------------------------------------------------------------
1 | mod numeric;
2 | fn main() {
3 | print!("pub static RECIPROCAL8: [i32; 256] = [\n 0, ");
4 | for divisor in 1..256 {
5 | let next_str = if divisor % 16 == 15 {
6 | "\n "
7 | } else {
8 | " "
9 | };
10 | let reciprocal = numeric::compute_divisor8(divisor as numeric::Denominator8Type);
11 | let mut fail = false;
12 | for num in 0u16..65535u16 {
13 | let correct = num as u16 /divisor;
14 | let trial = numeric::fast_divide_16bit_by_8bit(num as u16, reciprocal) as u16;
15 | if trial != correct {
16 | print!("FAIL: {} : {} / {} = fast: {} slow: {}\n",
17 | reciprocal,
18 | num,
19 | divisor,
20 | trial,
21 | correct);
22 | fail = true;
23 | }
24 | }
25 | assert!(!fail);
26 | assert!(reciprocal <= (1<<30));
27 | print!("{},{}", reciprocal, next_str)
28 | }
29 | print!("];\n");
30 | print!("pub static RECIPROCAL: [(i64, u8); 65536] = [\n (0,0), ");
31 | for divisor in 1..65536 {
32 | let next_str = if divisor % 16 == 15 {
33 | "\n "
34 | } else {
35 | " "
36 | };
37 | let reciprocal = numeric::compute_divisor(divisor as numeric::DenominatorType);
38 | for num in 0..65536 {
39 | assert_eq!((num<<15)/divisor, numeric::fast_divide_30bit_by_16bit(num << 15, reciprocal));
40 | }
41 | print!("({},{}),{}", reciprocal.0, numeric::compute_divisor(divisor as numeric::DenominatorType).1, next_str)
42 | }
43 | print!("];\n");
44 | }
45 |
--------------------------------------------------------------------------------
/c/vec_u8.h:
--------------------------------------------------------------------------------
1 | struct VecU8 {
2 | unsigned char *data;
3 | size_t size;
4 | };
5 | struct VecU8 new_vec_u8() {
6 | struct VecU8 ret;
7 | ret.data = NULL;
8 | ret.size = 0;
9 | return ret;
10 | }
11 | uint64_t round_up_to_power_of_two(uint64_t v) {
12 | v--;
13 | v |= v >> 1;
14 | v |= v >> 2;
15 | v |= v >> 4;
16 | v |= v >> 8;
17 | v |= v >> 16;
18 | {
19 | uint64_t tmp = v;
20 | tmp >>= 32;
21 | v |= tmp;
22 | }
23 | v++;
24 | return v;
25 | }
26 |
27 |
28 | void push_vec_u8(struct VecU8 *thus, const unsigned char*data, size_t size) {
29 | size_t new_actual_size = thus->size + size;
30 | if (size == 0 || new_actual_size < thus->size) {
31 | return;
32 | }
33 | {
34 | size_t new_alloc_size = round_up_to_power_of_two(new_actual_size);
35 | size_t old_alloc_size = round_up_to_power_of_two(thus->size);
36 | if (thus->size == 0 || old_alloc_size != new_alloc_size ) {
37 | unsigned char *tmp = custom_malloc_f(custom_alloc_opaque, new_alloc_size);
38 | size_t to_copy = old_alloc_size;
39 | if (new_alloc_size < old_alloc_size) {
40 | to_copy = new_alloc_size;
41 | }
42 | memcpy(tmp, thus->data, to_copy);
43 | custom_free_f(custom_alloc_opaque, thus->data);
44 | thus->data = tmp;
45 | }
46 | if (new_alloc_size < new_actual_size) {
47 | abort(); // assert
48 | }
49 | memcpy(thus->data + thus->size, data, size);
50 | thus->size = new_actual_size;
51 | }
52 | }
53 |
54 | void release_vec_u8(struct VecU8 *thus) {
55 | if (thus->size) {
56 | custom_free_f(custom_alloc_opaque, thus->data);
57 | thus->size = 0;
58 | thus->data = NULL;
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/src/codec/specializations.rs:
--------------------------------------------------------------------------------
1 | use ::probability::CDF16;
2 | use alloc::Allocator;
3 | pub use super::interface::{CrossCommandBookKeeping,LiteralBookKeeping};
4 |
5 | pub trait CodecTraits {
6 | const MIXING_PRIORS: bool;
7 | }
8 | macro_rules! define_codec_trait {
9 | ($name: ident, $global: ident, mix: $mix: expr) => {
10 | #[derive(Default)]
11 | pub struct $name {}
12 | impl CodecTraits for $name {
13 | const MIXING_PRIORS: bool = $mix;
14 | }
15 | pub static $global: $name = $name{};
16 | }
17 | }
18 | define_codec_trait!(MixingTrait, MIXING_TRAIT, mix: true);
19 | define_codec_trait!(DefaultTrait, DEFAULT_TRAIT, mix: false);
20 |
21 | #[derive(Clone,Copy)]
22 | pub enum CodecTraitSelector {
23 | DefaultTrait(&'static DefaultTrait),
24 | MixingTrait(&'static MixingTrait),
25 | }
26 |
27 | pub fn construct_codec_trait_from_bookkeeping,
29 | AllocCDF16:Allocator>(
30 | lbk:&LiteralBookKeeping,
31 | ) -> CodecTraitSelector {
32 | if lbk.model_weights[0].should_mix() || lbk.model_weights[1].should_mix() {
33 | return CodecTraitSelector::MixingTrait(&MIXING_TRAIT);
34 | }
35 | return CodecTraitSelector::DefaultTrait(&DEFAULT_TRAIT);
36 | }
37 |
38 | pub trait NibbleHalfTrait {
39 | const HIGH_NIBBLE: bool;
40 | }
41 |
42 | pub struct HighNibbleTrait {
43 | }
44 | impl NibbleHalfTrait for HighNibbleTrait {
45 | const HIGH_NIBBLE:bool = true;
46 | }
47 | pub static HIGH_NIBBLE_TRAIT: HighNibbleTrait = HighNibbleTrait{};
48 |
49 | pub struct LowNibbleTrait {
50 | }
51 | impl NibbleHalfTrait for LowNibbleTrait {
52 | const HIGH_NIBBLE:bool = false;
53 | }
54 | pub static LOW_NIBBLE_TRAIT: LowNibbleTrait = LowNibbleTrait{};
55 |
--------------------------------------------------------------------------------
/src/stub_parallel_decompressor.rs:
--------------------------------------------------------------------------------
1 | #![cfg(not(feature="std"))]
2 | pub use interface::{DivansCompressorFactory, BlockSwitch, LiteralBlockSwitch, Command, Compressor, CopyCommand, Decompressor, DictCommand, LiteralCommand, Nop, NewWithAllocator, ArithmeticEncoderOrDecoder, LiteralPredictionModeNibble, PredictionModeContextMap, free_cmd, FeatureFlagSliceType,
3 | DefaultCDF16, DivansResult};
4 | pub use alloc::{AllocatedStackMemory, Allocator, SliceWrapper, SliceWrapperMut, StackAllocator};
5 | pub use super::divans_decompressor::StaticCommand;
6 | pub use core::marker::PhantomData;
7 |
8 | pub struct ParallelDivansProcess,
9 | AllocU8:Allocator,
10 | AllocCDF16:Allocator,
11 | AllocCommand:Allocator> {
12 | p0: PhantomData,
13 | p1: PhantomData,
14 | p2: PhantomData,
15 | p3: PhantomData,
16 | }
17 |
18 | impl,
19 | AllocU8:Allocator,
20 | AllocCDF16:Allocator,
21 | AllocCommand:Allocator>
22 | ParallelDivansProcess {
23 |
24 | pub fn new(_header: &mut T, mut _window_size: usize) -> Self {
25 | unimplemented!();
26 | }
27 | pub fn decode(&mut self,
28 | _input:&[u8],
29 | _input_offset:&mut usize,
30 | _output:&mut [u8],
31 | _output_offset: &mut usize) -> DivansResult {
32 | unimplemented!();
33 | }
34 | pub fn free_ref(&mut self){
35 | unimplemented!();
36 | }
37 | pub fn free(self) -> (AllocU8, AllocCDF16, AllocCommand) {
38 | unimplemented!();
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/examples/compress.rs:
--------------------------------------------------------------------------------
1 | extern crate divans;
2 | #[cfg(feature="no-stdlib")]
3 | fn main() {
4 | panic!("For no-stdlib examples please see the tests")
5 | }
6 | #[cfg(not(feature="no-stdlib"))]
7 | fn main() {
8 | let example_opts = divans::DivansCompressorOptions::default();
9 | use std::io;
10 | let stdout = &mut io::stdout();
11 | {
12 | use std::io::Write;
13 | let mut writer = divans::DivansBrotliHybridCompressorWriter::new(
14 | stdout,
15 | divans::DivansCompressorOptions{
16 | brotli_literal_byte_score:example_opts.brotli_literal_byte_score,
17 | force_literal_context_mode:example_opts.force_literal_context_mode,
18 | literal_adaptation:example_opts.literal_adaptation, // should we override how fast the cdfs converge for literals?
19 | window_size:example_opts.window_size, // log 2 of the window size
20 | lgblock:example_opts.lgblock, // should we override how often metablocks are created in brotli
21 | quality:example_opts.quality, // the quality of brotli commands
22 | q9_5:example_opts.q9_5,
23 | dynamic_context_mixing:example_opts.dynamic_context_mixing, // if we want to mix together the stride prediction and the context map
24 | prior_depth:example_opts.prior_depth,
25 | use_brotli:example_opts.use_brotli, // ignored
26 | use_context_map:example_opts.use_context_map, // whether we should use the brotli context map in addition to the last 8 bits of each byte as a prior
27 | force_stride_value: example_opts.force_stride_value, // if we should use brotli to decide on the stride
28 | speed_detection_quality: example_opts.speed_detection_quality,
29 | stride_detection_quality: example_opts.stride_detection_quality,
30 | prior_bitmask_detection: example_opts.prior_bitmask_detection,
31 | divans_ir_optimizer:example_opts.divans_ir_optimizer,
32 | },
33 | 4096, // internal buffer size
34 | );
35 | io::copy(&mut io::stdin(), &mut writer).unwrap();
36 | writer.flush().unwrap();
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/no-stdlib/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "divans-no-stdlib"
3 | version = "0.0.1"
4 | authors = [
5 | "Daniel Reiter Horn ",
6 | "Jongmin Baek ",
7 | "Anatoly Yakovenko "
8 | ]
9 | license = "BSD-3-Clause/MIT or Apache"
10 | documentation = "tbd"
11 | keywords = ["brotli", "decompression", "lz77", "huffman", "nostd"]
12 | readme = "README.md"
13 | build = "../build.rs"
14 |
15 | [[bin]]
16 | name="divans"
17 | path="../src/bin/divans.rs"
18 |
19 | [lib]
20 | crate-type=["rlib"]
21 | path="../src/lib.rs"
22 |
23 | [profile.release]
24 | lto=true
25 |
26 | [build-dependencies]
27 | vergen = "0.1"
28 |
29 | [dependencies]
30 | "alloc-no-stdlib" = "~1.3"
31 | "brotli" = { version = "~2.5"}
32 |
33 | [features]
34 | default= ["no-stdlib","no-stdlib-rust-binding", "no-stdlib-rlib"]
35 | # allow use of SIMD to update probability and compute crc32c
36 | simd = []
37 |
38 | # use avx2-specific instructions
39 | avx2=[]
40 |
41 | # avoid inlining some key functions (for profiling). Significant perf hit
42 | no-inline=[]
43 |
44 | # do not compile any unsafe code (this disables C-FFI)
45 | safe=[]
46 |
47 | # use 65536-sized table to do 16 bit integer divides: similar speed, but strains memory subsystem
48 | avoid-divide=[]
49 |
50 | #use dynamic CDF blending depending on samples
51 | findspeed = []
52 |
53 | # print breakdown of which parts of the file cost
54 | billing = []
55 |
56 | # use divisionless CDF: averages with other CDFs rather than counting samples like FrequentistCDF
57 | blend = []
58 |
59 | # print trace of probability
60 | debug_entropy = []
61 |
62 | # only use portable SIMD instructions for probability updates
63 | portable-simd = []
64 |
65 | no-stdlib = ["alloc-no-stdlib/no-stdlib", "brotli/no-stdlib"]
66 |
67 | # bind to rust with nostdlib
68 | no-stdlib-rust-binding = []
69 |
70 | # turn on benchmark tests and --bench commands (requires nigtly)
71 | benchmark = ["brotli/benchmark"]
72 |
73 | no-stdlib-rlib = []
74 |
75 | # allow specification of a external probability array, to test new prediction schemes
76 | external-literal-probability = ["brotli/external-literal-probability"]
77 |
78 | # frequentist without division tables
79 | uncached_frequentist = []
80 |
81 |
--------------------------------------------------------------------------------
/src/ffi/interface.rs:
--------------------------------------------------------------------------------
1 | #[allow(non_camel_case_types)]
2 | #[repr(u8)]
3 | pub enum c_void{
4 | _Nothing = 0,
5 | }
6 |
7 | #[no_mangle]
8 | pub type DivansReturnCode = u8;
9 | pub const DIVANS_SUCCESS: DivansReturnCode = 0;
10 | pub const DIVANS_NEEDS_MORE_INPUT: DivansReturnCode = 1;
11 | pub const DIVANS_NEEDS_MORE_OUTPUT: DivansReturnCode = 2;
12 | pub const DIVANS_FAILURE: DivansReturnCode = 3;
13 |
14 |
15 |
16 | pub type DivansOptionSelect = u8;
17 |
18 | pub const DIVANS_OPTION_QUALITY:DivansOptionSelect = 1;
19 | pub const DIVANS_OPTION_WINDOW_SIZE:DivansOptionSelect = 2;
20 | pub const DIVANS_OPTION_LGBLOCK:DivansOptionSelect = 3;
21 | pub const DIVANS_OPTION_DYNAMIC_CONTEXT_MIXING:DivansOptionSelect = 4;
22 | pub const DIVANS_OPTION_USE_BROTLI_COMMAND_SELECTION:DivansOptionSelect = 5;
23 | pub const DIVANS_OPTION_USE_BROTLI_BITSTREAM:DivansOptionSelect = 6;
24 | pub const DIVANS_OPTION_USE_CONTEXT_MAP:DivansOptionSelect = 7;
25 | pub const DIVANS_OPTION_LITERAL_ADAPTATION_CM_HIGH:DivansOptionSelect = 8;
26 | pub const DIVANS_OPTION_FORCE_STRIDE_VALUE:DivansOptionSelect = 9;
27 | pub const DIVANS_OPTION_STRIDE_DETECTION_QUALITY:DivansOptionSelect = 10;
28 | pub const DIVANS_OPTION_PRIOR_DEPTH:DivansOptionSelect = 11;
29 | pub const DIVANS_OPTION_LITERAL_ADAPTATION_STRIDE_HIGH:DivansOptionSelect = 12;
30 | pub const DIVANS_OPTION_LITERAL_ADAPTATION_CM_LOW:DivansOptionSelect = 13;
31 | pub const DIVANS_OPTION_LITERAL_ADAPTATION_STRIDE_LOW:DivansOptionSelect = 14;
32 | pub const DIVANS_OPTION_BROTLI_LITERAL_BYTE_SCORE:DivansOptionSelect = 15;
33 | pub const DIVANS_OPTION_SPEED_DETECTION_QUALITY:DivansOptionSelect = 16;
34 | pub const DIVANS_OPTION_PRIOR_BITMASK_DETECTION:DivansOptionSelect = 17;
35 | pub const DIVANS_OPTION_Q9_5:DivansOptionSelect = 18;
36 | pub const DIVANS_OPTION_FORCE_LITERAL_CONTEXT_MODE:DivansOptionSelect = 19;
37 | pub const DIVANS_OPTION_IR_OPTIMIZER:DivansOptionSelect = 20;
38 |
39 |
40 | #[repr(C)]
41 | #[no_mangle]
42 | #[derive(Clone)]
43 | pub struct CAllocator {
44 | pub alloc_func: Option *mut c_void>,
45 | pub free_func: Option ()>,
46 | pub opaque: *mut c_void,
47 | }
48 |
49 | unsafe impl Send for CAllocator {
50 | }
51 |
52 |
53 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "divans"
3 | version = "0.0.1"
4 | authors = [
5 | "Daniel Reiter Horn ",
6 | "Jongmin Baek ",
7 | "Anatoly Yakovenko "
8 | ]
9 | license = "Apache-2.0"
10 | description = "DivANS is a new way of structuring compression programs to make them more open to innovation in the wider community, by separating compression into multiple stages that can each be improved independently"
11 | documentation = "https://blogs.dropbox.com/tech/2018/06/building-better-compression-together-with-divans/"
12 | keywords = ["brotli", "decompression", "lz77", "huffman", "nostd"]
13 | readme = "README.md"
14 | build = "build.rs"
15 | autobins = false
16 |
17 | [lib]
18 | crate-type=["cdylib", "rlib", "staticlib"]
19 |
20 | [[bin]]
21 | doc = false
22 | name = "divans"
23 |
24 | [build-dependencies]
25 | vergen = "0.1"
26 |
27 | [dependencies]
28 | "alloc-no-stdlib" = "~2.0"
29 |
30 | "brotli" = {version = "~3.1"}
31 | "packed_simd" = {"optional"=true, version="0.3"}
32 | "alloc-stdlib" = {"optional"=true, version="~0.2"}
33 |
34 | [features]
35 | default = ["std"]
36 |
37 | # allow use of SIMD to update probability and compute crc32c
38 | simd = ["brotli/simd", "packed_simd/into_bits"]
39 |
40 | # use avx2-specific instructions
41 | avx2=[]
42 |
43 | # avoid inlining some key functions (for profiling). Significant perf hit
44 | no-inline=[]
45 |
46 | # do not compile any unsafe code (this disables C-FFI)
47 | safe=[]
48 |
49 | # use 65536-sized table to do 16 bit integer divides: similar speed, but strains memory subsystem
50 | avoid-divide=[]
51 |
52 | #use dynamic CDF blending depending on samples
53 | findspeed = []
54 |
55 | # print breakdown of which parts of the file cost
56 | billing = []
57 |
58 | # use divisionless CDF: averages with other CDFs rather than counting samples like FrequentistCDF
59 | blend = []
60 |
61 | # print trace of probability
62 | debug_entropy = []
63 |
64 | # only use portable SIMD instructions for probability updates
65 | portable-simd = []
66 |
67 | std = ["alloc-stdlib", "brotli/std"]
68 |
69 | no-stdlib-rust-binding = []
70 |
71 | # turn on benchmark tests and --bench commands (requires nigtly)
72 | benchmark = ["brotli/benchmark"]
73 |
74 |
75 | # allow specification of a external probability array, to test new prediction schemes
76 | external-literal-probability = ["brotli/external-literal-probability"]
77 |
78 | uncached_frequentist = []
79 |
80 | threadlog = []
81 |
--------------------------------------------------------------------------------
/research/bill_transform.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from collections import defaultdict
3 |
4 | features = defaultdict(lambda:0)
5 | remap = {
6 | 'CountLengthFirst': 'CopyLength',
7 | 'CountMantissaNibbles': 'CopyLength',
8 | 'CountSmall': 'CopyLength',
9 | 'DistanceLengthFirst': 'CopyDistance',
10 | 'DistanceLengthGreater15Less25': 'CopyDistance',
11 | 'DistanceLengthMnemonic': 'CopyDistance',
12 | 'DistanceMantissaNibbles': 'CopyDistance',
13 | 'BlockSwitchType': 'BlockTypeMetadata',
14 | 'FullSelection': 'CopyLength', # not quite truthful
15 | 'LiteralCountFirst': 'CopyLength', # not quite truthful
16 | 'LiteralCountLengthGreater14Less25': 'CopyLength', # not quite truthful
17 | 'LiteralCountMantissaNibbles': 'CopyLength', # not quite truthful
18 | 'LiteralCountSmall': 'CopyLength', # not quite truthful
19 | 'LiteralNibbleIndex': 'ComplexLiterals',
20 | 'LiteralNibbleIndex': 'ComplexLiterals',
21 | 'Begin': 'Misc',
22 | 'TransformHigh':'DictIndex',
23 | 'TransformLow':'DictIndex',
24 | 'WordIndexMantissa':'DictIndex',
25 | 'WordSizeFirst':'DictLength',
26 | 'ContextMapFirstNibble(0, Literal)': 'LiteralContextMode',
27 | 'ContextMapFirstNibble(0, Distance)': 'DistanceContextMode',
28 | 'ContextMapMnemonic(0, Literal)': 'LiteralContextMode',
29 | 'ContextMapMnemonic(0, Distance)': 'DistanceContextMode',
30 | 'ContextMapSecondNibble(0, Literal, 0)': 'LiteralContextMode',
31 | 'ContextMapSecondNibble(0, Distance, 0)': 'DistanceContextMode',
32 | 'DynamicContextMixing': 'Misc',
33 | 'LiteralAdaptationRate': 'Misc',
34 | 'PriorDepth': 'Misc',
35 | }
36 | for line in open(sys.argv[1]):
37 | if 'Total' in line:
38 | break
39 | pairmatch = '('.join(line.split('(')[1:]).split(')')[:-1]
40 | typ = ')'.join(pairmatch)
41 | counts = line.split('count:')[1:]
42 | byte_count_str = counts[1].strip().split(' ')[0].strip()
43 | byte_count = float(byte_count_str)
44 | if typ not in remap:
45 | typ = typ.split('(')[0]
46 | if typ not in remap:
47 | print typ, 'not found'
48 | continue
49 | features[remap[typ]] += byte_count
50 | maxb = max(len(str(item)) for item in features.values())
51 | maxa = max(len(str(int(item*8 + .5))) for item in features.values())
52 | for item in sorted(features.keys()):
53 | bitval = str(int(features[item] * 8 + .5))
54 | byteval = str(features[item])
55 | print bitval + ' '*(maxa + 2 - len(bitval)) + byteval + ' '*(maxb + 2 - len(byteval)) + item
56 |
--------------------------------------------------------------------------------
/wasm/brotli_wrapper.js:
--------------------------------------------------------------------------------
1 | (function() {
2 | var brotliIframe = null;
3 | var brotliWindow = null;
4 |
5 | var brotliReady = false;
6 | var brotliProcessing = null;
7 | var brotliInputQueue = [];
8 |
9 | window.onmessage = function(e) {
10 | brotliWindow = brotliIframe.contentWindow;
11 | if (e.source !== brotliWindow) {
12 | throw "Invalid source " + e.source;
13 | }
14 | msgtype = e.data[0];
15 | if (msgtype == "brotli-worker-ready") {
16 | if (brotliInputQueue.length > 0) {
17 | var queueEl = brotliInputQueue.shift();
18 | processBrotliNow(queueEl[0], queueEl[1]);
19 | } else {
20 | brotliReady = true;
21 | }
22 | }
23 | if (msgtype == "brotli-finished") {
24 | var outputLen = e.data[1];
25 | var intermediateRep = e.data[2];
26 | var originalInput = brotliProcessing;
27 | var finFunc = brotliFinished;
28 | brotliProcessing = null;
29 | brotliFinished = null;
30 | setTimeout(function() {
31 | finFunc(originalInput, outputLen, intermediateRep);
32 | }, 0);
33 | }
34 | }
35 |
36 | function processBrotliNow(arrayBuf, finishedFunc) {
37 | brotliProcessing = arrayBuf;
38 | brotliFinished = finishedFunc;
39 | brotliReady = false;
40 | setTimeout(function() {
41 |
42 | brotliWindow.postMessage(max_quality, "*");
43 | brotliWindow.postMessage(arrayBuf, "*", [arrayBuf]);
44 | }, 0);
45 | }
46 |
47 | function addToBrotliQueue(arrayBuf, finishedFunc) {
48 | if (brotliReady) {
49 | brotliReady = false;
50 | processBrotliNow(arrayBuf, finishedFunc);
51 | } else {
52 | brotliInputQueue.push([arrayBuf, finishedFunc]);
53 | }
54 | }
55 |
56 | function createBrotliIframe() {
57 | brotliIframe = document.createElement("iframe");
58 | brotliIframe.setAttribute("id", "brotli_iframe");
59 | brotliIframe.setAttribute("src", "brotli_iframe.html");
60 | brotliIframe.setAttribute("sandbox", "allow-scripts");
61 | brotliIframe.style.display = "none";
62 | document.body.appendChild(brotliIframe);
63 | }
64 |
65 | function init() {
66 | createBrotliIframe();
67 | }
68 | document.addEventListener("DOMContentLoaded", init);
69 |
70 | function runBrotliDestroysInput(arrayBuf, finishedFunc) {
71 | if (!(arrayBuf instanceof ArrayBuffer)) {
72 | throw "Invalid input";
73 | }
74 | addToBrotliQueue(arrayBuf, finishedFunc);
75 | }
76 |
77 | window.Brotli = {
78 | init: init,
79 | runBrotliDestroysInput: runBrotliDestroysInput
80 | };
81 |
82 | })();
83 |
--------------------------------------------------------------------------------
/src/constants.rs:
--------------------------------------------------------------------------------
1 | pub static UTF8_CONTEXT_LOOKUP: [u8; 512] =
2 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
3 | 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, 44, 44, 44, 44, 44, 44, 44, 44,
4 | 44, 44, 32, 32, 24, 40, 28, 12, 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
5 | 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, 12, 56, 60, 60, 60, 56, 60, 60,
6 | 60, 56, 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
7 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
8 | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
9 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
10 | 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
11 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
12 | 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
13 | 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
14 | 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
15 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
16 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
18 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2];
19 |
20 | pub static SIGNED_3_BIT_CONTEXT_LOOKUP: [u8; 256] =
21 | [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
22 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
23 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
24 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
25 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
26 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
27 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
28 | 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7];
29 |
--------------------------------------------------------------------------------
/src/resizable_buffer.rs:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Dropbox, Inc
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | use core;
16 | pub use super::alloc::{Allocator, SliceWrapper, SliceWrapperMut};
17 |
18 |
19 | pub struct ResizableByteBuffer> {
20 | data: AllocT::AllocatedMemory,
21 | size: usize,
22 | }
23 | impl> Default for ResizableByteBuffer{
24 | fn default() -> Self {
25 | Self::new()
26 | }
27 | }
28 | impl> ResizableByteBuffer{
29 | pub fn new() -> Self {
30 | ResizableByteBuffer:: {
31 | data: AllocT::AllocatedMemory::default(),
32 | size: 0,
33 | }
34 | }
35 | fn ensure_free_space_in_buffer(&mut self, allocator: &mut AllocT, min_size: usize) {
36 | if self.data.slice().is_empty() {
37 | self.data = allocator.alloc_cell(66_000); // some slack room to deal with worst case compression sizes
38 | } else if self.size + min_size > self.data.slice().len() {
39 | let mut cell = allocator.alloc_cell(self.size * 2);
40 | cell.slice_mut().split_at_mut(self.size).0.clone_from_slice(self.data.slice().split_at(self.size).0);
41 | allocator.free_cell(core::mem::replace(&mut self.data, cell));
42 | }
43 | }
44 | pub fn checkout_next_buffer(&mut self, allocator: &mut AllocT, min_size: Option) -> &mut [T] {
45 | self.ensure_free_space_in_buffer(allocator, min_size.unwrap_or(1));
46 | self.data.slice_mut().split_at_mut(self.size).1
47 | }
48 | pub fn commit_next_buffer(&mut self, size:usize) {
49 | self.size += size;
50 | }
51 | pub fn len(&self) -> usize {
52 | self.size
53 | }
54 | pub fn is_empty(&self) -> bool {
55 | self.size == 0
56 | }
57 | pub fn slice(&self) -> &[T] {
58 | self.data.slice().split_at(self.size).0
59 | }
60 | pub fn free(&mut self, allocator: &mut AllocT) {
61 | allocator.free_cell(core::mem::replace(&mut self.data, AllocT::AllocatedMemory::default()))
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/debug_encoder.rs:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Dropbox, Inc
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | use core;
15 | use super::arithmetic_coder::{
16 | EntropyEncoder,
17 | ByteQueue,
18 | RegisterQueue,
19 | EntropyDecoder,
20 | };
21 | use probability::CDF16;
22 | use super::interface::ArithmeticEncoderOrDecoder;
23 | use super::DivansResult;
24 | #[derive(Default)]
25 | pub struct DebugEncoder {
26 | buffer: RegisterQueue,
27 | }
28 |
29 |
30 | impl EntropyEncoder for DebugEncoder {
31 | type Queue = RegisterQueue;
32 | fn get_internal_buffer_mut(&mut self) -> &mut RegisterQueue {
33 | &mut self.buffer
34 | }
35 | fn get_internal_buffer(&self) -> &RegisterQueue {
36 | &self.buffer
37 | }
38 | fn put_bit(&mut self, bit: bool, prob_of_false: u8) {
39 | assert!(self.buffer.num_push_bytes_avail() > 0);
40 | let buf_to_push = [prob_of_false ^ bit as u8];
41 | let cnt = self.buffer.push_data(&buf_to_push[..]);
42 | assert_eq!(cnt, 1);
43 | }
44 | fn flush(&mut self) {
45 | }
46 | }
47 |
48 | #[derive(Default)]
49 | pub struct DebugDecoder {
50 | buffer: RegisterQueue,
51 | }
52 |
53 |
54 | impl EntropyDecoder for DebugDecoder {
55 | type Queue = RegisterQueue;
56 | fn get_internal_buffer_mut(&mut self) -> &mut RegisterQueue {
57 | &mut self.buffer
58 | }
59 | fn get_internal_buffer(&self) -> &RegisterQueue {
60 | &self.buffer
61 | }
62 | fn get_bit(&mut self, prob_of_false: u8) -> bool {
63 | assert!(self.buffer.num_pop_bytes_avail() > 0);
64 | let mut buf_to_pop = [0u8];
65 | let cnt = self.buffer.pop_data(&mut buf_to_pop[..]);
66 | assert_eq!(cnt, 1);
67 | let return_value = buf_to_pop[0] ^ prob_of_false;
68 | if return_value != 0 {
69 | assert_eq!(return_value, 1);
70 | }
71 | return_value != 0
72 | }
73 | fn flush(&mut self) -> DivansResult {
74 | DivansResult::Success
75 | }
76 | }
77 |
78 | impl DebugEncoder {
79 | fn mov_internal(&mut self) -> Self {
80 | core::mem::replace(self, DebugEncoder::default())
81 | }
82 | }
83 | impl ArithmeticEncoderOrDecoder for DebugEncoder {
84 | arithmetic_encoder_or_decoder_methods!();
85 | }
86 |
--------------------------------------------------------------------------------
/c/custom_alloc.h:
--------------------------------------------------------------------------------
1 | int use_real_malloc = 1;
2 | int use_fake_malloc = 0;
3 | void* custom_alloc_opaque = &use_real_malloc;
4 | unsigned char huge_buffer[1024*1024 * 255];
5 | size_t huge_buffer_offset = 0;
6 | const uint32_t science = 0x5C1E11CE;
7 |
8 | void * custom_malloc_f(void* opaque, size_t user_size) {
9 | unsigned char * retval;
10 | size_t amt = user_size + 2*sizeof(opaque) + 4 + 32;
11 | if (opaque == &use_fake_malloc) {
12 | retval = &huge_buffer[huge_buffer_offset];
13 | huge_buffer_offset += amt;
14 | } else {
15 | retval = (unsigned char*)malloc(amt);
16 | }
17 | memset(retval, 0x34, 2*sizeof(opaque) + 4 + 32); // make sure control areas are initialized to something--to help debug
18 | memcpy(retval, &science, 4);
19 | memcpy(retval + 4, &opaque, sizeof(opaque));
20 | memcpy(retval + 4 + sizeof(opaque), &user_size, sizeof(size_t));
21 | signed char alignment_offset = (32 - (((size_t)(retval + 4 + sizeof(opaque) + sizeof(size_t) + 1)) & 0x1f)) & 0x1f;
22 | retval[sizeof(opaque) + sizeof(size_t) + 4 + alignment_offset] = alignment_offset;
23 | void * final_return = retval + sizeof(opaque) + sizeof(size_t) + 4 + 1 + alignment_offset;
24 | assert((((size_t)final_return)&0x1f) == 0);
25 | return final_return;
26 | }
27 | void * (*custom_malloc)(void* opaque, size_t data) = &custom_malloc_f;
28 | void custom_free_f(void* opaque, void *mfd) {
29 | void * local_opaque;
30 | uint32_t local_science;
31 | size_t local_size = 0;
32 | char * local_mfd = (char *)mfd;
33 | if (mfd == NULL) {
34 | return;
35 | }
36 | local_mfd -= 1;
37 | local_mfd -= *local_mfd;
38 | local_mfd -= 4;
39 | local_mfd -= sizeof(opaque);
40 | local_mfd -= sizeof(size_t);
41 | memcpy(&local_science, local_mfd, 4);
42 | assert(local_science == science);
43 | memcpy(&local_opaque, local_mfd + 4, sizeof(opaque));
44 | memcpy(&local_size, local_mfd + 4 + sizeof(opaque), sizeof(size_t));
45 | assert(opaque == local_opaque);
46 | if (opaque == &use_fake_malloc) {
47 | void *retval = &huge_buffer[huge_buffer_offset];
48 | if ((void*)(retval - local_size) == mfd) {
49 | huge_buffer_offset -= 4 + sizeof(opaque) + sizeof(size_t) + local_size;
50 | }
51 | } else {
52 | free(local_mfd);
53 | }
54 | }
55 |
56 | void (*custom_free)(void* opaque, void *mfd) = &custom_free_f;
57 | void custom_atoi(char * dst, size_t data) {
58 | if (!data) {
59 | memcpy(dst, "0\0", 2);
60 | return;
61 | }
62 | char *ptr = dst;
63 | while(data) {
64 | *ptr = '0' + (data % 10);
65 | ++ptr;
66 | data /= 10;
67 | }
68 | *ptr = '\0';
69 | int del = (int)(ptr - dst);
70 | int i;
71 | for (i = 0;i < del/2;i+= 1) {
72 | char tmp = dst[i];
73 | dst[i] = *(ptr - i - 1);
74 | *(ptr - i - 1) = tmp;
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/src/probability/frequentist_cdf.rs:
--------------------------------------------------------------------------------
1 | use core;
2 | use super::interface::{Prob, BaseCDF, Speed, CDF16, BLEND_FIXED_POINT_PRECISION};
3 | fn to_bit_i32(val: i32, shift_val: u8) -> u32 {
4 | if val != 0 {
5 | 1 << shift_val
6 | } else {
7 | 0
8 | }
9 | }
10 |
11 |
12 | #[derive(Clone,Copy)]
13 | pub struct FrequentistCDF16 {
14 | pub cdf: [Prob; 16]
15 | }
16 |
17 | impl Default for FrequentistCDF16 {
18 | fn default() -> Self {
19 | FrequentistCDF16 {
20 | cdf: [4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64]
21 | }
22 | }
23 | }
24 |
25 |
26 | impl BaseCDF for FrequentistCDF16 {
27 | fn num_symbols() -> u8 { 16 }
28 | fn used(&self) -> bool {
29 | self.entropy() != Self::default().entropy()
30 | }
31 | #[inline(always)]
32 | fn max(&self) -> Prob {
33 | self.cdf[15]
34 | }
35 | #[inline(always)]
36 | fn div_by_max(&self, val:i32) -> i32 {
37 | return val / i32::from(self.max())
38 | }
39 | fn log_max(&self) -> Option { None }
40 | #[inline(always)]
41 | fn cdf(&self, symbol: u8) -> Prob {
42 | self.cdf[symbol as usize & 0xf]
43 | }
44 | fn valid(&self) -> bool {
45 | let mut prev = 0;
46 | for item in self.cdf.split_at(15).0.iter() {
47 | if *item <= prev {
48 | return false;
49 | }
50 | prev = *item;
51 | }
52 | true
53 | }
54 | }
55 |
56 | impl CDF16 for FrequentistCDF16 {
57 | #[inline(always)]
58 | fn average(&self, other:&Self, mix_rate:i32) -> Self {
59 | let mut retval = *self;
60 | let ourmax = i32::from(self.max());
61 | let othermax = i32::from(other.max());
62 | let ourmax_times_othermax = ourmax * othermax;
63 | let leading_zeros_combo = core::cmp::min(ourmax_times_othermax.leading_zeros(), 17);
64 | let desired_shift = 17 - leading_zeros_combo;
65 | let inv_mix_rate = (1 << BLEND_FIXED_POINT_PRECISION) - mix_rate;
66 | for (s, o) in retval.cdf.iter_mut().zip(other.cdf.iter()) {
67 | let rescaled_self = (i32::from(*s) * othermax) >> desired_shift;
68 | let rescaled_other = (i32::from(*o) * ourmax) >> desired_shift;
69 | *s = ((rescaled_self * mix_rate + rescaled_other * inv_mix_rate + 1) >> BLEND_FIXED_POINT_PRECISION) as Prob;
70 | }
71 | retval
72 | }
73 | #[inline(always)]
74 | fn blend(&mut self, symbol: u8, speed: Speed) {
75 | const CDF_BIAS : [Prob;16] = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16];
76 | let increment : Prob = speed.inc() as Prob;
77 | for i in (symbol as usize)..16 {
78 | self.cdf[i] = self.cdf[i].wrapping_add(increment);
79 | }
80 | if self.cdf[15] >= speed.lim() {
81 | for i in 0..16 {
82 | self.cdf[i] = self.cdf[i].wrapping_add(CDF_BIAS[i]).wrapping_sub(self.cdf[i].wrapping_add(CDF_BIAS[i]) >> 2);
83 | }
84 | }
85 | }
86 | }
87 |
88 | #[cfg(test)]
89 | mod test {
90 | use super::FrequentistCDF16;
91 | declare_common_tests!(FrequentistCDF16);
92 | }
93 |
--------------------------------------------------------------------------------
/c/divans/ffi.h:
--------------------------------------------------------------------------------
1 | #ifndef _DIVANS_H_
2 | #define _DIVANS_H_
3 | #include
4 | #include
5 |
6 | typedef uint8_t DivansResult;
7 |
8 | #define DIVANS_SUCCESS ((uint8_t)0)
9 | #define DIVANS_NEEDS_MORE_INPUT ((uint8_t)1)
10 | #define DIVANS_NEEDS_MORE_OUTPUT ((uint8_t)2)
11 | #define DIVANS_FAILURE ((uint8_t)3)
12 |
13 | typedef uint8_t DivansOptionSelect;
14 |
15 | #define DIVANS_OPTION_QUALITY 1
16 | #define DIVANS_OPTION_WINDOW_SIZE 2
17 | #define DIVANS_OPTION_LGBLOCK 3
18 | #define DIVANS_OPTION_DYNAMIC_CONTEXT_MIXING 4
19 | #define DIVANS_OPTION_USE_BROTLI_COMMAND_SELECTION 5
20 | #define DIVANS_OPTION_USE_BROTLI_BITSTREAM 6
21 | #define DIVANS_OPTION_USE_CONTEXT_MAP 7
22 | #define DIVANS_OPTION_LITERAL_ADAPTATION_CM_HIGH 8
23 | #define DIVANS_OPTION_FORCE_STRIDE_VALUE 9
24 | #define DIVANS_OPTION_STRIDE_DETECTION_QUALITY 10
25 | #define DIVANS_OPTION_PRIOR_DEPTH 11
26 | #define DIVANS_OPTION_LITERAL_ADAPTATION_STRIDE_HIGH 12
27 | #define DIVANS_OPTION_LITERAL_ADAPTATION_CM_LOW 13
28 | #define DIVANS_OPTION_LITERAL_ADAPTATION_STRIDE_LOW 14
29 | #define DIVANS_OPTION_BROTLI_LITERAL_BYTE_SCORE 15
30 | #define DIVANS_OPTION_SPEED_DETECTION_QUALITY 16
31 | #define DIVANS_OPTION_PRIOR_BITMASK_DETECTION 17
32 | #define DIVANS_OPTION_Q9_5 18
33 | #define DIVANS_OPTION_FORCE_LITERAL_CONTEXT_MODE 19
34 |
35 |
36 | /// a struct specifying custom allocators for divans to use instead of the builtin rust allocators.
37 | /// if all 3 values are set to NULL, the Rust allocators are used instead.
38 | struct CAllocator {
39 | /// Allocate length bytes. The returned pointer must be 32-byte aligned unless divans was built without features=simd
40 | void* (*alloc_func)(void * opaque, size_t length);
41 | void (*free_func)(void * opaque, void * mfd);
42 | void * opaque;
43 | };
44 | struct DivansDecompressorState;
45 | struct DivansCompressorState;
46 |
47 | struct DivansCompressorState* divans_new_compressor();
48 | struct DivansCompressorState* divans_new_compressor_with_custom_alloc(struct CAllocator alloc);
49 | DivansResult divans_set_option(struct DivansCompressorState* state, DivansOptionSelect selector, uint32_t value);
50 | DivansResult divans_encode(struct DivansCompressorState* state,
51 | const uint8_t *input_buf_ptr, size_t input_size, size_t*input_offset,
52 | uint8_t *output_buf_ptr, size_t output_size, size_t *output_offset);
53 |
54 | DivansResult divans_encode_flush(struct DivansCompressorState* state,
55 | uint8_t *output_buf_ptr, size_t output_size, size_t *output_offset);
56 |
57 | void divans_free_compressor(struct DivansCompressorState* mfd);
58 |
59 |
60 | struct DivansDecompressorState* divans_new_decompressor();
61 | struct DivansDecompressorState* divans_new_decompressor_with_custom_alloc(struct CAllocator alloc, uint8_t skip_crc);
62 | DivansResult divans_decode(struct DivansDecompressorState* state,
63 | const uint8_t *input_buf_ptr, size_t input_size, size_t*input_offset,
64 | uint8_t *output_buf_ptr, size_t output_size, size_t *output_offset);
65 |
66 | void divans_free_decompressor(struct DivansDecompressorState* mfd);
67 |
68 |
69 |
70 | #endif
71 |
--------------------------------------------------------------------------------
/src/probability/numeric.rs:
--------------------------------------------------------------------------------
1 | #[cfg(not(feature="division_table_gen"))]
2 | use super::div_lut;
3 |
4 | #[cfg(feature="simd")]
5 | use packed_simd;
6 | #[cfg(not(feature="division_table_gen"))]
7 | pub type DenominatorType = i16;
8 | #[cfg(feature="division_table_gen")]
9 | pub type DenominatorType = u16;
10 | #[inline(always)]
11 | fn k16bit_length(d:DenominatorType) -> u8 {
12 | (16 - d.leading_zeros()) as u8
13 | }
14 | pub const LOG_MAX_NUMERATOR: usize = 31;
15 | #[inline(always)]
16 | pub fn compute_divisor(d: DenominatorType) -> (i64, u8) {
17 | let bit_len = k16bit_length(d);
18 | (((((( 1i64 << bit_len) - i64::from(d)) << (LOG_MAX_NUMERATOR))) / i64::from(d)) + 1, bit_len.wrapping_sub(1))
19 | }
20 | #[cfg(not(feature="division_table_gen"))]
21 | #[inline(always)]
22 | pub fn lookup_divisor(d: i16) -> (i64, u8) {
23 | div_lut::RECIPROCAL[d as u16 as usize]
24 | }
25 | #[inline(always)]
26 | pub fn fast_divide_30bit_by_16bit(num: i32, inv_denom_and_bitlen: (i64, u8)) -> i32 {
27 | let idiv_mul_num = i64::from(inv_denom_and_bitlen.0) * i64::from(num);
28 | ((idiv_mul_num >> LOG_MAX_NUMERATOR as u32) as i32
29 | + (((i64::from(num) - (idiv_mul_num >> LOG_MAX_NUMERATOR as u32)) as i32) >> 1))
30 | >> inv_denom_and_bitlen.1
31 | }
32 |
33 | #[cfg(feature="simd")]
34 | #[inline(always)]
35 | pub fn fast_divide_30bit_i64x2_by_16bit(num: packed_simd::i64x2, inv_denom_and_bitlen: (i64, u8)) -> packed_simd::i64x2 {
36 | let idiv_mul_num = packed_simd::i64x2::splat(inv_denom_and_bitlen.0) * num;
37 | let idiv_mul_num_shift_max_num = idiv_mul_num >> LOG_MAX_NUMERATOR as u32;
38 | (idiv_mul_num_shift_max_num
39 | + ((num - (idiv_mul_num_shift_max_num)) >> 1))
40 | >> u32::from(inv_denom_and_bitlen.1)
41 | }
42 |
43 |
44 |
45 |
46 | pub type Denominator8Type = u8;
47 | const SHIFT_16_BY_8:usize = 24;
48 |
49 | #[inline(always)]
50 | pub fn compute_divisor8(d: Denominator8Type) -> i32 {
51 | let del = 1;
52 | del + (1 << SHIFT_16_BY_8) / i32::from(d)
53 | }
54 | #[cfg(not(feature="division_table_gen"))]
55 | #[inline(always)]
56 | pub fn lookup_divisor8(d: u8) -> i32 {
57 | div_lut::RECIPROCAL8[d as u8 as usize]
58 | }
59 | #[inline(always)]
60 | pub fn fast_divide_16bit_by_8bit(num: u16, inv_denom_and_bitlen: i32) -> i16 {
61 | (i64::from(inv_denom_and_bitlen) * i64::from(num) >> SHIFT_16_BY_8) as i16
62 | }
63 |
64 |
65 | #[cfg(test)]
66 | mod test {
67 | use super::{fast_divide_30bit_by_16bit, lookup_divisor};
68 |
69 | fn divide_30bit_by_16bit(num: i32, denom: i16) -> i32 {
70 | fast_divide_30bit_by_16bit(num, lookup_divisor(denom))
71 | }
72 |
73 | #[test]
74 | fn test_divide() {
75 | let nums: [i32; 10] = [3032127, 5049117, 16427165, 23282359, 35903174,
76 | 132971515, 163159927, 343856773, 935221996, 1829347323];
77 | let denoms: [i16; 10] = [115, 248, 267, 764, 1337, 4005, 4965, 9846, 24693, 31604];
78 | for n in nums.into_iter() {
79 | for d in denoms.into_iter() {
80 | let reference = n / (*d as i32);
81 | let actual = divide_30bit_by_16bit(*n, *d);
82 | assert_eq!(reference, actual);
83 | }
84 | }
85 | }
86 | }
87 |
--------------------------------------------------------------------------------
/c/arg.h:
--------------------------------------------------------------------------------
1 | char * find_first_arg(int argc, char**argv) {
2 | int i;
3 | for (i = 1; i < argc; ++i) {
4 | if (argv[i][0] != '-') {
5 | return argv[i];
6 | }
7 | }
8 | return NULL;
9 | }
10 | void set_options(struct DivansCompressorState *state, int argc, char **argv) {
11 | int i;
12 | unsigned int ret =0 ;
13 | int used_cm = 0;
14 | for (i = 1; i < argc; ++i) {
15 | if (strstr(argv[i], "-q") == argv[i]) {
16 | ret = divans_set_option(state, DIVANS_OPTION_QUALITY, atoi(argv[i] + 2));
17 | assert(ret == DIVANS_SUCCESS);
18 | }
19 | if (strstr(argv[i], "-p") == argv[i]) {
20 | ret = divans_set_option(state, DIVANS_OPTION_PRIOR_BITMASK_DETECTION, atoi(argv[i] + 2));
21 | assert(ret == DIVANS_SUCCESS);
22 | }
23 | if (strstr(argv[i], "-l") == argv[i]) {
24 | ret = divans_set_option(state, DIVANS_OPTION_USE_BROTLI_COMMAND_SELECTION, 0);
25 | assert(ret == DIVANS_SUCCESS);
26 | }
27 | if (strstr(argv[i], "-w") == argv[i]) {
28 | ret = divans_set_option(state, DIVANS_OPTION_WINDOW_SIZE, atoi(argv[i] + 2));
29 | assert(ret == DIVANS_SUCCESS);
30 | }
31 | if (strstr(argv[i], "-a") == argv[i]) {
32 | ret = divans_set_option(state, DIVANS_OPTION_LITERAL_ADAPTATION_CM_HIGH, atoi(argv[i] + 2));
33 | assert(ret == DIVANS_SUCCESS);
34 | }
35 | if (strstr(argv[i], "-cm") == argv[i]) {
36 | used_cm = 1;
37 | ret = divans_set_option(state, DIVANS_OPTION_USE_CONTEXT_MAP, 1);
38 | assert(ret == DIVANS_SUCCESS);
39 | if (argv[i] + 3 !='\0') {
40 | ret = divans_set_option(state, DIVANS_OPTION_FORCE_LITERAL_CONTEXT_MODE, atoi(argv[i] + 3));
41 | assert(ret == DIVANS_SUCCESS);
42 | }
43 | }
44 | if (strstr(argv[i], "-bs") == argv[i]) {
45 | ret = divans_set_option(state, DIVANS_OPTION_STRIDE_DETECTION_QUALITY, 1);
46 | assert(ret == DIVANS_SUCCESS);
47 | }
48 | if (strstr(argv[i], "-as") == argv[i]) {
49 | ret = divans_set_option(state, DIVANS_OPTION_STRIDE_DETECTION_QUALITY, 2);
50 | assert(ret == DIVANS_SUCCESS);
51 | }
52 | }
53 | for (i = 1; i < argc; ++i) {
54 | if (strstr(argv[i], "-s") == argv[i]) {
55 | if (used_cm) {
56 | ret = divans_set_option(state, DIVANS_OPTION_DYNAMIC_CONTEXT_MIXING, 1);
57 | assert(ret == DIVANS_SUCCESS);
58 | }
59 | if (strcmp(argv[i], "-s") != 0) { // diff
60 | ret = divans_set_option(state, DIVANS_OPTION_FORCE_STRIDE_VALUE, atoi(argv[i]+2));
61 | assert(ret == DIVANS_SUCCESS);
62 | }
63 | }
64 | }
65 | for (i = 1; i < argc; ++i) {
66 | if (strstr(argv[i], "-m") == argv[i]) {
67 | if (strcmp(argv[i], "-m") != 0) { // diff
68 | ret = divans_set_option(state, DIVANS_OPTION_DYNAMIC_CONTEXT_MIXING, atoi(argv[i]+2));
69 | } else {
70 | ret = divans_set_option(state, DIVANS_OPTION_DYNAMIC_CONTEXT_MIXING, 2);
71 | }
72 | assert(ret == DIVANS_SUCCESS);
73 | }
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/src/probability/variant_speed_cdf.rs:
--------------------------------------------------------------------------------
1 | use core;
2 | use super::interface::{Prob, BaseCDF, Speed, CDF16, BLEND_FIXED_POINT_PRECISION, SPEED_PALETTE_SIZE, SymStartFreq};
3 | use brotli::enc::util::FastLog2u16;
4 |
5 | #[derive(Clone,Copy)]
6 | pub struct VariantSpeedCDF {
7 | cdf: [ChildCDF; SPEED_PALETTE_SIZE + 1],
8 | cost: [f32;SPEED_PALETTE_SIZE+1],
9 | }
10 |
11 | impl Default for VariantSpeedCDF {
12 | fn default() -> Self{
13 | VariantSpeedCDF {
14 | cdf:[ChildCDF::default();SPEED_PALETTE_SIZE + 1],
15 | cost:[0.0;SPEED_PALETTE_SIZE+1],
16 | }
17 | }
18 | }
19 |
20 | impl CDF16 for VariantSpeedCDF {
21 | fn blend(&mut self, symbol: u8, dyn:Speed) {
22 | for (index, (cdf, cost)) in self.cdf.iter_mut().zip(self.cost.iter_mut()).enumerate() {
23 | let pdf = cdf.pdf(symbol);
24 | let max = cdf.max();
25 | *cost += FastLog2u16(max as u16) - FastLog2u16(pdf as u16);
26 | cdf.blend(symbol, if index == 0 {dyn} else {Speed::ENCODER_DEFAULT_PALETTE[index - 1]});
27 | }
28 | }
29 | fn average(&self, other: &Self, mix_rate: i32) ->Self {
30 | let mut ret = self.clone();
31 | ret.cdf[0] = self.cdf[0].average(&other.cdf[0], mix_rate);
32 | ret
33 | }
34 | }
35 |
36 | impl BaseCDF for VariantSpeedCDF {
37 | fn num_symbols() -> u8 {
38 | ::num_symbols()
39 | }
40 | fn cdf(&self, symbol: u8) -> Prob {
41 | self.cdf[0].cdf(symbol)
42 | }
43 | fn pdf(&self, symbol: u8) -> Prob {
44 | self.cdf[0].pdf(symbol)
45 | }
46 | fn div_by_max(&self, val: i32) -> i32 {
47 | self.cdf[0].div_by_max(val)
48 | }
49 | fn max(&self) -> Prob {
50 | self.cdf[0].max()
51 | }
52 | fn log_max(&self) -> Option {
53 | self.cdf[0].log_max()
54 | }
55 | fn used(&self) -> bool {
56 | self.cdf[0].used()
57 | }
58 |
59 | // returns true if valid.
60 | fn valid(&self) -> bool {
61 | self.cdf[0].valid()
62 | }
63 |
64 | // returns the entropy of the current distribution.
65 | fn entropy(&self) -> f64 {
66 | self.cdf[0].entropy()
67 | }
68 | #[inline(always)]
69 | fn sym_to_start_and_freq(&self,
70 | sym: u8) -> SymStartFreq {
71 | self.cdf[0].sym_to_start_and_freq(sym)
72 | }
73 | #[inline(always)]
74 | fn rescaled_cdf(&self, sym: u8) -> i32 {
75 | self.cdf[0].rescaled_cdf(sym)
76 | }
77 | #[inline(always)]
78 | fn cdf_offset_to_sym_start_and_freq(&self,
79 | cdf_offset_p: Prob) -> SymStartFreq {
80 | self.cdf[0].cdf_offset_to_sym_start_and_freq(cdf_offset_p)
81 | }
82 |
83 | // These methods are optional because implementing them requires nontrivial bookkeeping.
84 | // Only CDFs that are intended for debugging should support them.
85 | fn num_samples(&self) -> Option {
86 | self.cdf[0].num_samples()
87 | }
88 | fn true_entropy(&self) -> Option {
89 | self.cdf[0].true_entropy()
90 | }
91 | fn rolling_entropy(&self) -> Option {
92 | self.cdf[0].rolling_entropy()
93 | }
94 | fn encoding_cost(&self) -> Option {
95 | self.cdf[0].encoding_cost()
96 | }
97 | fn num_variants(&self) -> usize {
98 | SPEED_PALETTE_SIZE
99 | }
100 | fn variant_cost(&self, variant_index: usize) -> f32 {
101 | self.cost[variant_index + 1]
102 | }
103 | fn base_variant_cost(&self) -> f32 {
104 | self.cost[0]
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/src/probability/external_cdf.rs:
--------------------------------------------------------------------------------
1 | use core;
2 | use super::interface::{BaseCDF, Prob, CDF16, Speed, BLEND_FIXED_POINT_PRECISION};
3 |
4 | #[derive(Clone,Copy)]
5 | pub struct ExternalProbCDF16 {
6 | pub cdf: [Prob; 16],
7 | pub nibble: usize,
8 | }
9 |
10 | impl Default for ExternalProbCDF16 {
11 | fn default() -> Self {
12 | ExternalProbCDF16 {
13 | cdf: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
14 | nibble: 0,
15 | }
16 | }
17 | }
18 |
19 | impl ExternalProbCDF16 {
20 | pub fn init(&mut self, _n: u8, probs: &[u8], mix: &T) {
21 | //println_stderr!("init for {:x}", _n);
22 | //println_stderr!("init for {:x} {:x} {:x} {:x}", probs[0], probs[1], probs[2], probs[3]);
23 | //average the two probabilities
24 | assert!(probs.len() == 4);
25 | self.nibble = _n as usize;
26 | let mut pcdf = [1f64;16];
27 | for nibble in 0..16 {
28 | //println_stderr!("setting for {:x}", nibble);
29 | for bit in 0..4 {
30 | let p1 = f64::from(probs[bit]) / f64::from(u8::max_value());
31 | let isone = (nibble & (1<<(3 - bit))) != 0;
32 | //println_stderr!("bit {:} is {:} {:}", bit, isone, p1);
33 | if isone {
34 | pcdf[nibble] *= p1;
35 | } else {
36 | pcdf[nibble] *= 1f64 - p1;
37 | }
38 | }
39 | }
40 | let mut mcdf = [1f64;16];
41 | for nibble in 1..16 {
42 | let prev = nibble - 1;
43 | let c = f64::from(mix.cdf(nibble));
44 | let p = f64::from(mix.cdf(prev));
45 | let m = f64::from(mix.max());
46 | let d = (c - p) / m;
47 | assert!(d < 1.0);
48 | mcdf[nibble as usize] = d;
49 | }
50 | for nibble in 0..16 {
51 | pcdf[nibble] = (pcdf[nibble] + mcdf[nibble])/2f64;
52 | }
53 | let mut sum = 0f64;
54 | for pcdf_nibble in &mut pcdf {
55 | sum += *pcdf_nibble;
56 | *pcdf_nibble = sum;
57 | }
58 | for pcdf_nibble in &mut pcdf {
59 | *pcdf_nibble /= sum;
60 | }
61 | for nibble in 0..16 {
62 | let p = pcdf[nibble];
63 | let res = (p * f64::from(Prob::max_value())) as Prob;
64 | let least1 = core::cmp::max(res, 1);
65 | self.cdf[nibble] = core::cmp::min(least1, self.max() - 1);
66 | //println_stderr!("cdf set {:x} {:x} {:}", nibble, self.cdf[nibble], p);
67 | }
68 | }
69 | }
70 |
71 | impl BaseCDF for ExternalProbCDF16 {
72 | fn num_symbols() -> u8 { 16 }
73 | fn div_by_max(&self, val:i32) -> i32 {
74 | return val / i32::from(self.max())
75 | }
76 | fn used(&self) -> bool {
77 | self.entropy() != Self::default().entropy()
78 | }
79 | fn max(&self) -> Prob {
80 | Prob::max_value()
81 | }
82 | fn log_max(&self) -> Option { None }
83 | fn cdf(&self, symbol: u8) -> Prob {
84 | //println_stderr!("cdf for {:x} have {:x}", symbol, self.nibble);
85 | self.cdf[symbol as usize]
86 | }
87 | fn valid(&self) -> bool {
88 | true
89 | }
90 | }
91 |
92 | impl CDF16 for ExternalProbCDF16 {
93 | fn average(&self, other:&Self, mix_rate:i32) -> Self {
94 | if self.max() < 64 && other.max() > 64 {
95 | //return other.clone();
96 | }
97 | if self.max() > 64 && other.max() < 64 {
98 | //return self.clone();
99 | }
100 | if self.entropy() > other.entropy() {
101 | //return other.clone();
102 | }
103 | //return self.clone();
104 | let mut retval = *self;
105 | let ourmax = i64::from(self.max());
106 | let othermax = i64::from(other.max());
107 | let maxmax = core::cmp::min(ourmax, othermax);
108 | let lgmax = 64 - maxmax.leading_zeros();
109 | let inv_mix_rate = (1 << BLEND_FIXED_POINT_PRECISION) - mix_rate;
110 | for (s, o) in retval.cdf.iter_mut().zip(other.cdf.iter()) {
111 | *s = (((i64::from(*s) * i64::from(mix_rate) *othermax + i64::from(*o) * i64::from(inv_mix_rate) * ourmax + 1) >> BLEND_FIXED_POINT_PRECISION) >> lgmax) as Prob;
112 | }
113 | retval
114 | }
115 | fn blend(&mut self, symbol: u8, speed: Speed) {
116 | return;
117 | }
118 | }
119 |
--------------------------------------------------------------------------------
/wasm/brotli_iframe.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Brotli
6 |
35 |
36 |
138 |
139 |
140 |
141 |
--------------------------------------------------------------------------------
/src/divans_to_raw/mod.rs:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Dropbox, Inc
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | use alloc::{SliceWrapper, Allocator};
16 |
17 | use codec::EncoderOrDecoderSpecialization;
18 |
19 | use super::interface::{CopyCommand,DictCommand,LiteralCommand,Command};
20 |
21 | use slice_util::AllocatedMemoryPrefix;
22 |
23 | #[derive(Default)]
24 | pub struct DecoderSpecialization {
25 | max_size: usize,
26 | }
27 |
28 | impl DecoderSpecialization {
29 | pub fn new() -> Self {
30 | DecoderSpecialization{
31 | max_size:0usize,
32 | }
33 | }
34 | }
35 |
36 |
37 |
38 | impl EncoderOrDecoderSpecialization for DecoderSpecialization {
39 | const DOES_CALLER_WANT_ORIGINAL_FILE_BYTES: bool = true;
40 | const IS_DECODING_FILE: bool = true;
41 | fn alloc_literal_buffer>(&mut self,
42 | m8:&mut AllocU8,
43 | len: usize) -> AllocatedMemoryPrefix {
44 | if len > self.max_size {
45 | self.max_size = len;
46 | }
47 | AllocatedMemoryPrefix::::new(m8, self.max_size)
48 | }
49 | #[inline(always)]
50 | fn get_input_command<'a, ISlice:SliceWrapper>(&self,
51 | _data:&'a [Command],
52 | _offset: usize,
53 | backing:&'a Command) -> &'a Command {
54 | backing
55 | }
56 | #[inline(always)]
57 | fn get_output_command<'a, AllocU8:Allocator>(&self, data:&'a mut [Command>],
58 | offset: usize,
59 | _backing:&'a mut Command>) -> &'a mut Command> {
60 | &mut data[offset]
61 | }
62 | #[inline(always)]
63 | fn get_source_copy_command<'a, ISlice:SliceWrapper>(&self,
64 | _data: &'a Command,
65 | backing: &'a CopyCommand) -> &'a CopyCommand {
66 | backing
67 | }
68 | #[inline(always)]
69 | fn get_source_literal_command<'a,
70 | ISlice:SliceWrapper
71 | +Default>(&self,
72 | _data: &'a Command,
73 | backing: &'a LiteralCommand) -> &'a LiteralCommand {
74 | backing
75 | }
76 | #[inline(always)]
77 | fn get_source_dict_command<'a, ISlice:SliceWrapper>(&self,
78 | _data: &'a Command,
79 | backing: &'a DictCommand) -> &'a DictCommand {
80 | backing
81 | }
82 | #[inline(always)]
83 | fn get_literal_byte>(&self,
84 | _in_cmd: &LiteralCommand,
85 | _index: usize) -> u8 {
86 | 0
87 | }
88 | #[inline(always)]
89 | fn get_recoder_output<'a>(&'a mut self,
90 | passed_in_output_bytes: &'a mut [u8]) -> &'a mut[u8] {
91 | assert_eq!(Self::DOES_CALLER_WANT_ORIGINAL_FILE_BYTES, true);
92 | passed_in_output_bytes
93 | }
94 | #[inline(always)]
95 | fn get_recoder_output_offset<'a>(&self,
96 | passed_in_output_bytes: &'a mut usize,
97 | _backing: &'a mut usize) -> &'a mut usize {
98 | assert_eq!(Self::DOES_CALLER_WANT_ORIGINAL_FILE_BYTES, true);
99 | passed_in_output_bytes
100 | }
101 |
102 |
103 | }
104 |
--------------------------------------------------------------------------------
/src/probability/opt_frequentist_cdf.rs:
--------------------------------------------------------------------------------
1 | use core;
2 | use super::interface::{Prob, BaseCDF, Speed, CDF16, BLEND_FIXED_POINT_PRECISION, LOG2_SCALE, CDF_BITS};
3 | use super::frequentist_cdf::FrequentistCDF16;
4 | use super::numeric;
5 | fn to_bit_i32(val: i32, shift_val: u8) -> u32 {
6 | if val != 0 {
7 | 1 << shift_val
8 | } else {
9 | 0
10 | }
11 | }
12 |
13 |
14 | fn movemask_epi8_i32(data:[i32;8]) -> u32{
15 | to_bit_i32(data[0] & 0x80 , 0) |
16 | to_bit_i32(data[0] & 0x8000 , 1) |
17 | to_bit_i32(data[0] & 0x800000 , 2) |
18 | to_bit_i32(data[0] & -0x80000000, 3) |
19 |
20 | to_bit_i32(data[1] & 0x80 , 4) |
21 | to_bit_i32(data[1] & 0x8000 , 5) |
22 | to_bit_i32(data[1] & 0x800000 , 6) |
23 | to_bit_i32(data[1] & -0x80000000, 7) |
24 |
25 | to_bit_i32(data[2] & 0x80 , 8) |
26 | to_bit_i32(data[2] & 0x8000 , 9) |
27 | to_bit_i32(data[2] & 0x800000 , 10) |
28 | to_bit_i32(data[2] & -0x80000000, 11) |
29 |
30 | to_bit_i32(data[3] & 0x80 , 12) |
31 | to_bit_i32(data[3] & 0x8000 , 13) |
32 | to_bit_i32(data[3] & 0x800000 , 14) |
33 | to_bit_i32(data[3] & -0x80000000, 15) |
34 |
35 | to_bit_i32(data[4] & 0x80 , 16) |
36 | to_bit_i32(data[4] & 0x8000 , 17) |
37 | to_bit_i32(data[4] & 0x800000 , 18) |
38 | to_bit_i32(data[4] & -0x80000000, 19) |
39 |
40 | to_bit_i32(data[5] & 0x80 , 20) |
41 | to_bit_i32(data[5] & 0x8000 , 21) |
42 | to_bit_i32(data[5] & 0x800000 , 22) |
43 | to_bit_i32(data[5] & -0x80000000, 23) |
44 |
45 | to_bit_i32(data[6] & 0x80 , 24) |
46 | to_bit_i32(data[6] & 0x8000 , 25) |
47 | to_bit_i32(data[6] & 0x800000 , 26) |
48 | to_bit_i32(data[6] & -0x80000000, 27) |
49 |
50 | to_bit_i32(data[7] & 0x80 , 28) |
51 | to_bit_i32(data[7] & 0x8000 , 29) |
52 | to_bit_i32(data[7] & 0x800000 , 30) |
53 | to_bit_i32(data[7] & -0x80000000, 31)
54 | }
55 | #[derive(Clone,Copy)]
56 | pub struct OptFrequentistCDF16 {
57 | pub cdf: FrequentistCDF16,
58 | pub inv_max_and_bitlen: (i64, u8),
59 | }
60 |
61 | impl OptFrequentistCDF16 {
62 | fn new(input:FrequentistCDF16) -> Self {
63 | OptFrequentistCDF16{
64 | cdf:input,
65 | inv_max_and_bitlen: numeric::lookup_divisor(input.max()),
66 | }
67 | }
68 | }
69 |
70 | impl Default for OptFrequentistCDF16 {
71 | fn default() -> Self {
72 | Self::new(FrequentistCDF16::default())
73 | }
74 | }
75 |
76 |
77 | impl BaseCDF for OptFrequentistCDF16 {
78 | fn num_symbols() -> u8 { 16 }
79 | fn used(&self) -> bool {
80 | self.cdf.used()
81 | }
82 | #[inline(always)]
83 | fn max(&self) -> Prob {
84 | self.cdf.max()
85 | }
86 | fn log_max(&self) -> Option { None }
87 | #[inline(always)]
88 | fn cdf(&self, symbol: u8) -> Prob {
89 | self.cdf.cdf(symbol)
90 | }
91 | fn valid(&self) -> bool {
92 | let inv_max_and_bitlen = numeric::lookup_divisor(self.max());
93 | if self.inv_max_and_bitlen != inv_max_and_bitlen {
94 | return false;
95 | }
96 | self.cdf.valid()
97 | }
98 | #[inline(always)]
99 | fn div_by_max(&self, num: i32) -> i32 {
100 | assert!(LOG2_SCALE as usize + CDF_BITS <= numeric::LOG_MAX_NUMERATOR);
101 | numeric::fast_divide_30bit_by_16bit(num, self.inv_max_and_bitlen)
102 | }
103 | }
104 |
105 | fn k16bit_length(d:i16) -> u8 {
106 | (16 - d.leading_zeros()) as u8
107 | }
108 | const LOG_MAX_NUMERATOR: usize = LOG2_SCALE as usize + CDF_BITS;
109 |
110 | impl CDF16 for OptFrequentistCDF16 {
111 | fn average(&self, other:&Self, mix_rate:i32) -> Self {
112 | let ret = self.cdf.average(&other.cdf, mix_rate);
113 | Self::new(ret)
114 | }
115 | fn blend(&mut self, symbol: u8, speed: Speed) {
116 | self.cdf.blend(symbol, speed);
117 | self.inv_max_and_bitlen = numeric::lookup_divisor(self.max());
118 | }
119 | }
120 |
121 | #[cfg(test)]
122 | mod test {
123 | use super::OptFrequentistCDF16;
124 | declare_common_tests!(OptFrequentistCDF16);
125 |
126 | #[test]
127 | fn test_cdf_opt_eq_baseline() {
128 | use super::FrequentistCDF16;
129 | use super::super::common_tests;
130 | common_tests::operation_test_helper(&mut FrequentistCDF16::default(),
131 | &mut FrequentistCDF16::default(),
132 | &mut OptFrequentistCDF16::default(),
133 | &mut OptFrequentistCDF16::default());
134 | }
135 | }
136 |
--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Dropbox, Inc
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.// See the License for the specific language governing permissions and
12 | // limitations under the License.
13 | //! This is documentation for the `divans` crate
14 | //!
15 | //! The `divans` crate is meant to be used for generic data compression
16 |
17 | #![cfg_attr(feature="benchmark", feature(test))]
18 | //#![cfg_attr(feature="simd", feature(platform_intrinsics))]
19 | #![cfg_attr(not(feature="no-stdlib-rust-binding"),cfg_attr(not(feature="std"), feature(lang_items)))]
20 | #![cfg_attr(not(feature="no-stdlib-rust-binding"),cfg_attr(not(feature="std"), feature(compiler_builtins_lib)))]
21 | #![cfg_attr(not(feature="no-stdlib-rust-binding"),cfg_attr(not(feature="std"), crate_type="cdylib"))]
22 | #![no_std]
23 |
24 | #[cfg(not(test))]
25 | #[cfg(any(feature="findspeed", feature="billing"))]
26 | #[macro_use]
27 | extern crate std;
28 |
29 | #[cfg(feature="std")]
30 | #[cfg(not(test))]
31 | #[cfg(not(any(feature="billing", feature="findspeed")))]
32 | #[macro_use]
33 | extern crate std;
34 |
35 | #[cfg(test)]
36 | #[macro_use]
37 | extern crate std;
38 | #[cfg(feature="simd")]
39 | #[macro_use(shuffle)]
40 | extern crate packed_simd;
41 | #[cfg(feature="std")]
42 | extern crate alloc_stdlib;
43 |
44 | extern crate alloc_no_stdlib as alloc;
45 | extern crate brotli;
46 | pub mod resizable_buffer;
47 | pub mod interface;
48 | pub mod slice_util;
49 | pub mod alloc_util;
50 | mod probability;
51 | #[macro_use]
52 | mod priors;
53 | #[macro_use]
54 | mod arithmetic_coder;
55 | mod debug_encoder;
56 | mod cmd_to_raw;
57 | mod raw_to_cmd;
58 | mod codec;
59 | mod cmd_to_divans;
60 | mod divans_to_raw;
61 | #[macro_use]
62 | mod billing;
63 | pub mod test_helper;
64 | mod test_ans;
65 | mod test_mux;
66 | mod ans;
67 | mod brotli_ir_gen;
68 | mod divans_compressor;
69 | mod divans_decompressor;
70 | mod parallel_decompressor;
71 | mod stub_parallel_decompressor;
72 | pub mod ir_optimize;
73 | pub mod mux;
74 | pub mod constants;
75 | pub mod threading;
76 | pub mod multithreading;
77 | pub use self::interface::{DivansInputResult,DivansOpResult,DivansOutputResult, DivansResult, ErrMsg, MAGIC_NUMBER};
78 | pub use alloc::{AllocatedStackMemory, Allocator, SliceWrapper, SliceWrapperMut, StackAllocator};
79 | pub use interface::{DivansCompressorFactory, BlockSwitch, LiteralBlockSwitch, Command, Compressor, CopyCommand, Decompressor, DictCommand, LiteralCommand, Nop, NewWithAllocator, ArithmeticEncoderOrDecoder, LiteralPredictionModeNibble, PredictionModeContextMap, free_cmd, FeatureFlagSliceType,
80 | DefaultCDF16};
81 |
82 | pub use brotli_ir_gen::{BrotliDivansHybridCompressor,BrotliDivansHybridCompressorFactory};
83 | pub use cmd_to_raw::DivansRecodeState;
84 | pub use codec::CMD_BUFFER_SIZE;
85 | pub use divans_to_raw::DecoderSpecialization;
86 | pub use cmd_to_divans::EncoderSpecialization;
87 | pub use codec::{EncoderOrDecoderSpecialization, DivansCodec, StrideSelection};
88 | pub use divans_compressor::{DivansCompressor, DivansCompressorFactoryStruct};
89 |
90 | #[cfg(not(feature="safe"))]
91 | mod ffi;
92 | #[cfg(not(feature="safe"))]
93 | pub use ffi::*;
94 | mod reader;
95 | mod writer;
96 | #[cfg(feature="std")]
97 | pub use reader::DivansBrotliHybridCompressorReader;
98 | #[cfg(feature="std")]
99 | pub use reader::DivansExperimentalCompressorReader;
100 | #[cfg(feature="std")]
101 | pub use reader::DivansDecompressorReader;
102 |
103 | #[cfg(feature="std")]
104 | pub use writer::DivansBrotliHybridCompressorWriter;
105 | #[cfg(feature="std")]
106 | pub use writer::DivansExperimentalCompressorWriter;
107 | #[cfg(feature="std")]
108 | pub use writer::DivansDecompressorWriter;
109 |
110 |
111 | pub use probability::Speed;
112 |
113 |
114 | pub use probability::CDF2;
115 | pub use probability::CDF16;
116 | pub use probability::BaseCDF;
117 |
118 | pub use interface::BrotliCompressionSetting;
119 | pub use interface::DivansCompressorOptions;
120 | pub use divans_decompressor::{DivansDecompressor,
121 | DivansDecompressorFactory,
122 | DivansDecompressorFactoryStruct,
123 | StaticCommand};
124 |
--------------------------------------------------------------------------------
/research/brute.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import threading
3 | import subprocess
4 | import Queue
5 | ir = sys.stdin.read()
6 | found_mixing_offsets = []
7 | original_values = []
8 | start = 0
9 | def run(output_q, procedure, input):
10 | so, se = procedure.communicate(input)
11 | output_q.put(so)
12 | original_values = []
13 | while True:
14 | key = "mixingvalues "
15 | where = ir.find(key, start)
16 | if where == -1:
17 | if start == 0:
18 | assert where != -1, "Must have at least one mixingvalues"
19 | break
20 | for end_index in range(where + len(key), len(ir)):
21 | if ir[end_index] not in ('0', '1', '2', '3', ' '):
22 | break
23 | found_mixing_offsets.append((where + len(key), end_index))
24 | original_values.append(ir[where + len(key):end_index])
25 | start = where + 1
26 |
27 | q = Queue.Queue()
28 | q_c = Queue.Queue()
29 | best_size = None
30 | last_ir = ""
31 | for (item, oarray) in zip(found_mixing_offsets, original_values):
32 | array = [x + ' ' for x in oarray.split(' ')]
33 | for sub_offset in range(0,8192):
34 | array[sub_offset] = '0 '
35 | option_a = ''.join(array)
36 | array[sub_offset] = '1 '
37 | option_b = ''.join(array)
38 | array[sub_offset] = '3 '
39 | option_c = ''.join(array)
40 | ir_a = ir[:item[0]] + option_a + ir[item[1]:]
41 | ir_b = ir[:item[0]] + option_b + ir[item[1]:]
42 | ir_c = ir[:item[0]] + option_c + ir[item[1]:]
43 | proc_a = subprocess.Popen([sys.argv[1],
44 | '-i', '-cm', '-s', '-mixing=1'] + sys.argv[2:],
45 | stdin=subprocess.PIPE,
46 | stdout=subprocess.PIPE)
47 | proc_b = subprocess.Popen([sys.argv[1],
48 | '-i', '-cm', '-s', '-mixing=1'] + sys.argv[2:],
49 | stdin=subprocess.PIPE,
50 | stdout=subprocess.PIPE)
51 | proc_c = subprocess.Popen([sys.argv[1],
52 | '-i', '-cm', '-s', '-mixing=1'] + sys.argv[2:],
53 | stdin=subprocess.PIPE,
54 | stdout=subprocess.PIPE)
55 | threading.Thread(target=lambda: run(q, proc_a, ir_a)).start()
56 | threading.Thread(target=lambda: run(q_c, proc_c, ir_c)).start()
57 | b_stdout, _stderr = proc_b.communicate(ir_b)
58 | a_ec = proc_a.wait()
59 | b_ec = proc_b.wait()
60 | c_ec = proc_c.wait()
61 | if a_ec != 0 or b_ec != 0 or c_ec != 0:
62 | with open('/tmp/ira','w') as f:
63 | f.write(ir_a)
64 | with open('/tmp/irb','w') as f:
65 | f.write(ir_b)
66 | with open('/tmp/irc','w') as f:
67 | f.write(ir_c)
68 | assert a_ec == 0
69 | assert b_ec == 0
70 | assert c_ec == 0
71 | a_stdout = q.get()
72 | c_stdout = q_c.get()
73 | if best_size is not None:
74 | if min(len(a_stdout), len(b_stdout)) > best_size:
75 | print 'uh oh',len(a_stdout), len(b_stdout),min(len(a_stdout), len(b_stdout)),'>', best_size
76 | with open('/tmp/ira','w') as f:
77 | f.write(ir_a)
78 | with open('/tmp/irb','w') as f:
79 | f.write(ir_b)
80 | with open('/tmp/irc','w') as f:
81 | f.write(ir_c)
82 | with open('/tmp/iro','w') as f:
83 | f.write(last_ir)
84 | assert min(len(a_stdout), len(b_stdout)) > best_size, "optimization should get better"
85 | if len(c_stdout) < len(b_stdout) and len(c_stdout) < len(a_stdout):
86 | array[sub_offset] = '3 '
87 | sys.stderr.write("index " + str(sub_offset) + "Prefer 3 for " + str(len(c_stdout)) + "\n")
88 | last_ir = ir_c
89 | ir = ir_c
90 | best_size = len(c_stdout)
91 | elif len(a_stdout) < len(b_stdout):
92 | array[sub_offset] = '0 '
93 | sys.stderr.write("index " + str(sub_offset) + "Prefer 0 for " + str(len(a_stdout)) + "\n")
94 | last_ir = ir_a
95 | ir = ir_a
96 | best_size = len(a_stdout)
97 | else:
98 | sys.stderr.write("index " + str(sub_offset) + "Prefer 1 for "+ str(len(b_stdout)) + "\n")
99 | array[sub_offset] = '1 '
100 | last_ir = ir_b
101 | ir = ir_b
102 | best_size = len(b_stdout)
103 | ir = ir[:item[0]] + ''.join(array) + ir[item[1]:]
104 |
105 | sys.stdout.write(ir)
106 |
--------------------------------------------------------------------------------
/src/ir_optimize/cache.rs:
--------------------------------------------------------------------------------
1 | use core;
2 | use codec::get_distance_from_mnemonic_code;
3 | use alloc::{Allocator, SliceWrapperMut, SliceWrapper};
4 | const CACHE_HIT_REFERENCE_SIZE: usize = 8;
5 |
6 | pub struct CacheHitReferenceMut<'a>(pub &'a mut [u8]);
7 |
8 |
9 | impl<'a> CacheHitReferenceMut<'a> {
10 | pub fn set_code_and_offset(&mut self, code: u8, mut offset: usize) {
11 | offset += 1;
12 | self.0[0] = code;
13 | self.0[1] = offset as u8;
14 | self.0[2] = (offset >> 8) as u8;
15 | self.0[3] = (offset >> 16) as u8;
16 | self.0[4] = (offset >> 24) as u8;
17 | }
18 | }
19 |
20 |
21 | pub struct CacheHitReference<'a>(pub &'a [u8]);
22 |
23 |
24 | impl<'a> CacheHitReference<'a> {
25 | pub fn offset(&self) ->usize {
26 | (self.0[1] as usize | ((self.0[2] as usize) << 8)| ((self.0[3] as usize) << 16)| ((self.0[4] as usize) << 24)).wrapping_sub(1)
27 | }
28 | pub fn entry_id(&self) -> u8 {
29 | self.0[0]
30 | }
31 | pub fn miss(&self) -> bool {
32 | (self.0[1] | self.0[2] | self.0[3] | self.0[4] | self.0[5] | self.0[6] | self.0[7]) == 0
33 | }
34 | }
35 |
36 |
37 |
38 | #[derive(Debug,Copy,Clone)]
39 | pub struct CacheEntry {
40 | dist:u32,
41 | origin_offset:usize,
42 | }
43 | pub struct Cache> {
44 | cache:[CacheEntry;4],
45 | hitlist:AllocU8::AllocatedMemory,
46 | }
47 |
48 | impl> Cache {
49 | // prepares the cache statistics tracker for operating on num_commands
50 | pub fn new(cur_cache:&[u32;4], num_commands:usize, m8:&mut AllocU8) -> Self {
51 | Cache::{
52 | cache:[CacheEntry{dist:cur_cache[0], origin_offset:0},
53 | CacheEntry{dist:cur_cache[1], origin_offset:0},
54 | CacheEntry{dist:cur_cache[2], origin_offset:0},
55 | CacheEntry{dist:cur_cache[3], origin_offset:0}],
56 | hitlist:m8.alloc_cell(num_commands * 8),
57 | }
58 | }
59 | pub fn free(&mut self, m8:&mut AllocU8) {
60 | m8.free_cell(core::mem::replace(&mut self.hitlist, AllocU8::AllocatedMemory::default()));
61 | }
62 | pub fn get_cache_hit_log(&mut self, cmd_offset:usize) -> CacheHitReference{
63 | let mut index = cmd_offset * CACHE_HIT_REFERENCE_SIZE;
64 | if index + CACHE_HIT_REFERENCE_SIZE > self.hitlist.slice().len() { // if we somehow overestimated the cache size
65 | index = 0;
66 | }
67 | CacheHitReference(self.hitlist.slice_mut().split_at_mut(index).1)
68 | }
69 | fn get_cache_hit_log_mut(&mut self, cmd_offset:usize) -> CacheHitReferenceMut{
70 | let mut index = cmd_offset * CACHE_HIT_REFERENCE_SIZE;
71 | if index + CACHE_HIT_REFERENCE_SIZE > self.hitlist.len() { // if we somehow overestimated the cache size
72 | index = 0;
73 | }
74 | CacheHitReferenceMut(self.hitlist.slice_mut().split_at_mut(index).1)
75 | }
76 | fn forward_reference_hitlist(&mut self, code: u8, cache_index: u8, cmd_offset: usize) {
77 | let origin = self.cache[usize::from(cache_index)].origin_offset;
78 | self.cache[usize::from(cache_index)].origin_offset = cmd_offset; // bump the "next use" of the cache
79 | let mut log = self.get_cache_hit_log_mut(origin);
80 | log.set_code_and_offset(code, cmd_offset);
81 | }
82 | pub fn populate(&mut self, dist:u32, copy_len:u32, cmd_offset:usize) {
83 | let cur_cache = [self.cache[0].dist, self.cache[1].dist, self.cache[2].dist, self.cache[3].dist];
84 | for code in 0..15 {
85 | let (cache_dist, ok, cache_index) = get_distance_from_mnemonic_code(&cur_cache, code as u8, copy_len);
86 | if dist == cache_dist && ok {
87 | // we have a hit
88 | self.forward_reference_hitlist(code, cache_index, cmd_offset);
89 | break;
90 | }
91 | }
92 | let new_cache_entry = CacheEntry {
93 | dist:dist,
94 | origin_offset:cmd_offset,
95 | };
96 | // note the different logic here from the codec: we need to replace the cache entry, even if it's equal to 0 to get the right command index
97 | if dist == cur_cache[0] {
98 | self.cache[0] = new_cache_entry;
99 | } else if dist == cur_cache[1] {
100 | self.cache = [new_cache_entry, self.cache[0], self.cache[2], self.cache[3]];
101 | } else if dist == cur_cache[2] {
102 | self.cache = [new_cache_entry, self.cache[0], self.cache[1], self.cache[3]];
103 | } else {
104 | self.cache = [new_cache_entry, self.cache[0], self.cache[1], self.cache[2]];
105 | }
106 | }
107 | }
108 |
109 |
110 |
--------------------------------------------------------------------------------
/src/cmd_to_divans/mod.rs:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Dropbox, Inc
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | use alloc::{SliceWrapper, Allocator};
16 |
17 | use codec::EncoderOrDecoderSpecialization;
18 |
19 | use super::interface::{CopyCommand,DictCommand,LiteralCommand,Command};
20 |
21 | use slice_util::AllocatedMemoryPrefix;
22 | pub struct EncoderSpecialization {
23 | backing: [u8; 128],
24 | max_size: usize,
25 | }
26 | impl EncoderSpecialization {
27 | pub fn new() -> Self {
28 | EncoderSpecialization{
29 | backing:[0;128],
30 | max_size: 0usize,
31 | }
32 | }
33 | }
34 | impl Default for EncoderSpecialization {
35 | fn default() -> Self {
36 | Self::new()
37 | }
38 | }
39 |
40 | impl EncoderOrDecoderSpecialization for EncoderSpecialization {
41 | const DOES_CALLER_WANT_ORIGINAL_FILE_BYTES: bool = false;
42 | const IS_DECODING_FILE: bool = false;
43 | fn alloc_literal_buffer>(&mut self,
44 | m8:&mut AllocU8,
45 | len: usize) -> AllocatedMemoryPrefix {
46 | if len > self.max_size {
47 | self.max_size = len;
48 | }
49 | AllocatedMemoryPrefix::::new(m8, self.max_size)
50 | }
51 | fn get_input_command<'a, ISlice:SliceWrapper>(&self,
52 | data:&'a [Command],
53 | offset: usize,
54 | _backing:&'a Command) -> &'a Command {
55 | &data[offset]
56 | }
57 | fn get_output_command<'a, AllocU8:Allocator>(&self,
58 | _data:&'a mut [Command>],
59 | _offset: usize,
60 | backing:&'a mut Command>) -> &'a mut Command> {
61 | backing
62 | }
63 | fn get_source_copy_command<'a, ISlice:SliceWrapper>(&self,
64 | data: &'a Command,
65 | backing: &'a CopyCommand) -> &'a CopyCommand {
66 | match *data {
67 | Command::Copy(ref cc) => cc,
68 | _ => backing,
69 | }
70 | }
71 | fn get_source_literal_command<'a,
72 | ISlice:SliceWrapper
73 | +Default>(&self,
74 | data: &'a Command,
75 | backing: &'a LiteralCommand) -> &'a LiteralCommand {
76 | match *data {
77 | Command::Literal(ref lc) => lc,
78 | _ => backing,
79 | }
80 | }
81 | fn get_source_dict_command<'a, ISlice:SliceWrapper>(&self,
82 | data: &'a Command,
83 | backing: &'a DictCommand) -> &'a DictCommand {
84 | match *data {
85 | Command::Dict(ref dc) => dc,
86 | _ => backing,
87 | }
88 | }
89 | fn get_literal_byte>(&self,
90 | in_cmd: &LiteralCommand,
91 | index: usize) -> u8 {
92 | in_cmd.data.slice()[index]
93 | }
94 | fn get_recoder_output<'a>(&'a mut self,
95 | _passed_in_output_bytes: &'a mut [u8]) -> &'a mut[u8] {
96 | assert_eq!(Self::DOES_CALLER_WANT_ORIGINAL_FILE_BYTES, false);
97 | &mut self.backing[..]
98 | }
99 | fn get_recoder_output_offset<'a>(&self,
100 | _passed_in_output_bytes: &'a mut usize,
101 | backing: &'a mut usize) -> &'a mut usize {
102 | assert_eq!(Self::DOES_CALLER_WANT_ORIGINAL_FILE_BYTES, false);
103 | //*backing = self.backing.len();
104 | backing
105 | }
106 |
107 |
108 | }
109 |
--------------------------------------------------------------------------------
/research/select_best_quandruple.py:
--------------------------------------------------------------------------------
1 | import json
2 | import sys
3 | from collections import defaultdict
4 | cut = True
5 | best_other_alg = 'zlib'
6 | if len(sys.argv) > 1 and 'b95' in sys.argv[1]:
7 | best_other_alg = 'b95'
8 | elif len(sys.argv) > 1 and 'b11' in sys.argv[1]:
9 | best_other_alg = 'b11'
10 | elif len(sys.argv) > 1:
11 | assert 'zlib' in sys.argv[1]
12 | sub_item = 6
13 | combo_scores = defaultdict(lambda:0)
14 | data_list = []
15 | zlib_other_list = []
16 | score_record = []
17 |
18 | for line in sys.stdin:
19 | try:
20 | if cut:
21 | line = line[line.find(':')+1:]
22 | raw = json.loads(line)
23 | b11_cost = raw['b11'][0]
24 | b95_cost = raw['b95'][0]
25 | zlib_cost = raw['zlib'][0]
26 | other_cost = raw[best_other_alg][0]
27 | if raw['~raw']*.995 < zlib_cost:
28 | continue
29 | clist = raw['~'][sub_item]
30 | data_list.append(clist)
31 | zlib_other_list.append((zlib_cost, other_cost))
32 | for k0 in range(len(clist) - 1):
33 | for k1 in range(k0 + 1, len(clist)):
34 | key = (k0, k1)
35 | score = min(clist[k0][0], clist[k1][0], other_cost, zlib_cost)
36 | combo_scores[key] += score
37 | except Exception:
38 | continue
39 | best_combo = min([(v, k[0], k[1]) for k, v in combo_scores.iteritems()])
40 | score_record.append(best_combo[0])
41 | best_elements = [best_combo[1], best_combo[2]]
42 | print 'partial', best_elements,'score',score_record
43 | sys.stdout.flush()
44 | combo_scores = defaultdict(lambda:0)
45 | for (sample, other) in zip(data_list, zlib_other_list):
46 | for k in range(len(sample)):
47 | combo_scores[k] += min(sample[best_elements[0]][0],
48 | sample[best_elements[1]][0],
49 | sample[k][0], other[0], other[1])
50 | best_val = min([(v,k) for k, v in combo_scores.iteritems()])
51 | score_record.append(best_val[0])
52 | best_elements.append(best_val[1])
53 | print 'partial', best_elements,'score',score_record
54 | sys.stdout.flush()
55 | combo_scores = defaultdict(lambda:0)
56 | for (sample, other) in zip(data_list, zlib_other_list):
57 | for k in range(len(sample)):
58 | combo_scores[k] += min(sample[best_elements[0]][0],
59 | sample[best_elements[1]][0],
60 | sample[best_elements[2]][0],
61 | sample[k][0], other[0], other[1])
62 | best_val = min([(v,k) for k, v in combo_scores.iteritems()])
63 | score_record.append(best_val[0])
64 | best_elements.append(best_val[1])
65 | print 'partial', best_elements,'score',score_record
66 | sys.stdout.flush()
67 | combo_scores = defaultdict(lambda:0)
68 |
69 | for (sample, other) in zip(data_list, zlib_other_list):
70 | for k in range(len(sample)):
71 | combo_scores[k] += min(sample[best_elements[0]][0],
72 | sample[best_elements[1]][0],
73 | sample[best_elements[2]][0],
74 | sample[best_elements[3]][0],
75 | sample[k][0], other[0], other[1])
76 | best_val = min([(v,k) for k, v in combo_scores.iteritems()])
77 | score_record.append(best_val[0])
78 | best_elements.append(best_val[1])
79 | print 'partial', best_elements,'score',score_record
80 | sys.stdout.flush()
81 | combo_scores = defaultdict(lambda:0)
82 | for (sample, other) in zip(data_list, zlib_other_list):
83 | for k in range(len(sample)):
84 | combo_scores[k] += min(sample[best_elements[0]][0],
85 | sample[best_elements[1]][0],
86 | sample[best_elements[2]][0],
87 | sample[best_elements[3]][0],
88 | sample[best_elements[4]][0],
89 | sample[k][0], other[0], other[1])
90 | best_val = min([(v,k) for k, v in combo_scores.iteritems()])
91 | score_record.append(best_val[0])
92 | best_elements.append(best_val[1])
93 | print 'partial', best_elements,'score',score_record
94 | sys.stdout.flush()
95 | combo_scores = defaultdict(lambda:0)
96 | prescient_score = 0
97 | for (sample, other) in zip(data_list, zlib_other_list):
98 | prescient_score += min(min(x[0] for x in sample), min(other))
99 | for k in range(len(sample)):
100 | combo_scores[k] += min(sample[best_elements[0]][0],
101 | sample[best_elements[1]][0],
102 | sample[best_elements[2]][0],
103 | sample[best_elements[3]][0],
104 | sample[best_elements[4]][0],
105 | sample[best_elements[5]][0],
106 | sample[k][0], other[0], other[1])
107 | best_val = min([(v,k) for k, v in combo_scores.iteritems()])
108 | score_record.append(best_val[0])
109 | best_elements.append(best_val[1])
110 | print best_elements,'score',score_record,'best',prescient_score
111 |
--------------------------------------------------------------------------------
/research/select_best_triple.py:
--------------------------------------------------------------------------------
1 | import json
2 | import sys
3 | samples = []
4 | othstats = []
5 | hdrs = []
6 | for line in sys.stdin:
7 | if line.startswith('hdr:'):
8 | hdrs = json.loads(line[line.find(':')+1:].replace("'",'"'))
9 | continue
10 | if line.startswith('stats:'):
11 | scores = [int(item.strip()) for item in line[6:].replace('baseline: ','').replace('vsIX','vs').replace('vsXI','vs').replace("vsX", "vs").replace('vsZstd','vs').replace('vsZ','vs').replace('vsU','vs').replace('vs:','vs').split('[')[0].split(' vs ')]
12 | othstats.append(scores)
13 | if not line.startswith('args:'):
14 | continue # ignore anything but the nonopt items
15 | where = line.find('[')
16 | if where == -1:
17 | continue
18 | where2 = line.find(']')
19 | json_src = json.loads(line[where:where2 + 1])
20 | best_item = min(json_src)
21 | for index in range(len(json_src)):
22 | if json_src[index] == best_item:
23 | break
24 | samples.append(json_src)
25 | bytescore_required = ""
26 | if len(sys.argv) > 1:
27 | bytescore_required = "-bytescore=" + sys.argv[1]
28 | def not_ok(index):
29 | if index == 50:
30 | return True
31 | found_ok_byte_score = False
32 | for item in hdrs[index]:
33 | if bytescore_required in item or '-bytescore=340' in item:
34 | found_ok_byte_score = True
35 | return not found_ok_byte_score
36 | #if index >= 10:
37 | # return True # force us to use brotli-9
38 | for item in hdrs[index]:
39 | if 'speedlow' in item:
40 | return True
41 | return False
42 |
43 | #best_price = 0
44 | def is_blacklisted(baseline, uncompressed):
45 | if baseline / float(uncompressed) > .995:
46 | return True
47 | if uncompressed - baseline < 128:
48 | return True
49 | return False
50 | for include_ignored in [False, True]:
51 | perfect_prediction = 0
52 | num_options = len(samples[0])
53 | total_count = [0] * num_options
54 | brotli_total = 0
55 | brotli9_total = 0
56 | brotli10_total = 0
57 | brotli11_total = 0
58 | zstd_total = 0
59 | baseline_total = 0
60 | cost = 0
61 | favored = [0, 0, 0, 0, 0, 0]
62 | display = {}
63 | ignored = 0
64 | ignored_and_viewed = 0
65 | for favored_index in range(0,6):
66 | total_count = [0] * num_options
67 | for xindex in range(len(samples)):
68 | sample = [n for n in samples[xindex]]
69 | divans,brotli,brotli9, brotli10,brotli11,zstd,baseline ,uncompressed= othstats[xindex]
70 | blacklist = is_blacklisted(baseline, uncompressed)
71 | ignored_and_viewed+= baseline
72 | if blacklist:
73 | ignored += baseline
74 | if not include_ignored:
75 | continue # ignore these samples--assume they are vids
76 | divans,brotli,brotli9, brotli10,brotli11,zstd = baseline, baseline, baseline, baseline, baseline, baseline
77 | for index in range(len(sample)):
78 | sample[index] = baseline
79 | if favored_index == 0:
80 | target = min([sample[index] for index in range(len(sample)) if not not_ok(index)]+ [baseline])
81 | perfect_prediction += target
82 | baseline_total += baseline
83 | brotli_total += brotli
84 | brotli9_total += brotli9
85 | brotli10_total += brotli10
86 | brotli11_total += brotli11
87 | zstd_total += zstd
88 | cost += max(sample)
89 | else:
90 | target = min(sample)
91 | for index in range(num_options):
92 | cur = min([baseline] + [sample[index]] + [sample[fav] for fav in favored[:favored_index]])
93 | if not_ok(index):
94 | total_count[index] += cur * 1000
95 | else:
96 | total_count[index] += cur
97 |
98 | for index in range(num_options):
99 | if total_count[index] < cost:
100 | cost = total_count[index]
101 | favored[favored_index] = index
102 | print cost / 1000.
103 | if include_ignored:
104 | print 'using zlib for % of bytes ',ignored * 100./ignored_and_viewed
105 | else:
106 | print 'ignored % bytes ',ignored * 100./ignored_and_viewed
107 | print 'perfect', perfect_prediction / 1000., 'brotli',brotli_total/1000.,'brotli9',brotli9_total/1000.,'brotli10',brotli10_total/1000.,'brotli11',brotli11_total/1000.,'zstd',zstd_total/1000.,'baseline',baseline_total/1000.
108 | print 'pct vs brotli', cost * 100. / brotli_total
109 | print 'pct vs brotli9', cost * 100. / brotli9_total
110 | print 'pct vs brotli10', cost * 100. / brotli10_total
111 | print 'pct vs brotli11', cost * 100. / brotli11_total
112 | print 'pct vs zstd', cost * 100. / zstd_total
113 | print 'pct vs zlib', cost * 100. / baseline_total
114 | #print json.dumps(display,indent=2)
115 | print favored
116 |
117 | print [hdrs[favor] for favor in favored]
118 |
119 |
--------------------------------------------------------------------------------
/src/codec/crc32.rs:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright 2017 Andrew Gallant (BurntSushi)
3 |
4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5 |
6 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7 |
8 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
9 |
10 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
11 |
12 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13 | */
14 | #[allow(unused_imports)]
15 | use core;
16 | use super::crc32_table::TABLE16;
17 | pub fn crc32c_init() -> u32 {
18 | 0
19 | }
20 | #[cfg(not(all(feature="simd", not(feature="portable-simd"), not(feature="safe"), any(target_arch="x86", target_arch="x86_64"))))]
21 | #[inline(always)]
22 | pub fn crc32c_update(crc:u32, buf: &[u8]) -> u32 {
23 | fallback_crc32c_update(crc, buf)
24 | }
25 |
26 | #[cfg(all(feature="simd", not(feature="portable-simd"), not(feature="safe"), any(target_arch="x86", target_arch="x86_64")))]
27 | #[inline(always)]
28 | pub fn crc32c_update(crc:u32, buf: &[u8]) -> u32 {
29 | if is_x86_feature_detected!("sse4.2") {
30 | return unsafe {
31 | sse_crc32c_update(crc, buf)
32 | };
33 | }
34 | fallback_crc32c_update(crc, buf)
35 | }
36 |
37 | #[inline(always)]
38 | pub fn fallback_crc32c_update(mut crc:u32, mut buf: &[u8]) -> u32 {
39 | crc = !crc;
40 | while buf.len() >= 16 {
41 | crc ^= u32::from(buf[0]) | (u32::from(buf[1]) << 8) | (u32::from(buf[2]) << 16) | (u32::from(buf[3]) << 24);
42 | crc = TABLE16[0][buf[15] as usize]
43 | ^ TABLE16[1][buf[14] as usize]
44 | ^ TABLE16[2][buf[13] as usize]
45 | ^ TABLE16[3][buf[12] as usize]
46 | ^ TABLE16[4][buf[11] as usize]
47 | ^ TABLE16[5][buf[10] as usize]
48 | ^ TABLE16[6][buf[9] as usize]
49 | ^ TABLE16[7][buf[8] as usize]
50 | ^ TABLE16[8][buf[7] as usize]
51 | ^ TABLE16[9][buf[6] as usize]
52 | ^ TABLE16[10][buf[5] as usize]
53 | ^ TABLE16[11][buf[4] as usize]
54 | ^ TABLE16[12][(crc >> 24) as u8 as usize]
55 | ^ TABLE16[13][(crc >> 16) as u8 as usize]
56 | ^ TABLE16[14][(crc >> 8 ) as u8 as usize]
57 | ^ TABLE16[15][(crc ) as u8 as usize];
58 | buf = &buf.split_at(16).1;
59 | }
60 | for &b in buf {
61 | crc = TABLE16[0][((crc as u8) ^ b) as usize] ^ (crc >> 8);
62 | }
63 | !crc
64 | }
65 | #[cfg(feature="simd")]
66 | #[cfg(not(target_arch = "x86_64"))]
67 | fn sse_crc32c_update(_crc:u32, _buf: &[u8]) -> u32 {
68 | unimplemented!();
69 | }
70 | #[cfg(feature="simd")]
71 | #[cfg(target_arch = "x86_64")]
72 | #[inline(always)]
73 | //#[target_feature(enable = "sse4.2")]
74 | unsafe fn sse_crc32c_update(mut crc:u32, mut buf: &[u8]) -> u32 {
75 | crc = !crc;
76 | while buf.len() >= 8 {
77 | crc = core::arch::x86_64::_mm_crc32_u64(u64::from(crc),
78 | u64::from(buf[0]) | (u64::from(buf[1]) << 8) | (u64::from(buf[2]) << 16) | (u64::from(buf[3]) << 24)
79 | |(u64::from(buf[4])<<32) | (u64::from(buf[5]) << 40) | (u64::from(buf[6]) << 48) | (u64::from(buf[7]) << 56)) as u32;
80 | buf = &buf.split_at(8).1;
81 | }
82 | for &b in buf {
83 | crc = core::arch::x86_64::_mm_crc32_u8(crc, b);
84 | }
85 | !crc
86 | }
87 | mod test {
88 | #[cfg(test)]
89 | use super::{crc32c_init, crc32c_update};
90 | #[test]
91 | fn test_crc32c_empty() {
92 | assert_eq!(crc32c_update(crc32c_init(), &[]), 0x0);
93 | }
94 | #[test]
95 | fn test_crc32c_numeric() {
96 | let slice = b"123456789";
97 | assert_eq!(crc32c_update(crc32c_init(), slice), 0xe3069283);
98 | }
99 | #[test]
100 | fn test_crc32c_numeric_half() {
101 | let slice = b"123456789";
102 | let (firsthalf, secondhalf) = slice.split_at(5);
103 | assert_eq!(crc32c_update(crc32c_update(crc32c_init(), firsthalf), secondhalf), 0xe3069283);
104 | }
105 | #[test]
106 | fn test_crc32c_qbf() {
107 | let slice = b"The quick brown fox jumps over the lazy dog";
108 | assert_eq!(crc32c_update(crc32c_init(), slice), 0x22620404);
109 | }
110 | #[test]
111 | fn test_crc32c_qbf_half() {
112 | let slice = b"The quick brown fox jumps over the lazy dog";
113 | let (firsthalf, secondhalf) = slice.split_at(18);
114 | assert_eq!(crc32c_update(crc32c_update(crc32c_init(), firsthalf), secondhalf), 0x22620404);
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/src/codec/weights.rs:
--------------------------------------------------------------------------------
1 | use core;
2 | use ::probability::{Prob, BLEND_FIXED_POINT_PRECISION, LOG2_SCALE};
3 |
4 | pub struct Weights {
5 | model_weights: [i32;2],
6 | mixing_param: u8,
7 | normalized_weight: i16,
8 | }
9 | impl Default for Weights {
10 | fn default() -> Self {
11 | Self::new()
12 | }
13 | }
14 | impl Weights {
15 | pub fn new() -> Self {
16 | Weights {
17 | model_weights:[1;2],
18 | mixing_param: 1,
19 | normalized_weight: 1 << (BLEND_FIXED_POINT_PRECISION - 1),
20 | }
21 | }
22 | #[inline(always)]
23 | pub fn update(&mut self, model_probs: [Prob; 2], weighted_prob: Prob) {
24 | debug_assert!(self.mixing_param != 0);
25 | normalize_weights(&mut self.model_weights);
26 | let w0new = compute_new_weight(model_probs,
27 | weighted_prob,
28 | self.model_weights,
29 | false,
30 | self.mixing_param - 1);
31 | let w1new = compute_new_weight(model_probs,
32 | weighted_prob,
33 | self.model_weights,
34 | true,
35 | self.mixing_param - 1);
36 | self.model_weights = [w0new, w1new];
37 | self.normalized_weight = compute_normalized_weight(self.model_weights);
38 | }
39 | #[inline(always)]
40 | pub fn set_mixing_param(&mut self, param: u8) {
41 | self.mixing_param = param;
42 | }
43 | #[inline(always)]
44 | pub fn should_mix(&self) -> bool {
45 | self.mixing_param > 1
46 | }
47 | #[inline(always)]
48 | pub fn norm_weight(&self) -> i16 {
49 | self.normalized_weight
50 | }
51 | }
52 |
53 | #[inline(always)]
54 | fn compute_normalized_weight(model_weights: [i32;2]) -> i16 {
55 | let total = i64::from(model_weights[0]) + i64::from(model_weights[1]);
56 | let leading_zeros = total.leading_zeros();
57 | let shift = core::cmp::max(56 - (leading_zeros as i16), 0);
58 | let total_8bit = total >> shift;
59 | ::probability::numeric::fast_divide_16bit_by_8bit(
60 | ((model_weights[0] >> shift) as u16)<< 8,
61 | ::probability::numeric::lookup_divisor8(total_8bit as u8)) << (BLEND_FIXED_POINT_PRECISION - 8)
62 | }
63 |
64 | #[cold]
65 | fn fix_weights(weights: &mut [i32;2]) {
66 | let ilog = 32 - core::cmp::min(weights[0].leading_zeros(),
67 | weights[1].leading_zeros());
68 | let max_log = 24;
69 | if ilog >= max_log {
70 | weights[0] >>= ilog - max_log;
71 | weights[1] >>= ilog - max_log;
72 | }
73 | }
74 |
75 | #[inline(always)]
76 | fn normalize_weights(weights: &mut [i32;2]) {
77 | if ((weights[0]|weights[1])&0x7f000000) != 0 {
78 | fix_weights(weights);
79 | }
80 | }
81 | fn ilog2(item: i64) -> u32 {
82 | 64 - item.leading_zeros()
83 | }
84 | #[cfg(features="floating_point_context_mixing")]
85 | fn compute_new_weight(probs: [Prob; 2],
86 | weighted_prob: Prob,
87 | weights: [i32;2],
88 | index_equal_1: bool,
89 | _speed: u8) -> i32{ // speed ranges from 1 to 14 inclusive
90 | let index = index_equal_1 as usize;
91 | let n1i = probs[index] as f64 / ((1i64 << LOG2_SCALE) as f64);
92 | //let n0i = 1.0f64 - n1i;
93 | let ni = 1.0f64;
94 | let s1 = weighted_prob as f64 / ((1i64 << LOG2_SCALE) as f64);
95 | let s0 = 1.0f64 - s1;
96 | let s = 1.0f64;
97 | //let p0 = s0;
98 | let p1 = s1;
99 | let wi = weights[index] as f64 / ((1i64 << LOG2_SCALE) as f64);
100 | let mut wi_new = wi + (1.0 - p1) * (s * n1i - s1 * ni) / (s0 * s1);
101 | let eps = 0.00001f64;
102 | if !(wi_new > eps) {
103 | wi_new = eps;
104 | }
105 | (wi_new * ((1i64 << LOG2_SCALE) as f64)) as i32
106 | }
107 |
108 | #[cfg(not(features="floating_point_context_mixing"))]
109 | #[inline(always)]
110 | fn compute_new_weight(probs: [Prob; 2],
111 | weighted_prob: Prob,
112 | weights: [i32;2],
113 | index_equal_1: bool,
114 | _speed: u8) -> i32{ // speed ranges from 1 to 14 inclusive
115 | let index = index_equal_1 as usize;
116 | let full_model_sum_p1 = i64::from(weighted_prob);
117 | let full_model_total = 1i64 << LOG2_SCALE;
118 | let full_model_sum_p0 = full_model_total.wrapping_sub(i64::from(weighted_prob));
119 | let n1i = i64::from(probs[index]);
120 | let ni = 1i64 << LOG2_SCALE;
121 | let error = full_model_total.wrapping_sub(full_model_sum_p1);
122 | let wi = i64::from(weights[index]);
123 | let efficacy = full_model_total.wrapping_mul(n1i) - full_model_sum_p1.wrapping_mul(ni);
124 | //let geometric_probabilities = full_model_sum_p1 * full_model_sum_p0;
125 | let log_geometric_probabilities = 64 - (full_model_sum_p1.wrapping_mul(full_model_sum_p0)).leading_zeros();
126 | //let scaled_geometric_probabilities = geometric_probabilities * S;
127 | //let new_weight_adj = (error * efficacy) >> log_geometric_probabilities;// / geometric_probabilities;
128 | //let new_weight_adj = (error * efficacy)/(full_model_sum_p1 * full_model_sum_p0);
129 | let new_weight_adj = (error.wrapping_mul(efficacy)) >> log_geometric_probabilities;
130 | // assert!(wi + new_weight_adj < (1i64 << 31));
131 | //print!("{} -> {} due to {:?} vs {}\n", wi as f64 / (weights[0] + weights[1]) as f64, (wi + new_weight_adj) as f64 /(weights[0] as i64 + new_weight_adj as i64 + weights[1] as i64) as f64, probs[index], weighted_prob);
132 | core::cmp::max(1,wi.wrapping_add(new_weight_adj) as i32)
133 | }
134 |
--------------------------------------------------------------------------------
/src/codec/priors.rs:
--------------------------------------------------------------------------------
1 | use ::interface::{
2 | CrossCommandBilling,
3 | };
4 | use ::priors::{PriorCollection, PriorMultiIndex};
5 | #[cfg(feature="billing")]
6 | #[cfg(feature="debug_entropy")]
7 | use ::priors::summarize_prior_billing;
8 | pub const NUM_BLOCK_TYPES:usize = 256;
9 | pub const NUM_STRIDES:usize = 8;
10 | use alloc::{SliceWrapper, Allocator, SliceWrapperMut};
11 | use probability::{BaseCDF};
12 | define_prior_struct!(CrossCommandPriors, CrossCommandBilling,
13 | (CrossCommandBilling::FullSelection, 16, 1),
14 | (CrossCommandBilling::EndIndicator, 1, NUM_BLOCK_TYPES));
15 |
16 |
17 |
18 | #[derive(PartialEq, Debug, Clone)]
19 | pub enum LiteralCommandPriorType {
20 | CountSmall,
21 | SizeBegNib,
22 | SizeLastNib,
23 | SizeMantissaNib,
24 | }
25 | #[derive(PartialEq, Debug, Clone)]
26 | pub enum LiteralCMPriorType {
27 | FirstNibble,
28 | SecondNibble,
29 | }
30 | #[derive(PartialEq, Debug, Clone)]
31 | pub enum LiteralNibblePriorType {
32 | CombinedNibble,
33 | }
34 |
35 | define_prior_struct!(LiteralNibblePriors, LiteralNibblePriorType,
36 | (LiteralNibblePriorType::CombinedNibble, 3, 256, NUM_BLOCK_TYPES)
37 | );
38 |
39 | define_prior_struct!(LiteralCommandPriors, LiteralCommandPriorType,
40 | (LiteralCommandPriorType::CountSmall, NUM_BLOCK_TYPES, 16),
41 | (LiteralCommandPriorType::SizeBegNib, NUM_BLOCK_TYPES),
42 | (LiteralCommandPriorType::SizeLastNib, NUM_BLOCK_TYPES),
43 | (LiteralCommandPriorType::SizeMantissaNib, NUM_BLOCK_TYPES));
44 |
45 | define_prior_struct!(LiteralCommandPriorsCM, LiteralCMPriorType,
46 | (LiteralCMPriorType::FirstNibble, 1, NUM_BLOCK_TYPES),
47 | (LiteralCMPriorType::SecondNibble, 1, 16, NUM_BLOCK_TYPES));
48 |
49 | #[derive(PartialEq, Debug, Clone)]
50 | pub enum RandLiteralNibblePriorType {
51 | CountSmall,
52 | SizeBegNib,
53 | SizeLastNib,
54 | SizeMantissaNib,
55 | }
56 | define_prior_struct!(RandLiteralCommandPriors, RandLiteralNibblePriorType,
57 | (RandLiteralNibblePriorType::CountSmall, NUM_BLOCK_TYPES, 16),
58 | (RandLiteralNibblePriorType::SizeBegNib, NUM_BLOCK_TYPES),
59 | (RandLiteralNibblePriorType::SizeLastNib, NUM_BLOCK_TYPES),
60 | (RandLiteralNibblePriorType::SizeMantissaNib, NUM_BLOCK_TYPES));
61 |
62 | #[derive(PartialEq, Debug, Clone)]
63 | pub enum CopyCommandNibblePriorType {
64 | DistanceBegNib,
65 | DistanceLastNib,
66 | DistanceMnemonic,
67 | DistanceMnemonicTwo,
68 | DistanceMantissaNib,
69 | CountSmall,
70 | CountBegNib,
71 | CountLastNib,
72 | CountMantissaNib,
73 | }
74 | const NUM_COPY_COMMAND_ORGANIC_PRIORS: usize = 64;
75 | define_prior_struct!(CopyCommandPriors, CopyCommandNibblePriorType,
76 | (CopyCommandNibblePriorType::DistanceBegNib, NUM_BLOCK_TYPES, NUM_COPY_COMMAND_ORGANIC_PRIORS),
77 | (CopyCommandNibblePriorType::DistanceMnemonic, NUM_BLOCK_TYPES, 2),
78 | (CopyCommandNibblePriorType::DistanceLastNib, NUM_BLOCK_TYPES, 1),
79 | (CopyCommandNibblePriorType::DistanceMantissaNib, NUM_BLOCK_TYPES, 5),
80 | (CopyCommandNibblePriorType::CountSmall, NUM_BLOCK_TYPES, NUM_COPY_COMMAND_ORGANIC_PRIORS),
81 | (CopyCommandNibblePriorType::CountBegNib, NUM_BLOCK_TYPES, NUM_COPY_COMMAND_ORGANIC_PRIORS),
82 | (CopyCommandNibblePriorType::CountLastNib, NUM_BLOCK_TYPES, NUM_COPY_COMMAND_ORGANIC_PRIORS),
83 | (CopyCommandNibblePriorType::CountMantissaNib, NUM_BLOCK_TYPES, NUM_COPY_COMMAND_ORGANIC_PRIORS));
84 | #[derive(PartialEq, Debug, Clone)]
85 | pub enum DictCommandNibblePriorType {
86 | SizeBegNib,
87 | SizeLastNib,
88 | Index,
89 | Transform,
90 | }
91 |
92 | const NUM_ORGANIC_DICT_DISTANCE_PRIORS: usize = 5;
93 | define_prior_struct!(DictCommandPriors, DictCommandNibblePriorType,
94 | (DictCommandNibblePriorType::SizeBegNib, NUM_BLOCK_TYPES),
95 | (DictCommandNibblePriorType::SizeLastNib, NUM_BLOCK_TYPES),
96 | (DictCommandNibblePriorType::Index, NUM_BLOCK_TYPES, NUM_ORGANIC_DICT_DISTANCE_PRIORS),
97 | (DictCommandNibblePriorType::Transform, 2, 25));
98 |
99 | #[derive(PartialEq, Debug, Clone)]
100 | pub enum BlockTypePriorType {
101 | Mnemonic,
102 | FirstNibble,
103 | SecondNibble,
104 | StrideNibble,
105 | }
106 | define_prior_struct!(BlockTypePriors, BlockTypePriorType,
107 | (BlockTypePriorType::Mnemonic, 3), // 3 for each of ltype, ctype, dtype switches.
108 | (BlockTypePriorType::FirstNibble, 3),
109 | (BlockTypePriorType::SecondNibble, 3),
110 | (BlockTypePriorType::StrideNibble, 1));
111 |
112 | #[derive(PartialEq, Debug, Clone)]
113 | pub enum PredictionModePriorType {
114 | Only,
115 | DynamicContextMixingSpeed,
116 | PriorDepth,
117 | PriorMixingValue,
118 | LiteralSpeed,
119 | Mnemonic,
120 | FirstNibble,
121 | SecondNibble,
122 | ContextMapSpeedPalette,
123 | }
124 |
125 | define_prior_struct!(PredictionModePriors, PredictionModePriorType,
126 | (PredictionModePriorType::Only, 1),
127 | (PredictionModePriorType::LiteralSpeed, 1),
128 | (PredictionModePriorType::FirstNibble, 2),
129 | (PredictionModePriorType::SecondNibble, 2),
130 | (PredictionModePriorType::Mnemonic, 4),
131 | (PredictionModePriorType::PriorMixingValue, 17),
132 | (PredictionModePriorType::ContextMapSpeedPalette, 4)
133 | );
134 |
--------------------------------------------------------------------------------
/examples/util_prior_stream_cost.rs:
--------------------------------------------------------------------------------
1 | extern crate divans;
2 | use ::std::io::{ErrorKind, BufReader, Result};
3 | use std::env;
4 | use std::collections::HashMap;
5 | use divans::CDF16;
6 | use divans::BaseCDF;
7 | use std::vec;
8 | fn determine_cost(cdf: &divans::DefaultCDF16,
9 | nibble: u8) -> f64 {
10 | let pdf = cdf.pdf(nibble);
11 | let prob = (pdf as f64) / (cdf.max() as f64);
12 | return -prob.log2()
13 | }
14 |
15 | fn eval_stream(
16 | r :&mut Reader,
17 | speed: Option,
18 | is_hex: bool
19 | ) -> Result {
20 | let mut sub_streams = HashMap::>::new();
21 | let mut buffer = String::new();
22 | let mut cost: f64 = 0.0;
23 | loop {
24 | buffer.clear();
25 | match r.read_line(&mut buffer) {
26 | Err(e) => {
27 | if e.kind() == ErrorKind::Interrupted {
28 | continue;
29 | }
30 | return Err(e);
31 | },
32 | Ok(val) => {
33 | if val == 0 || val == 1{
34 | break;
35 | }
36 | let line = buffer.trim().to_string();
37 | let mut prior_val: Vec = if let Some(_) = line.find(",") {
38 | line.split(',').map(|s| s.to_string()).collect()
39 | } else {
40 | line.split(' ').map(|s| s.to_string()).collect()
41 | };
42 | let prior = if is_hex {
43 | match u64::from_str_radix(&prior_val[0], 16) {
44 | Err(_) => return Err(std::io::Error::new(ErrorKind::InvalidData,prior_val[0].clone())),
45 | Ok(val) => val,
46 | }
47 | } else {
48 | match prior_val[0].parse::() {
49 | Err(_) => return Err(std::io::Error::new(ErrorKind::InvalidData,prior_val[0].clone())),
50 | Ok(val) => val,
51 | }
52 | };
53 |
54 | let val = if is_hex {
55 | match u8::from_str_radix(&prior_val[1], 16) {
56 | Err(_) => return Err(std::io::Error::new(ErrorKind::InvalidData,prior_val[1].clone())),
57 | Ok(val) => val,
58 | }
59 | } else {
60 | match prior_val[1].parse::() {
61 | Err(_) => return Err(std::io::Error::new(ErrorKind::InvalidData, prior_val[1].clone())),
62 | Ok(val) => val,
63 | }
64 | };
65 | let mut prior_stream = &mut sub_streams.entry(prior).or_insert(vec::Vec::::new());
66 | prior_stream.push(val);
67 | }
68 | }
69 | }
70 | let specified_speed = match speed {
71 | Some(s) => [s],
72 | None => [divans::Speed::MUD],
73 | };
74 | let trial_speeds = [divans::Speed::GEOLOGIC, divans::Speed::GLACIAL, divans::Speed::MUD, divans::Speed::SLOW,
75 | divans::Speed::MED, divans::Speed::FAST, divans::Speed::PLANE, divans::Speed::ROCKET];
76 | let speed_choice = match speed {
77 | Some(_) => &specified_speed[..],
78 | None => &trial_speeds[..],
79 | };
80 | for (_prior, sub_stream) in sub_streams.iter() {
81 | let mut best_cost_high: Option = None;
82 | let mut best_cost_low: Option = None;
83 | for cur_speed in speed_choice.iter() {
84 | let mut cur_cost_high: f64 = 0.0;
85 | let mut cur_cost_low: f64 = 0.0;
86 | let mut cdf0 = divans::DefaultCDF16::default();
87 | let mut cdf1a = [
88 | divans::DefaultCDF16::default(), divans::DefaultCDF16::default(),divans::DefaultCDF16::default(), divans::DefaultCDF16::default(),
89 | divans::DefaultCDF16::default(), divans::DefaultCDF16::default(),divans::DefaultCDF16::default(), divans::DefaultCDF16::default(),
90 | divans::DefaultCDF16::default(), divans::DefaultCDF16::default(),divans::DefaultCDF16::default(), divans::DefaultCDF16::default(),
91 | divans::DefaultCDF16::default(), divans::DefaultCDF16::default(),divans::DefaultCDF16::default(), divans::DefaultCDF16::default(),
92 | ];
93 | for val in sub_stream.iter() {
94 | let val_nibbles = (val >> 4, val & 0xf);
95 | {
96 | cur_cost_high += determine_cost(&cdf0, val_nibbles.0);
97 | cdf0.blend(val_nibbles.0, *cur_speed);
98 | }
99 | {
100 | let cdf1 = &mut cdf1a[val_nibbles.0 as usize];
101 | cur_cost_low += determine_cost(cdf1, val_nibbles.1);
102 | cdf1.blend(val_nibbles.1, *cur_speed);
103 | }
104 | }
105 | best_cost_high = match best_cost_high.clone() {
106 | None => Some(cur_cost_high),
107 | Some(bc) => Some(if bc > cur_cost_high {cur_cost_high} else {bc}),
108 | };
109 | best_cost_low = match best_cost_low.clone() {
110 | None => Some(cur_cost_low),
111 | Some(bc) => Some(if bc > cur_cost_low {cur_cost_low} else {bc}),
112 | };
113 | }
114 | cost += best_cost_high.unwrap();
115 | cost += best_cost_low.unwrap();
116 | }
117 | Ok(cost)
118 | }
119 |
120 |
121 | fn main() {
122 | let stdin = std::io::stdin();
123 | let stdin = stdin.lock();
124 | let mut buffered_in = BufReader::new(stdin);
125 | let mut speed: Option = None;
126 | if env::args_os().len() > 1 {
127 | for argument in env::args().skip(1) {
128 | speed = Some(argument.parse::().unwrap());
129 | }
130 | }
131 | let cost = eval_stream(&mut buffered_in, speed, true).unwrap();
132 | println!("{} bytes; {} bits", ((cost + 0.99) as u64) as f64 / 8.0, (cost + 0.99) as u64);
133 | }
134 |
--------------------------------------------------------------------------------
/research/divansplot.py:
--------------------------------------------------------------------------------
1 |
2 | import matplotlib.pyplot as plt
3 | from matplotlib import rcParams
4 |
5 | import matplotlib.patches as patches
6 | import matplotlib.transforms as transforms
7 | import numpy as np
8 | from matplotlib.ticker import ScalarFormatter
9 | def on_whitelist(key, label):
10 | #if 'key' == 'time_pct':
11 | # return label in ('b11, d0')
12 | return label in ('b11', 'b9', 'd1', 'dX', 'zlib', 'z19', 'lzma', 'bz')
13 | def label_reassign(key):
14 | keymap = {
15 | 'b11': 'Brotli\nq11',
16 | 'b9': 'Brotli\nq9',
17 | 'd0': u'DivANS .\nq11',
18 | 'd1': u'DivANS .\nq11',
19 | 'dX': u'DivANS\nq9',
20 | 'd5': u'DivANS\nq9',
21 | 'd35': u'DivANS\nq9',
22 | 'z19': 'Zstd\nq19',
23 | 'lzma': '7zip',
24 | 'bz': 'bz2',
25 | }
26 | if key in keymap:
27 | return keymap[key]
28 | return key
29 | colors = [[r for r in reversed(['#aaaaff','#9999dd','#4444aa','#000088',])],
30 | [r for r in reversed(['#ffffaa','#cccc88','#aaaa44','#999900',])],
31 | [r for r in reversed(['#ffaaaa','#cc8888','#aa4444','#880000',])],
32 | [r for r in reversed(['#aaffaa','#88cc88','#44aa44','#008800',])],
33 | [r for r in reversed(['#666666','#666666','#666666','#666666',])],
34 | ]
35 | map_color = {
36 | 'd0':colors[0][3],
37 | 'd1':colors[0][3],
38 | 'd2':colors[0][3],
39 | 'd3':colors[0][2],
40 | 'd4':colors[0][2],
41 | 'dX':colors[0][2],
42 | 'd5':colors[0][2],
43 | 'd35':colors[0][2],
44 | 'b9':colors[1][0],
45 | 'b11':colors[1][1],
46 | 'z19':colors[2][1],
47 | 'zlib':colors[4][1],
48 | 'bz':colors[3][1],
49 | 'lzma':colors[3][1],
50 | }
51 | ylabel = {
52 | 'savings_vs_zlib':'% saving vs zlib\n',
53 | 'encode_speed': 'Encode (Mbps)',
54 | 'decode_speed': 'Decode (Mbps)',
55 | 'time_pct':'Decode Time (ms)',
56 | }
57 |
58 | y_limits= {
59 | 'savings_vs_zlib':[0, 14],
60 | 'encode_speed': [1,400],
61 | 'decode_speed': [10,5000],
62 | # 'time_pct':
63 | }
64 | do_log = set(['decode_speed', 'encode_speed'])
65 | def build_figure(key, ax, data, last=False):
66 | if key in do_log:
67 | ax.set_yscale('log')
68 | else:
69 | ax.set_yscale('linear')
70 | labels = []
71 | trans = transforms.blended_transform_factory(
72 | ax.transData, ax.transAxes)
73 | offset = .5
74 | for (index, sub_items_key) in enumerate([x for x in sorted(data.keys(), key=lambda v: v.replace('d','a').replace('z1','c1').replace('z2','c2').replace('bz','mz')) if on_whitelist(key, x)]):
75 | labels.append(sub_items_key)
76 | bar_width = 0.35
77 | sub_items = data[sub_items_key]
78 | axen = []
79 | for (sub_index, sub_item) in enumerate(sub_items):
80 | kwargs = {}
81 | if key in do_log:
82 | kwargs['log'] = True
83 | #if key not in y_limits:
84 | # kwargs['transform'] = trans
85 | #if sub_index == 0:
86 | # kwargs['label'] = key.replace('_', ' ')
87 | kwargs['color'] = map_color[sub_items_key]
88 | axen.append(ax.bar(index + offset, sub_item, bar_width, **kwargs))
89 | rect = axen[-1][-1]
90 | height = rect.get_height()
91 | if height > 100:
92 | dat = '%.0f' %height
93 | elif height > 5:
94 | dat = '%.1f' % height
95 | else:
96 | dat = '%.2f' % height
97 | ax.text(rect.get_x() + rect.get_width()/2.0, height, dat, ha='center', va='bottom')
98 | if index == 0 and len(sub_items) != 1:
99 | ax.legend(axen, ['p99.99', 'p99', 'p75', 'p50'], ncol=2)
100 |
101 | ax.set_xticks(np.arange(len(labels)) + offset + bar_width * .5)
102 | ax.set_xticklabels([label_reassign(l) for l in labels])
103 | ax.set_ylabel(ylabel[key])
104 | if key in y_limits:
105 | ax.set_ylim(y_limits[key][0], y_limits[key][1]) #
106 | ax.set_xlim(0,len(labels))
107 | ax.yaxis.set_major_formatter(ScalarFormatter())
108 | #ax.set_xticks([offset + x for (x,_) in enumerate(labels)])
109 |
110 | def draw(ratio_vs_raw, ratio_vs_zlib, encode_avg, decode_avg, decode_pct):
111 | rcParams['pdf.fonttype'] = 42
112 | rcParams['ps.fonttype'] = 42
113 | rcParams['pgf.rcfonts'] = False
114 | fig, [ax1, ax2, ax3] = plt.subplots(3, 1, sharex=True, figsize=(6, 6))
115 | plt.suptitle("Dropbox recent uploads")
116 | #build_figure('time_pct', ax1, decode_pct, last=True)
117 | build_figure('decode_speed', ax2, decode_avg, last=True)
118 | build_figure('encode_speed', ax3, encode_avg)
119 | build_figure('savings_vs_zlib', ax1, ratio_vs_zlib)
120 | #fig.subplots_adjust(bottom=0.15, right=.99, top=0.99, hspace=0.03)
121 | plt.savefig('compression_comparison_ratio_speed_time.pdf')
122 | plt.savefig('compression_comparison_ratio_speed_time.png')
123 | fig.clear()
124 |
125 | rcParams['pdf.fonttype'] = 42
126 | rcParams['ps.fonttype'] = 42
127 | rcParams['pgf.rcfonts'] = False
128 | fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=(6, 4.5))
129 | plt.suptitle("Dropbox recent uploads timing")
130 | #build_figure('time_pct', ax1, decode_pct, last=True)
131 | build_figure('decode_speed', ax1, decode_avg, last=True)
132 | build_figure('encode_speed', ax2, encode_avg)
133 | #fig.subplots_adjust(bottom=0.15, right=.99, top=0.99, hspace=0.03)
134 | plt.savefig('compression_comparison_speed_time.pdf')
135 | plt.savefig('compression_comparison_speed_time.png')
136 | fig.clear()
137 |
138 | rcParams['pdf.fonttype'] = 42
139 | rcParams['ps.fonttype'] = 42
140 | rcParams['pgf.rcfonts'] = False
141 | fig, ax1 = plt.subplots(1, 1, sharex=True, figsize=(6, 2.7))
142 | plt.suptitle(" Dropbox uploads compression ratio for uncompressed files")
143 | build_figure('savings_vs_zlib', ax1, ratio_vs_zlib)
144 | fig.subplots_adjust(bottom=0.2, right=.99, top=.9, hspace=0.03)
145 | plt.savefig('compression_comparison_ratio.pdf')
146 | plt.savefig('compression_comparison_ratio.png')
147 | fig.clear()
148 |
149 |
--------------------------------------------------------------------------------
/src/codec/io.rs:
--------------------------------------------------------------------------------
1 | use core;
2 | use interface::{Command, PredictionModeContextMap, free_cmd, StreamDemuxer, ReadableBytes, StreamID, NUM_STREAMS};
3 | use ::interface::{
4 | DivansOutputResult,
5 | MAX_PREDMODE_SPEED_AND_DISTANCE_CONTEXT_MAP_SIZE,
6 | MAX_LITERAL_CONTEXT_MAP_SIZE,
7 | EncoderOrDecoderRecoderSpecialization,
8 | ErrMsg,
9 | };
10 | use codec::interface::CMD_CODER;
11 | use slice_util::{AllocatedMemoryRange, AllocatedMemoryPrefix};
12 |
13 | use alloc::{Allocator};
14 | use alloc_util::{RepurposingAlloc, UninitializedOnAlloc};
15 | use cmd_to_raw::DivansRecodeState;
16 |
17 | use threading::{ThreadToMain,ThreadData};
18 |
19 |
20 | pub struct DemuxerAndRingBuffer,
21 | LinearInputBytes:StreamDemuxer>{
22 | input: LinearInputBytes,
23 | phantom: core::marker::PhantomData,
24 | err: DivansOutputResult,
25 | }
26 |
27 | impl, LinearInputBytes:StreamDemuxer+Default> Default for DemuxerAndRingBuffer {
28 | fn default() ->Self {
29 | DemuxerAndRingBuffer::::new(LinearInputBytes::default())
30 | }
31 | }
32 | impl, LinearInputBytes:StreamDemuxer> DemuxerAndRingBuffer {
33 | fn new(demuxer: LinearInputBytes) -> Self {
34 | DemuxerAndRingBuffer::{
35 | input:demuxer,
36 | phantom:core::marker::PhantomData::::default(),
37 | err: DivansOutputResult::Success,
38 | }
39 | }
40 | }
41 |
42 | impl, LinearInputBytes:StreamDemuxer> StreamDemuxer for DemuxerAndRingBuffer {
43 | #[inline(always)]
44 | fn write_linear(&mut self, data:&[u8], m8: &mut AllocU8) -> usize {
45 | self.input.write_linear(data, m8)
46 | }
47 | #[inline(always)]
48 | fn read_buffer(&mut self) -> [ReadableBytes; NUM_STREAMS] {
49 | self.input.read_buffer()
50 | }
51 | #[inline(always)]
52 | fn data_ready(&self, stream_id:StreamID) -> usize {
53 | self.input.data_ready(stream_id)
54 | }
55 | #[inline(always)]
56 | fn peek(&self, stream_id: StreamID) -> &[u8] {
57 | self.input.peek(stream_id)
58 | }
59 | #[inline(always)]
60 | fn edit(&mut self, stream_id: StreamID) -> &mut AllocatedMemoryRange {
61 | self.input.edit(stream_id)
62 | }
63 | #[inline(always)]
64 | fn consume(&mut self, stream_id: StreamID, count: usize) {
65 | self.input.consume(stream_id, count)
66 | }
67 | #[inline(always)]
68 | fn consumed_all_streams_until_eof(&self) -> bool {
69 | self.input.consumed_all_streams_until_eof()
70 | }
71 | #[inline(always)]
72 | fn encountered_eof(&self) -> bool {
73 | self.input.encountered_eof()
74 | }
75 | #[inline(always)]
76 | fn free_demux(&mut self, m8: &mut AllocU8) {
77 | self.input.free_demux(m8);
78 | }
79 | }
80 |
81 | // this is an implementation of simply printing to the ring buffer that masquerades as communicating with a 'main thread'
82 | impl, LinearInputBytes:StreamDemuxer> ThreadToMain for DemuxerAndRingBuffer {
83 | const COOPERATIVE:bool = false;
84 | const ISOLATED:bool = false;
85 | fn pull_data(&mut self) -> ThreadData {
86 | ThreadData::Data(core::mem::replace(self.input.edit(CMD_CODER as StreamID), AllocatedMemoryRange::::default()))
87 | }
88 | fn pull_context_map(&mut self, mut m8: Option<&mut RepurposingAlloc>) -> Result>, ()> {
89 | match m8 {
90 | Some(ref mut m) => {
91 | let lit = m.use_cached_allocation::().alloc_cell(MAX_LITERAL_CONTEXT_MAP_SIZE);
92 | Ok(PredictionModeContextMap::> {
93 | literal_context_map:lit,
94 | predmode_speed_and_distance_context_map:m.use_cached_allocation::().alloc_cell(
95 | MAX_PREDMODE_SPEED_AND_DISTANCE_CONTEXT_MAP_SIZE),
96 | })
97 | },
98 | None => {
99 | panic!("Pull context map in Demuxer+RingBuffer without an allocator");
100 | },
101 | }
102 | }
103 | fn push_eof(&mut self) -> DivansOutputResult {
104 | self.err
105 | }
106 | fn push_consumed_data(&mut self,
107 | data: &mut AllocatedMemoryRange,
108 | mut m8: Option<&mut RepurposingAlloc>,
109 | ) -> DivansOutputResult {
110 | m8.as_mut().unwrap().free_cell(core::mem::replace(&mut data.0, AllocU8::AllocatedMemory::default()));
111 | self.err
112 | }
113 | fn broadcast_err(&mut self, err:ErrMsg) {
114 | self.err = DivansOutputResult::Failure(err);
115 | }
116 | fn push_cmd(
117 | &mut self,
118 | cmd:&mut Command>,
119 | mut m8: Option<&mut RepurposingAlloc>,
120 | mut recoder: Option<&mut DivansRecodeState>,
121 | specialization:&mut Specialization,
122 | output:&mut [u8],
123 | output_offset: &mut usize,
124 | ) -> DivansOutputResult {
125 | let mut tmp_output_offset_bytes_backing: usize = 0;
126 | let tmp_output_offset_bytes = specialization.get_recoder_output_offset(
127 | output_offset,
128 | &mut tmp_output_offset_bytes_backing);
129 | let ret = recoder.as_mut().unwrap().encode_cmd(cmd,
130 | specialization.get_recoder_output(output),
131 | tmp_output_offset_bytes);
132 | match ret {
133 | DivansOutputResult::Success => {
134 | free_cmd(cmd, &mut m8.as_mut().unwrap().use_cached_allocation::<
135 | UninitializedOnAlloc>());
136 | self.err
137 | },
138 | DivansOutputResult::Failure(_) => {
139 | free_cmd(cmd, &mut m8.as_mut().unwrap().use_cached_allocation::<
140 | UninitializedOnAlloc>());
141 | ret
142 | }
143 | _ => ret,
144 | }
145 | }
146 | }
147 |
--------------------------------------------------------------------------------
/c/example.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #ifndef _WIN32
6 | #include
7 | #endif
8 | #include "divans/ffi.h"
9 | #include "arg.h"
10 | #include "custom_alloc.h"
11 | #include "vec_u8.h"
12 | const unsigned char example[]=
13 | "Mary had a little lamb. Its fleece was white as snow.\n"
14 | "And every where that Mary went, the lamb was sure to go.\n"
15 | "It followed her to school one day which was against the rule.\n"
16 | "It made the children laugh and play to see a lamb at sch00l!\n\n\n\n"
17 | "0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987 1597 2584 4181 6765\n"
18 | "\x11\x99\x2f\xfc\xfe\xef\xff\xd8\xfd\x9c\x43"
19 | "Additional testing characters here";
20 |
21 |
22 |
23 | #define BUF_SIZE 65536
24 | DivansResult compress(const unsigned char *data, size_t len, struct VecU8 *ret_buffer,
25 | int argc, char** argv) {
26 | unsigned char buf[BUF_SIZE];
27 | struct CAllocator alloc = {custom_malloc, custom_free, custom_alloc_opaque};
28 | struct DivansCompressorState *state = divans_new_compressor_with_custom_alloc(alloc);
29 | set_options(state, argc, argv);
30 | while (len) {
31 | size_t read_offset = 0;
32 | size_t buf_offset = 0;
33 | DivansResult res = divans_encode(state,
34 | data, len, &read_offset,
35 | buf, sizeof(buf), &buf_offset);
36 | if (res == DIVANS_FAILURE) {
37 | divans_free_compressor(state);
38 | return res;
39 | }
40 | data += read_offset;
41 | len -= read_offset;
42 | push_vec_u8(ret_buffer, buf, buf_offset);
43 | }
44 | DivansResult res;
45 | do {
46 | size_t buf_offset = 0;
47 | res = divans_encode_flush(state,
48 | buf, sizeof(buf), &buf_offset);
49 | if (res == DIVANS_FAILURE) {
50 | divans_free_compressor(state);
51 | return res;
52 | }
53 | push_vec_u8(ret_buffer, buf, buf_offset);
54 | } while(res != DIVANS_SUCCESS);
55 | divans_free_compressor(state);
56 | return DIVANS_SUCCESS;
57 | }
58 |
59 | DivansResult decompress(const unsigned char *data, size_t len, struct VecU8 *ret_buffer) {
60 | unsigned char buf[BUF_SIZE];
61 | struct CAllocator alloc = {custom_malloc, custom_free, custom_alloc_opaque};
62 | struct DivansDecompressorState *state = divans_new_decompressor_with_custom_alloc(alloc, 0);
63 | DivansResult res;
64 | do {
65 | size_t read_offset = 0;
66 | size_t buf_offset = 0;
67 | res = divans_decode(state,
68 | data, len, &read_offset,
69 | buf, sizeof(buf), &buf_offset);
70 | if (res == DIVANS_FAILURE || (res == DIVANS_NEEDS_MORE_INPUT && len == 0)) {
71 | divans_free_decompressor(state);
72 | return res;
73 | }
74 | data += read_offset;
75 | len -= read_offset;
76 | push_vec_u8(ret_buffer, buf, buf_offset);
77 | } while (res != DIVANS_SUCCESS);
78 | divans_free_decompressor(state);
79 | return DIVANS_SUCCESS;
80 | }
81 |
82 | int main(int argc, char**argv) {
83 | custom_free_f(&use_fake_malloc, memset(custom_malloc_f(&use_fake_malloc, 127), 0x7e, 127));
84 | if (getenv("NO_MALLOC")) {
85 | custom_alloc_opaque = &use_fake_malloc;
86 | }
87 | if (getenv("RUST_MALLOC")) {
88 | custom_alloc_opaque = NULL;
89 | custom_malloc = NULL;
90 | custom_free = NULL;
91 | }
92 | const unsigned char* data = example;
93 | size_t len = sizeof(example);
94 | unsigned char* to_free = NULL;
95 | if (find_first_arg(argc, argv)) {
96 | FILE * fp = fopen(find_first_arg(argc, argv), "rb");
97 | if (fp != NULL) {
98 | size_t ret;
99 | (void)fseek(fp, 0, SEEK_END);
100 | len = ftell(fp);
101 | (void)fseek(fp, 0, SEEK_SET);
102 | to_free = malloc(len);
103 | ret = fread(to_free, 1, len, fp);
104 | if (ret == 0) {
105 | return -1;
106 | }
107 | data = to_free;
108 | (void)fclose(fp);
109 | }
110 | }
111 | {
112 | struct VecU8 divans_file = new_vec_u8();
113 | struct VecU8 rt_file = new_vec_u8();
114 | DivansResult res = compress(data, len, &divans_file, argc, argv);
115 | if (res != DIVANS_SUCCESS) {
116 | fprintf(stderr, "Failed to compress code:%d\n", (int) res);
117 | abort();
118 | }
119 | res = decompress(divans_file.data, divans_file.size, &rt_file);
120 | if (res != DIVANS_SUCCESS) {
121 | fprintf(stderr, "Failed to compress code:%d\n", (int)res);
122 | abort();
123 | }
124 | if (rt_file.size != len) {
125 | FILE * fp = fopen("/tmp/fail.rt", "wb");
126 | fwrite(rt_file.data, 1, rt_file.size, fp);
127 | fclose(fp);
128 | fp = fopen("/tmp/fail.dv", "wb");
129 | fwrite(divans_file.data, 1, divans_file.size, fp);
130 | fclose(fp);
131 | fp = fopen("/tmp/fail.or", "wb");
132 | fwrite(data, 1, len, fp);
133 | fclose(fp);
134 | fprintf(stderr, "Decompressed file size %ld != %ld\n", (long) rt_file.size, (long)len);
135 | abort();
136 | }
137 | if (memcmp(rt_file.data, data, len) != 0) {
138 | fprintf(stderr, "Roundtrip Contents mismatch\n");
139 | abort();
140 | }
141 | #ifdef _WIN32
142 | printf("File length %ld reduced to %ld, %0.2f%%\n",
143 | (long)len, (long)divans_file.size,(double)divans_file.size * 100.0 / (double)len);
144 | #else
145 | char buf[512];
146 | int ret;
147 | ret = write(1, "File length ", strlen("File Length "));
148 | if (ret <= 0) {
149 | return ret;
150 | }
151 | custom_atoi(buf, len);
152 | ret = write(1, buf, strlen(buf));
153 | if (ret <= 0) {
154 | return ret;
155 | }
156 | ret = write(1, " reduced to ", strlen(" reduced to "));
157 | if (ret <= 0) {
158 | return ret;
159 | }
160 | custom_atoi(buf, divans_file.size);
161 | ret = write(1, buf, strlen(buf));
162 | if (ret <= 0) {
163 | return ret;
164 | }
165 | ret = write(1, ", ", strlen(", "));
166 | if (ret <= 0) {
167 | return ret;
168 | }
169 | custom_atoi(buf, divans_file.size * 100 / len);
170 | ret = write(1, buf, strlen(buf));
171 | if (ret <= 0) {
172 | return ret;
173 | }
174 | ret = write(1, ".", strlen("."));
175 | if (ret <= 0) {
176 | return ret;
177 | }
178 | custom_atoi(buf, ((divans_file.size * 1000000 + len/2)/ len) % 10000 + 10000);
179 | ret = write(1, buf + 1, strlen(buf) - 1);
180 | if (ret <= 0) {
181 | return ret;
182 | }
183 | ret = write(1, "%\n", strlen("%\n"));
184 | if (ret <= 0) {
185 | return ret;
186 | }
187 | #endif
188 | release_vec_u8(&divans_file);
189 | release_vec_u8(&rt_file);
190 | }
191 | if (to_free != NULL) {
192 | free(to_free);
193 | }
194 | return 0;
195 | }
196 |
--------------------------------------------------------------------------------
/research/summary.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import json
3 | import traceback
4 | from collections import defaultdict
5 | total = {}
6 | num_rows = 0
7 | raw_size =0
8 |
9 | cut = 0
10 | uncut = 0
11 | decode_hist = defaultdict(list)
12 | dig5 = 10000000.0
13 | def prec(x, scale=100000.0):
14 | return int(x * scale +.5)/scale
15 |
16 | def summarize(show_results=True):
17 | print "Summary for",num_rows,'Processed ',(uncut * 100.)/(cut + uncut),'%', raw_size / 1000.**4
18 | ratio_vs_zlib = {}
19 | ratio_vs_raw = {}
20 | encode_avg = {}
21 | decode_avg = {}
22 | decode_st_avg = {}
23 | decode_pct = {}
24 |
25 | for key in sorted(total.keys()):
26 | temp = [total[key][0] * 100. /total['zlib'][0],
27 | total[key][3]/max(total[key][1], 1),
28 | total[key][3]/max(total[key][2], 1),
29 | total[key][3]/max(total[key][4], 1)]
30 | print str(key) + ':' + str([prec(t) for t in temp]), 'sav', str(prec((total[key][0] + cut) * 100./ (cut + uncut))) + '%'
31 | ratio_vs_zlib[key] = [100 - 100. * float(total[key][0])/total['zlib'][0]]
32 | ratio_vs_raw[key] = [100 - 100. * float(total[key][0])/total['~raw'][0]]
33 | encode_avg[key] = [8 * total[key][3]/max(total[key][1], .00001)]
34 | decode_avg[key] = [8 * total[key][3]/max(total[key][2], .00001)]
35 | decode_st_avg[key] = [8 * total[key][3]/max(total[key][4], .00001)]
36 | if key in decode_hist:
37 | val = decode_hist[key]
38 | val.sort()
39 | vlen = len(val)
40 | p9999 = vlen * 9999 // 10000
41 | p99 = vlen * 99 // 100
42 | p95 = vlen * 95 // 100
43 | p75 = vlen * 75//100
44 | p50 = vlen // 2
45 | print str(key) + ': ' + str(total[key][0]) + '/' + str(total['zlib'][0]) + ' vs raw ' + str(total[key][0]) + '/' + str(total['~raw'][0])
46 | decode_pct[key] = [1000 * val[p9999], 1000 * val[p99], 1000 * val[p75], 1000 * val[p50]]
47 | if show_results:
48 | try:
49 | import divansplot
50 |
51 | except Exception:
52 | traceback.print_exc()
53 | show_results = False
54 | if show_results:
55 | divansplot.draw(ratio_vs_raw, ratio_vs_zlib, encode_avg, decode_avg, decode_pct)
56 | gopts_map = {
57 | 'd1':[['-O2', '-q11', '-w22', '-lsb', '-lgwin22', '-mixing=1', '-findprior', '-speed=2,2048'],
58 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-findprior', '-bytescore=140',
59 | '-sign', '-speed=32,4096'],
60 | ['-O2', '-q10', '-w22', '-lgwin22', '-mixing=1', '-findprior', '-sign', '-speed=16,8192'],
61 | ['-O2', '-q11', '-w22', '-lgwin18', '-mixing=1', '-findprior', '-speed=16,8192'],
62 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-findprior', '-bytescore=340',
63 | '-lsb', '-speed=2,1024']],
64 | 'd12':[['-O2', '-q9.5', '-w22', '-defaultprior', '-lgwin22', '-mixing=2', '-bytescore=340']],
65 | 'd13':[ ['-O2', '-q9.5', '-w22', '-lsb', '-lgwin22', '-mixing=1', '-speed=2,2048', '-bytescore=540'],
66 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-bytescore=140', '-speed=32,4096']],
67 | 'd15':[['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-speed=2,2048', '-bytescore=840'],
68 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-bytescore=340'],
69 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-bytescore=140', '-speed=32,4096']],
70 | 'd20':[['-O2', '-q10', '-w22', '-lsb', '-lgwin22', '-mixing=1', '-findprior', '-speed=2,2048'],
71 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-findprior', '-bytescore=140',
72 | '-sign', '-speed=32,4096'],
73 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-findprior', '-bytescore=40',
74 | '-sign', '-speed=16,8192'],
75 | ['-O2', '-q10', '-w22', '-lgwin18', '-mixing=1', '-findprior', '-speed=16,8192'],
76 | ['-O2', '-q9.5', '-w22', '-lgwin22', '-mixing=1', '-findprior', '-bytescore=340',
77 | '-lsb', '-speed=2,1024']],
78 | 'd21':[['-q9', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=140'],
79 | ['-q9', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=340'],
80 | ['-q9', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=840', "-speed=16,8192"]],
81 | 'd29':[['-q9', '-defaultprior', '-nocm', '-w22', '-lgwin22', '-mixing=0', '-bytescore=340']],
82 | 'd35':[ ['-q7', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=40'],
83 | ['-q7', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=340'],
84 | ['-q7', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=540'],
85 | ['-q7', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=840', '-speed=1,16384']],
86 | 'd38':[['-q5', '-defaultprior', '-w22', '-lgwin22', '-mixing=2', '-bytescore=340']],
87 |
88 | }
89 | for line in sys.stdin:
90 | if sys.argv[1] == '--cut':
91 | line = line[line.find(':') + 1:]
92 | try:
93 | row = json.loads(line)
94 | except Exception:
95 | traceback.print_exc()
96 | continue
97 | zlib_ratio = row['zlib'][0] / float(row['~raw'])
98 | if sys.argv[2] == "image":
99 | if row['zlib'][0] == row['~raw']:
100 | cut += row['zlib'][0]
101 | continue
102 | elif zlib_ratio > float(sys.argv[2])/100.:
103 | cut += row['zlib'][0]
104 | continue
105 | uncut += row['zlib'][0]
106 | raw_size += row['~raw']
107 | mb_size = row['~raw']/1024./1024.
108 | num_rows += 1
109 | candidate = [0,0,0]
110 | rule = ['d12',
111 | 'd13',
112 | 'd20',
113 | 'd21',
114 | 'd1',
115 | 'd1']
116 | if zlib_ratio > .99:
117 | candidate = row[rule[5]]
118 | elif zlib_ratio > .96:
119 | candidate = row[rule[4]]
120 | elif zlib_ratio > .92: # .85
121 | candidate = row[rule[3]]
122 | elif zlib_ratio > .89: # .25
123 | candidate = row[rule[2]]
124 | elif zlib_ratio > .85: # .22
125 | candidate = row[rule[1]]
126 | else:
127 | candidate = row[rule[0]] #1
128 | row['dY'] = candidate
129 | if zlib_ratio > .97:
130 | candidate = row['d29'] # fast to encode fast to decode
131 | #elif zlib_ratio > .9:
132 | # candidate = row['d38'] # fastest to encode slow to decode
133 | elif zlib_ratio > .5:
134 | candidate = row['d35'] # fast to encode and slow to decode
135 | else:
136 | candidate = row['d15'] # slow to encode fast to decode
137 | # candidate = row['d1'] # slowest to encode fat to decoed
138 | row['dX'] = candidate
139 | for (key, value) in row.iteritems():
140 | if key not in total:
141 | total[key] = [0,0,0,0,0]
142 | if key == '~path' or key=='~':
143 | continue
144 | if key == '~raw':
145 | total[key][0] += value
146 | continue
147 | total[key][0] += value[0]
148 | decode_hist[key].append(value[2])
149 | if mb_size >= 1 or True:
150 | total[key][1] += value[1]
151 | total[key][2] += value[2]
152 | total[key][3] += mb_size
153 | if len(value) > 3:
154 | total[key][4] += value[3]
155 | else:
156 | total[key][4] += value[2]
157 | if num_rows % 100000 == 0:
158 | summarize(False)
159 | summarize()
160 |
--------------------------------------------------------------------------------
/src/billing.rs:
--------------------------------------------------------------------------------
1 | // Copyright 2017 Dropbox, Inc
2 | //
3 | // Licensed under the Apache License, Version 2.0 (the "License");
4 | // you may not use this file except in compliance with the License.
5 | // You may obtain a copy of the License at
6 | //
7 | // http://www.apache.org/licenses/LICENSE-2.0
8 | //
9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 |
15 | #![allow(unknown_lints,unused_macros,unused_imports)]
16 | use core::mem;
17 | use core::iter::FromIterator;
18 | use core::marker::PhantomData;
19 | use alloc::{Allocator};
20 | use interface::{ArithmeticEncoderOrDecoder, BillingDesignation, NewWithAllocator, BillingCapability};
21 | use super::probability::{CDF16, ProbRange};
22 | use interface::{DivansResult, ReadableBytes, WritableBytes};
23 |
24 | #[cfg(feature="billing")]
25 | mod billing {
26 | pub use std::collections::HashMap;
27 | pub use std::string::String;
28 | pub use std::vec::Vec;
29 | }
30 |
31 | #[cfg(feature="billing")]
32 | pub use std::io::Write;
33 |
34 | macro_rules! println_stderr(
35 | ($($val:tt)*) => { {
36 | writeln!(&mut ::std::io::stderr(), $($val)*).unwrap();
37 | } }
38 | );
39 |
40 | #[cfg(feature="billing")]
41 | pub struct BillingArithmeticCoder, Coder:ArithmeticEncoderOrDecoder> {
42 | coder: Coder,
43 | counter: billing::HashMap,
44 | movd: bool,
45 | _phantom: PhantomData,
46 | }
47 |
48 | #[cfg(feature="billing")]
49 | impl,
50 | Coder:ArithmeticEncoderOrDecoder+NewWithAllocator> NewWithAllocator for BillingArithmeticCoder {
51 | fn new(m8: &mut AllocU8) -> Self {
52 | BillingArithmeticCoder::{
53 | coder: Coder::new(m8),
54 | counter: billing::HashMap::new(),
55 | movd:false,
56 | _phantom:PhantomData::::default(),
57 | }
58 | }
59 | fn free(&mut self, m8: &mut AllocU8) {
60 | self.coder.free(m8);
61 | }
62 | }
63 |
64 | #[cfg(feature="billing")]
65 | impl, Coder:ArithmeticEncoderOrDecoder> BillingArithmeticCoder {
66 | // Return the (bits, virtual bits) pair.
67 | pub fn get_total(&self) -> (f64, f64) {
68 | let mut total_bits : f64 = 0.0;
69 | let mut total_vbits : f64 = 0.0;
70 | for (_, v) in self.counter.iter() {
71 | total_bits += v.0;
72 | total_vbits += v.1;
73 | }
74 | (total_bits, total_vbits)
75 | }
76 | pub fn print_compression_ratio(&self, original_bytes : usize) {
77 | let (total_bits, _) = self.get_total();
78 | println_stderr!("{:.2}/{:} Ratio {:.3}%",
79 | total_bits / 8.0, original_bytes, total_bits * 100.0 / 8.0 / (original_bytes as f64));
80 | }
81 | }
82 |
83 | #[cfg(feature="billing")]
84 | impl, Coder:ArithmeticEncoderOrDecoder> Drop for BillingArithmeticCoder {
85 | fn drop(&mut self) {
86 | if self.movd {
87 | return;
88 | }
89 | let max_key_len = self.counter.keys().map(|k| format!("{:?}", k).len()).max().unwrap_or(5);
90 | let report = |k, v: (f64, f64)| {
91 | println_stderr!("{1:0$} Bit count: {2:9.1} Byte count: {3:11.3} Virtual bits: {4:7.0}",
92 | max_key_len, k, v.0, v.0 / 8.0, v.1);
93 | };
94 | let mut sorted_entries = billing::Vec::from_iter(self.counter.iter());
95 | sorted_entries.sort_by_key(|&(k, _)| format!("{:?}", k));
96 |
97 | let mut total_bits : f64 = 0.0;
98 | let mut total_vbits : f64 = 0.0;
99 |
100 | for (k, v) in sorted_entries {
101 | report(format!("{:?}", k), *v);
102 | total_bits += v.0;
103 | total_vbits += v.1;
104 | }
105 | report(billing::String::from("Total"), (total_bits, total_vbits));
106 | }
107 | }
108 |
109 | #[cfg(feature="billing")]
110 | impl, Coder:ArithmeticEncoderOrDecoder> ArithmeticEncoderOrDecoder for BillingArithmeticCoder {
111 | fn mov_consume(mut self) -> Self {
112 | self.mov()
113 | }
114 | fn mov(&mut self) -> Self {
115 | self.movd = true;
116 | BillingArithmeticCoder::{
117 | coder: self.coder.mov(),
118 | counter: mem::replace(&mut self.counter, billing::HashMap::new()),
119 | movd: false,
120 | _phantom:PhantomData::::default(),
121 | }
122 | }
123 | fn has_data_to_drain_or_fill(&self) -> bool {
124 | self.coder.has_data_to_drain_or_fill()
125 | }
126 | fn drain_or_fill_internal_buffer_unchecked(&mut self,
127 | input_buffer: &mut ReadableBytes,
128 | output_buffer: &mut WritableBytes) -> DivansResult {
129 | self.coder.drain_or_fill_internal_buffer_unchecked(input_buffer, output_buffer)
130 | }
131 | fn get_or_put_bit_without_billing(&mut self,
132 | bit: &mut bool,
133 | prob_of_false: u8) {
134 | self.get_or_put_bit(bit, prob_of_false, BillingDesignation::Unknown)
135 | }
136 | fn get_or_put_bit(&mut self,
137 | bit: &mut bool,
138 | prob_of_false: u8,
139 | billing: BillingDesignation) {
140 | self.coder.get_or_put_bit_without_billing(bit, prob_of_false);
141 | let mut actual_prob = (prob_of_false as f64 + 0.5) / 256.0;
142 | if *bit {
143 | actual_prob = 1.0 - actual_prob;
144 | }
145 | let v = self.counter.entry(billing).or_insert((0.0, 0.0));
146 | (*v).0 += -actual_prob.log2();
147 | (*v).1 += 1.0;
148 | }
149 | fn get_or_put_nibble_without_billing(&mut self,
150 | nibble: &mut u8,
151 | prob: &C) -> ProbRange {
152 | self.get_or_put_nibble(nibble, prob, BillingDesignation::Unknown)
153 | }
154 | fn get_or_put_nibble(&mut self,
155 | nibble: &mut u8,
156 | prob: &C,
157 | billing: BillingDesignation) -> ProbRange {
158 | let ret = self.coder.get_or_put_nibble_without_billing(nibble, prob);
159 | let actual_prob = prob.pdf(*nibble) as f64 / (prob.max() as f64);
160 | let v = self.counter.entry(billing).or_insert((0.0, 0.0));
161 | (*v).0 += -actual_prob.log2();
162 | (*v).1 += 4.0;
163 | ret
164 | }
165 | fn close(&mut self) -> DivansResult {
166 | self.coder.close()
167 | }
168 | }
169 |
170 | // only need to implement this for feature=billing, since it's defined for any T in the default case
171 | #[cfg(feature="billing")]
172 | impl, Coder:ArithmeticEncoderOrDecoder> BillingCapability for BillingArithmeticCoder {
173 | fn debug_print(&self, byte_size: usize) {
174 | self.print_compression_ratio(byte_size);
175 | }
176 | }
177 |
178 | #[cfg(not(feature="billing"))]
179 | macro_rules! DefaultEncoderType(
180 | () => {::ans::ANSEncoder}
181 | );
182 |
183 | #[cfg(not(feature="billing"))]
184 | macro_rules! DefaultDecoderType(
185 | () => {::ans::ANSDecoder}
186 | );
187 |
188 |
189 | #[cfg(feature="billing")]
190 | macro_rules! DefaultEncoderType(
191 | () => { ::billing::BillingArithmeticCoder> }
192 | );
193 |
194 | #[cfg(feature="billing")]
195 | macro_rules! DefaultDecoderType(
196 | () => { ::billing::BillingArithmeticCoder }
197 | );
198 |
--------------------------------------------------------------------------------
/src/probability/common_tests.rs:
--------------------------------------------------------------------------------
1 | use super::{BLEND_FIXED_POINT_PRECISION, CDF16, LOG2_SCALE, Prob, ProbRange, Speed};
2 |
3 | #[cfg(test)]
4 | pub fn test_sym_to_start_and_freq() {
5 | let mut cdf = T::default();
6 | for i in 0..100 {
7 | cdf.blend((i & 0xf) as u8, Speed::MED);
8 | let mut last_prob_range: ProbRange = ProbRange { start:0, freq:0 };
9 | for sym in 0..16 {
10 | let result = cdf.sym_to_start_and_freq(sym as u8);
11 | assert_eq!(sym as u8, result.sym);
12 | // NOTE: the +1 is to mirror the default implementation of sym_to_start_and_freq,
13 | // which does +1 to the interpolated Prob value.
14 | let expected_start: Prob = 1 + if sym == 0 { 0 } else {
15 | last_prob_range.start + last_prob_range.freq
16 | };
17 | assert_eq!(result.range.start, expected_start);
18 | last_prob_range = result.range.clone();
19 | }
20 | }
21 | }
22 |
23 | #[cfg(test)]
24 | pub fn test_cdf_offset_to_sym_start_and_freq() {
25 | let mut cdf = T::default();
26 | for i in 0..100 {
27 | cdf.blend((i & 0xf) as u8, Speed::MED);
28 | let mut prev_sym: u8 = 0;
29 | for val in 0..(1i32 << LOG2_SCALE) {
30 | let result = cdf.cdf_offset_to_sym_start_and_freq(val as Prob);
31 | // TODO: The following comparisons should not have +1's, but
32 | // cdf_offset_to_sym_start_and_freq(...) implementation at the moment is HAX.
33 | assert!(prev_sym <= result.sym);
34 | // check that val falls in the range defined by the return value.
35 | assert!(result.range.start as i32 <= val + 1);
36 | assert!(val <= (result.range.start as i32) + (result.range.freq as i32));
37 | prev_sym = result.sym;
38 | }
39 | assert_eq!(prev_sym, 15);
40 | }
41 | }
42 |
43 | #[allow(unused)]
44 | fn simple_rand(state: &mut u64) -> u32 {
45 | const RAND_MAX : u32 = 32_767;
46 | *state = (*state).wrapping_mul(1_103_515_245).wrapping_add(12_345);
47 | ((*state / 65_536) as u32 % (RAND_MAX + 1)) as u32
48 | }
49 |
50 | #[cfg(test)]
51 | pub fn test_stationary_probability() {
52 | let mut cdf = T::default();
53 | let groundtruth_pdf: [(u32, u32); 16] = [(0,1), (0,1), (1,16), (0,1),
54 | (1,32), (1,32), (0,1), (0,1),
55 | (1,8), (0,1), (0,1), (0,1),
56 | (1,5), (1,5), (1,5), (3,20)];
57 |
58 | // compute CDF manually
59 | const CDF_MAX : u32 = 32_767;
60 | let mut cutoffs: [u32; 16] = [0; 16];
61 | let mut sum_prob: f32 = 0.0f32;
62 | for i in 0..16 {
63 | sum_prob += (groundtruth_pdf[i].0 as f32) / (groundtruth_pdf[i].1 as f32);
64 | cutoffs[i] = (((CDF_MAX + 1) as f32) * sum_prob).round() as u32;
65 | }
66 | assert_eq!(cutoffs[15], CDF_MAX + 1);
67 |
68 | // make sure we have all probability taken care of
69 | let mut seed = 1u64;
70 | let num_trials = 1000000usize;
71 | for i in 0..num_trials {
72 | let rand_num = simple_rand(&mut seed) as u32;
73 | for j in 0..16 {
74 | if rand_num < cutoffs[j] {
75 | // we got an j as the next symbol
76 | cdf.blend(j as u8, Speed::MED);
77 | assert!(cdf.valid());
78 | break;
79 | }
80 | assert!(j != 15); // should have broken
81 | }
82 | }
83 | for i in 0..16 {
84 | let actual = (cdf.pdf(i as u8) as f32) / (cdf.max() as f32);
85 | let expected = (groundtruth_pdf[i].0 as f32) / (groundtruth_pdf[i].1 as f32);
86 | let abs_delta = (expected - actual).abs();
87 | let rel_delta = abs_delta / expected; // may be nan
88 | // TODO: These bounds should be tightened.
89 | assert!(rel_delta < 0.15f32 || abs_delta < 0.014f32);
90 | }
91 | }
92 |
93 | #[cfg(test)]
94 | pub fn test_nonzero_pdf() {
95 | // This is a regression test
96 | let mut cdf = T::default();
97 | for _ in 0..1000000 {
98 | cdf.blend(15, Speed::MED);
99 | }
100 | for i in 0..15 {
101 | assert!(cdf.pdf(i) > 0);
102 | }
103 | }
104 |
105 | macro_rules! define_common_tests_helper {
106 | ($cdf_ty: ident; $($test_name: ident),+) => {
107 | $(
108 | #[test]
109 | fn $test_name() {
110 | use super::super::common_tests;
111 | common_tests::$test_name::<$cdf_ty>();
112 | }
113 | )+
114 | };
115 | }
116 |
117 | #[macro_export]
118 | macro_rules! declare_common_tests {
119 | ($cdf_ty: ident) => {
120 | define_common_tests_helper!($cdf_ty;
121 | test_sym_to_start_and_freq,
122 | test_cdf_offset_to_sym_start_and_freq,
123 | test_stationary_probability,
124 | test_nonzero_pdf);
125 | }
126 | }
127 |
128 | pub fn assert_cdf_eq(cdf0: &CDF16A, cdf1: &CDF16B) {
129 | assert_eq!(cdf0.max(), cdf1.max());
130 | for sym in 0..16 {
131 | assert_eq!(cdf0.cdf(sym as u8), cdf1.cdf(sym as u8));
132 | }
133 | assert!(cdf0.valid());
134 | assert!(cdf1.valid());
135 | }
136 |
137 | pub fn assert_cdf_similar(cdf0: &CDF16A, cdf1: &CDF16B) {
138 | let max0 = cdf0.max() as i64;
139 | let max1 = cdf1.max() as i64;
140 | for sym in 0..16 {
141 | let sym0cdf = i64::from(cdf0.cdf(sym as u8));
142 | let sym1cdf = i64::from(cdf1.cdf(sym as u8));
143 | let cmp0 = sym0cdf * max1;
144 | let cmp1 = sym1cdf * max0;
145 | let delta = if cmp0 < cmp1 { cmp1.wrapping_sub(cmp0) } else { cmp0.wrapping_sub(cmp1) };
146 | assert!(delta < max1 * max0 / 160);
147 | }
148 | assert!(cdf0.valid());
149 | assert!(cdf1.valid());
150 | }
151 |
152 | pub fn operation_test_helper (cdf0a: &mut CDFA, cdf1a: &mut CDFA, cdf0b: &mut CDFB, cdf1b: &mut CDFB) {
153 | assert_cdf_eq(cdf0a, cdf0b);
154 | assert_cdf_eq(cdf1a, cdf1b);
155 | let symbol_buffer0 = [0u8, 0u8, 0u8, 0u8, 0u8, 1u8, 2u8, 3u8, 4u8, 5u8, 5u8, 5u8, 5u8, 5u8, 5u8,
156 | 6u8, 7u8, 8u8, 8u8, 9u8, 9u8, 10u8, 10u8, 10u8, 10u8, 10u8, 10u8, 10u8,
157 | 10u8, 10u8, 10u8, 11u8, 12u8, 12u8, 12u8, 13u8, 13u8, 13u8, 14u8, 15u8,
158 | 15u8, 15u8, 15u8, 15u8, 15u8, 15u8];
159 | let symbol_buffer1 = [0u8, 0u8, 0u8, 0u8, 0u8, 1u8, 2u8, 3u8, 4u8, 5u8, 5u8, 5u8, 5u8, 5u8, 5u8];
160 | for sym in symbol_buffer0.iter() {
161 | cdf0a.blend(*sym, Speed::MED);
162 | cdf0b.blend(*sym, Speed::MED);
163 | assert_cdf_eq(cdf0a, cdf0b);
164 | }
165 | assert_cdf_similar(&cdf0a.average(cdf1a, (1<>2), &cdf0b.average(cdf1b, (1<>2));
166 | for sym in symbol_buffer1.iter() {
167 | cdf0a.blend(*sym, Speed::MED);
168 | cdf0b.blend(*sym, Speed::MED);
169 | assert_cdf_eq(cdf0a, cdf0b);
170 | }
171 | let all = (1<>1;
173 | let quarter = (1<>2;
174 | let threequarters = half + quarter;;
175 |
176 | assert_cdf_eq(&cdf0a.average(cdf1a, quarter), &cdf0b.average(cdf1b, quarter));
177 | assert_cdf_eq(&cdf0a.average(cdf1a, half), &cdf0b.average(cdf1b, half));
178 | assert_cdf_eq(&cdf0a.average(cdf1a, threequarters), &cdf0b.average(cdf1b, threequarters));
179 | assert_cdf_eq(&cdf0a.average(cdf1a, 0), &cdf0b.average(cdf1b, 0));
180 | assert_cdf_eq(&cdf0a.average(cdf1a, all), &cdf0b.average(cdf1b, all));
181 | assert_cdf_similar(&cdf0a.average(cdf1a, 0), cdf1a);
182 | assert_cdf_similar(&cdf0a.average(cdf1a, all), cdf0a);
183 | assert_cdf_similar(&cdf0b.average(cdf1b, 0), cdf1b);
184 | assert_cdf_similar(&cdf0b.average(cdf1b, all), cdf0b);
185 | }
186 |
--------------------------------------------------------------------------------
/src/ffi/alloc_util.rs:
--------------------------------------------------------------------------------
1 | use core;
2 | use ::alloc;
3 | use super::interface::{c_void, CAllocator};
4 | #[cfg(feature="std")]
5 | use std::vec::Vec;
6 | #[cfg(feature="std")]
7 | pub use std::boxed::Box;
8 |
9 | #[cfg(feature="std")]
10 | #[derive(Debug)]
11 | pub struct MemoryBlock(Box<[Ty]>);
12 | #[cfg(feature="std")]
13 | impl Default for MemoryBlock {
14 | fn default() -> Self {
15 | MemoryBlock(Vec::::new().into_boxed_slice())
16 | }
17 | }
18 | #[cfg(feature="std")]
19 | impl alloc::SliceWrapper for MemoryBlock {
20 | fn slice(&self) -> &[Ty] {
21 | &self.0[..]
22 | }
23 | }
24 | #[cfg(feature="std")]
25 | impl alloc::SliceWrapperMut for MemoryBlock {
26 | fn slice_mut(&mut self) -> &mut [Ty] {
27 | &mut self.0[..]
28 | }
29 | }
30 | #[cfg(feature="std")]
31 | impl core::ops::Index for MemoryBlock {
32 | type Output = Ty;
33 | fn index(&self, index:usize) -> &Ty {
34 | &self.0[index]
35 | }
36 | }
37 | #[cfg(feature="std")]
38 | impl core::ops::IndexMut for MemoryBlock {
39 |
40 | fn index_mut(&mut self, index:usize) -> &mut Ty {
41 | &mut self.0[index]
42 | }
43 | }
44 | #[cfg(feature="std")]
45 | impl Drop for MemoryBlock {
46 | fn drop (&mut self) {
47 | if self.0.len() != 0 {
48 | print!("leaking memory block of length {} element size: {}\n", self.0.len(), core::mem::size_of::());
49 |
50 | let to_forget = core::mem::replace(self, MemoryBlock::default());
51 | core::mem::forget(to_forget);// leak it -- it's the only safe way with custom allocators
52 | }
53 | }
54 | }
55 | pub struct SubclassableAllocator {
56 | _ty: core::marker::PhantomData,
57 | alloc: CAllocator
58 | // have alternative ty here
59 | }
60 |
61 | impl SubclassableAllocator {
62 | pub fn new(sub_alloc:CAllocator) -> Self {
63 | SubclassableAllocator::{
64 | _ty:core::marker::PhantomData::::default(),
65 | alloc:sub_alloc,
66 | }
67 | }
68 | }
69 | #[cfg(feature="std")]
70 | impl alloc::Allocator for SubclassableAllocator {
71 | type AllocatedMemory = MemoryBlock;
72 | fn alloc_cell(&mut self, size:usize) ->MemoryBlock{
73 | if let Some(alloc_fn) = self.alloc.alloc_func {
74 | let ptr = alloc_fn(self.alloc.opaque, size * core::mem::size_of::());
75 | let typed_ptr = unsafe {core::mem::transmute::<*mut c_void, *mut Ty>(ptr)};
76 | let slice_ref = unsafe {core::slice::from_raw_parts_mut(typed_ptr, size)};
77 | for item in slice_ref.iter_mut() {
78 | unsafe{core::ptr::write(item, Ty::default())};
79 | }
80 | return MemoryBlock(unsafe{Box::from_raw(slice_ref)})
81 | }
82 | MemoryBlock(vec![Ty::default();size].into_boxed_slice())
83 | }
84 | fn free_cell(&mut self, mut bv:MemoryBlock) {
85 | if (*bv.0).len() != 0 {
86 | if let Some(_) = self.alloc.alloc_func {
87 | let slice_ptr = (*bv.0).as_mut_ptr();
88 | let _box_ptr = Box::into_raw(core::mem::replace(&mut bv.0, Vec::::new().into_boxed_slice()));
89 | if let Some(free_fn) = self.alloc.free_func {
90 | unsafe {free_fn(self.alloc.opaque, core::mem::transmute::<*mut Ty, *mut c_void>(slice_ptr))};
91 | }
92 | } else {
93 | let _to_free = core::mem::replace(&mut bv.0, Vec::::new().into_boxed_slice());
94 | }
95 | }
96 | }
97 | }
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 | #[cfg(not(feature="std"))]
110 | static mut G_SLICE:&mut[u8] = &mut[];
111 | #[cfg(not(feature="std"))]
112 | #[derive(Debug)]
113 | pub struct MemoryBlock