├── tests
├── testdata
│ ├── empty
│ ├── empty.compressed
│ ├── ukkonooa.compressed
│ └── ukkonooa
├── Makefile
├── compatibility_test.sh
├── roundtrip_test.sh
├── run-compatibility-test.cmake
└── run-roundtrip-test.cmake
├── python
├── tests
│ ├── __init__.py
│ ├── decompress_test.py
│ ├── compress_test.py
│ ├── decompressor_test.py
│ ├── compressor_test.py
│ ├── bro_test.py
│ └── _test_utils.py
├── Makefile
├── README.md
├── brotli.py
└── bro.py
├── setup.cfg
├── docs
├── brotli.svg
├── constants.h.3
├── types.h.3
└── brotli.1
├── scripts
├── libbrotlicommon.pc.in
├── libbrotlidec.pc.in
├── libbrotlienc.pc.in
└── sources.lst
├── configure.ac
├── WORKSPACE
├── configure
├── MANIFEST.in
├── c
├── common
│ ├── platform.c
│ ├── constants.c
│ ├── version.h
│ ├── dictionary.h
│ ├── shared_dictionary_internal.h
│ ├── transform.h
│ └── context.h
├── enc
│ ├── dictionary_hash.h
│ ├── bit_cost.c
│ ├── utf8_util.h
│ ├── literal_cost.h
│ ├── command.c
│ ├── cluster.h
│ ├── block_encoder_inc.h
│ ├── params.h
│ ├── static_dict.h
│ ├── backward_references.h
│ ├── histogram_inc.h
│ ├── cluster.c
│ ├── block_splitter.h
│ ├── fast_log.h
│ ├── bit_cost.h
│ ├── histogram.h
│ ├── prefix.h
│ ├── find_match_length.h
│ ├── utf8_util.c
│ ├── compound_dictionary.h
│ ├── compress_fragment_two_pass.h
│ ├── write_bits.h
│ ├── state.h
│ ├── histogram.c
│ ├── brotli_bit_stream.h
│ ├── compress_fragment.h
│ ├── backward_references_hq.h
│ ├── metablock.h
│ ├── memory.h
│ ├── entropy_encode.h
│ ├── bit_cost_inc.h
│ ├── encoder_dict.h
│ ├── hash_composite_inc.h
│ ├── memory.c
│ ├── fast_log.c
│ ├── backward_references.c
│ └── literal_cost.c
├── dec
│ ├── bit_reader.c
│ ├── huffman.h
│ └── state.c
├── include
│ └── brotli
│ │ ├── types.h
│ │ └── shared_dictionary.h
└── tools
│ └── brotli.md
├── README
├── LICENSE
├── compiler_config_setting.bzl
├── Makefile.am
├── CONTRIBUTING.md
├── Makefile
├── bootstrap
├── BUILD
└── README.md
/tests/testdata/empty:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/python/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tests/testdata/empty.compressed:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [build]
2 | build_base=bin
3 |
4 | [yapf]
5 | based_on_style=google
6 |
--------------------------------------------------------------------------------
/tests/testdata/ukkonooa.compressed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Unity-Technologies/brotli/master/tests/testdata/ukkonooa.compressed
--------------------------------------------------------------------------------
/tests/testdata/ukkonooa:
--------------------------------------------------------------------------------
1 | ukko nooa, ukko nooa oli kunnon mies, kun han meni saunaan, pisti laukun naulaan, ukko nooa, ukko nooa oli kunnon mies.
--------------------------------------------------------------------------------
/docs/brotli.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/scripts/libbrotlicommon.pc.in:
--------------------------------------------------------------------------------
1 | prefix=@prefix@
2 | exec_prefix=@exec_prefix@
3 | libdir=@libdir@
4 | includedir=@includedir@
5 |
6 | Name: libbrotlicommon
7 | URL: https://github.com/google/brotli
8 | Description: Brotli common dictionary library
9 | Version: @PACKAGE_VERSION@
10 | Libs: -L${libdir} -lbrotlicommon
11 | Cflags: -I${includedir}
12 |
--------------------------------------------------------------------------------
/tests/Makefile:
--------------------------------------------------------------------------------
1 | #brotli/tests
2 |
3 | BROTLI = ..
4 |
5 | all: test
6 |
7 | test: deps
8 | ./compatibility_test.sh
9 | ./roundtrip_test.sh
10 |
11 | deps :
12 | $(MAKE) -C $(BROTLI) brotli
13 |
14 | clean :
15 | rm -f testdata/*.{br,unbr,uncompressed}
16 | rm -f $(BROTLI)/{enc,dec,tools}/*.{un,}br
17 | $(MAKE) -C $(BROTLI)/tools clean
18 |
--------------------------------------------------------------------------------
/scripts/libbrotlidec.pc.in:
--------------------------------------------------------------------------------
1 | prefix=@prefix@
2 | exec_prefix=@exec_prefix@
3 | libdir=@libdir@
4 | includedir=@includedir@
5 |
6 | Name: libbrotlidec
7 | URL: https://github.com/google/brotli
8 | Description: Brotli decoder library
9 | Version: @PACKAGE_VERSION@
10 | Libs: -L${libdir} -lbrotlidec
11 | Requires.private: libbrotlicommon >= 1.0.2
12 | Cflags: -I${includedir}
13 |
--------------------------------------------------------------------------------
/scripts/libbrotlienc.pc.in:
--------------------------------------------------------------------------------
1 | prefix=@prefix@
2 | exec_prefix=@exec_prefix@
3 | libdir=@libdir@
4 | includedir=@includedir@
5 |
6 | Name: libbrotlienc
7 | URL: https://github.com/google/brotli
8 | Description: Brotli encoder library
9 | Version: @PACKAGE_VERSION@
10 | Libs: -L${libdir} -lbrotlienc
11 | Requires.private: libbrotlicommon >= 1.0.2
12 | Cflags: -I${includedir}
13 |
--------------------------------------------------------------------------------
/configure.ac:
--------------------------------------------------------------------------------
1 | AC_PREREQ(2.57)
2 |
3 | dnl Actual version is substituted by bootstrap
4 | AC_INIT([brotli], [0.0.0], [https://groups.google.com/forum/#!forum/brotli])
5 |
6 | AM_INIT_AUTOMAKE()
7 | AC_CONFIG_MACRO_DIR([m4])
8 |
9 | AC_PROG_CC
10 | LT_INIT
11 |
12 | AC_CONFIG_FILES([Makefile scripts/libbrotlicommon.pc scripts/libbrotlidec.pc scripts/libbrotlienc.pc])
13 |
14 | AC_OUTPUT
15 |
--------------------------------------------------------------------------------
/WORKSPACE:
--------------------------------------------------------------------------------
1 | workspace(name = "org_brotli")
2 |
3 | local_repository(
4 | name = "ignore_org_brotli_go",
5 | path = "go",
6 | )
7 |
8 | local_repository(
9 | name = "ignore_org_brotli_java",
10 | path = "java",
11 | )
12 |
13 | local_repository(
14 | name = "ignore_org_brotli_js",
15 | path = "js",
16 | )
17 |
18 | local_repository(
19 | name = "ignore_org_brotli_research",
20 | path = "research",
21 | )
22 |
--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | echo "Use Autotools, Bazel, CMake or Premake5 to generate projects / build files."
3 | echo " Bazel: http://www.bazel.build/"
4 | echo " CMake: https://cmake.org/"
5 | echo " Premake5: https://premake.github.io/"
6 | echo "To generate Autotools 'configure' file run './bootstrap'."
7 | echo "Run './configure-cmake' for Autotools-like CMake configuration."
8 | echo "Or simply run 'make' to build and test command line tool."
9 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include CONTRIBUTING.md
2 | include c/common/*.c
3 | include c/common/*.h
4 | include c/dec/*.c
5 | include c/dec/*.h
6 | include c/enc/*.c
7 | include c/enc/*.h
8 | include c/include/brotli/*.h
9 | include LICENSE
10 | include MANIFEST.in
11 | include python/_brotli.cc
12 | include python/bro.py
13 | include python/brotli.py
14 | include python/README.md
15 | include python/tests/*
16 | include README.md
17 | include setup.py
18 | include tests/testdata/*
19 | include c/tools/brotli.c
20 |
--------------------------------------------------------------------------------
/c/common/platform.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2016 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | #include
8 |
9 | #include
10 |
11 | #include "platform.h"
12 |
13 | /* Default brotli_alloc_func */
14 | void* BrotliDefaultAllocFunc(void* opaque, size_t size) {
15 | BROTLI_UNUSED(opaque);
16 | return malloc(size);
17 | }
18 |
19 | /* Default brotli_free_func */
20 | void BrotliDefaultFreeFunc(void* opaque, void* address) {
21 | BROTLI_UNUSED(opaque);
22 | free(address);
23 | }
24 |
--------------------------------------------------------------------------------
/c/common/constants.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | #include "constants.h"
8 |
9 | const BrotliPrefixCodeRange
10 | _kBrotliPrefixCodeRanges[BROTLI_NUM_BLOCK_LEN_SYMBOLS] = {
11 | {1, 2}, {5, 2}, {9, 2}, {13, 2}, {17, 3}, {25, 3},
12 | {33, 3}, {41, 3}, {49, 4}, {65, 4}, {81, 4}, {97, 4},
13 | {113, 5}, {145, 5}, {177, 5}, {209, 5}, {241, 6}, {305, 6},
14 | {369, 7}, {497, 8}, {753, 9}, {1265, 10}, {2289, 11}, {4337, 12},
15 | {8433, 13}, {16625, 24}};
16 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | BROTLI DATA COMPRESSION LIBRARY
2 |
3 | Brotli is a generic-purpose lossless compression algorithm that compresses data
4 | using a combination of a modern variant of the LZ77 algorithm, Huffman coding
5 | and 2nd order context modeling, with a compression ratio comparable to the best
6 | currently available general-purpose compression methods. It is similar in speed
7 | with deflate but offers more dense compression.
8 |
9 | The specification of the Brotli Compressed Data Format is defined in RFC 7932
10 | https://tools.ietf.org/html/rfc7932
11 |
12 | Brotli is open-sourced under the MIT License, see the LICENSE file.
13 |
14 | Brotli mailing list:
15 | https://groups.google.com/forum/#!forum/brotli
16 |
--------------------------------------------------------------------------------
/c/enc/dictionary_hash.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2015 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Hash table on the 4-byte prefixes of static dictionary words. */
8 |
9 | #ifndef BROTLI_ENC_DICTIONARY_HASH_H_
10 | #define BROTLI_ENC_DICTIONARY_HASH_H_
11 |
12 | #include
13 |
14 | #if defined(__cplusplus) || defined(c_plusplus)
15 | extern "C" {
16 | #endif
17 |
18 | extern const uint16_t kStaticDictionaryHashWords[32768];
19 | extern const uint8_t kStaticDictionaryHashLengths[32768];
20 |
21 | #if defined(__cplusplus) || defined(c_plusplus)
22 | } /* extern "C" */
23 | #endif
24 |
25 | #endif /* BROTLI_ENC_DICTIONARY_HASH_H_ */
26 |
--------------------------------------------------------------------------------
/tests/compatibility_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Test that the brotli command-line tool can decompress old brotli-compressed
4 | # files.
5 | #
6 | # The first argument may be a wrapper for brotli, such as 'qemu-arm'.
7 |
8 | set -o errexit
9 |
10 | BROTLI_WRAPPER=$1
11 | BROTLI="${BROTLI_WRAPPER} bin/brotli"
12 | TMP_DIR=bin/tmp
13 |
14 | for file in tests/testdata/*.compressed*; do
15 | echo "Testing decompression of file $file"
16 | expected=${file%.compressed*}
17 | uncompressed=${TMP_DIR}/${expected##*/}.uncompressed
18 | echo $uncompressed
19 | $BROTLI $file -fdo $uncompressed
20 | diff -q $uncompressed $expected
21 | # Test the streaming version
22 | cat $file | $BROTLI -dc > $uncompressed
23 | diff -q $uncompressed $expected
24 | rm -f $uncompressed
25 | done
26 |
--------------------------------------------------------------------------------
/c/enc/bit_cost.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Functions to estimate the bit cost of Huffman trees. */
8 |
9 | #include "bit_cost.h"
10 |
11 | #include
12 |
13 | #include "../common/constants.h"
14 | #include "../common/platform.h"
15 | #include "fast_log.h"
16 | #include "histogram.h"
17 |
18 | #if defined(__cplusplus) || defined(c_plusplus)
19 | extern "C" {
20 | #endif
21 |
22 | #define FN(X) X ## Literal
23 | #include "bit_cost_inc.h" /* NOLINT(build/include) */
24 | #undef FN
25 |
26 | #define FN(X) X ## Command
27 | #include "bit_cost_inc.h" /* NOLINT(build/include) */
28 | #undef FN
29 |
30 | #define FN(X) X ## Distance
31 | #include "bit_cost_inc.h" /* NOLINT(build/include) */
32 | #undef FN
33 |
34 | #if defined(__cplusplus) || defined(c_plusplus)
35 | } /* extern "C" */
36 | #endif
37 |
--------------------------------------------------------------------------------
/c/common/version.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Version definition. */
8 |
9 | #ifndef BROTLI_COMMON_VERSION_H_
10 | #define BROTLI_COMMON_VERSION_H_
11 |
12 | /* This macro should only be used when library is compiled together with client.
13 | If library is dynamically linked, use BrotliDecoderVersion and
14 | BrotliEncoderVersion methods. */
15 |
16 | /* Semantic version, calculated as (MAJOR << 24) | (MINOR << 12) | PATCH */
17 | #define BROTLI_VERSION 0x1000009
18 |
19 | /* This macro is used by build system to produce Libtool-friendly soname. See
20 | https://www.gnu.org/software/libtool/manual/html_node/Libtool-versioning.html
21 | */
22 |
23 | /* ABI version, calculated as (CURRENT << 24) | (REVISION << 12) | AGE */
24 | #define BROTLI_ABI_VERSION 0x1009000
25 |
26 | #endif /* BROTLI_COMMON_VERSION_H_ */
27 |
--------------------------------------------------------------------------------
/c/enc/utf8_util.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Heuristics for deciding about the UTF8-ness of strings. */
8 |
9 | #ifndef BROTLI_ENC_UTF8_UTIL_H_
10 | #define BROTLI_ENC_UTF8_UTIL_H_
11 |
12 | #include
13 |
14 | #include "../common/platform.h"
15 |
16 | #if defined(__cplusplus) || defined(c_plusplus)
17 | extern "C" {
18 | #endif
19 |
20 | static const double kMinUTF8Ratio = 0.75;
21 |
22 | /* Returns 1 if at least min_fraction of the bytes between pos and
23 | pos + length in the (data, mask) ring-buffer is UTF8-encoded, otherwise
24 | returns 0. */
25 | BROTLI_INTERNAL BROTLI_BOOL BrotliIsMostlyUTF8(
26 | const uint8_t* data, const size_t pos, const size_t mask,
27 | const size_t length, const double min_fraction);
28 |
29 | #if defined(__cplusplus) || defined(c_plusplus)
30 | } /* extern "C" */
31 | #endif
32 |
33 | #endif /* BROTLI_ENC_UTF8_UTIL_H_ */
34 |
--------------------------------------------------------------------------------
/c/enc/literal_cost.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Literal cost model to allow backward reference replacement to be efficient.
8 | */
9 |
10 | #ifndef BROTLI_ENC_LITERAL_COST_H_
11 | #define BROTLI_ENC_LITERAL_COST_H_
12 |
13 | #include
14 |
15 | #include "../common/platform.h"
16 |
17 | #if defined(__cplusplus) || defined(c_plusplus)
18 | extern "C" {
19 | #endif
20 |
21 | /* Estimates how many bits the literals in the interval [pos, pos + len) in the
22 | ring-buffer (data, mask) will take entropy coded and writes these estimates
23 | to the cost[0..len) array. */
24 | BROTLI_INTERNAL void BrotliEstimateBitCostsForLiterals(
25 | size_t pos, size_t len, size_t mask, const uint8_t* data, size_t* histogram,
26 | float* cost);
27 |
28 | #if defined(__cplusplus) || defined(c_plusplus)
29 | } /* extern "C" */
30 | #endif
31 |
32 | #endif /* BROTLI_ENC_LITERAL_COST_H_ */
33 |
--------------------------------------------------------------------------------
/tests/roundtrip_test.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #
3 | # Roundtrip test for the brotli command-line tool.
4 | #
5 | # The first argument may be a wrapper for brotli, such as 'qemu-arm'.
6 |
7 | set -o errexit
8 |
9 | BROTLI_WRAPPER=$1
10 | BROTLI="${BROTLI_WRAPPER} bin/brotli"
11 | TMP_DIR=bin/tmp
12 | INPUTS="""
13 | tests/testdata/alice29.txt
14 | tests/testdata/asyoulik.txt
15 | tests/testdata/lcet10.txt
16 | tests/testdata/plrabn12.txt
17 | c/enc/encode.c
18 | c/common/dictionary.h
19 | c/dec/decode.c
20 | """
21 |
22 | for file in $INPUTS; do
23 | if [ -f $file ]; then
24 | for quality in 1 6 9 11; do
25 | echo "Roundtrip testing $file at quality $quality"
26 | compressed=${TMP_DIR}/${file##*/}.br
27 | uncompressed=${TMP_DIR}/${file##*/}.unbr
28 | $BROTLI -fq $quality $file -o $compressed
29 | $BROTLI $compressed -fdo $uncompressed
30 | diff -q $file $uncompressed
31 | # Test the streaming version
32 | cat $file | $BROTLI -cq $quality | $BROTLI -cd >$uncompressed
33 | diff -q $file $uncompressed
34 | done
35 | fi
36 | done
37 |
--------------------------------------------------------------------------------
/c/enc/command.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | #include "command.h"
8 |
9 | #include
10 |
11 | #if defined(__cplusplus) || defined(c_plusplus)
12 | extern "C" {
13 | #endif
14 |
15 | const uint32_t kBrotliInsBase[BROTLI_NUM_INS_COPY_CODES] = {
16 | 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26,
17 | 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594};
18 | const uint32_t kBrotliInsExtra[BROTLI_NUM_INS_COPY_CODES] = {
19 | 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24};
20 | const uint32_t kBrotliCopyBase[BROTLI_NUM_INS_COPY_CODES] = {
21 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18,
22 | 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118};
23 | const uint32_t kBrotliCopyExtra[BROTLI_NUM_INS_COPY_CODES] = {
24 | 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24};
25 |
26 | #if defined(__cplusplus) || defined(c_plusplus)
27 | } /* extern "C" */
28 | #endif
29 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/compiler_config_setting.bzl:
--------------------------------------------------------------------------------
1 | # Copyright 2018 Google Inc. All Rights Reserved.
2 | #
3 | # Distributed under MIT license.
4 | # See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 |
6 | """Creates config_setting that allows selecting based on 'compiler' value."""
7 |
8 | def create_msvc_config():
9 | # The "do_not_use_tools_cpp_compiler_present" attribute exists to
10 | # distinguish between older versions of Bazel that do not support
11 | # "@bazel_tools//tools/cpp:compiler" flag_value, and newer ones that do.
12 | # In the future, the only way to select on the compiler will be through
13 | # flag_values{"@bazel_tools//tools/cpp:compiler"} and the else branch can
14 | # be removed.
15 | if hasattr(cc_common, "do_not_use_tools_cpp_compiler_present"):
16 | native.config_setting(
17 | name = "msvc",
18 | flag_values = {
19 | "@bazel_tools//tools/cpp:compiler": "msvc-cl",
20 | },
21 | visibility = ["//visibility:public"],
22 | )
23 | else:
24 | native.config_setting(
25 | name = "msvc",
26 | values = {"compiler": "msvc-cl"},
27 | visibility = ["//visibility:public"],
28 | )
29 |
--------------------------------------------------------------------------------
/c/enc/cluster.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Functions for clustering similar histograms together. */
8 |
9 | #ifndef BROTLI_ENC_CLUSTER_H_
10 | #define BROTLI_ENC_CLUSTER_H_
11 |
12 | #include
13 |
14 | #include "../common/platform.h"
15 | #include "histogram.h"
16 | #include "memory.h"
17 |
18 | #if defined(__cplusplus) || defined(c_plusplus)
19 | extern "C" {
20 | #endif
21 |
22 | typedef struct HistogramPair {
23 | uint32_t idx1;
24 | uint32_t idx2;
25 | double cost_combo;
26 | double cost_diff;
27 | } HistogramPair;
28 |
29 | #define CODE(X) /* Declaration */;
30 |
31 | #define FN(X) X ## Literal
32 | #include "cluster_inc.h" /* NOLINT(build/include) */
33 | #undef FN
34 |
35 | #define FN(X) X ## Command
36 | #include "cluster_inc.h" /* NOLINT(build/include) */
37 | #undef FN
38 |
39 | #define FN(X) X ## Distance
40 | #include "cluster_inc.h" /* NOLINT(build/include) */
41 | #undef FN
42 |
43 | #undef CODE
44 |
45 | #if defined(__cplusplus) || defined(c_plusplus)
46 | } /* extern "C" */
47 | #endif
48 |
49 | #endif /* BROTLI_ENC_CLUSTER_H_ */
50 |
--------------------------------------------------------------------------------
/tests/run-compatibility-test.cmake:
--------------------------------------------------------------------------------
1 | string(REGEX REPLACE "([a-zA-Z0-9\\.]+)\\.compressed(\\.[0-9]+)?$" "\\1" REFERENCE_DATA "${INPUT}")
2 | string(REGEX REPLACE "\\.compressed" "" OUTPUT_FILE "${INPUT}")
3 | get_filename_component(OUTPUT_NAME "${OUTPUT_FILE}" NAME)
4 |
5 | set(ENV{QEMU_LD_PREFIX} "${BROTLI_WRAPPER_LD_PREFIX}")
6 |
7 | execute_process(
8 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
9 | COMMAND ${BROTLI_WRAPPER} ${BROTLI_CLI} --force --decompress ${INPUT} --output=${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_NAME}.unbr
10 | RESULT_VARIABLE result)
11 | if(result)
12 | message(FATAL_ERROR "Decompression failed")
13 | endif()
14 |
15 | function(test_file_equality f1 f2)
16 | if(NOT CMAKE_VERSION VERSION_LESS 2.8.7)
17 | file(SHA512 "${f1}" f1_cs)
18 | file(SHA512 "${f2}" f2_cs)
19 | if(NOT "${f1_cs}" STREQUAL "${f2_cs}")
20 | message(FATAL_ERROR "Files do not match")
21 | endif()
22 | else()
23 | file(READ "${f1}" f1_contents)
24 | file(READ "${f2}" f2_contents)
25 | if(NOT "${f1_contents}" STREQUAL "${f2_contents}")
26 | message(FATAL_ERROR "Files do not match")
27 | endif()
28 | endif()
29 | endfunction()
30 |
31 | test_file_equality("${REFERENCE_DATA}" "${CMAKE_CURRENT_BINARY_DIR}/${OUTPUT_NAME}.unbr")
32 |
--------------------------------------------------------------------------------
/c/enc/block_encoder_inc.h:
--------------------------------------------------------------------------------
1 | /* NOLINT(build/header_guard) */
2 | /* Copyright 2014 Google Inc. All Rights Reserved.
3 |
4 | Distributed under MIT license.
5 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 | */
7 |
8 | /* template parameters: FN */
9 |
10 | #define HistogramType FN(Histogram)
11 |
12 | /* Creates entropy codes for all block types and stores them to the bit
13 | stream. */
14 | static void FN(BuildAndStoreEntropyCodes)(MemoryManager* m, BlockEncoder* self,
15 | const HistogramType* histograms, const size_t histograms_size,
16 | const size_t alphabet_size, HuffmanTree* tree,
17 | size_t* storage_ix, uint8_t* storage) {
18 | const size_t table_size = histograms_size * self->histogram_length_;
19 | self->depths_ = BROTLI_ALLOC(m, uint8_t, table_size);
20 | self->bits_ = BROTLI_ALLOC(m, uint16_t, table_size);
21 | if (BROTLI_IS_OOM(m)) return;
22 |
23 | {
24 | size_t i;
25 | for (i = 0; i < histograms_size; ++i) {
26 | size_t ix = i * self->histogram_length_;
27 | BuildAndStoreHuffmanTree(&histograms[i].data_[0], self->histogram_length_,
28 | alphabet_size, tree, &self->depths_[ix], &self->bits_[ix],
29 | storage_ix, storage);
30 | }
31 | }
32 | }
33 |
34 | #undef HistogramType
35 |
--------------------------------------------------------------------------------
/tests/run-roundtrip-test.cmake:
--------------------------------------------------------------------------------
1 | set(ENV{QEMU_LD_PREFIX} "${BROTLI_WRAPPER_LD_PREFIX}")
2 |
3 | execute_process(
4 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
5 | COMMAND ${BROTLI_WRAPPER} ${BROTLI_CLI} --force --quality=${QUALITY} ${INPUT} --output=${OUTPUT}.br
6 | RESULT_VARIABLE result
7 | ERROR_VARIABLE result_stderr)
8 | if(result)
9 | message(FATAL_ERROR "Compression failed: ${result_stderr}")
10 | endif()
11 |
12 | execute_process(
13 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
14 | COMMAND ${BROTLI_WRAPPER} ${BROTLI_CLI} --force --decompress ${OUTPUT}.br --output=${OUTPUT}.unbr
15 | RESULT_VARIABLE result)
16 | if(result)
17 | message(FATAL_ERROR "Decompression failed")
18 | endif()
19 |
20 | function(test_file_equality f1 f2)
21 | if(NOT CMAKE_VERSION VERSION_LESS 2.8.7)
22 | file(SHA512 "${f1}" f1_cs)
23 | file(SHA512 "${f2}" f2_cs)
24 | if(NOT "${f1_cs}" STREQUAL "${f2_cs}")
25 | message(FATAL_ERROR "Files do not match")
26 | endif()
27 | else()
28 | file(READ "${f1}" f1_contents)
29 | file(READ "${f2}" f2_contents)
30 | if(NOT "${f1_contents}" STREQUAL "${f2_contents}")
31 | message(FATAL_ERROR "Files do not match")
32 | endif()
33 | endif()
34 | endfunction()
35 |
36 | test_file_equality("${INPUT}" "${OUTPUT}.unbr")
37 |
--------------------------------------------------------------------------------
/c/enc/params.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2017 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Parameters for the Brotli encoder with chosen quality levels. */
8 |
9 | #ifndef BROTLI_ENC_PARAMS_H_
10 | #define BROTLI_ENC_PARAMS_H_
11 |
12 | #include
13 |
14 | #include "encoder_dict.h"
15 |
16 | typedef struct BrotliHasherParams {
17 | int type;
18 | int bucket_bits;
19 | int block_bits;
20 | int hash_len;
21 | int num_last_distances_to_check;
22 | } BrotliHasherParams;
23 |
24 | typedef struct BrotliDistanceParams {
25 | uint32_t distance_postfix_bits;
26 | uint32_t num_direct_distance_codes;
27 | uint32_t alphabet_size_max;
28 | uint32_t alphabet_size_limit;
29 | size_t max_distance;
30 | } BrotliDistanceParams;
31 |
32 | /* Encoding parameters */
33 | typedef struct BrotliEncoderParams {
34 | BrotliEncoderMode mode;
35 | int quality;
36 | int lgwin;
37 | int lgblock;
38 | size_t stream_offset;
39 | size_t size_hint;
40 | BROTLI_BOOL disable_literal_context_modeling;
41 | BROTLI_BOOL large_window;
42 | BrotliHasherParams hasher;
43 | BrotliDistanceParams dist;
44 | /* TODO(eustas): rename to BrotliShared... */
45 | SharedEncoderDictionary dictionary;
46 | } BrotliEncoderParams;
47 |
48 | #endif /* BROTLI_ENC_PARAMS_H_ */
49 |
--------------------------------------------------------------------------------
/Makefile.am:
--------------------------------------------------------------------------------
1 | AUTOMAKE_OPTIONS = foreign nostdinc subdir-objects
2 |
3 | ACLOCAL_AMFLAGS = -I m4
4 |
5 | # Actual ABI version is substituted by bootstrap
6 | LIBBROTLI_VERSION_INFO = -version-info 0:0:0
7 |
8 | bin_PROGRAMS = brotli
9 | lib_LTLIBRARIES = libbrotlicommon.la libbrotlidec.la libbrotlienc.la
10 |
11 | include scripts/sources.lst
12 |
13 | brotliincludedir = $(includedir)/brotli
14 | brotliinclude_HEADERS = $(BROTLI_INCLUDE)
15 |
16 | AM_CFLAGS = -I$(top_srcdir)/c/include
17 |
18 | brotli_SOURCES = $(BROTLI_CLI_C)
19 | brotli_LDADD = libbrotlidec.la libbrotlienc.la libbrotlicommon.la -lm
20 | #brotli_LDFLAGS = -static
21 |
22 | libbrotlicommon_la_SOURCES = $(BROTLI_COMMON_C) $(BROTLI_COMMON_H)
23 | libbrotlicommon_la_LDFLAGS = $(AM_LDFLAGS) $(LIBBROTLI_VERSION_INFO) $(LDFLAGS)
24 | libbrotlidec_la_SOURCES = $(BROTLI_DEC_C) $(BROTLI_DEC_H)
25 | libbrotlidec_la_LDFLAGS = $(AM_LDFLAGS) $(LIBBROTLI_VERSION_INFO) $(LDFLAGS)
26 | libbrotlidec_la_LIBADD = libbrotlicommon.la -lm
27 | libbrotlienc_la_SOURCES = $(BROTLI_ENC_C) $(BROTLI_ENC_H)
28 | libbrotlienc_la_LDFLAGS = $(AM_LDFLAGS) $(LIBBROTLI_VERSION_INFO) $(LDFLAGS)
29 | libbrotlienc_la_LIBADD = libbrotlicommon.la -lm
30 |
31 | pkgconfigdir = $(libdir)/pkgconfig
32 | pkgconfig_DATA = \
33 | scripts/libbrotlicommon.pc \
34 | scripts/libbrotlidec.pc \
35 | scripts/libbrotlienc.pc
36 | pkgincludedir= $(brotliincludedir)
37 |
38 | dist_doc_DATA = README
39 |
--------------------------------------------------------------------------------
/python/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The Brotli Authors. All rights reserved.
2 | #
3 | # Distributed under MIT license.
4 | # See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 |
6 |
7 | # Default (Build)
8 | .PHONY: all
9 | # Build
10 | .PHONY: build
11 | # Develop
12 | .PHONY: develop
13 | # Install
14 | .PHONY: install
15 | # Test
16 | .PHONY: test
17 | # Clean
18 | .PHONY: clean
19 | # Format
20 | .PHONY: fix
21 |
22 |
23 | PYTHON ?= python
24 | YAPF ?= yapf
25 |
26 | EXT_SUFFIX=$(shell $(PYTHON) -c 'import sysconfig; print(sysconfig.get_config_var("EXT_SUFFIX"))')
27 | EXT_SOURCES=$(shell find . -name '*.cc')
28 | EXTENSIONS=$(EXT_SOURCES:%.cc=%$(EXT_SUFFIX))
29 |
30 |
31 | all: build
32 |
33 | build: $(EXTENSIONS)
34 |
35 | $(EXTENSIONS): $(EXT_SOURCES)
36 | @cd .. && $(PYTHON) setup.py build_ext --inplace
37 |
38 | develop:
39 | @cd .. && $(PYTHON) setup.py develop
40 |
41 | install:
42 | @cd .. && $(PYTHON) setup.py install
43 |
44 | test: build
45 | @echo 'running tests'
46 | @$(PYTHON) -m unittest discover -v -p '*_test.py'
47 |
48 | clean:
49 | @cd .. && $(PYTHON) setup.py clean
50 | @find .. -name '*.pyc' | xargs rm -v
51 | @find .. -name '*.so' | xargs rm -v
52 | @find .. -type d -name '__pycache__' | xargs rm -v -r
53 | @find .. -type d -name '*.egg-info' | xargs rm -v -r
54 |
55 | fix:
56 | @echo 'formatting code'
57 | -@$(YAPF) --in-place --recursive --verify .
58 |
--------------------------------------------------------------------------------
/c/enc/static_dict.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Class to model the static dictionary. */
8 |
9 | #ifndef BROTLI_ENC_STATIC_DICT_H_
10 | #define BROTLI_ENC_STATIC_DICT_H_
11 |
12 | #include
13 |
14 | #include "../common/dictionary.h"
15 | #include "../common/platform.h"
16 | #include "encoder_dict.h"
17 |
18 | #if defined(__cplusplus) || defined(c_plusplus)
19 | extern "C" {
20 | #endif
21 |
22 | #define BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN 37
23 | static const uint32_t kInvalidMatch = 0xFFFFFFF;
24 |
25 | /* Matches data against static dictionary words, and for each length l,
26 | for which a match is found, updates matches[l] to be the minimum possible
27 | (distance << 5) + len_code.
28 | Returns 1 if matches have been found, otherwise 0.
29 | Prerequisites:
30 | matches array is at least BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1 long
31 | all elements are initialized to kInvalidMatch */
32 | BROTLI_INTERNAL BROTLI_BOOL BrotliFindAllStaticDictionaryMatches(
33 | const BrotliEncoderDictionary* dictionary,
34 | const uint8_t* data, size_t min_length, size_t max_length,
35 | uint32_t* matches);
36 |
37 | #if defined(__cplusplus) || defined(c_plusplus)
38 | } /* extern "C" */
39 | #endif
40 |
41 | #endif /* BROTLI_ENC_STATIC_DICT_H_ */
42 |
--------------------------------------------------------------------------------
/c/enc/backward_references.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Function to find backward reference copies. */
8 |
9 | #ifndef BROTLI_ENC_BACKWARD_REFERENCES_H_
10 | #define BROTLI_ENC_BACKWARD_REFERENCES_H_
11 |
12 | #include
13 |
14 | #include "../common/constants.h"
15 | #include "../common/context.h"
16 | #include "../common/dictionary.h"
17 | #include "../common/platform.h"
18 | #include "command.h"
19 | #include "hash.h"
20 | #include "quality.h"
21 |
22 | #if defined(__cplusplus) || defined(c_plusplus)
23 | extern "C" {
24 | #endif
25 |
26 | /* "commands" points to the next output command to write to, "*num_commands" is
27 | initially the total amount of commands output by previous
28 | CreateBackwardReferences calls, and must be incremented by the amount written
29 | by this call. */
30 | BROTLI_INTERNAL void BrotliCreateBackwardReferences(size_t num_bytes,
31 | size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
32 | ContextLut literal_context_lut, const BrotliEncoderParams* params,
33 | Hasher* hasher, int* dist_cache, size_t* last_insert_len,
34 | Command* commands, size_t* num_commands, size_t* num_literals);
35 |
36 | #if defined(__cplusplus) || defined(c_plusplus)
37 | } /* extern "C" */
38 | #endif
39 |
40 | #endif /* BROTLI_ENC_BACKWARD_REFERENCES_H_ */
41 |
--------------------------------------------------------------------------------
/python/tests/decompress_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The Brotli Authors. All rights reserved.
2 | #
3 | # Distributed under MIT license.
4 | # See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 |
6 | import unittest
7 |
8 | from . import _test_utils
9 | import brotli
10 |
11 |
12 | def _get_original_name(test_data):
13 | return test_data.split('.compressed')[0]
14 |
15 |
16 | class TestDecompress(_test_utils.TestCase):
17 |
18 | def _check_decompression(self, test_data):
19 | # Verify decompression matches the original.
20 | temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
21 | original = _get_original_name(test_data)
22 | self.assertFilesMatch(temp_uncompressed, original)
23 |
24 | def _decompress(self, test_data):
25 | temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
26 | with open(temp_uncompressed, 'wb') as out_file:
27 | with open(test_data, 'rb') as in_file:
28 | out_file.write(brotli.decompress(in_file.read()))
29 |
30 | def _test_decompress(self, test_data):
31 | self._decompress(test_data)
32 | self._check_decompression(test_data)
33 |
34 | def test_garbage_appended(self):
35 | with self.assertRaises(brotli.error):
36 | brotli.decompress(brotli.compress(b'a') + b'a')
37 |
38 |
39 | _test_utils.generate_test_methods(TestDecompress, for_decompression=True)
40 |
41 | if __name__ == '__main__':
42 | unittest.main()
43 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | Want to contribute? Great! First, read this page (including the small print at
2 | the end).
3 |
4 | ### Before you contribute
5 | Before we can use your code, you must sign the
6 | [Google Individual Contributor License Agreement]
7 | (https://cla.developers.google.com/about/google-individual)
8 | (CLA), which you can do online. The CLA is necessary mainly because you own the
9 | copyright to your changes, even after your contribution becomes part of our
10 | codebase, so we need your permission to use and distribute your code. We also
11 | need to be sure of various other things—for instance that you'll tell us if you
12 | know that your code infringes on other people's patents. You don't have to sign
13 | the CLA until after you've submitted your code for review and a member has
14 | approved it, but you must do it before we can put your code into our codebase.
15 | Before you start working on a larger contribution, you should get in touch with
16 | us first through the issue tracker with your idea so that we can help out and
17 | possibly guide you. Coordinating up front makes it much easier to avoid
18 | frustration later on.
19 |
20 | ### Code reviews
21 | All submissions, including submissions by project members, require review. We
22 | use Github pull requests for this purpose.
23 |
24 | ### The small print
25 | Contributions made by corporations are covered by a different agreement than
26 | the one above, the [Software Grant and Corporate Contributor License Agreement]
27 | (https://cla.developers.google.com/about/google-corporate).
28 |
--------------------------------------------------------------------------------
/c/enc/histogram_inc.h:
--------------------------------------------------------------------------------
1 | /* NOLINT(build/header_guard) */
2 | /* Copyright 2013 Google Inc. All Rights Reserved.
3 |
4 | Distributed under MIT license.
5 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 | */
7 |
8 | /* template parameters: Histogram, DATA_SIZE, DataType */
9 |
10 | /* A simple container for histograms of data in blocks. */
11 |
12 | typedef struct FN(Histogram) {
13 | uint32_t data_[DATA_SIZE];
14 | size_t total_count_;
15 | double bit_cost_;
16 | } FN(Histogram);
17 |
18 | static BROTLI_INLINE void FN(HistogramClear)(FN(Histogram)* self) {
19 | memset(self->data_, 0, sizeof(self->data_));
20 | self->total_count_ = 0;
21 | self->bit_cost_ = HUGE_VAL;
22 | }
23 |
24 | static BROTLI_INLINE void FN(ClearHistograms)(
25 | FN(Histogram)* array, size_t length) {
26 | size_t i;
27 | for (i = 0; i < length; ++i) FN(HistogramClear)(array + i);
28 | }
29 |
30 | static BROTLI_INLINE void FN(HistogramAdd)(FN(Histogram)* self, size_t val) {
31 | ++self->data_[val];
32 | ++self->total_count_;
33 | }
34 |
35 | static BROTLI_INLINE void FN(HistogramAddVector)(FN(Histogram)* self,
36 | const DataType* p, size_t n) {
37 | self->total_count_ += n;
38 | n += 1;
39 | while (--n) ++self->data_[*p++];
40 | }
41 |
42 | static BROTLI_INLINE void FN(HistogramAddHistogram)(FN(Histogram)* self,
43 | const FN(Histogram)* v) {
44 | size_t i;
45 | self->total_count_ += v->total_count_;
46 | for (i = 0; i < DATA_SIZE; ++i) {
47 | self->data_[i] += v->data_[i];
48 | }
49 | }
50 |
51 | static BROTLI_INLINE size_t FN(HistogramDataSize)(void) { return DATA_SIZE; }
52 |
--------------------------------------------------------------------------------
/python/tests/compress_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The Brotli Authors. All rights reserved.
2 | #
3 | # Distributed under MIT license.
4 | # See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 |
6 | import unittest
7 |
8 | from . import _test_utils
9 | import brotli
10 |
11 |
12 | class TestCompress(_test_utils.TestCase):
13 |
14 | VARIANTS = {'quality': (1, 6, 9, 11), 'lgwin': (10, 15, 20, 24)}
15 |
16 | def _check_decompression(self, test_data, **kwargs):
17 | kwargs = {}
18 | # Write decompression to temp file and verify it matches the original.
19 | temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
20 | temp_compressed = _test_utils.get_temp_compressed_name(test_data)
21 | original = test_data
22 | with open(temp_uncompressed, 'wb') as out_file:
23 | with open(temp_compressed, 'rb') as in_file:
24 | out_file.write(brotli.decompress(in_file.read(), **kwargs))
25 | self.assertFilesMatch(temp_uncompressed, original)
26 |
27 | def _compress(self, test_data, **kwargs):
28 | temp_compressed = _test_utils.get_temp_compressed_name(test_data)
29 | with open(temp_compressed, 'wb') as out_file:
30 | with open(test_data, 'rb') as in_file:
31 | out_file.write(brotli.compress(in_file.read(), **kwargs))
32 |
33 | def _test_compress(self, test_data, **kwargs):
34 | self._compress(test_data, **kwargs)
35 | self._check_decompression(test_data, **kwargs)
36 |
37 |
38 | _test_utils.generate_test_methods(TestCompress, variants=TestCompress.VARIANTS)
39 |
40 | if __name__ == '__main__':
41 | unittest.main()
42 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | OS := $(shell uname)
2 | LIBSOURCES = $(wildcard c/common/*.c) $(wildcard c/dec/*.c) \
3 | $(wildcard c/enc/*.c)
4 | SOURCES = $(LIBSOURCES) c/tools/brotli.c
5 | BINDIR = bin
6 | OBJDIR = $(BINDIR)/obj
7 | LIBOBJECTS = $(addprefix $(OBJDIR)/, $(LIBSOURCES:.c=.o))
8 | OBJECTS = $(addprefix $(OBJDIR)/, $(SOURCES:.c=.o))
9 | LIB_A = libbrotli.a
10 | EXECUTABLE = brotli
11 | DIRS = $(OBJDIR)/c/common $(OBJDIR)/c/dec $(OBJDIR)/c/enc \
12 | $(OBJDIR)/c/tools $(BINDIR)/tmp
13 | CFLAGS += -O2
14 | ifeq ($(os), Darwin)
15 | CPPFLAGS += -DOS_MACOSX
16 | endif
17 |
18 | ifneq ($(strip $(CROSS_COMPILE)), )
19 | CC=$(CROSS_COMPILE)-gcc
20 | ARCH=$(firstword $(subst -, ,$(CROSS_COMPILE)))
21 | BROTLI_WRAPPER="qemu-$(ARCH) -L /usr/$(CROSS_COMPILE)"
22 | endif
23 |
24 | # The arm-linux-gnueabi compiler defaults to Armv5. Since we only support Armv7
25 | # and beyond, we need to select Armv7 explicitly with march.
26 | ifeq ($(ARCH), arm)
27 | CFLAGS += -march=armv7-a -mfloat-abi=hard -mfpu=neon
28 | endif
29 |
30 | all: test
31 | @:
32 |
33 | .PHONY: all clean test
34 |
35 | $(DIRS):
36 | mkdir -p $@
37 |
38 | $(EXECUTABLE): $(OBJECTS)
39 | $(CC) $(LDFLAGS) $(OBJECTS) -lm -o $(BINDIR)/$(EXECUTABLE)
40 |
41 | lib: $(LIBOBJECTS)
42 | rm -f $(LIB_A)
43 | ar -crs $(LIB_A) $(LIBOBJECTS)
44 |
45 | test: $(EXECUTABLE)
46 | tests/compatibility_test.sh $(BROTLI_WRAPPER)
47 | tests/roundtrip_test.sh $(BROTLI_WRAPPER)
48 |
49 | clean:
50 | rm -rf $(BINDIR) $(LIB_A)
51 |
52 | .SECONDEXPANSION:
53 | $(OBJECTS): $$(patsubst %.o,%.c,$$(patsubst $$(OBJDIR)/%,%,$$@)) | $(DIRS)
54 | $(CC) $(CFLAGS) $(CPPFLAGS) -Ic/include \
55 | -c $(patsubst %.o,%.c,$(patsubst $(OBJDIR)/%,%,$@)) -o $@
56 |
--------------------------------------------------------------------------------
/c/enc/cluster.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Functions for clustering similar histograms together. */
8 |
9 | #include "cluster.h"
10 |
11 | #include
12 |
13 | #include "../common/platform.h"
14 | #include "bit_cost.h" /* BrotliPopulationCost */
15 | #include "fast_log.h"
16 | #include "histogram.h"
17 | #include "memory.h"
18 |
19 | #if defined(__cplusplus) || defined(c_plusplus)
20 | extern "C" {
21 | #endif
22 |
23 | static BROTLI_INLINE BROTLI_BOOL HistogramPairIsLess(
24 | const HistogramPair* p1, const HistogramPair* p2) {
25 | if (p1->cost_diff != p2->cost_diff) {
26 | return TO_BROTLI_BOOL(p1->cost_diff > p2->cost_diff);
27 | }
28 | return TO_BROTLI_BOOL((p1->idx2 - p1->idx1) > (p2->idx2 - p2->idx1));
29 | }
30 |
31 | /* Returns entropy reduction of the context map when we combine two clusters. */
32 | static BROTLI_INLINE double ClusterCostDiff(size_t size_a, size_t size_b) {
33 | size_t size_c = size_a + size_b;
34 | return (double)size_a * FastLog2(size_a) +
35 | (double)size_b * FastLog2(size_b) -
36 | (double)size_c * FastLog2(size_c);
37 | }
38 |
39 | #define CODE(X) X
40 |
41 | #define FN(X) X ## Literal
42 | #include "cluster_inc.h" /* NOLINT(build/include) */
43 | #undef FN
44 |
45 | #define FN(X) X ## Command
46 | #include "cluster_inc.h" /* NOLINT(build/include) */
47 | #undef FN
48 |
49 | #define FN(X) X ## Distance
50 | #include "cluster_inc.h" /* NOLINT(build/include) */
51 | #undef FN
52 |
53 | #undef CODE
54 |
55 | #if defined(__cplusplus) || defined(c_plusplus)
56 | } /* extern "C" */
57 | #endif
58 |
--------------------------------------------------------------------------------
/python/README.md:
--------------------------------------------------------------------------------
1 | This directory contains the code for the Python `brotli` module,
2 | `bro.py` tool, and roundtrip tests.
3 |
4 | Only Python 2.7+ is supported.
5 |
6 | We provide a `Makefile` to simplify common development commands.
7 |
8 | ### Installation
9 |
10 | If you just want to install the latest release of the Python `brotli`
11 | module, we recommend installing from [PyPI][]:
12 |
13 | $ pip install brotli
14 |
15 | Alternatively, you may install directly from source by running the
16 | following command from this directory:
17 |
18 | $ make install
19 |
20 | ### Development
21 |
22 | You may run the following commands from this directory:
23 |
24 | $ make # Build the module in-place
25 |
26 | $ make test # Test the module
27 |
28 | $ make clean # Remove all temporary files and build output
29 |
30 | If you wish to make the module available while still being
31 | able to edit the source files, you can use the `setuptools`
32 | "[development mode][]":
33 |
34 | $ make develop # Install the module in "development mode"
35 |
36 | ### Code Style
37 |
38 | Brotli's code follows the [Google Python Style Guide][]. To
39 | automatically format your code, first install [YAPF][]:
40 |
41 | $ pip install yapf
42 |
43 | Then, to format all files in the project, you can run:
44 |
45 | $ make fix # Automatically format code
46 |
47 | See the [YAPF usage][] documentation for more information.
48 |
49 |
50 | [PyPI]: https://pypi.org/project/Brotli/
51 | [development mode]: https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode
52 | [Google Python Style Guide]: https://google.github.io/styleguide/pyguide.html
53 | [YAPF]: https://github.com/google/yapf
54 | [YAPF usage]: https://github.com/google/yapf#usage
55 |
--------------------------------------------------------------------------------
/bootstrap:
--------------------------------------------------------------------------------
1 | #!/bin/sh -e
2 |
3 | REQUIRED='is required, but not installed.'
4 | bc -v >/dev/null 2>&1 || { echo >&2 "'bc' $REQUIRED"; exit 1; }
5 | [ "x`echo hello | sed s/hello/world/ 2>/dev/null`" = "xworld" ] || { echo >&2 "'sed' $REQUIRED"; exit 1; }
6 | autoreconf --version >/dev/null 2>&1 || { echo >&2 "'autoconf' $REQUIRED"; exit 1; }
7 |
8 | # Determine which flag sed uses for extended regular expressions.
9 | # -E is POSIX. -r is for GNU sed older than 4.2.
10 | echo hello | sed -E s/hello/world/ >/dev/null 2>&1 && SED_ERE=-E || SED_ERE=-r
11 |
12 | # If libtool is not installed ->
13 | # "error: Libtool library used but 'LIBTOOL' is undefined"
14 |
15 | if [ ! -e "./m4" ]; then
16 | mkdir m4 2>/dev/null
17 | fi
18 |
19 | BROTLI_ABI_HEX=`sed -n 's/#define BROTLI_ABI_VERSION 0x//p' c/common/version.h`
20 | BROTLI_ABI_INT=`echo "ibase=16;$BROTLI_ABI_HEX" | bc`
21 | BROTLI_ABI_CURRENT=`echo "scale=0;$BROTLI_ABI_INT / 16777216" | bc`
22 | BROTLI_ABI_REVISION=`echo "scale=0;$BROTLI_ABI_INT / 4096 % 4096" | bc`
23 | BROTLI_ABI_AGE=`echo "scale=0;$BROTLI_ABI_INT % 4096" | bc`
24 | BROTLI_ABI_INFO="$BROTLI_ABI_CURRENT:$BROTLI_ABI_REVISION:$BROTLI_ABI_AGE"
25 |
26 | BROTLI_VERSION_HEX=`sed -n 's/#define BROTLI_VERSION 0x//p' c/common/version.h`
27 | BROTLI_VERSION_INT=`echo "ibase=16;$BROTLI_VERSION_HEX" | bc`
28 | BROTLI_VERSION_MAJOR=`echo "scale=0;$BROTLI_VERSION_INT / 16777216" | bc`
29 | BROTLI_VERSION_MINOR=`echo "scale=0;$BROTLI_VERSION_INT / 4096 % 4096" | bc`
30 | BROTLI_VERSION_PATCH=`echo "scale=0;$BROTLI_VERSION_INT % 4096" | bc`
31 | BROTLI_VERSION="$BROTLI_VERSION_MAJOR.$BROTLI_VERSION_MINOR.$BROTLI_VERSION_PATCH"
32 |
33 | sed -i.bak "$SED_ERE" "s/[0-9]+:[0-9]+:[0-9]+/$BROTLI_ABI_INFO/" Makefile.am
34 | sed -i.bak "$SED_ERE" "s/\[[0-9]+\.[0-9]+\.[0-9]+\]/[$BROTLI_VERSION]/" configure.ac
35 |
36 | autoreconf --install --force --symlink || exit $?
37 |
--------------------------------------------------------------------------------
/c/enc/block_splitter.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Block split point selection utilities. */
8 |
9 | #ifndef BROTLI_ENC_BLOCK_SPLITTER_H_
10 | #define BROTLI_ENC_BLOCK_SPLITTER_H_
11 |
12 | #include
13 |
14 | #include "../common/platform.h"
15 | #include "command.h"
16 | #include "memory.h"
17 | #include "quality.h"
18 |
19 | #if defined(__cplusplus) || defined(c_plusplus)
20 | extern "C" {
21 | #endif
22 |
23 | typedef struct BlockSplit {
24 | size_t num_types; /* Amount of distinct types */
25 | size_t num_blocks; /* Amount of values in types and length */
26 | uint8_t* types;
27 | uint32_t* lengths;
28 |
29 | size_t types_alloc_size;
30 | size_t lengths_alloc_size;
31 | } BlockSplit;
32 |
33 | BROTLI_INTERNAL void BrotliInitBlockSplit(BlockSplit* self);
34 | BROTLI_INTERNAL void BrotliDestroyBlockSplit(MemoryManager* m,
35 | BlockSplit* self);
36 |
37 | BROTLI_INTERNAL void BrotliSplitBlock(MemoryManager* m,
38 | const Command* cmds,
39 | const size_t num_commands,
40 | const uint8_t* data,
41 | const size_t offset,
42 | const size_t mask,
43 | const BrotliEncoderParams* params,
44 | BlockSplit* literal_split,
45 | BlockSplit* insert_and_copy_split,
46 | BlockSplit* dist_split);
47 |
48 | #if defined(__cplusplus) || defined(c_plusplus)
49 | } /* extern "C" */
50 | #endif
51 |
52 | #endif /* BROTLI_ENC_BLOCK_SPLITTER_H_ */
53 |
--------------------------------------------------------------------------------
/c/enc/fast_log.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Utilities for fast computation of logarithms. */
8 |
9 | #ifndef BROTLI_ENC_FAST_LOG_H_
10 | #define BROTLI_ENC_FAST_LOG_H_
11 |
12 | #include
13 |
14 | #include
15 |
16 | #include "../common/platform.h"
17 |
18 | #if defined(__cplusplus) || defined(c_plusplus)
19 | extern "C" {
20 | #endif
21 |
22 | static BROTLI_INLINE uint32_t Log2FloorNonZero(size_t n) {
23 | #if defined(BROTLI_BSR32)
24 | return BROTLI_BSR32((uint32_t)n);
25 | #else
26 | uint32_t result = 0;
27 | while (n >>= 1) result++;
28 | return result;
29 | #endif
30 | }
31 |
32 | #define BROTLI_LOG2_TABLE_SIZE 256
33 |
34 | /* A lookup table for small values of log2(int) to be used in entropy
35 | computation. */
36 | BROTLI_INTERNAL extern const double kBrotliLog2Table[BROTLI_LOG2_TABLE_SIZE];
37 |
38 | /* Visual Studio 2012 and Android API levels < 18 do not have the log2()
39 | * function defined, so we use log() and a multiplication instead. */
40 | #if !defined(BROTLI_HAVE_LOG2)
41 | #if ((defined(_MSC_VER) && _MSC_VER <= 1700) || \
42 | (defined(__ANDROID_API__) && __ANDROID_API__ < 18))
43 | #define BROTLI_HAVE_LOG2 0
44 | #else
45 | #define BROTLI_HAVE_LOG2 1
46 | #endif
47 | #endif
48 |
49 | #define LOG_2_INV 1.4426950408889634
50 |
51 | /* Faster logarithm for small integers, with the property of log2(0) == 0. */
52 | static BROTLI_INLINE double FastLog2(size_t v) {
53 | if (v < BROTLI_LOG2_TABLE_SIZE) {
54 | return kBrotliLog2Table[v];
55 | }
56 | #if !(BROTLI_HAVE_LOG2)
57 | return log((double)v) * LOG_2_INV;
58 | #else
59 | return log2((double)v);
60 | #endif
61 | }
62 |
63 | #if defined(__cplusplus) || defined(c_plusplus)
64 | } /* extern "C" */
65 | #endif
66 |
67 | #endif /* BROTLI_ENC_FAST_LOG_H_ */
68 |
--------------------------------------------------------------------------------
/c/enc/bit_cost.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Functions to estimate the bit cost of Huffman trees. */
8 |
9 | #ifndef BROTLI_ENC_BIT_COST_H_
10 | #define BROTLI_ENC_BIT_COST_H_
11 |
12 | #include
13 |
14 | #include "../common/platform.h"
15 | #include "fast_log.h"
16 | #include "histogram.h"
17 |
18 | #if defined(__cplusplus) || defined(c_plusplus)
19 | extern "C" {
20 | #endif
21 |
22 | static BROTLI_INLINE double ShannonEntropy(
23 | const uint32_t* population, size_t size, size_t* total) {
24 | size_t sum = 0;
25 | double retval = 0;
26 | const uint32_t* population_end = population + size;
27 | size_t p;
28 | if (size & 1) {
29 | goto odd_number_of_elements_left;
30 | }
31 | while (population < population_end) {
32 | p = *population++;
33 | sum += p;
34 | retval -= (double)p * FastLog2(p);
35 | odd_number_of_elements_left:
36 | p = *population++;
37 | sum += p;
38 | retval -= (double)p * FastLog2(p);
39 | }
40 | if (sum) retval += (double)sum * FastLog2(sum);
41 | *total = sum;
42 | return retval;
43 | }
44 |
45 | static BROTLI_INLINE double BitsEntropy(
46 | const uint32_t* population, size_t size) {
47 | size_t sum;
48 | double retval = ShannonEntropy(population, size, &sum);
49 | if (retval < (double)sum) {
50 | /* At least one bit per literal is needed. */
51 | retval = (double)sum;
52 | }
53 | return retval;
54 | }
55 |
56 | BROTLI_INTERNAL double BrotliPopulationCostLiteral(const HistogramLiteral*);
57 | BROTLI_INTERNAL double BrotliPopulationCostCommand(const HistogramCommand*);
58 | BROTLI_INTERNAL double BrotliPopulationCostDistance(const HistogramDistance*);
59 |
60 | #if defined(__cplusplus) || defined(c_plusplus)
61 | } /* extern "C" */
62 | #endif
63 |
64 | #endif /* BROTLI_ENC_BIT_COST_H_ */
65 |
--------------------------------------------------------------------------------
/docs/constants.h.3:
--------------------------------------------------------------------------------
1 | .TH "constants.h" 3 "August 2021" "Brotli" \" -*- nroff -*-
2 | .ad l
3 | .nh
4 | .SH NAME
5 | constants.h \- Common constants used in decoder and encoder API\&.
6 |
7 | .SH SYNOPSIS
8 | .br
9 | .PP
10 | .SS "Macros"
11 |
12 | .in +1c
13 | .ti -1c
14 | .RI "#define \fBBROTLI_LARGE_MAX_DISTANCE_BITS\fP 62U"
15 | .br
16 | .RI "\fIThe theoretical maximum number of distance bits specified for large window brotli, for 64-bit encoders and decoders\&. \fP"
17 | .ti -1c
18 | .RI "#define \fBBROTLI_LARGE_MAX_WBITS\fP 30"
19 | .br
20 | .RI "\fIThe maximum supported large brotli window bits by the encoder and decoder\&. \fP"
21 | .ti -1c
22 | .RI "#define \fBBROTLI_MAX_NPOSTFIX\fP 3"
23 | .br
24 | .RI "\fIMaximal number of 'postfix' bits\&. \fP"
25 | .in -1c
26 | .SH "Detailed Description"
27 | .PP
28 | Common constants used in decoder and encoder API\&.
29 |
30 |
31 | .SH "Macro Definition Documentation"
32 | .PP
33 | .SS "#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U"
34 |
35 | .PP
36 | The theoretical maximum number of distance bits specified for large window brotli, for 64-bit encoders and decoders\&. Even when in practice 32-bit encoders and decoders only support up to 30 max distance bits, the value is set to 62 because it affects the large window brotli file format\&. Specifically, it affects the encoding of simple huffman tree for distances, see Specification RFC 7932 chapter 3\&.4\&.
37 | .SS "#define BROTLI_LARGE_MAX_WBITS 30"
38 |
39 | .PP
40 | The maximum supported large brotli window bits by the encoder and decoder\&. Large window brotli allows up to 62 bits, however the current encoder and decoder, designed for 32-bit integers, only support up to 30 bits maximum\&.
41 | .SS "#define BROTLI_MAX_NPOSTFIX 3"
42 |
43 | .PP
44 | Maximal number of 'postfix' bits\&. Number of 'postfix' bits is stored as 2 bits in meta-block header\&.
45 | .SH "Author"
46 | .PP
47 | Generated automatically by Doxygen for Brotli from the source code\&.
48 |
--------------------------------------------------------------------------------
/python/brotli.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The Brotli Authors. All rights reserved.
2 | #
3 | # Distributed under MIT license.
4 | # See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 |
6 | """Functions to compress and decompress data using the Brotli library."""
7 |
8 | import _brotli
9 |
10 | # The library version.
11 | version = __version__ = _brotli.__version__
12 |
13 | # The compression mode.
14 | MODE_GENERIC = _brotli.MODE_GENERIC
15 | MODE_TEXT = _brotli.MODE_TEXT
16 | MODE_FONT = _brotli.MODE_FONT
17 |
18 | # The Compressor object.
19 | Compressor = _brotli.Compressor
20 |
21 | # The Decompressor object.
22 | Decompressor = _brotli.Decompressor
23 |
24 | # Compress a byte string.
25 | def compress(string, mode=MODE_GENERIC, quality=11, lgwin=22, lgblock=0):
26 | """Compress a byte string.
27 |
28 | Args:
29 | string (bytes): The input data.
30 | mode (int, optional): The compression mode can be MODE_GENERIC (default),
31 | MODE_TEXT (for UTF-8 format text input) or MODE_FONT (for WOFF 2.0).
32 | quality (int, optional): Controls the compression-speed vs compression-
33 | density tradeoff. The higher the quality, the slower the compression.
34 | Range is 0 to 11. Defaults to 11.
35 | lgwin (int, optional): Base 2 logarithm of the sliding window size. Range
36 | is 10 to 24. Defaults to 22.
37 | lgblock (int, optional): Base 2 logarithm of the maximum input block size.
38 | Range is 16 to 24. If set to 0, the value will be set based on the
39 | quality. Defaults to 0.
40 |
41 | Returns:
42 | The compressed byte string.
43 |
44 | Raises:
45 | brotli.error: If arguments are invalid, or compressor fails.
46 | """
47 | compressor = Compressor(mode=mode, quality=quality, lgwin=lgwin,
48 | lgblock=lgblock)
49 | return compressor.process(string) + compressor.finish()
50 |
51 | # Decompress a compressed byte string.
52 | decompress = _brotli.decompress
53 |
54 | # Raised if compression or decompression fails.
55 | error = _brotli.error
56 |
--------------------------------------------------------------------------------
/python/tests/decompressor_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The Brotli Authors. All rights reserved.
2 | #
3 | # Distributed under MIT license.
4 | # See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 |
6 | import functools
7 | import unittest
8 |
9 | from . import _test_utils
10 | import brotli
11 |
12 |
13 | def _get_original_name(test_data):
14 | return test_data.split('.compressed')[0]
15 |
16 |
17 | class TestDecompressor(_test_utils.TestCase):
18 |
19 | CHUNK_SIZE = 1
20 |
21 | def setUp(self):
22 | self.decompressor = brotli.Decompressor()
23 |
24 | def tearDown(self):
25 | self.decompressor = None
26 |
27 | def _check_decompression(self, test_data):
28 | # Verify decompression matches the original.
29 | temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
30 | original = _get_original_name(test_data)
31 | self.assertFilesMatch(temp_uncompressed, original)
32 |
33 | def _decompress(self, test_data):
34 | temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
35 | with open(temp_uncompressed, 'wb') as out_file:
36 | with open(test_data, 'rb') as in_file:
37 | read_chunk = functools.partial(in_file.read, self.CHUNK_SIZE)
38 | for data in iter(read_chunk, b''):
39 | out_file.write(self.decompressor.process(data))
40 | self.assertTrue(self.decompressor.is_finished())
41 |
42 | def _test_decompress(self, test_data):
43 | self._decompress(test_data)
44 | self._check_decompression(test_data)
45 |
46 | def test_garbage_appended(self):
47 | with self.assertRaises(brotli.error):
48 | self.decompressor.process(brotli.compress(b'a') + b'a')
49 |
50 | def test_already_finished(self):
51 | self.decompressor.process(brotli.compress(b'a'))
52 | with self.assertRaises(brotli.error):
53 | self.decompressor.process(b'a')
54 |
55 |
56 | _test_utils.generate_test_methods(TestDecompressor, for_decompression=True)
57 |
58 | if __name__ == '__main__':
59 | unittest.main()
60 |
--------------------------------------------------------------------------------
/c/enc/histogram.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Models the histograms of literals, commands and distance codes. */
8 |
9 | #ifndef BROTLI_ENC_HISTOGRAM_H_
10 | #define BROTLI_ENC_HISTOGRAM_H_
11 |
12 | #include /* memset */
13 |
14 | #include
15 |
16 | #include "../common/constants.h"
17 | #include "../common/context.h"
18 | #include "../common/platform.h"
19 | #include "block_splitter.h"
20 | #include "command.h"
21 |
22 | #if defined(__cplusplus) || defined(c_plusplus)
23 | extern "C" {
24 | #endif
25 |
26 | /* The distance symbols effectively used by "Large Window Brotli" (32-bit). */
27 | #define BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS 544
28 |
29 | #define FN(X) X ## Literal
30 | #define DATA_SIZE BROTLI_NUM_LITERAL_SYMBOLS
31 | #define DataType uint8_t
32 | #include "histogram_inc.h" /* NOLINT(build/include) */
33 | #undef DataType
34 | #undef DATA_SIZE
35 | #undef FN
36 |
37 | #define FN(X) X ## Command
38 | #define DataType uint16_t
39 | #define DATA_SIZE BROTLI_NUM_COMMAND_SYMBOLS
40 | #include "histogram_inc.h" /* NOLINT(build/include) */
41 | #undef DATA_SIZE
42 | #undef FN
43 |
44 | #define FN(X) X ## Distance
45 | #define DATA_SIZE BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS
46 | #include "histogram_inc.h" /* NOLINT(build/include) */
47 | #undef DataType
48 | #undef DATA_SIZE
49 | #undef FN
50 |
51 | BROTLI_INTERNAL void BrotliBuildHistogramsWithContext(
52 | const Command* cmds, const size_t num_commands,
53 | const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
54 | const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t pos,
55 | size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
56 | const ContextType* context_modes, HistogramLiteral* literal_histograms,
57 | HistogramCommand* insert_and_copy_histograms,
58 | HistogramDistance* copy_dist_histograms);
59 |
60 | #if defined(__cplusplus) || defined(c_plusplus)
61 | } /* extern "C" */
62 | #endif
63 |
64 | #endif /* BROTLI_ENC_HISTOGRAM_H_ */
65 |
--------------------------------------------------------------------------------
/c/enc/prefix.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Functions for encoding of integers into prefix codes the amount of extra
8 | bits, and the actual values of the extra bits. */
9 |
10 | #ifndef BROTLI_ENC_PREFIX_H_
11 | #define BROTLI_ENC_PREFIX_H_
12 |
13 | #include
14 |
15 | #include "../common/constants.h"
16 | #include "../common/platform.h"
17 | #include "fast_log.h"
18 |
19 | #if defined(__cplusplus) || defined(c_plusplus)
20 | extern "C" {
21 | #endif
22 |
23 | /* Here distance_code is an intermediate code, i.e. one of the special codes or
24 | the actual distance increased by BROTLI_NUM_DISTANCE_SHORT_CODES - 1. */
25 | static BROTLI_INLINE void PrefixEncodeCopyDistance(size_t distance_code,
26 | size_t num_direct_codes,
27 | size_t postfix_bits,
28 | uint16_t* code,
29 | uint32_t* extra_bits) {
30 | if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes) {
31 | *code = (uint16_t)distance_code;
32 | *extra_bits = 0;
33 | return;
34 | } else {
35 | size_t dist = ((size_t)1 << (postfix_bits + 2u)) +
36 | (distance_code - BROTLI_NUM_DISTANCE_SHORT_CODES - num_direct_codes);
37 | size_t bucket = Log2FloorNonZero(dist) - 1;
38 | size_t postfix_mask = (1u << postfix_bits) - 1;
39 | size_t postfix = dist & postfix_mask;
40 | size_t prefix = (dist >> bucket) & 1;
41 | size_t offset = (2 + prefix) << bucket;
42 | size_t nbits = bucket - postfix_bits;
43 | *code = (uint16_t)((nbits << 10) |
44 | (BROTLI_NUM_DISTANCE_SHORT_CODES + num_direct_codes +
45 | ((2 * (nbits - 1) + prefix) << postfix_bits) + postfix));
46 | *extra_bits = (uint32_t)((dist - offset) >> postfix_bits);
47 | }
48 | }
49 |
50 | #if defined(__cplusplus) || defined(c_plusplus)
51 | } /* extern "C" */
52 | #endif
53 |
54 | #endif /* BROTLI_ENC_PREFIX_H_ */
55 |
--------------------------------------------------------------------------------
/c/common/dictionary.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Collection of static dictionary words. */
8 |
9 | #ifndef BROTLI_COMMON_DICTIONARY_H_
10 | #define BROTLI_COMMON_DICTIONARY_H_
11 |
12 | #include
13 | #include
14 |
15 | #if defined(__cplusplus) || defined(c_plusplus)
16 | extern "C" {
17 | #endif
18 |
19 | typedef struct BrotliDictionary {
20 | /**
21 | * Number of bits to encode index of dictionary word in a bucket.
22 | *
23 | * Specification: Appendix A. Static Dictionary Data
24 | *
25 | * Words in a dictionary are bucketed by length.
26 | * @c 0 means that there are no words of a given length.
27 | * Dictionary consists of words with length of [4..24] bytes.
28 | * Values at [0..3] and [25..31] indices should not be addressed.
29 | */
30 | uint8_t size_bits_by_length[32];
31 |
32 | /* assert(offset[i + 1] == offset[i] + (bits[i] ? (i << bits[i]) : 0)) */
33 | uint32_t offsets_by_length[32];
34 |
35 | /* assert(data_size == offsets_by_length[31]) */
36 | size_t data_size;
37 |
38 | /* Data array is not bound, and should obey to size_bits_by_length values.
39 | Specified size matches default (RFC 7932) dictionary. Its size is
40 | defined by data_size */
41 | const uint8_t* data;
42 | } BrotliDictionary;
43 |
44 | BROTLI_COMMON_API const BrotliDictionary* BrotliGetDictionary(void);
45 |
46 | /**
47 | * Sets dictionary data.
48 | *
49 | * When dictionary data is already set / present, this method is no-op.
50 | *
51 | * Dictionary data MUST be provided before BrotliGetDictionary is invoked.
52 | * This method is used ONLY in multi-client environment (e.g. C + Java),
53 | * to reduce storage by sharing single dictionary between implementations.
54 | */
55 | BROTLI_COMMON_API void BrotliSetDictionaryData(const uint8_t* data);
56 |
57 | #define BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
58 | #define BROTLI_MAX_DICTIONARY_WORD_LENGTH 24
59 |
60 | #if defined(__cplusplus) || defined(c_plusplus)
61 | } /* extern "C" */
62 | #endif
63 |
64 | #endif /* BROTLI_COMMON_DICTIONARY_H_ */
65 |
--------------------------------------------------------------------------------
/c/enc/find_match_length.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2010 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Function to find maximal matching prefixes of strings. */
8 |
9 | #ifndef BROTLI_ENC_FIND_MATCH_LENGTH_H_
10 | #define BROTLI_ENC_FIND_MATCH_LENGTH_H_
11 |
12 | #include
13 |
14 | #include "../common/platform.h"
15 |
16 | #if defined(__cplusplus) || defined(c_plusplus)
17 | extern "C" {
18 | #endif
19 |
20 | /* Separate implementation for little-endian 64-bit targets, for speed. */
21 | #if defined(BROTLI_TZCNT64) && BROTLI_64_BITS && BROTLI_LITTLE_ENDIAN
22 | static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
23 | const uint8_t* s2,
24 | size_t limit) {
25 | const uint8_t *s1_orig = s1;
26 | for (; limit >= 8; limit -= 8) {
27 | uint64_t x = BROTLI_UNALIGNED_LOAD64LE(s2) ^
28 | BROTLI_UNALIGNED_LOAD64LE(s1);
29 | s2 += 8;
30 | if (x != 0) {
31 | size_t matching_bits = (size_t)BROTLI_TZCNT64(x);
32 | return (size_t)(s1 - s1_orig) + (matching_bits >> 3);
33 | }
34 | s1 += 8;
35 | }
36 | while (limit && *s1 == *s2) {
37 | limit--;
38 | ++s2;
39 | ++s1;
40 | }
41 | return (size_t)(s1 - s1_orig);
42 | }
43 | #else
44 | static BROTLI_INLINE size_t FindMatchLengthWithLimit(const uint8_t* s1,
45 | const uint8_t* s2,
46 | size_t limit) {
47 | size_t matched = 0;
48 | const uint8_t* s2_limit = s2 + limit;
49 | const uint8_t* s2_ptr = s2;
50 | /* Find out how long the match is. We loop over the data 32 bits at a
51 | time until we find a 32-bit block that doesn't match; then we find
52 | the first non-matching bit and use that to calculate the total
53 | length of the match. */
54 | while (s2_ptr <= s2_limit - 4 &&
55 | BrotliUnalignedRead32(s2_ptr) ==
56 | BrotliUnalignedRead32(s1 + matched)) {
57 | s2_ptr += 4;
58 | matched += 4;
59 | }
60 | while ((s2_ptr < s2_limit) && (s1[matched] == *s2_ptr)) {
61 | ++s2_ptr;
62 | ++matched;
63 | }
64 | return matched;
65 | }
66 | #endif
67 |
68 | #if defined(__cplusplus) || defined(c_plusplus)
69 | } /* extern "C" */
70 | #endif
71 |
72 | #endif /* BROTLI_ENC_FIND_MATCH_LENGTH_H_ */
73 |
--------------------------------------------------------------------------------
/c/dec/bit_reader.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Bit reading helpers */
8 |
9 | #include "bit_reader.h"
10 |
11 | #include
12 |
13 | #include "../common/platform.h"
14 |
15 | #if defined(__cplusplus) || defined(c_plusplus)
16 | extern "C" {
17 | #endif
18 |
19 | const uint32_t kBrotliBitMask[33] = { 0x00000000,
20 | 0x00000001, 0x00000003, 0x00000007, 0x0000000F,
21 | 0x0000001F, 0x0000003F, 0x0000007F, 0x000000FF,
22 | 0x000001FF, 0x000003FF, 0x000007FF, 0x00000FFF,
23 | 0x00001FFF, 0x00003FFF, 0x00007FFF, 0x0000FFFF,
24 | 0x0001FFFF, 0x0003FFFF, 0x0007FFFF, 0x000FFFFF,
25 | 0x001FFFFF, 0x003FFFFF, 0x007FFFFF, 0x00FFFFFF,
26 | 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF, 0x0FFFFFFF,
27 | 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF
28 | };
29 |
30 | void BrotliInitBitReader(BrotliBitReader* const br) {
31 | br->val_ = 0;
32 | br->bit_pos_ = sizeof(br->val_) << 3;
33 | }
34 |
35 | BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) {
36 | size_t aligned_read_mask = (sizeof(br->val_) >> 1) - 1;
37 | /* Fixing alignment after unaligned BrotliFillWindow would result accumulator
38 | overflow. If unalignment is caused by BrotliSafeReadBits, then there is
39 | enough space in accumulator to fix alignment. */
40 | if (BROTLI_UNALIGNED_READ_FAST) {
41 | aligned_read_mask = 0;
42 | }
43 | if (BrotliGetAvailableBits(br) == 0) {
44 | if (!BrotliPullByte(br)) {
45 | return BROTLI_FALSE;
46 | }
47 | }
48 |
49 | while ((((size_t)br->next_in) & aligned_read_mask) != 0) {
50 | if (!BrotliPullByte(br)) {
51 | /* If we consumed all the input, we don't care about the alignment. */
52 | return BROTLI_TRUE;
53 | }
54 | }
55 | return BROTLI_TRUE;
56 | }
57 |
58 | BROTLI_BOOL BrotliSafeReadBits32Slow(BrotliBitReader* const br,
59 | uint32_t n_bits, uint32_t* val) {
60 | uint32_t low_val;
61 | uint32_t high_val;
62 | BrotliBitReaderState memento;
63 | BROTLI_DCHECK(n_bits <= 32);
64 | BROTLI_DCHECK(n_bits > 24);
65 | BrotliBitReaderSaveState(br, &memento);
66 | if (!BrotliSafeReadBits(br, 16, &low_val) ||
67 | !BrotliSafeReadBits(br, n_bits - 16, &high_val)) {
68 | BrotliBitReaderRestoreState(br, &memento);
69 | return BROTLI_FALSE;
70 | }
71 | *val = low_val | (high_val << 16);
72 | return BROTLI_TRUE;
73 | }
74 |
75 | #if defined(__cplusplus) || defined(c_plusplus)
76 | } /* extern "C" */
77 | #endif
78 |
--------------------------------------------------------------------------------
/c/enc/utf8_util.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Heuristics for deciding about the UTF8-ness of strings. */
8 |
9 | #include "utf8_util.h"
10 |
11 | #include
12 |
13 | #if defined(__cplusplus) || defined(c_plusplus)
14 | extern "C" {
15 | #endif
16 |
17 | static size_t BrotliParseAsUTF8(
18 | int* symbol, const uint8_t* input, size_t size) {
19 | /* ASCII */
20 | if ((input[0] & 0x80) == 0) {
21 | *symbol = input[0];
22 | if (*symbol > 0) {
23 | return 1;
24 | }
25 | }
26 | /* 2-byte UTF8 */
27 | if (size > 1u &&
28 | (input[0] & 0xE0) == 0xC0 &&
29 | (input[1] & 0xC0) == 0x80) {
30 | *symbol = (((input[0] & 0x1F) << 6) |
31 | (input[1] & 0x3F));
32 | if (*symbol > 0x7F) {
33 | return 2;
34 | }
35 | }
36 | /* 3-byte UFT8 */
37 | if (size > 2u &&
38 | (input[0] & 0xF0) == 0xE0 &&
39 | (input[1] & 0xC0) == 0x80 &&
40 | (input[2] & 0xC0) == 0x80) {
41 | *symbol = (((input[0] & 0x0F) << 12) |
42 | ((input[1] & 0x3F) << 6) |
43 | (input[2] & 0x3F));
44 | if (*symbol > 0x7FF) {
45 | return 3;
46 | }
47 | }
48 | /* 4-byte UFT8 */
49 | if (size > 3u &&
50 | (input[0] & 0xF8) == 0xF0 &&
51 | (input[1] & 0xC0) == 0x80 &&
52 | (input[2] & 0xC0) == 0x80 &&
53 | (input[3] & 0xC0) == 0x80) {
54 | *symbol = (((input[0] & 0x07) << 18) |
55 | ((input[1] & 0x3F) << 12) |
56 | ((input[2] & 0x3F) << 6) |
57 | (input[3] & 0x3F));
58 | if (*symbol > 0xFFFF && *symbol <= 0x10FFFF) {
59 | return 4;
60 | }
61 | }
62 | /* Not UTF8, emit a special symbol above the UTF8-code space */
63 | *symbol = 0x110000 | input[0];
64 | return 1;
65 | }
66 |
67 | /* Returns 1 if at least min_fraction of the data is UTF8-encoded.*/
68 | BROTLI_BOOL BrotliIsMostlyUTF8(
69 | const uint8_t* data, const size_t pos, const size_t mask,
70 | const size_t length, const double min_fraction) {
71 | size_t size_utf8 = 0;
72 | size_t i = 0;
73 | while (i < length) {
74 | int symbol;
75 | size_t bytes_read =
76 | BrotliParseAsUTF8(&symbol, &data[(pos + i) & mask], length - i);
77 | i += bytes_read;
78 | if (symbol < 0x110000) size_utf8 += bytes_read;
79 | }
80 | return TO_BROTLI_BOOL((double)size_utf8 > min_fraction * (double)length);
81 | }
82 |
83 | #if defined(__cplusplus) || defined(c_plusplus)
84 | } /* extern "C" */
85 | #endif
86 |
--------------------------------------------------------------------------------
/c/common/shared_dictionary_internal.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2017 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* (Transparent) Shared Dictionary definition. */
8 |
9 | #ifndef BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_
10 | #define BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_
11 |
12 | #include
13 | #include
14 |
15 | #include "dictionary.h"
16 | #include "transform.h"
17 |
18 | #if defined(__cplusplus) || defined(c_plusplus)
19 | extern "C" {
20 | #endif
21 |
22 | struct BrotliSharedDictionaryStruct {
23 | /* LZ77 prefixes (compound dictionary). */
24 | uint32_t num_prefix; /* max SHARED_BROTLI_MAX_COMPOUND_DICTS */
25 | size_t prefix_size[SHARED_BROTLI_MAX_COMPOUND_DICTS];
26 | const uint8_t* prefix[SHARED_BROTLI_MAX_COMPOUND_DICTS];
27 |
28 | /* If set, the context map is used to select word and transform list from 64
29 | contexts, if not set, the context map is not used and only words[0] and
30 | transforms[0] are to be used. */
31 | BROTLI_BOOL context_based;
32 |
33 | uint8_t context_map[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
34 |
35 | /* Amount of word_list+transform_list combinations. */
36 | uint8_t num_dictionaries;
37 |
38 | /* Must use num_dictionaries values. */
39 | const BrotliDictionary* words[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
40 |
41 | /* Must use num_dictionaries values. */
42 | const BrotliTransforms* transforms[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
43 |
44 | /* Amount of custom word lists. May be 0 if only Brotli's built-in is used */
45 | uint8_t num_word_lists;
46 |
47 | /* Contents of the custom words lists. Must be NULL if num_word_lists is 0. */
48 | BrotliDictionary* words_instances;
49 |
50 | /* Amount of custom transform lists. May be 0 if only Brotli's built-in is
51 | used */
52 | uint8_t num_transform_lists;
53 |
54 | /* Contents of the custom transform lists. Must be NULL if num_transform_lists
55 | is 0. */
56 | BrotliTransforms* transforms_instances;
57 |
58 | /* Concatenated prefix_suffix_maps of the custom transform lists. Must be NULL
59 | if num_transform_lists is 0. */
60 | uint16_t* prefix_suffix_maps;
61 |
62 | /* Memory management */
63 | brotli_alloc_func alloc_func;
64 | brotli_free_func free_func;
65 | void* memory_manager_opaque;
66 | };
67 |
68 | typedef struct BrotliSharedDictionaryStruct BrotliSharedDictionaryInternal;
69 | #define BrotliSharedDictionary BrotliSharedDictionaryInternal
70 |
71 | #if defined(__cplusplus) || defined(c_plusplus)
72 | } /* extern "C" */
73 | #endif
74 |
75 | #endif /* BROTLI_COMMON_SHARED_DICTIONARY_INTERNAL_H_ */
76 |
--------------------------------------------------------------------------------
/c/enc/compound_dictionary.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2017 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | #ifndef BROTLI_ENC_PREPARED_DICTIONARY_H_
8 | #define BROTLI_ENC_PREPARED_DICTIONARY_H_
9 |
10 | #include
11 | #include
12 |
13 | #include "../common/platform.h"
14 | #include "../common/constants.h"
15 | #include "memory.h"
16 |
17 | /* "Fat" prepared dictionary, could be cooked outside of C implementation,
18 | * e.g. on Java side. LZ77 data is copied inside PreparedDictionary struct. */
19 | static const uint32_t kPreparedDictionaryMagic = 0xDEBCEDE0;
20 |
21 | static const uint32_t kSharedDictionaryMagic = 0xDEBCEDE1;
22 |
23 | static const uint32_t kManagedDictionaryMagic = 0xDEBCEDE2;
24 |
25 | /* "Lean" prepared dictionary. LZ77 data is referenced. It is the responsibility
26 | * of caller of "prepare dictionary" to keep the LZ77 data while prepared
27 | * dictionary is in use. */
28 | static const uint32_t kLeanPreparedDictionaryMagic = 0xDEBCEDE3;
29 |
30 | static const uint64_t kPreparedDictionaryHashMul64Long =
31 | BROTLI_MAKE_UINT64_T(0x1FE35A7Bu, 0xD3579BD3u);
32 |
33 | typedef struct PreparedDictionary {
34 | uint32_t magic;
35 | uint32_t num_items;
36 | uint32_t source_size;
37 | uint32_t hash_bits;
38 | uint32_t bucket_bits;
39 | uint32_t slot_bits;
40 |
41 | /* --- Dynamic size members --- */
42 |
43 | /* uint32_t slot_offsets[1 << slot_bits]; */
44 | /* uint16_t heads[1 << bucket_bits]; */
45 | /* uint32_t items[variable]; */
46 |
47 | /* [maybe] uint8_t* source_ref, depending on magic. */
48 | /* [maybe] uint8_t source[source_size], depending on magic. */
49 | } PreparedDictionary;
50 |
51 | BROTLI_INTERNAL PreparedDictionary* CreatePreparedDictionary(MemoryManager* m,
52 | const uint8_t* source, size_t source_size);
53 |
54 | BROTLI_INTERNAL void DestroyPreparedDictionary(MemoryManager* m,
55 | PreparedDictionary* dictionary);
56 |
57 | typedef struct CompoundDictionary {
58 | /* LZ77 prefix, compound dictionary */
59 | size_t num_chunks;
60 | size_t total_size;
61 | /* Client instances. */
62 | const PreparedDictionary* chunks[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
63 | const uint8_t* chunk_source[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
64 | size_t chunk_offsets[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
65 |
66 | size_t num_prepared_instances_;
67 | /* Owned instances. */
68 | PreparedDictionary* prepared_instances_[SHARED_BROTLI_MAX_COMPOUND_DICTS + 1];
69 | } CompoundDictionary;
70 |
71 | BROTLI_INTERNAL BROTLI_BOOL AttachPreparedDictionary(
72 | CompoundDictionary* compound, const PreparedDictionary* dictionary);
73 |
74 | #endif /* BROTLI_ENC_PREPARED_DICTIONARY */
75 |
--------------------------------------------------------------------------------
/c/include/brotli/types.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /**
8 | * @file
9 | * Common types used in decoder and encoder API.
10 | */
11 |
12 | #ifndef BROTLI_COMMON_TYPES_H_
13 | #define BROTLI_COMMON_TYPES_H_
14 |
15 | #include /* for size_t */
16 |
17 | #if defined(_MSC_VER) && (_MSC_VER < 1600)
18 | typedef __int8 int8_t;
19 | typedef unsigned __int8 uint8_t;
20 | typedef __int16 int16_t;
21 | typedef unsigned __int16 uint16_t;
22 | typedef __int32 int32_t;
23 | typedef unsigned __int32 uint32_t;
24 | typedef unsigned __int64 uint64_t;
25 | typedef __int64 int64_t;
26 | #else
27 | #include
28 | #endif /* defined(_MSC_VER) && (_MSC_VER < 1600) */
29 |
30 | /**
31 | * A portable @c bool replacement.
32 | *
33 | * ::BROTLI_BOOL is a "documentation" type: actually it is @c int, but in API it
34 | * denotes a type, whose only values are ::BROTLI_TRUE and ::BROTLI_FALSE.
35 | *
36 | * ::BROTLI_BOOL values passed to Brotli should either be ::BROTLI_TRUE or
37 | * ::BROTLI_FALSE, or be a result of ::TO_BROTLI_BOOL macros.
38 | *
39 | * ::BROTLI_BOOL values returned by Brotli should not be tested for equality
40 | * with @c true, @c false, ::BROTLI_TRUE, ::BROTLI_FALSE, but rather should be
41 | * evaluated, for example: @code{.cpp}
42 | * if (SomeBrotliFunction(encoder, BROTLI_TRUE) &&
43 | * !OtherBrotliFunction(decoder, BROTLI_FALSE)) {
44 | * bool x = !!YetAnotherBrotliFunction(encoder, TO_BROLTI_BOOL(2 * 2 == 4));
45 | * DoSomething(x);
46 | * }
47 | * @endcode
48 | */
49 | #define BROTLI_BOOL int
50 | /** Portable @c true replacement. */
51 | #define BROTLI_TRUE 1
52 | /** Portable @c false replacement. */
53 | #define BROTLI_FALSE 0
54 | /** @c bool to ::BROTLI_BOOL conversion macros. */
55 | #define TO_BROTLI_BOOL(X) (!!(X) ? BROTLI_TRUE : BROTLI_FALSE)
56 |
57 | #define BROTLI_MAKE_UINT64_T(high, low) ((((uint64_t)(high)) << 32) | low)
58 |
59 | #define BROTLI_UINT32_MAX (~((uint32_t)0))
60 | #define BROTLI_SIZE_MAX (~((size_t)0))
61 |
62 | /**
63 | * Allocating function pointer type.
64 | *
65 | * @param opaque custom memory manager handle provided by client
66 | * @param size requested memory region size; can not be @c 0
67 | * @returns @c 0 in the case of failure
68 | * @returns a valid pointer to a memory region of at least @p size bytes
69 | * long otherwise
70 | */
71 | typedef void* (*brotli_alloc_func)(void* opaque, size_t size);
72 |
73 | /**
74 | * Deallocating function pointer type.
75 | *
76 | * This function @b SHOULD do nothing if @p address is @c 0.
77 | *
78 | * @param opaque custom memory manager handle provided by client
79 | * @param address memory region pointer returned by ::brotli_alloc_func, or @c 0
80 | */
81 | typedef void (*brotli_free_func)(void* opaque, void* address);
82 |
83 | #endif /* BROTLI_COMMON_TYPES_H_ */
84 |
--------------------------------------------------------------------------------
/c/enc/compress_fragment_two_pass.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2015 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Function for fast encoding of an input fragment, independently from the input
8 | history. This function uses two-pass processing: in the first pass we save
9 | the found backward matches and literal bytes into a buffer, and in the
10 | second pass we emit them into the bit stream using prefix codes built based
11 | on the actual command and literal byte histograms. */
12 |
13 | #ifndef BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
14 | #define BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_
15 |
16 | #include
17 |
18 | #include "../common/constants.h"
19 | #include "../common/platform.h"
20 | #include "entropy_encode.h"
21 |
22 | #if defined(__cplusplus) || defined(c_plusplus)
23 | extern "C" {
24 | #endif
25 |
26 | /* TODO(eustas): turn to macro. */
27 | static const size_t kCompressFragmentTwoPassBlockSize = 1 << 17;
28 |
29 | typedef struct BrotliTwoPassArena {
30 | uint32_t lit_histo[256];
31 | uint8_t lit_depth[256];
32 | uint16_t lit_bits[256];
33 |
34 | uint32_t cmd_histo[128];
35 | uint8_t cmd_depth[128];
36 | uint16_t cmd_bits[128];
37 |
38 | /* BuildAndStoreCommandPrefixCode */
39 | HuffmanTree tmp_tree[2 * BROTLI_NUM_LITERAL_SYMBOLS + 1];
40 | uint8_t tmp_depth[BROTLI_NUM_COMMAND_SYMBOLS];
41 | uint16_t tmp_bits[64];
42 | } BrotliTwoPassArena;
43 |
44 | /* Compresses "input" string to the "*storage" buffer as one or more complete
45 | meta-blocks, and updates the "*storage_ix" bit position.
46 |
47 | If "is_last" is 1, emits an additional empty last meta-block.
48 |
49 | REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
50 | REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
51 | REQUIRES: "command_buf" and "literal_buf" point to at least
52 | kCompressFragmentTwoPassBlockSize long arrays.
53 | REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
54 | REQUIRES: "table_size" is a power of two
55 | OUTPUT: maximal copy distance <= |input_size|
56 | OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
57 | BROTLI_INTERNAL void BrotliCompressFragmentTwoPass(BrotliTwoPassArena* s,
58 | const uint8_t* input,
59 | size_t input_size,
60 | BROTLI_BOOL is_last,
61 | uint32_t* command_buf,
62 | uint8_t* literal_buf,
63 | int* table,
64 | size_t table_size,
65 | size_t* storage_ix,
66 | uint8_t* storage);
67 |
68 | #if defined(__cplusplus) || defined(c_plusplus)
69 | } /* extern "C" */
70 | #endif
71 |
72 | #endif /* BROTLI_ENC_COMPRESS_FRAGMENT_TWO_PASS_H_ */
73 |
--------------------------------------------------------------------------------
/c/enc/write_bits.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2010 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Write bits into a byte array. */
8 |
9 | #ifndef BROTLI_ENC_WRITE_BITS_H_
10 | #define BROTLI_ENC_WRITE_BITS_H_
11 |
12 | #include
13 |
14 | #include "../common/platform.h"
15 |
16 | #if defined(__cplusplus) || defined(c_plusplus)
17 | extern "C" {
18 | #endif
19 |
20 | /* This function writes bits into bytes in increasing addresses, and within
21 | a byte least-significant-bit first.
22 |
23 | The function can write up to 56 bits in one go with WriteBits
24 | Example: let's assume that 3 bits (Rs below) have been written already:
25 |
26 | BYTE-0 BYTE+1 BYTE+2
27 |
28 | 0000 0RRR 0000 0000 0000 0000
29 |
30 | Now, we could write 5 or less bits in MSB by just shifting by 3
31 | and OR'ing to BYTE-0.
32 |
33 | For n bits, we take the last 5 bits, OR that with high bits in BYTE-0,
34 | and locate the rest in BYTE+1, BYTE+2, etc. */
35 | static BROTLI_INLINE void BrotliWriteBits(size_t n_bits,
36 | uint64_t bits,
37 | size_t* BROTLI_RESTRICT pos,
38 | uint8_t* BROTLI_RESTRICT array) {
39 | BROTLI_LOG(("WriteBits %2d 0x%08x%08x %10d\n", (int)n_bits,
40 | (uint32_t)(bits >> 32), (uint32_t)(bits & 0xFFFFFFFF),
41 | (int)*pos));
42 | BROTLI_DCHECK((bits >> n_bits) == 0);
43 | BROTLI_DCHECK(n_bits <= 56);
44 | #if defined(BROTLI_LITTLE_ENDIAN)
45 | /* This branch of the code can write up to 56 bits at a time,
46 | 7 bits are lost by being perhaps already in *p and at least
47 | 1 bit is needed to initialize the bit-stream ahead (i.e. if 7
48 | bits are in *p and we write 57 bits, then the next write will
49 | access a byte that was never initialized). */
50 | {
51 | uint8_t* p = &array[*pos >> 3];
52 | uint64_t v = (uint64_t)(*p); /* Zero-extend 8 to 64 bits. */
53 | v |= bits << (*pos & 7);
54 | BROTLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */
55 | *pos += n_bits;
56 | }
57 | #else
58 | /* implicit & 0xFF is assumed for uint8_t arithmetics */
59 | {
60 | uint8_t* array_pos = &array[*pos >> 3];
61 | const size_t bits_reserved_in_first_byte = (*pos & 7);
62 | size_t bits_left_to_write;
63 | bits <<= bits_reserved_in_first_byte;
64 | *array_pos++ |= (uint8_t)bits;
65 | for (bits_left_to_write = n_bits + bits_reserved_in_first_byte;
66 | bits_left_to_write >= 9;
67 | bits_left_to_write -= 8) {
68 | bits >>= 8;
69 | *array_pos++ = (uint8_t)bits;
70 | }
71 | *array_pos = 0;
72 | *pos += n_bits;
73 | }
74 | #endif
75 | }
76 |
77 | static BROTLI_INLINE void BrotliWriteBitsPrepareStorage(
78 | size_t pos, uint8_t* array) {
79 | BROTLI_LOG(("WriteBitsPrepareStorage %10d\n", (int)pos));
80 | BROTLI_DCHECK((pos & 7) == 0);
81 | array[pos >> 3] = 0;
82 | }
83 |
84 | #if defined(__cplusplus) || defined(c_plusplus)
85 | } /* extern "C" */
86 | #endif
87 |
88 | #endif /* BROTLI_ENC_WRITE_BITS_H_ */
89 |
--------------------------------------------------------------------------------
/scripts/sources.lst:
--------------------------------------------------------------------------------
1 | # IT WOULD BE FOOLISH TO USE COMPUTERS TO AUTOMATE REPETITIVE TASKS:
2 | # neither CMake nor Automake support "glob" expressions,
3 | # so every header and source file have to be listed manually.
4 |
5 | BROTLI_CLI_C = \
6 | c/tools/brotli.c
7 |
8 | BROTLI_COMMON_C = \
9 | c/common/constants.c \
10 | c/common/context.c \
11 | c/common/dictionary.c \
12 | c/common/platform.c \
13 | c/common/shared_dictionary.c \
14 | c/common/transform.c
15 |
16 | BROTLI_COMMON_H = \
17 | c/common/constants.h \
18 | c/common/context.h \
19 | c/common/dictionary.h \
20 | c/common/platform.h \
21 | c/common/shared_dictionary_internal.h \
22 | c/common/transform.h \
23 | c/common/version.h
24 |
25 | BROTLI_DEC_C = \
26 | c/dec/bit_reader.c \
27 | c/dec/decode.c \
28 | c/dec/huffman.c \
29 | c/dec/state.c
30 |
31 | BROTLI_DEC_H = \
32 | c/dec/bit_reader.h \
33 | c/dec/huffman.h \
34 | c/dec/prefix.h \
35 | c/dec/state.h
36 |
37 | BROTLI_ENC_C = \
38 | c/enc/backward_references.c \
39 | c/enc/backward_references_hq.c \
40 | c/enc/bit_cost.c \
41 | c/enc/block_splitter.c \
42 | c/enc/brotli_bit_stream.c \
43 | c/enc/cluster.c \
44 | c/enc/command.c \
45 | c/enc/compound_dictionary.c \
46 | c/enc/compress_fragment.c \
47 | c/enc/compress_fragment_two_pass.c \
48 | c/enc/dictionary_hash.c \
49 | c/enc/encode.c \
50 | c/enc/encoder_dict.c \
51 | c/enc/entropy_encode.c \
52 | c/enc/fast_log.c \
53 | c/enc/histogram.c \
54 | c/enc/literal_cost.c \
55 | c/enc/memory.c \
56 | c/enc/metablock.c \
57 | c/enc/static_dict.c \
58 | c/enc/utf8_util.c
59 |
60 | BROTLI_ENC_H = \
61 | c/enc/backward_references.h \
62 | c/enc/backward_references_hq.h \
63 | c/enc/backward_references_inc.h \
64 | c/enc/bit_cost.h \
65 | c/enc/bit_cost_inc.h \
66 | c/enc/block_encoder_inc.h \
67 | c/enc/block_splitter.h \
68 | c/enc/block_splitter_inc.h \
69 | c/enc/brotli_bit_stream.h \
70 | c/enc/cluster.h \
71 | c/enc/cluster_inc.h \
72 | c/enc/command.h \
73 | c/enc/compound_dictionary.h \
74 | c/enc/compress_fragment.h \
75 | c/enc/compress_fragment_two_pass.h \
76 | c/enc/dictionary_hash.h \
77 | c/enc/encoder_dict.h \
78 | c/enc/entropy_encode.h \
79 | c/enc/entropy_encode_static.h \
80 | c/enc/fast_log.h \
81 | c/enc/find_match_length.h \
82 | c/enc/hash.h \
83 | c/enc/hash_composite_inc.h \
84 | c/enc/hash_forgetful_chain_inc.h \
85 | c/enc/hash_longest_match64_inc.h \
86 | c/enc/hash_longest_match_inc.h \
87 | c/enc/hash_longest_match_quickly_inc.h \
88 | c/enc/hash_rolling_inc.h \
89 | c/enc/hash_to_binary_tree_inc.h \
90 | c/enc/histogram.h \
91 | c/enc/histogram_inc.h \
92 | c/enc/literal_cost.h \
93 | c/enc/memory.h \
94 | c/enc/metablock.h \
95 | c/enc/metablock_inc.h \
96 | c/enc/params.h \
97 | c/enc/prefix.h \
98 | c/enc/quality.h \
99 | c/enc/ringbuffer.h \
100 | c/enc/state.h \
101 | c/enc/static_dict.h \
102 | c/enc/static_dict_lut.h \
103 | c/enc/utf8_util.h \
104 | c/enc/write_bits.h
105 |
106 | BROTLI_INCLUDE = \
107 | c/include/brotli/decode.h \
108 | c/include/brotli/encode.h \
109 | c/include/brotli/port.h \
110 | c/include/brotli/shared_dictionary.h \
111 | c/include/brotli/types.h
112 |
--------------------------------------------------------------------------------
/BUILD:
--------------------------------------------------------------------------------
1 | # Description:
2 | # Brotli is a generic-purpose lossless compression algorithm.
3 |
4 | load(":compiler_config_setting.bzl", "create_msvc_config")
5 |
6 | package(
7 | default_visibility = ["//visibility:public"],
8 | )
9 |
10 | licenses(["notice"]) # MIT
11 |
12 | exports_files(["LICENSE"])
13 |
14 | config_setting(
15 | name = "darwin",
16 | values = {"cpu": "darwin"},
17 | visibility = ["//visibility:public"],
18 | )
19 |
20 | config_setting(
21 | name = "darwin_x86_64",
22 | values = {"cpu": "darwin_x86_64"},
23 | visibility = ["//visibility:public"],
24 | )
25 |
26 | config_setting(
27 | name = "windows",
28 | values = {"cpu": "x64_windows"},
29 | visibility = ["//visibility:public"],
30 | )
31 |
32 | config_setting(
33 | name = "windows_msvc",
34 | values = {"cpu": "x64_windows_msvc"},
35 | visibility = ["//visibility:public"],
36 | )
37 |
38 | config_setting(
39 | name = "windows_msys",
40 | values = {"cpu": "x64_windows_msys"},
41 | visibility = ["//visibility:public"],
42 | )
43 |
44 | create_msvc_config()
45 |
46 | STRICT_C_OPTIONS = select({
47 | ":msvc": [],
48 | "//conditions:default": [
49 | "--pedantic-errors",
50 | "-Wall",
51 | "-Wconversion",
52 | "-Werror",
53 | "-Wextra",
54 | "-Wlong-long",
55 | "-Wmissing-declarations",
56 | "-Wmissing-prototypes",
57 | "-Wno-strict-aliasing",
58 | "-Wshadow",
59 | "-Wsign-compare",
60 | ],
61 | })
62 |
63 | filegroup(
64 | name = "public_headers",
65 | srcs = glob(["c/include/brotli/*.h"]),
66 | )
67 |
68 | filegroup(
69 | name = "common_headers",
70 | srcs = glob(["c/common/*.h"]),
71 | )
72 |
73 | filegroup(
74 | name = "common_sources",
75 | srcs = glob(["c/common/*.c"]),
76 | )
77 |
78 | filegroup(
79 | name = "dec_headers",
80 | srcs = glob(["c/dec/*.h"]),
81 | )
82 |
83 | filegroup(
84 | name = "dec_sources",
85 | srcs = glob(["c/dec/*.c"]),
86 | )
87 |
88 | filegroup(
89 | name = "enc_headers",
90 | srcs = glob(["c/enc/*.h"]),
91 | )
92 |
93 | filegroup(
94 | name = "enc_sources",
95 | srcs = glob(["c/enc/*.c"]),
96 | )
97 |
98 | cc_library(
99 | name = "brotli_inc",
100 | hdrs = [":public_headers"],
101 | copts = STRICT_C_OPTIONS,
102 | strip_include_prefix = "c/include",
103 | )
104 |
105 | cc_library(
106 | name = "brotlicommon",
107 | srcs = [":common_sources"],
108 | hdrs = [":common_headers"],
109 | copts = STRICT_C_OPTIONS,
110 | deps = [":brotli_inc"],
111 | )
112 |
113 | cc_library(
114 | name = "brotlidec",
115 | srcs = [":dec_sources"],
116 | hdrs = [":dec_headers"],
117 | copts = STRICT_C_OPTIONS,
118 | deps = [":brotlicommon"],
119 | )
120 |
121 | cc_library(
122 | name = "brotlienc",
123 | srcs = [":enc_sources"],
124 | hdrs = [":enc_headers"],
125 | copts = STRICT_C_OPTIONS,
126 | linkopts = select({
127 | ":msvc": [],
128 | "//conditions:default": ["-lm"],
129 | }),
130 | deps = [":brotlicommon"],
131 | )
132 |
133 | cc_binary(
134 | name = "brotli",
135 | srcs = ["c/tools/brotli.c"],
136 | copts = STRICT_C_OPTIONS,
137 | linkstatic = 1,
138 | deps = [
139 | ":brotlidec",
140 | ":brotlienc",
141 | ],
142 | )
143 |
144 | filegroup(
145 | name = "dictionary",
146 | srcs = ["c/common/dictionary.bin"],
147 | )
148 |
--------------------------------------------------------------------------------
/c/common/transform.h:
--------------------------------------------------------------------------------
1 | /* transforms is a part of ABI, but not API.
2 |
3 | It means that there are some functions that are supposed to be in "common"
4 | library, but header itself is not placed into include/brotli. This way,
5 | aforementioned functions will be available only to brotli internals.
6 | */
7 |
8 | #ifndef BROTLI_COMMON_TRANSFORM_H_
9 | #define BROTLI_COMMON_TRANSFORM_H_
10 |
11 | #include
12 | #include
13 |
14 | #if defined(__cplusplus) || defined(c_plusplus)
15 | extern "C" {
16 | #endif
17 |
18 | enum BrotliWordTransformType {
19 | BROTLI_TRANSFORM_IDENTITY = 0,
20 | BROTLI_TRANSFORM_OMIT_LAST_1 = 1,
21 | BROTLI_TRANSFORM_OMIT_LAST_2 = 2,
22 | BROTLI_TRANSFORM_OMIT_LAST_3 = 3,
23 | BROTLI_TRANSFORM_OMIT_LAST_4 = 4,
24 | BROTLI_TRANSFORM_OMIT_LAST_5 = 5,
25 | BROTLI_TRANSFORM_OMIT_LAST_6 = 6,
26 | BROTLI_TRANSFORM_OMIT_LAST_7 = 7,
27 | BROTLI_TRANSFORM_OMIT_LAST_8 = 8,
28 | BROTLI_TRANSFORM_OMIT_LAST_9 = 9,
29 | BROTLI_TRANSFORM_UPPERCASE_FIRST = 10,
30 | BROTLI_TRANSFORM_UPPERCASE_ALL = 11,
31 | BROTLI_TRANSFORM_OMIT_FIRST_1 = 12,
32 | BROTLI_TRANSFORM_OMIT_FIRST_2 = 13,
33 | BROTLI_TRANSFORM_OMIT_FIRST_3 = 14,
34 | BROTLI_TRANSFORM_OMIT_FIRST_4 = 15,
35 | BROTLI_TRANSFORM_OMIT_FIRST_5 = 16,
36 | BROTLI_TRANSFORM_OMIT_FIRST_6 = 17,
37 | BROTLI_TRANSFORM_OMIT_FIRST_7 = 18,
38 | BROTLI_TRANSFORM_OMIT_FIRST_8 = 19,
39 | BROTLI_TRANSFORM_OMIT_FIRST_9 = 20,
40 | BROTLI_TRANSFORM_SHIFT_FIRST = 21,
41 | BROTLI_TRANSFORM_SHIFT_ALL = 22,
42 | BROTLI_NUM_TRANSFORM_TYPES /* Counts transforms, not a transform itself. */
43 | };
44 |
45 | #define BROTLI_TRANSFORMS_MAX_CUT_OFF BROTLI_TRANSFORM_OMIT_LAST_9
46 |
47 | typedef struct BrotliTransforms {
48 | uint16_t prefix_suffix_size;
49 | /* Last character must be null, so prefix_suffix_size must be at least 1. */
50 | const uint8_t* prefix_suffix;
51 | const uint16_t* prefix_suffix_map;
52 | uint32_t num_transforms;
53 | /* Each entry is a [prefix_id, transform, suffix_id] triplet. */
54 | const uint8_t* transforms;
55 | /* Shift for BROTLI_TRANSFORM_SHIFT_FIRST and BROTLI_TRANSFORM_SHIFT_ALL,
56 | must be NULL if and only if no such transforms are present. */
57 | const uint8_t* params;
58 | /* Indices of transforms like ["", BROTLI_TRANSFORM_OMIT_LAST_#, ""].
59 | 0-th element corresponds to ["", BROTLI_TRANSFORM_IDENTITY, ""].
60 | -1, if cut-off transform does not exist. */
61 | int16_t cutOffTransforms[BROTLI_TRANSFORMS_MAX_CUT_OFF + 1];
62 | } BrotliTransforms;
63 |
64 | /* T is BrotliTransforms*; result is uint8_t. */
65 | #define BROTLI_TRANSFORM_PREFIX_ID(T, I) ((T)->transforms[((I) * 3) + 0])
66 | #define BROTLI_TRANSFORM_TYPE(T, I) ((T)->transforms[((I) * 3) + 1])
67 | #define BROTLI_TRANSFORM_SUFFIX_ID(T, I) ((T)->transforms[((I) * 3) + 2])
68 |
69 | /* T is BrotliTransforms*; result is const uint8_t*. */
70 | #define BROTLI_TRANSFORM_PREFIX(T, I) (&(T)->prefix_suffix[ \
71 | (T)->prefix_suffix_map[BROTLI_TRANSFORM_PREFIX_ID(T, I)]])
72 | #define BROTLI_TRANSFORM_SUFFIX(T, I) (&(T)->prefix_suffix[ \
73 | (T)->prefix_suffix_map[BROTLI_TRANSFORM_SUFFIX_ID(T, I)]])
74 |
75 | BROTLI_COMMON_API const BrotliTransforms* BrotliGetTransforms(void);
76 |
77 | BROTLI_COMMON_API int BrotliTransformDictionaryWord(
78 | uint8_t* dst, const uint8_t* word, int len,
79 | const BrotliTransforms* transforms, int transform_idx);
80 |
81 | #if defined(__cplusplus) || defined(c_plusplus)
82 | } /* extern "C" */
83 | #endif
84 |
85 | #endif /* BROTLI_COMMON_TRANSFORM_H_ */
86 |
--------------------------------------------------------------------------------
/c/enc/state.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2022 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Encoder state. */
8 |
9 | #ifndef BROTLI_ENC_STATE_H_
10 | #define BROTLI_ENC_STATE_H_
11 |
12 | #include
13 |
14 | #include "command.h"
15 | #include "compress_fragment.h"
16 | #include "compress_fragment_two_pass.h"
17 | #include "hash.h"
18 | #include "memory.h"
19 | #include "params.h"
20 | #include "ringbuffer.h"
21 |
22 | typedef enum BrotliEncoderStreamState {
23 | /* Default state. */
24 | BROTLI_STREAM_PROCESSING = 0,
25 | /* Intermediate state; after next block is emitted, byte-padding should be
26 | performed before getting back to default state. */
27 | BROTLI_STREAM_FLUSH_REQUESTED = 1,
28 | /* Last metablock was produced; no more input is acceptable. */
29 | BROTLI_STREAM_FINISHED = 2,
30 | /* Flushing compressed block and writing meta-data block header. */
31 | BROTLI_STREAM_METADATA_HEAD = 3,
32 | /* Writing metadata block body. */
33 | BROTLI_STREAM_METADATA_BODY = 4
34 | } BrotliEncoderStreamState;
35 |
36 | typedef enum BrotliEncoderFlintState {
37 | BROTLI_FLINT_NEEDS_2_BYTES = 2,
38 | BROTLI_FLINT_NEEDS_1_BYTE = 1,
39 | BROTLI_FLINT_WAITING_FOR_PROCESSING = 0,
40 | BROTLI_FLINT_WAITING_FOR_FLUSHING = -1,
41 | BROTLI_FLINT_DONE = -2
42 | } BrotliEncoderFlintState;
43 |
44 | typedef struct BrotliEncoderStateStruct {
45 | BrotliEncoderParams params;
46 |
47 | MemoryManager memory_manager_;
48 |
49 | uint64_t input_pos_;
50 | RingBuffer ringbuffer_;
51 | size_t cmd_alloc_size_;
52 | Command* commands_;
53 | size_t num_commands_;
54 | size_t num_literals_;
55 | size_t last_insert_len_;
56 | uint64_t last_flush_pos_;
57 | uint64_t last_processed_pos_;
58 | int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
59 | int saved_dist_cache_[4];
60 | uint16_t last_bytes_;
61 | uint8_t last_bytes_bits_;
62 | /* "Flint" is a tiny uncompressed block emitted before the continuation
63 | block to unwire literal context from previous data. Despite being int8_t,
64 | field is actually BrotliEncoderFlintState enum. */
65 | int8_t flint_;
66 | uint8_t prev_byte_;
67 | uint8_t prev_byte2_;
68 | size_t storage_size_;
69 | uint8_t* storage_;
70 |
71 | Hasher hasher_;
72 |
73 | /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
74 | int small_table_[1 << 10]; /* 4KiB */
75 | int* large_table_; /* Allocated only when needed */
76 | size_t large_table_size_;
77 |
78 | BrotliOnePassArena* one_pass_arena_;
79 | BrotliTwoPassArena* two_pass_arena_;
80 |
81 | /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
82 | uint32_t* command_buf_;
83 | uint8_t* literal_buf_;
84 |
85 | uint64_t total_in_;
86 | uint8_t* next_out_;
87 | size_t available_out_;
88 | uint64_t total_out_;
89 | /* Temporary buffer for padding flush bits or metadata block header / body. */
90 | union {
91 | uint64_t u64[2];
92 | uint8_t u8[16];
93 | } tiny_buf_;
94 | uint32_t remaining_metadata_bytes_;
95 | BrotliEncoderStreamState stream_state_;
96 |
97 | BROTLI_BOOL is_last_block_emitted_;
98 | BROTLI_BOOL is_initialized_;
99 | } BrotliEncoderStateStruct;
100 |
101 | typedef struct BrotliEncoderStateStruct BrotliEncoderStateInternal;
102 | #define BrotliEncoderState BrotliEncoderStateInternal
103 |
104 | #endif // BROTLI_ENC_STATE_H_
105 |
--------------------------------------------------------------------------------
/docs/types.h.3:
--------------------------------------------------------------------------------
1 | .TH "types.h" 3 "August 2021" "Brotli" \" -*- nroff -*-
2 | .ad l
3 | .nh
4 | .SH NAME
5 | types.h \- Common types used in decoder and encoder API\&.
6 |
7 | .SH SYNOPSIS
8 | .br
9 | .PP
10 | .SS "Macros"
11 |
12 | .in +1c
13 | .ti -1c
14 | .RI "#define \fBBROTLI_BOOL\fP int"
15 | .br
16 | .RI "\fIA portable \fCbool\fP replacement\&. \fP"
17 | .ti -1c
18 | .RI "#define \fBBROTLI_FALSE\fP 0"
19 | .br
20 | .RI "\fIPortable \fCfalse\fP replacement\&. \fP"
21 | .ti -1c
22 | .RI "#define \fBBROTLI_TRUE\fP 1"
23 | .br
24 | .RI "\fIPortable \fCtrue\fP replacement\&. \fP"
25 | .ti -1c
26 | .RI "#define \fBTO_BROTLI_BOOL\fP(X) (!!(X) ? \fBBROTLI_TRUE\fP : \fBBROTLI_FALSE\fP)"
27 | .br
28 | .RI "\fI\fCbool\fP to \fBBROTLI_BOOL\fP conversion macros\&. \fP"
29 | .in -1c
30 | .SS "Typedefs"
31 |
32 | .in +1c
33 | .ti -1c
34 | .RI "typedef void *(* \fBbrotli_alloc_func\fP) (void *opaque, size_t size)"
35 | .br
36 | .RI "\fIAllocating function pointer type\&. \fP"
37 | .ti -1c
38 | .RI "typedef void(* \fBbrotli_free_func\fP) (void *opaque, void *address)"
39 | .br
40 | .RI "\fIDeallocating function pointer type\&. \fP"
41 | .in -1c
42 | .SH "Detailed Description"
43 | .PP
44 | Common types used in decoder and encoder API\&.
45 |
46 |
47 | .SH "Macro Definition Documentation"
48 | .PP
49 | .SS "#define BROTLI_BOOL int"
50 |
51 | .PP
52 | A portable \fCbool\fP replacement\&. \fBBROTLI_BOOL\fP is a 'documentation' type: actually it is \fCint\fP, but in API it denotes a type, whose only values are \fBBROTLI_TRUE\fP and \fBBROTLI_FALSE\fP\&.
53 | .PP
54 | \fBBROTLI_BOOL\fP values passed to Brotli should either be \fBBROTLI_TRUE\fP or \fBBROTLI_FALSE\fP, or be a result of \fBTO_BROTLI_BOOL\fP macros\&.
55 | .PP
56 | \fBBROTLI_BOOL\fP values returned by Brotli should not be tested for equality with \fCtrue\fP, \fCfalse\fP, \fBBROTLI_TRUE\fP, \fBBROTLI_FALSE\fP, but rather should be evaluated, for example:
57 | .PP
58 | .nf
59 | if (SomeBrotliFunction(encoder, BROTLI_TRUE) &&
60 | !OtherBrotliFunction(decoder, BROTLI_FALSE)) {
61 | bool x = !!YetAnotherBrotliFunction(encoder, TO_BROLTI_BOOL(2 * 2 == 4));
62 | DoSomething(x);
63 | }
64 |
65 | .fi
66 | .PP
67 |
68 | .SS "#define BROTLI_FALSE 0"
69 |
70 | .PP
71 | Portable \fCfalse\fP replacement\&.
72 | .SS "#define BROTLI_TRUE 1"
73 |
74 | .PP
75 | Portable \fCtrue\fP replacement\&.
76 | .SS "#define TO_BROTLI_BOOL(X) (!!(X) ? \fBBROTLI_TRUE\fP : \fBBROTLI_FALSE\fP)"
77 |
78 | .PP
79 | \fCbool\fP to \fBBROTLI_BOOL\fP conversion macros\&.
80 | .SH "Typedef Documentation"
81 | .PP
82 | .SS "typedef void*(* brotli_alloc_func) (void *opaque, size_t size)"
83 |
84 | .PP
85 | Allocating function pointer type\&.
86 | .PP
87 | \fBParameters:\fP
88 | .RS 4
89 | \fIopaque\fP custom memory manager handle provided by client
90 | .br
91 | \fIsize\fP requested memory region size; can not be \fC0\fP
92 | .RE
93 | .PP
94 | \fBReturns:\fP
95 | .RS 4
96 | \fC0\fP in the case of failure
97 | .PP
98 | a valid pointer to a memory region of at least \fCsize\fP bytes long otherwise
99 | .RE
100 | .PP
101 |
102 | .SS "typedef void(* brotli_free_func) (void *opaque, void *address)"
103 |
104 | .PP
105 | Deallocating function pointer type\&. This function \fBSHOULD\fP do nothing if \fCaddress\fP is \fC0\fP\&.
106 | .PP
107 | \fBParameters:\fP
108 | .RS 4
109 | \fIopaque\fP custom memory manager handle provided by client
110 | .br
111 | \fIaddress\fP memory region pointer returned by \fBbrotli_alloc_func\fP, or \fC0\fP
112 | .RE
113 | .PP
114 |
115 | .SH "Author"
116 | .PP
117 | Generated automatically by Doxygen for Brotli from the source code\&.
118 |
--------------------------------------------------------------------------------
/c/enc/histogram.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Build per-context histograms of literals, commands and distance codes. */
8 |
9 | #include "histogram.h"
10 |
11 | #include "../common/context.h"
12 | #include "block_splitter.h"
13 | #include "command.h"
14 |
15 | #if defined(__cplusplus) || defined(c_plusplus)
16 | extern "C" {
17 | #endif
18 |
19 | typedef struct BlockSplitIterator {
20 | const BlockSplit* split_; /* Not owned. */
21 | size_t idx_;
22 | size_t type_;
23 | size_t length_;
24 | } BlockSplitIterator;
25 |
26 | static void InitBlockSplitIterator(BlockSplitIterator* self,
27 | const BlockSplit* split) {
28 | self->split_ = split;
29 | self->idx_ = 0;
30 | self->type_ = 0;
31 | self->length_ = split->lengths ? split->lengths[0] : 0;
32 | }
33 |
34 | static void BlockSplitIteratorNext(BlockSplitIterator* self) {
35 | if (self->length_ == 0) {
36 | ++self->idx_;
37 | self->type_ = self->split_->types[self->idx_];
38 | self->length_ = self->split_->lengths[self->idx_];
39 | }
40 | --self->length_;
41 | }
42 |
43 | void BrotliBuildHistogramsWithContext(
44 | const Command* cmds, const size_t num_commands,
45 | const BlockSplit* literal_split, const BlockSplit* insert_and_copy_split,
46 | const BlockSplit* dist_split, const uint8_t* ringbuffer, size_t start_pos,
47 | size_t mask, uint8_t prev_byte, uint8_t prev_byte2,
48 | const ContextType* context_modes, HistogramLiteral* literal_histograms,
49 | HistogramCommand* insert_and_copy_histograms,
50 | HistogramDistance* copy_dist_histograms) {
51 | size_t pos = start_pos;
52 | BlockSplitIterator literal_it;
53 | BlockSplitIterator insert_and_copy_it;
54 | BlockSplitIterator dist_it;
55 | size_t i;
56 |
57 | InitBlockSplitIterator(&literal_it, literal_split);
58 | InitBlockSplitIterator(&insert_and_copy_it, insert_and_copy_split);
59 | InitBlockSplitIterator(&dist_it, dist_split);
60 | for (i = 0; i < num_commands; ++i) {
61 | const Command* cmd = &cmds[i];
62 | size_t j;
63 | BlockSplitIteratorNext(&insert_and_copy_it);
64 | HistogramAddCommand(&insert_and_copy_histograms[insert_and_copy_it.type_],
65 | cmd->cmd_prefix_);
66 | /* TODO(eustas): unwrap iterator blocks. */
67 | for (j = cmd->insert_len_; j != 0; --j) {
68 | size_t context;
69 | BlockSplitIteratorNext(&literal_it);
70 | context = literal_it.type_;
71 | if (context_modes) {
72 | ContextLut lut = BROTLI_CONTEXT_LUT(context_modes[context]);
73 | context = (context << BROTLI_LITERAL_CONTEXT_BITS) +
74 | BROTLI_CONTEXT(prev_byte, prev_byte2, lut);
75 | }
76 | HistogramAddLiteral(&literal_histograms[context],
77 | ringbuffer[pos & mask]);
78 | prev_byte2 = prev_byte;
79 | prev_byte = ringbuffer[pos & mask];
80 | ++pos;
81 | }
82 | pos += CommandCopyLen(cmd);
83 | if (CommandCopyLen(cmd)) {
84 | prev_byte2 = ringbuffer[(pos - 2) & mask];
85 | prev_byte = ringbuffer[(pos - 1) & mask];
86 | if (cmd->cmd_prefix_ >= 128) {
87 | size_t context;
88 | BlockSplitIteratorNext(&dist_it);
89 | context = (dist_it.type_ << BROTLI_DISTANCE_CONTEXT_BITS) +
90 | CommandDistanceContext(cmd);
91 | HistogramAddDistance(©_dist_histograms[context],
92 | cmd->dist_prefix_ & 0x3FF);
93 | }
94 | }
95 | }
96 | }
97 |
98 | #if defined(__cplusplus) || defined(c_plusplus)
99 | } /* extern "C" */
100 | #endif
101 |
--------------------------------------------------------------------------------
/python/tests/compressor_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The Brotli Authors. All rights reserved.
2 | #
3 | # Distributed under MIT license.
4 | # See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 |
6 | import functools
7 | import unittest
8 |
9 | from . import _test_utils
10 | import brotli
11 |
12 |
13 | # Do not inherit from TestCase here to ensure that test methods
14 | # are not run automatically and instead are run as part of a specific
15 | # configuration below.
16 | class _TestCompressor(object):
17 |
18 | CHUNK_SIZE = 2048
19 |
20 | def tearDown(self):
21 | self.compressor = None
22 |
23 | def _check_decompression(self, test_data):
24 | # Write decompression to temp file and verify it matches the original.
25 | temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
26 | temp_compressed = _test_utils.get_temp_compressed_name(test_data)
27 | original = test_data
28 | with open(temp_uncompressed, 'wb') as out_file:
29 | with open(temp_compressed, 'rb') as in_file:
30 | out_file.write(brotli.decompress(in_file.read()))
31 | self.assertFilesMatch(temp_uncompressed, original)
32 |
33 | def _test_single_process(self, test_data):
34 | # Write single-shot compression to temp file.
35 | temp_compressed = _test_utils.get_temp_compressed_name(test_data)
36 | with open(temp_compressed, 'wb') as out_file:
37 | with open(test_data, 'rb') as in_file:
38 | out_file.write(self.compressor.process(in_file.read()))
39 | out_file.write(self.compressor.finish())
40 | self._check_decompression(test_data)
41 |
42 | def _test_multiple_process(self, test_data):
43 | # Write chunked compression to temp file.
44 | temp_compressed = _test_utils.get_temp_compressed_name(test_data)
45 | with open(temp_compressed, 'wb') as out_file:
46 | with open(test_data, 'rb') as in_file:
47 | read_chunk = functools.partial(in_file.read, self.CHUNK_SIZE)
48 | for data in iter(read_chunk, b''):
49 | out_file.write(self.compressor.process(data))
50 | out_file.write(self.compressor.finish())
51 | self._check_decompression(test_data)
52 |
53 | def _test_multiple_process_and_flush(self, test_data):
54 | # Write chunked and flushed compression to temp file.
55 | temp_compressed = _test_utils.get_temp_compressed_name(test_data)
56 | with open(temp_compressed, 'wb') as out_file:
57 | with open(test_data, 'rb') as in_file:
58 | read_chunk = functools.partial(in_file.read, self.CHUNK_SIZE)
59 | for data in iter(read_chunk, b''):
60 | out_file.write(self.compressor.process(data))
61 | out_file.write(self.compressor.flush())
62 | out_file.write(self.compressor.finish())
63 | self._check_decompression(test_data)
64 |
65 |
66 | _test_utils.generate_test_methods(_TestCompressor)
67 |
68 |
69 | class TestCompressorQuality1(_TestCompressor, _test_utils.TestCase):
70 |
71 | def setUp(self):
72 | self.compressor = brotli.Compressor(quality=1)
73 |
74 |
75 | class TestCompressorQuality6(_TestCompressor, _test_utils.TestCase):
76 |
77 | def setUp(self):
78 | self.compressor = brotli.Compressor(quality=6)
79 |
80 |
81 | class TestCompressorQuality9(_TestCompressor, _test_utils.TestCase):
82 |
83 | def setUp(self):
84 | self.compressor = brotli.Compressor(quality=9)
85 |
86 |
87 | class TestCompressorQuality11(_TestCompressor, _test_utils.TestCase):
88 |
89 | def setUp(self):
90 | self.compressor = brotli.Compressor(quality=11)
91 |
92 |
93 | if __name__ == '__main__':
94 | unittest.main()
95 |
--------------------------------------------------------------------------------
/c/enc/brotli_bit_stream.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2014 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Functions to convert brotli-related data structures into the
8 | brotli bit stream. The functions here operate under
9 | assumption that there is enough space in the storage, i.e., there are
10 | no out-of-range checks anywhere.
11 |
12 | These functions do bit addressing into a byte array. The byte array
13 | is called "storage" and the index to the bit is called storage_ix
14 | in function arguments. */
15 |
16 | #ifndef BROTLI_ENC_BROTLI_BIT_STREAM_H_
17 | #define BROTLI_ENC_BROTLI_BIT_STREAM_H_
18 |
19 | #include
20 |
21 | #include "../common/context.h"
22 | #include "../common/platform.h"
23 | #include "command.h"
24 | #include "entropy_encode.h"
25 | #include "memory.h"
26 | #include "metablock.h"
27 |
28 | #if defined(__cplusplus) || defined(c_plusplus)
29 | extern "C" {
30 | #endif
31 |
32 | /* All Store functions here will use a storage_ix, which is always the bit
33 | position for the current storage. */
34 |
35 | BROTLI_INTERNAL void BrotliStoreHuffmanTree(const uint8_t* depths, size_t num,
36 | HuffmanTree* tree, size_t* storage_ix, uint8_t* storage);
37 |
38 | BROTLI_INTERNAL void BrotliBuildAndStoreHuffmanTreeFast(
39 | HuffmanTree* tree, const uint32_t* histogram, const size_t histogram_total,
40 | const size_t max_bits, uint8_t* depth, uint16_t* bits, size_t* storage_ix,
41 | uint8_t* storage);
42 |
43 | /* REQUIRES: length > 0 */
44 | /* REQUIRES: length <= (1 << 24) */
45 | BROTLI_INTERNAL void BrotliStoreMetaBlock(MemoryManager* m,
46 | const uint8_t* input, size_t start_pos, size_t length, size_t mask,
47 | uint8_t prev_byte, uint8_t prev_byte2, BROTLI_BOOL is_last,
48 | const BrotliEncoderParams* params, ContextType literal_context_mode,
49 | const Command* commands, size_t n_commands, const MetaBlockSplit* mb,
50 | size_t* storage_ix, uint8_t* storage);
51 |
52 | /* Stores the meta-block without doing any block splitting, just collects
53 | one histogram per block category and uses that for entropy coding.
54 | REQUIRES: length > 0
55 | REQUIRES: length <= (1 << 24) */
56 | BROTLI_INTERNAL void BrotliStoreMetaBlockTrivial(MemoryManager* m,
57 | const uint8_t* input, size_t start_pos, size_t length, size_t mask,
58 | BROTLI_BOOL is_last, const BrotliEncoderParams* params,
59 | const Command* commands, size_t n_commands,
60 | size_t* storage_ix, uint8_t* storage);
61 |
62 | /* Same as above, but uses static prefix codes for histograms with a only a few
63 | symbols, and uses static code length prefix codes for all other histograms.
64 | REQUIRES: length > 0
65 | REQUIRES: length <= (1 << 24) */
66 | BROTLI_INTERNAL void BrotliStoreMetaBlockFast(MemoryManager* m,
67 | const uint8_t* input, size_t start_pos, size_t length, size_t mask,
68 | BROTLI_BOOL is_last, const BrotliEncoderParams* params,
69 | const Command* commands, size_t n_commands,
70 | size_t* storage_ix, uint8_t* storage);
71 |
72 | /* This is for storing uncompressed blocks (simple raw storage of
73 | bytes-as-bytes).
74 | REQUIRES: length > 0
75 | REQUIRES: length <= (1 << 24) */
76 | BROTLI_INTERNAL void BrotliStoreUncompressedMetaBlock(
77 | BROTLI_BOOL is_final_block, const uint8_t* BROTLI_RESTRICT input,
78 | size_t position, size_t mask, size_t len,
79 | size_t* BROTLI_RESTRICT storage_ix, uint8_t* BROTLI_RESTRICT storage);
80 |
81 | #if defined(BROTLI_TEST)
82 | void GetBlockLengthPrefixCodeForTest(uint32_t, size_t*, uint32_t*, uint32_t*);
83 | #endif
84 |
85 | #if defined(__cplusplus) || defined(c_plusplus)
86 | } /* extern "C" */
87 | #endif
88 |
89 | #endif /* BROTLI_ENC_BROTLI_BIT_STREAM_H_ */
90 |
--------------------------------------------------------------------------------
/c/include/brotli/shared_dictionary.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2017 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* (Opaque) Shared Dictionary definition and utilities. */
8 |
9 | #ifndef BROTLI_COMMON_SHARED_DICTIONARY_H_
10 | #define BROTLI_COMMON_SHARED_DICTIONARY_H_
11 |
12 | #include
13 | #include
14 |
15 | #if defined(__cplusplus) || defined(c_plusplus)
16 | extern "C" {
17 | #endif
18 |
19 | #define SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH 4
20 | #define SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH 31
21 | #define SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS 64
22 | #define SHARED_BROTLI_MAX_COMPOUND_DICTS 15
23 |
24 | /**
25 | * Opaque structure that holds shared dictionary data.
26 | *
27 | * Allocated and initialized with ::BrotliSharedDictionaryCreateInstance.
28 | * Cleaned up and deallocated with ::BrotliSharedDictionaryDestroyInstance.
29 | */
30 | typedef struct BrotliSharedDictionaryStruct BrotliSharedDictionary;
31 |
32 | /**
33 | * Input data type for ::BrotliSharedDictionaryAttach.
34 | */
35 | typedef enum BrotliSharedDictionaryType {
36 | /** Raw LZ77 prefix dictionary. */
37 | BROTLI_SHARED_DICTIONARY_RAW = 0,
38 | /** Serialized shared dictionary. */
39 | BROTLI_SHARED_DICTIONARY_SERIALIZED = 1
40 | } BrotliSharedDictionaryType;
41 |
42 | /**
43 | * Creates an instance of ::BrotliSharedDictionary.
44 | *
45 | * Fresh instance has default word dictionary and transforms
46 | * and no LZ77 prefix dictionary.
47 | *
48 | * @p alloc_func and @p free_func @b MUST be both zero or both non-zero. In the
49 | * case they are both zero, default memory allocators are used. @p opaque is
50 | * passed to @p alloc_func and @p free_func when they are called. @p free_func
51 | * has to return without doing anything when asked to free a NULL pointer.
52 | *
53 | * @param alloc_func custom memory allocation function
54 | * @param free_func custom memory free function
55 | * @param opaque custom memory manager handle
56 | * @returns @c 0 if instance can not be allocated or initialized
57 | * @returns pointer to initialized ::BrotliSharedDictionary otherwise
58 | */
59 | BROTLI_COMMON_API BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
60 | brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
61 |
62 | /**
63 | * Deinitializes and frees ::BrotliSharedDictionary instance.
64 | *
65 | * @param dict shared dictionary instance to be cleaned up and deallocated
66 | */
67 | BROTLI_COMMON_API void BrotliSharedDictionaryDestroyInstance(
68 | BrotliSharedDictionary* dict);
69 |
70 | /**
71 | * Attaches dictionary to a given instance of ::BrotliSharedDictionary.
72 | *
73 | * Dictionary to be attached is represented in a serialized format as a region
74 | * of memory.
75 | *
76 | * Provided data it partially referenced by a resulting (compound) dictionary,
77 | * and should be kept untouched, while at least one compound dictionary uses it.
78 | * This way memory overhead is kept minimal by the cost of additional resource
79 | * management.
80 | *
81 | * @param dict dictionary to extend
82 | * @param type type of dictionary to attach
83 | * @param data_size size of @p data
84 | * @param data serialized dictionary of type @p type, with at least @p data_size
85 | * addressable bytes
86 | * @returns ::BROTLI_TRUE if provided dictionary is successfully attached
87 | * @returns ::BROTLI_FALSE otherwise
88 | */
89 | BROTLI_COMMON_API BROTLI_BOOL BrotliSharedDictionaryAttach(
90 | BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
91 | size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]);
92 |
93 | #if defined(__cplusplus) || defined(c_plusplus)
94 | } /* extern "C" */
95 | #endif
96 |
97 | #endif /* BROTLI_COMMON_SHARED_DICTIONARY_H_ */
98 |
--------------------------------------------------------------------------------
/c/enc/compress_fragment.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2015 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Function for fast encoding of an input fragment, independently from the input
8 | history. This function uses one-pass processing: when we find a backward
9 | match, we immediately emit the corresponding command and literal codes to
10 | the bit stream. */
11 |
12 | #ifndef BROTLI_ENC_COMPRESS_FRAGMENT_H_
13 | #define BROTLI_ENC_COMPRESS_FRAGMENT_H_
14 |
15 | #include
16 |
17 | #include "../common/constants.h"
18 | #include "../common/platform.h"
19 | #include "entropy_encode.h"
20 |
21 | #if defined(__cplusplus) || defined(c_plusplus)
22 | extern "C" {
23 | #endif
24 |
25 | typedef struct BrotliOnePassArena {
26 | uint8_t lit_depth[256];
27 | uint16_t lit_bits[256];
28 |
29 | /* Command and distance prefix codes (each 64 symbols, stored back-to-back)
30 | used for the next block. The command prefix code is over a smaller alphabet
31 | with the following 64 symbols:
32 | 0 - 15: insert length code 0, copy length code 0 - 15, same distance
33 | 16 - 39: insert length code 0, copy length code 0 - 23
34 | 40 - 63: insert length code 0 - 23, copy length code 0
35 | Note that symbols 16 and 40 represent the same code in the full alphabet,
36 | but we do not use either of them. */
37 | uint8_t cmd_depth[128];
38 | uint16_t cmd_bits[128];
39 | uint32_t cmd_histo[128];
40 |
41 | /* The compressed form of the command and distance prefix codes for the next
42 | block. */
43 | uint8_t cmd_code[512];
44 | size_t cmd_code_numbits;
45 |
46 | HuffmanTree tree[2 * BROTLI_NUM_LITERAL_SYMBOLS + 1];
47 | uint32_t histogram[256];
48 | uint8_t tmp_depth[BROTLI_NUM_COMMAND_SYMBOLS];
49 | uint16_t tmp_bits[64];
50 | } BrotliOnePassArena;
51 |
52 | /* Compresses "input" string to the "*storage" buffer as one or more complete
53 | meta-blocks, and updates the "*storage_ix" bit position.
54 |
55 | If "is_last" is 1, emits an additional empty last meta-block.
56 |
57 | "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
58 | (see comment in encode.h) used for the encoding of this input fragment.
59 | If "is_last" is 0, they are updated to reflect the statistics
60 | of this input fragment, to be used for the encoding of the next fragment.
61 |
62 | "*cmd_code_numbits" is the number of bits of the compressed representation
63 | of the command and distance prefix codes, and "cmd_code" is an array of
64 | at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
65 | command and distance prefix codes. If "is_last" is 0, these are also
66 | updated to represent the updated "cmd_depth" and "cmd_bits".
67 |
68 | REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
69 | REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
70 | REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
71 | REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
72 | OUTPUT: maximal copy distance <= |input_size|
73 | OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
74 | BROTLI_INTERNAL void BrotliCompressFragmentFast(BrotliOnePassArena* s,
75 | const uint8_t* input,
76 | size_t input_size,
77 | BROTLI_BOOL is_last,
78 | int* table, size_t table_size,
79 | size_t* storage_ix,
80 | uint8_t* storage);
81 |
82 | #if defined(__cplusplus) || defined(c_plusplus)
83 | } /* extern "C" */
84 | #endif
85 |
86 | #endif /* BROTLI_ENC_COMPRESS_FRAGMENT_H_ */
87 |
--------------------------------------------------------------------------------
/c/tools/brotli.md:
--------------------------------------------------------------------------------
1 | # NAME
2 |
3 | brotli(1) -- brotli, unbrotli - compress or decompress files
4 |
5 | # SYNOPSIS
6 |
7 | `brotli` [*OPTION|FILE*]...
8 |
9 | `unbrotli` is equivalent to `brotli --decompress`
10 |
11 | # DESCRIPTION
12 |
13 | `brotli` is a generic-purpose lossless compression algorithm that compresses
14 | data using a combination of a modern variant of the **LZ77** algorithm, Huffman
15 | coding and 2-nd order context modeling, with a compression ratio comparable to
16 | the best currently available general-purpose compression methods. It is similar
17 | in speed with deflate but offers more dense compression.
18 |
19 | `brotli` command line syntax similar to `gzip (1)` and `zstd (1)`.
20 | Unlike `gzip (1)`, source files are preserved by default. It is possible to
21 | remove them after processing by using the `--rm` _option_.
22 |
23 | Arguments that look like "`--name`" or "`--name=value`" are _options_. Every
24 | _option_ has a short form "`-x`" or "`-x value`". Multiple short form _options_
25 | could be coalesced:
26 |
27 | * "`--decompress --stdout --suffix=.b`" works the same as
28 | * "`-d -s -S .b`" and
29 | * "`-dsS .b`"
30 |
31 | `brotli` has 3 operation modes:
32 |
33 | * default mode is compression;
34 | * `--decompress` option activates decompression mode;
35 | * `--test` option switches to integrity test mode; this option is equivalent to
36 | "`--decompress --stdout`" except that the decompressed data is discarded
37 | instead of being written to standard output.
38 |
39 | Every non-option argument is a _file_ entry. If no _files_ are given or _file_
40 | is "`-`", `brotli` reads from standard input. All arguments after "`--`" are
41 | _file_ entries.
42 |
43 | Unless `--stdout` or `--output` is specified, _files_ are written to a new file
44 | whose name is derived from the source _file_ name:
45 |
46 | * when compressing, a suffix is appended to the source filename to
47 | get the target filename
48 | * when decompressing, a suffix is removed from the source filename to
49 | get the target filename
50 |
51 | Default suffix is `.br`, but it could be specified with `--suffix` option.
52 |
53 | Conflicting or duplicate _options_ are not allowed.
54 |
55 | # OPTIONS
56 |
57 | * `-#`:
58 | compression level (0-9); bigger values cause denser, but slower compression
59 | * `-c`, `--stdout`:
60 | write on standard output
61 | * `-d`, `--decompress`:
62 | decompress mode
63 | * `-f`, `--force`:
64 | force output file overwrite
65 | * `-h`, `--help`:
66 | display this help and exit
67 | * `-j`, `--rm`:
68 | remove source file(s); `gzip (1)`-like behaviour
69 | * `-k`, `--keep`:
70 | keep source file(s); `zstd (1)`-like behaviour
71 | * `-n`, `--no-copy-stat`:
72 | do not copy source file(s) attributes
73 | * `-o FILE`, `--output=FILE`
74 | output file; valid only if there is a single input entry
75 | * `-q NUM`, `--quality=NUM`:
76 | compression level (0-11); bigger values cause denser, but slower compression
77 | * `-t`, `--test`:
78 | test file integrity mode
79 | * `-v`, `--verbose`:
80 | increase output verbosity
81 | * `-w NUM`, `--lgwin=NUM`:
82 | set LZ77 window size (0, 10-24) (default: 24); window size is
83 | `(pow(2, NUM) - 16)`; 0 lets compressor decide over the optimal value;
84 | bigger windows size improve density; decoder might require up to window size
85 | memory to operate
86 | * `-D FILE`, `--dictionary=FILE`:
87 | use FILE as raw (LZ77) dictionary; same dictionary MUST be used both for
88 | compression and decompression
89 | * `-S SUF`, `--suffix=SUF`:
90 | output file suffix (default: `.br`)
91 | * `-V`, `--version`:
92 | display version and exit
93 | * `-Z`, `--best`:
94 | use best compression level (default); same as "`-q 11`"
95 |
96 | # SEE ALSO
97 |
98 | `brotli` file format is defined in
99 | [RFC 7932](https://www.ietf.org/rfc/rfc7932.txt).
100 |
101 | `brotli` is open-sourced under the
102 | [MIT License](https://opensource.org/licenses/MIT).
103 |
104 | Mailing list: https://groups.google.com/forum/#!forum/brotli
105 |
106 | # BUGS
107 |
108 | Report bugs at: https://github.com/google/brotli/issues
109 |
--------------------------------------------------------------------------------
/c/enc/backward_references_hq.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Function to find backward reference copies. */
8 |
9 | #ifndef BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
10 | #define BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_
11 |
12 | #include
13 |
14 | #include "../common/constants.h"
15 | #include "../common/context.h"
16 | #include "../common/dictionary.h"
17 | #include "../common/platform.h"
18 | #include "command.h"
19 | #include "hash.h"
20 | #include "memory.h"
21 | #include "quality.h"
22 |
23 | #if defined(__cplusplus) || defined(c_plusplus)
24 | extern "C" {
25 | #endif
26 |
27 | BROTLI_INTERNAL void BrotliCreateZopfliBackwardReferences(MemoryManager* m,
28 | size_t num_bytes,
29 | size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
30 | ContextLut literal_context_lut, const BrotliEncoderParams* params,
31 | Hasher* hasher, int* dist_cache, size_t* last_insert_len,
32 | Command* commands, size_t* num_commands, size_t* num_literals);
33 |
34 | BROTLI_INTERNAL void BrotliCreateHqZopfliBackwardReferences(MemoryManager* m,
35 | size_t num_bytes,
36 | size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
37 | ContextLut literal_context_lut, const BrotliEncoderParams* params,
38 | Hasher* hasher, int* dist_cache, size_t* last_insert_len,
39 | Command* commands, size_t* num_commands, size_t* num_literals);
40 |
41 | typedef struct ZopfliNode {
42 | /* Best length to get up to this byte (not including this byte itself)
43 | highest 7 bit is used to reconstruct the length code. */
44 | uint32_t length;
45 | /* Distance associated with the length. */
46 | uint32_t distance;
47 | /* Number of literal inserts before this copy; highest 5 bits contain
48 | distance short code + 1 (or zero if no short code). */
49 | uint32_t dcode_insert_length;
50 |
51 | /* This union holds information used by dynamic-programming. During forward
52 | pass |cost| it used to store the goal function. When node is processed its
53 | |cost| is invalidated in favor of |shortcut|. On path back-tracing pass
54 | |next| is assigned the offset to next node on the path. */
55 | union {
56 | /* Smallest cost to get to this byte from the beginning, as found so far. */
57 | float cost;
58 | /* Offset to the next node on the path. Equals to command_length() of the
59 | next node on the path. For last node equals to BROTLI_UINT32_MAX */
60 | uint32_t next;
61 | /* Node position that provides next distance for distance cache. */
62 | uint32_t shortcut;
63 | } u;
64 | } ZopfliNode;
65 |
66 | BROTLI_INTERNAL void BrotliInitZopfliNodes(ZopfliNode* array, size_t length);
67 |
68 | /* Computes the shortest path of commands from position to at most
69 | position + num_bytes.
70 |
71 | On return, path->size() is the number of commands found and path[i] is the
72 | length of the i-th command (copy length plus insert length).
73 | Note that the sum of the lengths of all commands can be less than num_bytes.
74 |
75 | On return, the nodes[0..num_bytes] array will have the following
76 | "ZopfliNode array invariant":
77 | For each i in [1..num_bytes], if nodes[i].cost < kInfinity, then
78 | (1) nodes[i].copy_length() >= 2
79 | (2) nodes[i].command_length() <= i and
80 | (3) nodes[i - nodes[i].command_length()].cost < kInfinity */
81 | BROTLI_INTERNAL size_t BrotliZopfliComputeShortestPath(
82 | MemoryManager* m, size_t num_bytes,
83 | size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
84 | ContextLut literal_context_lut, const BrotliEncoderParams* params,
85 | const int* dist_cache, Hasher* hasher, ZopfliNode* nodes);
86 |
87 | BROTLI_INTERNAL void BrotliZopfliCreateCommands(
88 | const size_t num_bytes, const size_t block_start, const ZopfliNode* nodes,
89 | int* dist_cache, size_t* last_insert_len, const BrotliEncoderParams* params,
90 | Command* commands, size_t* num_literals);
91 |
92 | #if defined(__cplusplus) || defined(c_plusplus)
93 | } /* extern "C" */
94 | #endif
95 |
96 | #endif /* BROTLI_ENC_BACKWARD_REFERENCES_HQ_H_ */
97 |
--------------------------------------------------------------------------------
/python/tests/bro_test.py:
--------------------------------------------------------------------------------
1 | # Copyright 2016 The Brotli Authors. All rights reserved.
2 | #
3 | # Distributed under MIT license.
4 | # See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 |
6 | import subprocess
7 | import unittest
8 |
9 | from . import _test_utils
10 |
11 | BRO_ARGS = _test_utils.BRO_ARGS
12 | TEST_ENV = _test_utils.TEST_ENV
13 |
14 |
15 | def _get_original_name(test_data):
16 | return test_data.split('.compressed')[0]
17 |
18 |
19 | class TestBroDecompress(_test_utils.TestCase):
20 |
21 | def _check_decompression(self, test_data):
22 | # Verify decompression matches the original.
23 | temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
24 | original = _get_original_name(test_data)
25 | self.assertFilesMatch(temp_uncompressed, original)
26 |
27 | def _decompress_file(self, test_data):
28 | temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
29 | args = BRO_ARGS + ['-f', '-d', '-i', test_data, '-o', temp_uncompressed]
30 | subprocess.check_call(args, env=TEST_ENV)
31 |
32 | def _decompress_pipe(self, test_data):
33 | temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
34 | args = BRO_ARGS + ['-d']
35 | with open(temp_uncompressed, 'wb') as out_file:
36 | with open(test_data, 'rb') as in_file:
37 | subprocess.check_call(
38 | args, stdin=in_file, stdout=out_file, env=TEST_ENV)
39 |
40 | def _test_decompress_file(self, test_data):
41 | self._decompress_file(test_data)
42 | self._check_decompression(test_data)
43 |
44 | def _test_decompress_pipe(self, test_data):
45 | self._decompress_pipe(test_data)
46 | self._check_decompression(test_data)
47 |
48 |
49 | _test_utils.generate_test_methods(TestBroDecompress, for_decompression=True)
50 |
51 |
52 | class TestBroCompress(_test_utils.TestCase):
53 |
54 | VARIANTS = {'quality': (1, 6, 9, 11), 'lgwin': (10, 15, 20, 24)}
55 |
56 | def _check_decompression(self, test_data, **kwargs):
57 | # Write decompression to temp file and verify it matches the original.
58 | temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data)
59 | temp_compressed = _test_utils.get_temp_compressed_name(test_data)
60 | original = test_data
61 | args = BRO_ARGS + ['-f', '-d']
62 | args.extend(['-i', temp_compressed, '-o', temp_uncompressed])
63 | subprocess.check_call(args, env=TEST_ENV)
64 | self.assertFilesMatch(temp_uncompressed, original)
65 |
66 | def _compress_file(self, test_data, **kwargs):
67 | temp_compressed = _test_utils.get_temp_compressed_name(test_data)
68 | args = BRO_ARGS + ['-f']
69 | if 'quality' in kwargs:
70 | args.extend(['-q', str(kwargs['quality'])])
71 | if 'lgwin' in kwargs:
72 | args.extend(['--lgwin', str(kwargs['lgwin'])])
73 | args.extend(['-i', test_data, '-o', temp_compressed])
74 | subprocess.check_call(args, env=TEST_ENV)
75 |
76 | def _compress_pipe(self, test_data, **kwargs):
77 | temp_compressed = _test_utils.get_temp_compressed_name(test_data)
78 | args = BRO_ARGS
79 | if 'quality' in kwargs:
80 | args.extend(['-q', str(kwargs['quality'])])
81 | if 'lgwin' in kwargs:
82 | args.extend(['--lgwin', str(kwargs['lgwin'])])
83 | with open(temp_compressed, 'wb') as out_file:
84 | with open(test_data, 'rb') as in_file:
85 | subprocess.check_call(
86 | args, stdin=in_file, stdout=out_file, env=TEST_ENV)
87 |
88 | def _test_compress_file(self, test_data, **kwargs):
89 | self._compress_file(test_data, **kwargs)
90 | self._check_decompression(test_data)
91 |
92 | def _test_compress_pipe(self, test_data, **kwargs):
93 | self._compress_pipe(test_data, **kwargs)
94 | self._check_decompression(test_data)
95 |
96 |
97 | _test_utils.generate_test_methods(
98 | TestBroCompress, variants=TestBroCompress.VARIANTS)
99 |
100 | if __name__ == '__main__':
101 | unittest.main()
102 |
--------------------------------------------------------------------------------
/c/enc/metablock.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2015 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Algorithms for distributing the literals and commands of a metablock between
8 | block types and contexts. */
9 |
10 | #ifndef BROTLI_ENC_METABLOCK_H_
11 | #define BROTLI_ENC_METABLOCK_H_
12 |
13 | #include
14 |
15 | #include "../common/context.h"
16 | #include "../common/platform.h"
17 | #include "block_splitter.h"
18 | #include "command.h"
19 | #include "histogram.h"
20 | #include "memory.h"
21 | #include "quality.h"
22 |
23 | #if defined(__cplusplus) || defined(c_plusplus)
24 | extern "C" {
25 | #endif
26 |
27 | typedef struct MetaBlockSplit {
28 | BlockSplit literal_split;
29 | BlockSplit command_split;
30 | BlockSplit distance_split;
31 | uint32_t* literal_context_map;
32 | size_t literal_context_map_size;
33 | uint32_t* distance_context_map;
34 | size_t distance_context_map_size;
35 | HistogramLiteral* literal_histograms;
36 | size_t literal_histograms_size;
37 | HistogramCommand* command_histograms;
38 | size_t command_histograms_size;
39 | HistogramDistance* distance_histograms;
40 | size_t distance_histograms_size;
41 | } MetaBlockSplit;
42 |
43 | static BROTLI_INLINE void InitMetaBlockSplit(MetaBlockSplit* mb) {
44 | BrotliInitBlockSplit(&mb->literal_split);
45 | BrotliInitBlockSplit(&mb->command_split);
46 | BrotliInitBlockSplit(&mb->distance_split);
47 | mb->literal_context_map = 0;
48 | mb->literal_context_map_size = 0;
49 | mb->distance_context_map = 0;
50 | mb->distance_context_map_size = 0;
51 | mb->literal_histograms = 0;
52 | mb->literal_histograms_size = 0;
53 | mb->command_histograms = 0;
54 | mb->command_histograms_size = 0;
55 | mb->distance_histograms = 0;
56 | mb->distance_histograms_size = 0;
57 | }
58 |
59 | static BROTLI_INLINE void DestroyMetaBlockSplit(
60 | MemoryManager* m, MetaBlockSplit* mb) {
61 | BrotliDestroyBlockSplit(m, &mb->literal_split);
62 | BrotliDestroyBlockSplit(m, &mb->command_split);
63 | BrotliDestroyBlockSplit(m, &mb->distance_split);
64 | BROTLI_FREE(m, mb->literal_context_map);
65 | BROTLI_FREE(m, mb->distance_context_map);
66 | BROTLI_FREE(m, mb->literal_histograms);
67 | BROTLI_FREE(m, mb->command_histograms);
68 | BROTLI_FREE(m, mb->distance_histograms);
69 | }
70 |
71 | /* Uses the slow shortest-path block splitter and does context clustering.
72 | The distance parameters are dynamically selected based on the commands
73 | which get recomputed under the new distance parameters. The new distance
74 | parameters are stored into *params. */
75 | BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m,
76 | const uint8_t* ringbuffer,
77 | const size_t pos,
78 | const size_t mask,
79 | BrotliEncoderParams* params,
80 | uint8_t prev_byte,
81 | uint8_t prev_byte2,
82 | Command* cmds,
83 | size_t num_commands,
84 | ContextType literal_context_mode,
85 | MetaBlockSplit* mb);
86 |
87 | /* Uses a fast greedy block splitter that tries to merge current block with the
88 | last or the second last block and uses a static context clustering which
89 | is the same for all block types. */
90 | BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(
91 | MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
92 | uint8_t prev_byte, uint8_t prev_byte2, ContextLut literal_context_lut,
93 | size_t num_contexts, const uint32_t* static_context_map,
94 | const Command* commands, size_t n_commands, MetaBlockSplit* mb);
95 |
96 | BROTLI_INTERNAL void BrotliOptimizeHistograms(uint32_t num_distance_codes,
97 | MetaBlockSplit* mb);
98 |
99 | BROTLI_INTERNAL void BrotliInitDistanceParams(BrotliDistanceParams* params,
100 | uint32_t npostfix, uint32_t ndirect, BROTLI_BOOL large_window);
101 |
102 | #if defined(__cplusplus) || defined(c_plusplus)
103 | } /* extern "C" */
104 | #endif
105 |
106 | #endif /* BROTLI_ENC_METABLOCK_H_ */
107 |
--------------------------------------------------------------------------------
/c/enc/memory.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2016 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Macros for memory management. */
8 |
9 | #ifndef BROTLI_ENC_MEMORY_H_
10 | #define BROTLI_ENC_MEMORY_H_
11 |
12 | #include /* memcpy */
13 |
14 | #include
15 |
16 | #include "../common/platform.h"
17 |
18 | #if defined(__cplusplus) || defined(c_plusplus)
19 | extern "C" {
20 | #endif
21 |
22 | #if !defined(BROTLI_ENCODER_CLEANUP_ON_OOM) && \
23 | !defined(BROTLI_ENCODER_EXIT_ON_OOM)
24 | #define BROTLI_ENCODER_EXIT_ON_OOM
25 | #endif
26 |
27 | typedef struct MemoryManager {
28 | brotli_alloc_func alloc_func;
29 | brotli_free_func free_func;
30 | void* opaque;
31 | #if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
32 | BROTLI_BOOL is_oom;
33 | size_t perm_allocated;
34 | size_t new_allocated;
35 | size_t new_freed;
36 | void* pointers[256];
37 | #endif /* BROTLI_ENCODER_EXIT_ON_OOM */
38 | } MemoryManager;
39 |
40 | BROTLI_INTERNAL void BrotliInitMemoryManager(
41 | MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
42 | void* opaque);
43 |
44 | BROTLI_INTERNAL void* BrotliAllocate(MemoryManager* m, size_t n);
45 | #define BROTLI_ALLOC(M, T, N) \
46 | ((N) > 0 ? ((T*)BrotliAllocate((M), (N) * sizeof(T))) : NULL)
47 |
48 | BROTLI_INTERNAL void BrotliFree(MemoryManager* m, void* p);
49 | #define BROTLI_FREE(M, P) { \
50 | BrotliFree((M), (P)); \
51 | P = NULL; \
52 | }
53 |
54 | #if defined(BROTLI_ENCODER_EXIT_ON_OOM)
55 | #define BROTLI_IS_OOM(M) (!!0)
56 | #else /* BROTLI_ENCODER_EXIT_ON_OOM */
57 | #define BROTLI_IS_OOM(M) (!!(M)->is_oom)
58 | #endif /* BROTLI_ENCODER_EXIT_ON_OOM */
59 |
60 | /*
61 | BROTLI_IS_NULL is a fake check, BROTLI_IS_OOM does the heavy lifting.
62 | The only purpose of it is to explain static analyzers the state of things.
63 | NB: use ONLY together with BROTLI_IS_OOM
64 | AND ONLY for allocations in the current scope.
65 | */
66 | #if defined(__clang_analyzer__) && !defined(BROTLI_ENCODER_EXIT_ON_OOM)
67 | #define BROTLI_IS_NULL(A) ((A) == nullptr)
68 | #else /* defined(__clang_analyzer__) */
69 | #define BROTLI_IS_NULL(A) (!!0)
70 | #endif /* defined(__clang_analyzer__) */
71 |
72 | BROTLI_INTERNAL void BrotliWipeOutMemoryManager(MemoryManager* m);
73 |
74 | /*
75 | Dynamically grows array capacity to at least the requested size
76 | M: MemoryManager
77 | T: data type
78 | A: array
79 | C: capacity
80 | R: requested size
81 | */
82 | #define BROTLI_ENSURE_CAPACITY(M, T, A, C, R) { \
83 | if (C < (R)) { \
84 | size_t _new_size = (C == 0) ? (R) : C; \
85 | T* new_array; \
86 | while (_new_size < (R)) _new_size *= 2; \
87 | new_array = BROTLI_ALLOC((M), T, _new_size); \
88 | if (!BROTLI_IS_OOM(M) && !BROTLI_IS_NULL(new_array) && C != 0) \
89 | memcpy(new_array, A, C * sizeof(T)); \
90 | BROTLI_FREE((M), A); \
91 | A = new_array; \
92 | C = _new_size; \
93 | } \
94 | }
95 |
96 | /*
97 | Appends value and dynamically grows array capacity when needed
98 | M: MemoryManager
99 | T: data type
100 | A: array
101 | C: array capacity
102 | S: array size
103 | V: value to append
104 | */
105 | #define BROTLI_ENSURE_CAPACITY_APPEND(M, T, A, C, S, V) { \
106 | (S)++; \
107 | BROTLI_ENSURE_CAPACITY(M, T, A, C, S); \
108 | A[(S) - 1] = (V); \
109 | }
110 |
111 | /* "Bootstrap" allocations are not tracked by memory manager; should be used
112 | only to allocate MemoryManager itself (or structure containing it). */
113 | BROTLI_INTERNAL void* BrotliBootstrapAlloc(size_t size,
114 | brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
115 | BROTLI_INTERNAL void BrotliBootstrapFree(void* address, MemoryManager* m);
116 |
117 | #if defined(__cplusplus) || defined(c_plusplus)
118 | } /* extern "C" */
119 | #endif
120 |
121 | #endif /* BROTLI_ENC_MEMORY_H_ */
122 |
--------------------------------------------------------------------------------
/python/tests/_test_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import filecmp
3 | import glob
4 | import itertools
5 | import os
6 | import sys
7 | import sysconfig
8 | import tempfile
9 | import unittest
10 |
11 |
12 | project_dir = os.path.abspath(os.path.join(__file__, '..', '..', '..'))
13 | test_dir = os.getenv("BROTLI_TESTS_PATH")
14 | BRO_ARGS = [os.getenv("BROTLI_WRAPPER")]
15 |
16 | # Fallbacks
17 | if test_dir is None:
18 | test_dir = os.path.join(project_dir, 'tests')
19 | if BRO_ARGS[0] is None:
20 | python_exe = sys.executable or 'python'
21 | bro_path = os.path.join(project_dir, 'python', 'bro.py')
22 | BRO_ARGS = [python_exe, bro_path]
23 |
24 | # Get the platform/version-specific build folder.
25 | # By default, the distutils build base is in the same location as setup.py.
26 | platform_lib_name = 'lib.{platform}-{version[0]}.{version[1]}'.format(
27 | platform=sysconfig.get_platform(), version=sys.version_info)
28 | build_dir = os.path.join(project_dir, 'bin', platform_lib_name)
29 |
30 | # Prepend the build folder to sys.path and the PYTHONPATH environment variable.
31 | if build_dir not in sys.path:
32 | sys.path.insert(0, build_dir)
33 | TEST_ENV = os.environ.copy()
34 | if 'PYTHONPATH' not in TEST_ENV:
35 | TEST_ENV['PYTHONPATH'] = build_dir
36 | else:
37 | TEST_ENV['PYTHONPATH'] = build_dir + os.pathsep + TEST_ENV['PYTHONPATH']
38 |
39 | TESTDATA_DIR = os.path.join(test_dir, 'testdata')
40 |
41 | TESTDATA_FILES = [
42 | 'empty', # Empty file
43 | '10x10y', # Small text
44 | 'alice29.txt', # Large text
45 | 'random_org_10k.bin', # Small data
46 | 'mapsdatazrh', # Large data
47 | 'ukkonooa', # Poem
48 | ]
49 |
50 | # Some files might be missing in a lightweight sources pack.
51 | TESTDATA_PATH_CANDIDATES = [
52 | os.path.join(TESTDATA_DIR, f) for f in TESTDATA_FILES
53 | ]
54 |
55 | TESTDATA_PATHS = [
56 | path for path in TESTDATA_PATH_CANDIDATES if os.path.isfile(path)
57 | ]
58 |
59 | TESTDATA_PATHS_FOR_DECOMPRESSION = glob.glob(
60 | os.path.join(TESTDATA_DIR, '*.compressed'))
61 |
62 | TEMP_DIR = tempfile.mkdtemp()
63 |
64 |
65 | def get_temp_compressed_name(filename):
66 | return os.path.join(TEMP_DIR, os.path.basename(filename + '.bro'))
67 |
68 |
69 | def get_temp_uncompressed_name(filename):
70 | return os.path.join(TEMP_DIR, os.path.basename(filename + '.unbro'))
71 |
72 |
73 | def bind_method_args(method, *args, **kwargs):
74 | return lambda self: method(self, *args, **kwargs)
75 |
76 |
77 | def generate_test_methods(test_case_class,
78 | for_decompression=False,
79 | variants=None):
80 | # Add test methods for each test data file. This makes identifying problems
81 | # with specific compression scenarios easier.
82 | if for_decompression:
83 | paths = TESTDATA_PATHS_FOR_DECOMPRESSION
84 | else:
85 | paths = TESTDATA_PATHS
86 | opts = []
87 | if variants:
88 | opts_list = []
89 | for k, v in variants.items():
90 | opts_list.append([r for r in itertools.product([k], v)])
91 | for o in itertools.product(*opts_list):
92 | opts_name = '_'.join([str(i) for i in itertools.chain(*o)])
93 | opts_dict = dict(o)
94 | opts.append([opts_name, opts_dict])
95 | else:
96 | opts.append(['', {}])
97 | for method in [m for m in dir(test_case_class) if m.startswith('_test')]:
98 | for testdata in paths:
99 | for (opts_name, opts_dict) in opts:
100 | f = os.path.splitext(os.path.basename(testdata))[0]
101 | name = 'test_{method}_{options}_{file}'.format(
102 | method=method, options=opts_name, file=f)
103 | func = bind_method_args(
104 | getattr(test_case_class, method), testdata, **opts_dict)
105 | setattr(test_case_class, name, func)
106 |
107 |
108 | class TestCase(unittest.TestCase):
109 |
110 | def tearDown(self):
111 | for f in TESTDATA_PATHS:
112 | try:
113 | os.unlink(get_temp_compressed_name(f))
114 | except OSError:
115 | pass
116 | try:
117 | os.unlink(get_temp_uncompressed_name(f))
118 | except OSError:
119 | pass
120 |
121 | def assertFilesMatch(self, first, second):
122 | self.assertTrue(
123 | filecmp.cmp(first, second, shallow=False),
124 | 'File {} differs from {}'.format(first, second))
125 |
--------------------------------------------------------------------------------
/c/enc/entropy_encode.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2010 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Entropy encoding (Huffman) utilities. */
8 |
9 | #ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
10 | #define BROTLI_ENC_ENTROPY_ENCODE_H_
11 |
12 | #include
13 |
14 | #include "../common/platform.h"
15 |
16 | #if defined(__cplusplus) || defined(c_plusplus)
17 | extern "C" {
18 | #endif
19 |
20 | /* A node of a Huffman tree. */
21 | typedef struct HuffmanTree {
22 | uint32_t total_count_;
23 | int16_t index_left_;
24 | int16_t index_right_or_value_;
25 | } HuffmanTree;
26 |
27 | static BROTLI_INLINE void InitHuffmanTree(HuffmanTree* self, uint32_t count,
28 | int16_t left, int16_t right) {
29 | self->total_count_ = count;
30 | self->index_left_ = left;
31 | self->index_right_or_value_ = right;
32 | }
33 |
34 | /* Returns 1 is assignment of depths succeeded, otherwise 0. */
35 | BROTLI_INTERNAL BROTLI_BOOL BrotliSetDepth(
36 | int p, HuffmanTree* pool, uint8_t* depth, int max_depth);
37 |
38 | /* This function will create a Huffman tree.
39 |
40 | The (data,length) contains the population counts.
41 | The tree_limit is the maximum bit depth of the Huffman codes.
42 |
43 | The depth contains the tree, i.e., how many bits are used for
44 | the symbol.
45 |
46 | The actual Huffman tree is constructed in the tree[] array, which has to
47 | be at least 2 * length + 1 long.
48 |
49 | See http://en.wikipedia.org/wiki/Huffman_coding */
50 | BROTLI_INTERNAL void BrotliCreateHuffmanTree(const uint32_t* data,
51 | const size_t length,
52 | const int tree_limit,
53 | HuffmanTree* tree,
54 | uint8_t* depth);
55 |
56 | /* Change the population counts in a way that the consequent
57 | Huffman tree compression, especially its RLE-part will be more
58 | likely to compress this data more efficiently.
59 |
60 | length contains the size of the histogram.
61 | counts contains the population counts.
62 | good_for_rle is a buffer of at least length size */
63 | BROTLI_INTERNAL void BrotliOptimizeHuffmanCountsForRle(
64 | size_t length, uint32_t* counts, uint8_t* good_for_rle);
65 |
66 | /* Write a Huffman tree from bit depths into the bit-stream representation
67 | of a Huffman tree. The generated Huffman tree is to be compressed once
68 | more using a Huffman tree */
69 | BROTLI_INTERNAL void BrotliWriteHuffmanTree(const uint8_t* depth,
70 | size_t num,
71 | size_t* tree_size,
72 | uint8_t* tree,
73 | uint8_t* extra_bits_data);
74 |
75 | /* Get the actual bit values for a tree of bit depths. */
76 | BROTLI_INTERNAL void BrotliConvertBitDepthsToSymbols(const uint8_t* depth,
77 | size_t len,
78 | uint16_t* bits);
79 |
80 | BROTLI_INTERNAL extern const size_t kBrotliShellGaps[6];
81 | /* Input size optimized Shell sort. */
82 | typedef BROTLI_BOOL (*HuffmanTreeComparator)(
83 | const HuffmanTree*, const HuffmanTree*);
84 | static BROTLI_INLINE void SortHuffmanTreeItems(HuffmanTree* items,
85 | const size_t n, HuffmanTreeComparator comparator) {
86 | if (n < 13) {
87 | /* Insertion sort. */
88 | size_t i;
89 | for (i = 1; i < n; ++i) {
90 | HuffmanTree tmp = items[i];
91 | size_t k = i;
92 | size_t j = i - 1;
93 | while (comparator(&tmp, &items[j])) {
94 | items[k] = items[j];
95 | k = j;
96 | if (!j--) break;
97 | }
98 | items[k] = tmp;
99 | }
100 | return;
101 | } else {
102 | /* Shell sort. */
103 | int g = n < 57 ? 2 : 0;
104 | for (; g < 6; ++g) {
105 | size_t gap = kBrotliShellGaps[g];
106 | size_t i;
107 | for (i = gap; i < n; ++i) {
108 | size_t j = i;
109 | HuffmanTree tmp = items[i];
110 | for (; j >= gap && comparator(&tmp, &items[j - gap]); j -= gap) {
111 | items[j] = items[j - gap];
112 | }
113 | items[j] = tmp;
114 | }
115 | }
116 | }
117 | }
118 |
119 | #if defined(__cplusplus) || defined(c_plusplus)
120 | } /* extern "C" */
121 | #endif
122 |
123 | #endif /* BROTLI_ENC_ENTROPY_ENCODE_H_ */
124 |
--------------------------------------------------------------------------------
/c/dec/huffman.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Utilities for building Huffman decoding tables. */
8 |
9 | #ifndef BROTLI_DEC_HUFFMAN_H_
10 | #define BROTLI_DEC_HUFFMAN_H_
11 |
12 | #include
13 |
14 | #include "../common/platform.h"
15 |
16 | #if defined(__cplusplus) || defined(c_plusplus)
17 | extern "C" {
18 | #endif
19 |
20 | #define BROTLI_HUFFMAN_MAX_CODE_LENGTH 15
21 |
22 | /* BROTLI_NUM_BLOCK_LEN_SYMBOLS == 26 */
23 | #define BROTLI_HUFFMAN_MAX_SIZE_26 396
24 | /* BROTLI_MAX_BLOCK_TYPE_SYMBOLS == 258 */
25 | #define BROTLI_HUFFMAN_MAX_SIZE_258 632
26 | /* BROTLI_MAX_CONTEXT_MAP_SYMBOLS == 272 */
27 | #define BROTLI_HUFFMAN_MAX_SIZE_272 646
28 |
29 | #define BROTLI_HUFFMAN_MAX_CODE_LENGTH_CODE_LENGTH 5
30 |
31 | #if ((defined(BROTLI_TARGET_ARMV7) || defined(BROTLI_TARGET_ARMV8_32)) && \
32 | BROTLI_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0))
33 | #define BROTLI_HUFFMAN_CODE_FAST_LOAD
34 | #endif
35 |
36 | #if !defined(BROTLI_HUFFMAN_CODE_FAST_LOAD)
37 | /* Do not create this struct directly - use the ConstructHuffmanCode
38 | * constructor below! */
39 | typedef struct {
40 | uint8_t bits; /* number of bits used for this symbol */
41 | uint16_t value; /* symbol value or table offset */
42 | } HuffmanCode;
43 |
44 | static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
45 | const uint16_t value) {
46 | HuffmanCode h;
47 | h.bits = bits;
48 | h.value = value;
49 | return h;
50 | }
51 |
52 | /* Please use the following macros to optimize HuffmanCode accesses in hot
53 | * paths.
54 | *
55 | * For example, assuming |table| contains a HuffmanCode pointer:
56 | *
57 | * BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(table);
58 | * BROTLI_HC_ADJUST_TABLE_INDEX(table, index_into_table);
59 | * *bits = BROTLI_HC_GET_BITS(table);
60 | * *value = BROTLI_HC_GET_VALUE(table);
61 | * BROTLI_HC_ADJUST_TABLE_INDEX(table, offset);
62 | * *bits2 = BROTLI_HC_GET_BITS(table);
63 | * *value2 = BROTLI_HC_GET_VALUE(table);
64 | *
65 | */
66 |
67 | #define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H)
68 | #define BROTLI_HC_ADJUST_TABLE_INDEX(H, V) H += (V)
69 |
70 | /* These must be given a HuffmanCode pointer! */
71 | #define BROTLI_HC_FAST_LOAD_BITS(H) (H->bits)
72 | #define BROTLI_HC_FAST_LOAD_VALUE(H) (H->value)
73 |
74 | #else /* BROTLI_HUFFMAN_CODE_FAST_LOAD */
75 |
76 | typedef BROTLI_ALIGNED(4) uint32_t HuffmanCode;
77 |
78 | static BROTLI_INLINE HuffmanCode ConstructHuffmanCode(const uint8_t bits,
79 | const uint16_t value) {
80 | return (HuffmanCode) ((value & 0xFFFF) << 16) | (bits & 0xFF);
81 | }
82 |
83 | #define BROTLI_HC_MARK_TABLE_FOR_FAST_LOAD(H) uint32_t __fastload_##H = (*H)
84 | #define BROTLI_HC_ADJUST_TABLE_INDEX(H, V) H += (V); __fastload_##H = (*H)
85 |
86 | /* These must be given a HuffmanCode pointer! */
87 | #define BROTLI_HC_FAST_LOAD_BITS(H) ((__fastload_##H) & 0xFF)
88 | #define BROTLI_HC_FAST_LOAD_VALUE(H) ((__fastload_##H) >> 16)
89 | #endif /* BROTLI_HUFFMAN_CODE_FAST_LOAD */
90 |
91 | /* Builds Huffman lookup table assuming code lengths are in symbol order. */
92 | BROTLI_INTERNAL void BrotliBuildCodeLengthsHuffmanTable(HuffmanCode* root_table,
93 | const uint8_t* const code_lengths, uint16_t* count);
94 |
95 | /* Builds Huffman lookup table assuming code lengths are in symbol order.
96 | Returns size of resulting table. */
97 | BROTLI_INTERNAL uint32_t BrotliBuildHuffmanTable(HuffmanCode* root_table,
98 | int root_bits, const uint16_t* const symbol_lists, uint16_t* count);
99 |
100 | /* Builds a simple Huffman table. The |num_symbols| parameter is to be
101 | interpreted as follows: 0 means 1 symbol, 1 means 2 symbols,
102 | 2 means 3 symbols, 3 means 4 symbols with lengths [2, 2, 2, 2],
103 | 4 means 4 symbols with lengths [1, 2, 3, 3]. */
104 | BROTLI_INTERNAL uint32_t BrotliBuildSimpleHuffmanTable(HuffmanCode* table,
105 | int root_bits, uint16_t* symbols, uint32_t num_symbols);
106 |
107 | /* Contains a collection of Huffman trees with the same alphabet size. */
108 | /* alphabet_size_limit is needed due to simple codes, since
109 | log2(alphabet_size_max) could be greater than log2(alphabet_size_limit). */
110 | typedef struct {
111 | HuffmanCode** htrees;
112 | HuffmanCode* codes;
113 | uint16_t alphabet_size_max;
114 | uint16_t alphabet_size_limit;
115 | uint16_t num_htrees;
116 | } HuffmanTreeGroup;
117 |
118 | #if defined(__cplusplus) || defined(c_plusplus)
119 | } /* extern "C" */
120 | #endif
121 |
122 | #endif /* BROTLI_DEC_HUFFMAN_H_ */
123 |
--------------------------------------------------------------------------------
/c/enc/bit_cost_inc.h:
--------------------------------------------------------------------------------
1 | /* NOLINT(build/header_guard) */
2 | /* Copyright 2013 Google Inc. All Rights Reserved.
3 |
4 | Distributed under MIT license.
5 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 | */
7 |
8 | /* template parameters: FN */
9 |
10 | #define HistogramType FN(Histogram)
11 |
12 | double FN(BrotliPopulationCost)(const HistogramType* histogram) {
13 | static const double kOneSymbolHistogramCost = 12;
14 | static const double kTwoSymbolHistogramCost = 20;
15 | static const double kThreeSymbolHistogramCost = 28;
16 | static const double kFourSymbolHistogramCost = 37;
17 | const size_t data_size = FN(HistogramDataSize)();
18 | int count = 0;
19 | size_t s[5];
20 | double bits = 0.0;
21 | size_t i;
22 | if (histogram->total_count_ == 0) {
23 | return kOneSymbolHistogramCost;
24 | }
25 | for (i = 0; i < data_size; ++i) {
26 | if (histogram->data_[i] > 0) {
27 | s[count] = i;
28 | ++count;
29 | if (count > 4) break;
30 | }
31 | }
32 | if (count == 1) {
33 | return kOneSymbolHistogramCost;
34 | }
35 | if (count == 2) {
36 | return (kTwoSymbolHistogramCost + (double)histogram->total_count_);
37 | }
38 | if (count == 3) {
39 | const uint32_t histo0 = histogram->data_[s[0]];
40 | const uint32_t histo1 = histogram->data_[s[1]];
41 | const uint32_t histo2 = histogram->data_[s[2]];
42 | const uint32_t histomax =
43 | BROTLI_MAX(uint32_t, histo0, BROTLI_MAX(uint32_t, histo1, histo2));
44 | return (kThreeSymbolHistogramCost +
45 | 2 * (histo0 + histo1 + histo2) - histomax);
46 | }
47 | if (count == 4) {
48 | uint32_t histo[4];
49 | uint32_t h23;
50 | uint32_t histomax;
51 | for (i = 0; i < 4; ++i) {
52 | histo[i] = histogram->data_[s[i]];
53 | }
54 | /* Sort */
55 | for (i = 0; i < 4; ++i) {
56 | size_t j;
57 | for (j = i + 1; j < 4; ++j) {
58 | if (histo[j] > histo[i]) {
59 | BROTLI_SWAP(uint32_t, histo, j, i);
60 | }
61 | }
62 | }
63 | h23 = histo[2] + histo[3];
64 | histomax = BROTLI_MAX(uint32_t, h23, histo[0]);
65 | return (kFourSymbolHistogramCost +
66 | 3 * h23 + 2 * (histo[0] + histo[1]) - histomax);
67 | }
68 |
69 | {
70 | /* In this loop we compute the entropy of the histogram and simultaneously
71 | build a simplified histogram of the code length codes where we use the
72 | zero repeat code 17, but we don't use the non-zero repeat code 16. */
73 | size_t max_depth = 1;
74 | uint32_t depth_histo[BROTLI_CODE_LENGTH_CODES] = { 0 };
75 | const double log2total = FastLog2(histogram->total_count_);
76 | for (i = 0; i < data_size;) {
77 | if (histogram->data_[i] > 0) {
78 | /* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
79 | = log2(total_count) - log2(count(symbol)) */
80 | double log2p = log2total - FastLog2(histogram->data_[i]);
81 | /* Approximate the bit depth by round(-log2(P(symbol))) */
82 | size_t depth = (size_t)(log2p + 0.5);
83 | bits += histogram->data_[i] * log2p;
84 | if (depth > 15) {
85 | depth = 15;
86 | }
87 | if (depth > max_depth) {
88 | max_depth = depth;
89 | }
90 | ++depth_histo[depth];
91 | ++i;
92 | } else {
93 | /* Compute the run length of zeros and add the appropriate number of 0
94 | and 17 code length codes to the code length code histogram. */
95 | uint32_t reps = 1;
96 | size_t k;
97 | for (k = i + 1; k < data_size && histogram->data_[k] == 0; ++k) {
98 | ++reps;
99 | }
100 | i += reps;
101 | if (i == data_size) {
102 | /* Don't add any cost for the last zero run, since these are encoded
103 | only implicitly. */
104 | break;
105 | }
106 | if (reps < 3) {
107 | depth_histo[0] += reps;
108 | } else {
109 | reps -= 2;
110 | while (reps > 0) {
111 | ++depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH];
112 | /* Add the 3 extra bits for the 17 code length code. */
113 | bits += 3;
114 | reps >>= 3;
115 | }
116 | }
117 | }
118 | }
119 | /* Add the estimated encoding cost of the code length code histogram. */
120 | bits += (double)(18 + 2 * max_depth);
121 | /* Add the entropy of the code length code histogram. */
122 | bits += BitsEntropy(depth_histo, BROTLI_CODE_LENGTH_CODES);
123 | }
124 | return bits;
125 | }
126 |
127 | #undef HistogramType
128 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 | # SECURITY NOTE
4 |
5 | Please consider updating brotli to version 1.0.9 (latest).
6 |
7 | Version 1.0.9 contains a fix to "integer overflow" problem. This happens when "one-shot" decoding API is used (or input chunk for streaming API is not limited), input size (chunk size) is larger than 2GiB, and input contains uncompressed blocks. After the overflow happens, `memcpy` is invoked with a gigantic `num` value, that will likely cause the crash.
8 |
9 | ### Introduction
10 |
11 | Brotli is a generic-purpose lossless compression algorithm that compresses data
12 | using a combination of a modern variant of the LZ77 algorithm, Huffman coding
13 | and 2nd order context modeling, with a compression ratio comparable to the best
14 | currently available general-purpose compression methods. It is similar in speed
15 | with deflate but offers more dense compression.
16 |
17 | The specification of the Brotli Compressed Data Format is defined in [RFC 7932](https://tools.ietf.org/html/rfc7932).
18 |
19 | Brotli is open-sourced under the MIT License, see the LICENSE file.
20 |
21 | > **Please note:** brotli is a "stream" format; it does not contain
22 | > meta-information, like checksums or uncompresssed data length. It is possible
23 | > to modify "raw" ranges of the compressed stream and the decoder will not
24 | > notice that.
25 |
26 | Brotli mailing list:
27 | https://groups.google.com/forum/#!forum/brotli
28 |
29 | 
30 | [](https://oss-fuzz-build-logs.storage.googleapis.com/index.html#brotli)
31 |
32 | ### Build instructions
33 |
34 | #### Vcpkg
35 |
36 | You can download and install brotli using the [vcpkg](https://github.com/Microsoft/vcpkg/) dependency manager:
37 |
38 | git clone https://github.com/Microsoft/vcpkg.git
39 | cd vcpkg
40 | ./bootstrap-vcpkg.sh
41 | ./vcpkg integrate install
42 | ./vcpkg install brotli
43 |
44 | The brotli port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
45 |
46 | #### Autotools-style CMake
47 |
48 | [configure-cmake](https://github.com/nemequ/configure-cmake) is an
49 | autotools-style configure script for CMake-based projects (not supported on Windows).
50 |
51 | The basic commands to build, test and install brotli are:
52 |
53 | $ mkdir out && cd out
54 | $ ../configure-cmake
55 | $ make
56 | $ make test
57 | $ make install
58 |
59 | By default, debug binaries are built. To generate "release" `Makefile` specify `--disable-debug` option to `configure-cmake`.
60 |
61 | #### Bazel
62 |
63 | See [Bazel](http://www.bazel.build/)
64 |
65 | #### CMake
66 |
67 | The basic commands to build and install brotli are:
68 |
69 | $ mkdir out && cd out
70 | $ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=./installed ..
71 | $ cmake --build . --config Release --target install
72 |
73 | You can use other [CMake](https://cmake.org/) configuration.
74 |
75 | #### Premake5
76 |
77 | See [Premake5](https://premake.github.io/)
78 |
79 | #### Python
80 |
81 | To install the latest release of the Python module, run the following:
82 |
83 | $ pip install brotli
84 |
85 | To install the tip-of-the-tree version, run:
86 |
87 | $ pip install --upgrade git+https://github.com/google/brotli
88 |
89 | See the [Python readme](python/README.md) for more details on installing
90 | from source, development, and testing.
91 |
92 | ### Benchmarks
93 | * [Squash Compression Benchmark](https://quixdb.github.io/squash-benchmark/) / [Unstable Squash Compression Benchmark](https://quixdb.github.io/squash-benchmark/unstable/)
94 | * [Large Text Compression Benchmark](http://mattmahoney.net/dc/text.html)
95 | * [Lzturbo Benchmark](https://sites.google.com/site/powturbo/home/benchmark)
96 |
97 | ### Related projects
98 | > **Disclaimer:** Brotli authors take no responsibility for the third party projects mentioned in this section.
99 |
100 | Independent [decoder](https://github.com/madler/brotli) implementation by Mark Adler, based entirely on format specification.
101 |
102 | JavaScript port of brotli [decoder](https://github.com/devongovett/brotli.js). Could be used directly via `npm install brotli`
103 |
104 | Hand ported [decoder / encoder](https://github.com/dominikhlbg/BrotliHaxe) in haxe by Dominik Homberger. Output source code: JavaScript, PHP, Python, Java and C#
105 |
106 | 7Zip [plugin](https://github.com/mcmilk/7-Zip-Zstd)
107 |
108 | Dart [native bindings](https://github.com/thosakwe/brotli)
109 |
110 | Dart compression framework with [fast FFI-based Brotli implementation](https://pub.dev/documentation/es_compression/latest/brotli/brotli-library.html) with ready-to-use prebuilt binaries for Win/Linux/Mac
111 |
--------------------------------------------------------------------------------
/c/common/context.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Lookup table to map the previous two bytes to a context id.
8 |
9 | There are four different context modeling modes defined here:
10 | CONTEXT_LSB6: context id is the least significant 6 bits of the last byte,
11 | CONTEXT_MSB6: context id is the most significant 6 bits of the last byte,
12 | CONTEXT_UTF8: second-order context model tuned for UTF8-encoded text,
13 | CONTEXT_SIGNED: second-order context model tuned for signed integers.
14 |
15 | If |p1| and |p2| are the previous two bytes, and |mode| is current context
16 | mode, we calculate the context as:
17 |
18 | context = ContextLut(mode)[p1] | ContextLut(mode)[p2 + 256].
19 |
20 | For CONTEXT_UTF8 mode, if the previous two bytes are ASCII characters
21 | (i.e. < 128), this will be equivalent to
22 |
23 | context = 4 * context1(p1) + context2(p2),
24 |
25 | where context1 is based on the previous byte in the following way:
26 |
27 | 0 : non-ASCII control
28 | 1 : \t, \n, \r
29 | 2 : space
30 | 3 : other punctuation
31 | 4 : " '
32 | 5 : %
33 | 6 : ( < [ {
34 | 7 : ) > ] }
35 | 8 : , ; :
36 | 9 : .
37 | 10 : =
38 | 11 : number
39 | 12 : upper-case vowel
40 | 13 : upper-case consonant
41 | 14 : lower-case vowel
42 | 15 : lower-case consonant
43 |
44 | and context2 is based on the second last byte:
45 |
46 | 0 : control, space
47 | 1 : punctuation
48 | 2 : upper-case letter, number
49 | 3 : lower-case letter
50 |
51 | If the last byte is ASCII, and the second last byte is not (in a valid UTF8
52 | stream it will be a continuation byte, value between 128 and 191), the
53 | context is the same as if the second last byte was an ASCII control or space.
54 |
55 | If the last byte is a UTF8 lead byte (value >= 192), then the next byte will
56 | be a continuation byte and the context id is 2 or 3 depending on the LSB of
57 | the last byte and to a lesser extent on the second last byte if it is ASCII.
58 |
59 | If the last byte is a UTF8 continuation byte, the second last byte can be:
60 | - continuation byte: the next byte is probably ASCII or lead byte (assuming
61 | 4-byte UTF8 characters are rare) and the context id is 0 or 1.
62 | - lead byte (192 - 207): next byte is ASCII or lead byte, context is 0 or 1
63 | - lead byte (208 - 255): next byte is continuation byte, context is 2 or 3
64 |
65 | The possible value combinations of the previous two bytes, the range of
66 | context ids and the type of the next byte is summarized in the table below:
67 |
68 | |--------\-----------------------------------------------------------------|
69 | | \ Last byte |
70 | | Second \---------------------------------------------------------------|
71 | | last byte \ ASCII | cont. byte | lead byte |
72 | | \ (0-127) | (128-191) | (192-) |
73 | |=============|===================|=====================|==================|
74 | | ASCII | next: ASCII/lead | not valid | next: cont. |
75 | | (0-127) | context: 4 - 63 | | context: 2 - 3 |
76 | |-------------|-------------------|---------------------|------------------|
77 | | cont. byte | next: ASCII/lead | next: ASCII/lead | next: cont. |
78 | | (128-191) | context: 4 - 63 | context: 0 - 1 | context: 2 - 3 |
79 | |-------------|-------------------|---------------------|------------------|
80 | | lead byte | not valid | next: ASCII/lead | not valid |
81 | | (192-207) | | context: 0 - 1 | |
82 | |-------------|-------------------|---------------------|------------------|
83 | | lead byte | not valid | next: cont. | not valid |
84 | | (208-) | | context: 2 - 3 | |
85 | |-------------|-------------------|---------------------|------------------|
86 | */
87 |
88 | #ifndef BROTLI_COMMON_CONTEXT_H_
89 | #define BROTLI_COMMON_CONTEXT_H_
90 |
91 | #include
92 | #include
93 |
94 | typedef enum ContextType {
95 | CONTEXT_LSB6 = 0,
96 | CONTEXT_MSB6 = 1,
97 | CONTEXT_UTF8 = 2,
98 | CONTEXT_SIGNED = 3
99 | } ContextType;
100 |
101 | /* "Soft-private", it is exported, but not "advertised" as API. */
102 | /* Common context lookup table for all context modes. */
103 | BROTLI_COMMON_API extern const uint8_t _kBrotliContextLookupTable[2048];
104 |
105 | typedef const uint8_t* ContextLut;
106 |
107 | /* typeof(MODE) == ContextType; returns ContextLut */
108 | #define BROTLI_CONTEXT_LUT(MODE) (&_kBrotliContextLookupTable[(MODE) << 9])
109 |
110 | /* typeof(LUT) == ContextLut */
111 | #define BROTLI_CONTEXT(P1, P2, LUT) ((LUT)[P1] | ((LUT) + 256)[P2])
112 |
113 | #endif /* BROTLI_COMMON_CONTEXT_H_ */
114 |
--------------------------------------------------------------------------------
/docs/brotli.1:
--------------------------------------------------------------------------------
1 | .\" Automatically generated by Pandoc 2.7.3
2 | .\"
3 | .TH "brotli" "1" "August 14 2021" "brotli 1.0.9" "User Manual"
4 | .hy
5 | .SH NAME
6 | .PP
7 | brotli(1) -- brotli, unbrotli - compress or decompress files
8 | .SH SYNOPSIS
9 | .PP
10 | \f[B]brotli\f[R] [\f[I]OPTION|FILE\f[R]]\&...
11 | .PP
12 | \f[B]unbrotli\f[R] is equivalent to \f[B]brotli --decompress\f[R]
13 | .SH DESCRIPTION
14 | .PP
15 | \f[B]brotli\f[R] is a generic-purpose lossless compression algorithm
16 | that compresses data using a combination of a modern variant of the
17 | \f[B]LZ77\f[R] algorithm, Huffman coding and 2-nd order context
18 | modeling, with a compression ratio comparable to the best currently
19 | available general-purpose compression methods.
20 | It is similar in speed with deflate but offers more dense compression.
21 | .PP
22 | \f[B]brotli\f[R] command line syntax similar to \f[B]gzip (1)\f[R] and
23 | \f[B]zstd (1)\f[R].
24 | Unlike \f[B]gzip (1)\f[R], source files are preserved by default.
25 | It is possible to remove them after processing by using the
26 | \f[B]--rm\f[R] \f[I]option\f[R].
27 | .PP
28 | Arguments that look like \[lq]\f[B]--name\f[R]\[rq] or
29 | \[lq]\f[B]--name=value\f[R]\[rq] are \f[I]options\f[R].
30 | Every \f[I]option\f[R] has a short form \[lq]\f[B]-x\f[R]\[rq] or
31 | \[lq]\f[B]-x value\f[R]\[rq].
32 | Multiple short form \f[I]options\f[R] could be coalesced:
33 | .IP \[bu] 2
34 | \[lq]\f[B]--decompress --stdout --suffix=.b\f[R]\[rq] works the same as
35 | .IP \[bu] 2
36 | \[lq]\f[B]-d -s -S .b\f[R]\[rq] and
37 | .IP \[bu] 2
38 | \[lq]\f[B]-dsS .b\f[R]\[rq]
39 | .PP
40 | \f[B]brotli\f[R] has 3 operation modes:
41 | .IP \[bu] 2
42 | default mode is compression;
43 | .IP \[bu] 2
44 | \f[B]--decompress\f[R] option activates decompression mode;
45 | .IP \[bu] 2
46 | \f[B]--test\f[R] option switches to integrity test mode; this option is
47 | equivalent to \[lq]\f[B]--decompress --stdout\f[R]\[rq] except that the
48 | decompressed data is discarded instead of being written to standard
49 | output.
50 | .PP
51 | Every non-option argument is a \f[I]file\f[R] entry.
52 | If no \f[I]files\f[R] are given or \f[I]file\f[R] is
53 | \[lq]\f[B]-\f[R]\[rq], \f[B]brotli\f[R] reads from standard input.
54 | All arguments after \[lq]\f[B]--\f[R]\[rq] are \f[I]file\f[R] entries.
55 | .PP
56 | Unless \f[B]--stdout\f[R] or \f[B]--output\f[R] is specified,
57 | \f[I]files\f[R] are written to a new file whose name is derived from the
58 | source \f[I]file\f[R] name:
59 | .IP \[bu] 2
60 | when compressing, a suffix is appended to the source filename to get the
61 | target filename
62 | .IP \[bu] 2
63 | when decompressing, a suffix is removed from the source filename to get
64 | the target filename
65 | .PP
66 | Default suffix is \f[B].br\f[R], but it could be specified with
67 | \f[B]--suffix\f[R] option.
68 | .PP
69 | Conflicting or duplicate \f[I]options\f[R] are not allowed.
70 | .SH OPTIONS
71 | .IP \[bu] 2
72 | \f[B]-#\f[R]: compression level (0-9); bigger values cause denser, but
73 | slower compression
74 | .IP \[bu] 2
75 | \f[B]-c\f[R], \f[B]--stdout\f[R]: write on standard output
76 | .IP \[bu] 2
77 | \f[B]-d\f[R], \f[B]--decompress\f[R]: decompress mode
78 | .IP \[bu] 2
79 | \f[B]-f\f[R], \f[B]--force\f[R]: force output file overwrite
80 | .IP \[bu] 2
81 | \f[B]-h\f[R], \f[B]--help\f[R]: display this help and exit
82 | .IP \[bu] 2
83 | \f[B]-j\f[R], \f[B]--rm\f[R]: remove source file(s); \f[B]gzip
84 | (1)\f[R]-like behaviour
85 | .IP \[bu] 2
86 | \f[B]-k\f[R], \f[B]--keep\f[R]: keep source file(s); \f[B]zstd
87 | (1)\f[R]-like behaviour
88 | .IP \[bu] 2
89 | \f[B]-n\f[R], \f[B]--no-copy-stat\f[R]: do not copy source file(s)
90 | attributes
91 | .IP \[bu] 2
92 | \f[B]-o FILE\f[R], \f[B]--output=FILE\f[R] output file; valid only if
93 | there is a single input entry
94 | .IP \[bu] 2
95 | \f[B]-q NUM\f[R], \f[B]--quality=NUM\f[R]: compression level (0-11);
96 | bigger values cause denser, but slower compression
97 | .IP \[bu] 2
98 | \f[B]-t\f[R], \f[B]--test\f[R]: test file integrity mode
99 | .IP \[bu] 2
100 | \f[B]-v\f[R], \f[B]--verbose\f[R]: increase output verbosity
101 | .IP \[bu] 2
102 | \f[B]-w NUM\f[R], \f[B]--lgwin=NUM\f[R]: set LZ77 window size (0, 10-24)
103 | (default: 24); window size is \f[B](pow(2, NUM) - 16)\f[R]; 0 lets
104 | compressor decide over the optimal value; bigger windows size improve
105 | density; decoder might require up to window size memory to operate
106 | .IP \[bu] 2
107 | \f[B]-D FILE\f[R], \f[B]--dictionary=FILE\f[R]: use FILE as raw (LZ77)
108 | dictionary; same dictionary MUST be used both for compression and
109 | decompression
110 | .IP \[bu] 2
111 | \f[B]-S SUF\f[R], \f[B]--suffix=SUF\f[R]: output file suffix (default:
112 | \f[B].br\f[R])
113 | .IP \[bu] 2
114 | \f[B]-V\f[R], \f[B]--version\f[R]: display version and exit
115 | .IP \[bu] 2
116 | \f[B]-Z\f[R], \f[B]--best\f[R]: use best compression level (default);
117 | same as \[lq]\f[B]-q 11\f[R]\[rq]
118 | .SH SEE ALSO
119 | .PP
120 | \f[B]brotli\f[R] file format is defined in RFC
121 | 7932 (https://www.ietf.org/rfc/rfc7932.txt).
122 | .PP
123 | \f[B]brotli\f[R] is open-sourced under the MIT
124 | License (https://opensource.org/licenses/MIT).
125 | .PP
126 | Mailing list: https://groups.google.com/forum/#!forum/brotli
127 | .SH BUGS
128 | .PP
129 | Report bugs at: https://github.com/google/brotli/issues
130 |
--------------------------------------------------------------------------------
/c/enc/encoder_dict.h:
--------------------------------------------------------------------------------
1 | /* Copyright 2017 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | #ifndef BROTLI_ENC_ENCODER_DICT_H_
8 | #define BROTLI_ENC_ENCODER_DICT_H_
9 |
10 | #include
11 | #include
12 |
13 | #include "../common/dictionary.h"
14 | #include "../common/platform.h"
15 | #include "compound_dictionary.h"
16 | #include "memory.h"
17 | #include "static_dict_lut.h"
18 |
19 | #if defined(__cplusplus) || defined(c_plusplus)
20 | extern "C" {
21 | #endif
22 |
23 | /*
24 | Dictionary hierarchy for Encoder:
25 | -SharedEncoderDictionary
26 | --CompoundDictionary
27 | ---PreparedDictionary [up to 15x]
28 | = prefix dictionary with precomputed hashes
29 | --ContextualEncoderDictionary
30 | ---BrotliEncoderDictionary [up to 64x]
31 | = for each context, precomputed static dictionary with words + transforms
32 |
33 | Dictionary hiearchy from common: similar, but without precomputed hashes
34 | -BrotliSharedDictionary
35 | --BrotliDictionary [up to 64x]
36 | --BrotliTransforms [up to 64x]
37 | --const uint8_t* prefix [up to 15x]: compound dictionaries
38 | */
39 |
40 | typedef struct BrotliTrieNode {
41 | uint8_t single; /* if 1, sub is a single node for c instead of 256 */
42 | uint8_t c;
43 | uint8_t len_; /* untransformed length */
44 | uint32_t idx_; /* word index + num words * transform index */
45 | uint32_t sub; /* index of sub node(s) in the pool */
46 | } BrotliTrieNode;
47 |
48 | typedef struct BrotliTrie {
49 | BrotliTrieNode* pool;
50 | size_t pool_capacity;
51 | size_t pool_size;
52 | BrotliTrieNode root;
53 | } BrotliTrie;
54 |
55 | BROTLI_INTERNAL const BrotliTrieNode* BrotliTrieSub(const BrotliTrie* trie,
56 | const BrotliTrieNode* node, uint8_t c);
57 | /* Dictionary data (words and transforms) for 1 possible context */
58 | typedef struct BrotliEncoderDictionary {
59 | const BrotliDictionary* words;
60 | uint32_t num_transforms;
61 |
62 | /* cut off for fast encoder */
63 | uint32_t cutoffTransformsCount;
64 | uint64_t cutoffTransforms;
65 |
66 | /* from dictionary_hash.h, for fast encoder */
67 | const uint16_t* hash_table_words;
68 | const uint8_t* hash_table_lengths;
69 |
70 | /* from static_dict_lut.h, for slow encoder */
71 | const uint16_t* buckets;
72 | const DictWord* dict_words;
73 | /* Heavy version, for use by slow encoder when there are custom transforms.
74 | Contains every possible transformed dictionary word in a trie. It encodes
75 | about as fast as the non-heavy encoder but consumes a lot of memory and
76 | takes time to build. */
77 | BrotliTrie trie;
78 | BROTLI_BOOL has_words_heavy;
79 |
80 | /* Reference to other dictionaries. */
81 | const struct ContextualEncoderDictionary* parent;
82 |
83 | /* Allocated memory, used only when not using the Brotli defaults */
84 | uint16_t* hash_table_data_words_;
85 | uint8_t* hash_table_data_lengths_;
86 | size_t buckets_alloc_size_;
87 | uint16_t* buckets_data_;
88 | size_t dict_words_alloc_size_;
89 | DictWord* dict_words_data_;
90 | BrotliDictionary* words_instance_;
91 | } BrotliEncoderDictionary;
92 |
93 | /* Dictionary data for all 64 contexts */
94 | typedef struct ContextualEncoderDictionary {
95 | BROTLI_BOOL context_based;
96 | uint8_t num_dictionaries;
97 | uint8_t context_map[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
98 | const BrotliEncoderDictionary* dict[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
99 |
100 | /* If num_instances_ is 1, instance_ is used, else dynamic allocation with
101 | instances_ is used. */
102 | size_t num_instances_;
103 | BrotliEncoderDictionary instance_;
104 | BrotliEncoderDictionary* instances_;
105 | } ContextualEncoderDictionary;
106 |
107 | typedef struct SharedEncoderDictionary {
108 | /* Magic value to distinguish this struct from PreparedDictionary for
109 | certain external usages. */
110 | uint32_t magic;
111 |
112 | /* LZ77 prefix, compound dictionary */
113 | CompoundDictionary compound;
114 |
115 | /* Custom static dictionary (optionally context-based) */
116 | ContextualEncoderDictionary contextual;
117 |
118 | /* The maximum quality the dictionary was computed for */
119 | int max_quality;
120 | } SharedEncoderDictionary;
121 |
122 | typedef struct ManagedDictionary {
123 | uint32_t magic;
124 | MemoryManager memory_manager_;
125 | uint32_t* dictionary;
126 | } ManagedDictionary;
127 |
128 | /* Initializes to the brotli built-in dictionary */
129 | BROTLI_INTERNAL void BrotliInitSharedEncoderDictionary(
130 | SharedEncoderDictionary* dict);
131 |
132 | /* Initializes to shared dictionary that will be parsed from
133 | encoded_dict. Requires that you keep the encoded_dict buffer
134 | around, parts of data will point to it. */
135 | BROTLI_INTERNAL BROTLI_BOOL BrotliInitCustomSharedEncoderDictionary(
136 | MemoryManager* m, const uint8_t* encoded_dict, size_t size,
137 | int quality, SharedEncoderDictionary* dict);
138 |
139 | BROTLI_INTERNAL void BrotliCleanupSharedEncoderDictionary(
140 | MemoryManager* m, SharedEncoderDictionary* dict);
141 |
142 | BROTLI_INTERNAL ManagedDictionary* BrotliCreateManagedDictionary(
143 | brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque);
144 |
145 | BROTLI_INTERNAL void BrotliDestroyManagedDictionary(
146 | ManagedDictionary* dictionary);
147 |
148 | #if defined(__cplusplus) || defined(c_plusplus)
149 | } /* extern "C" */
150 | #endif
151 |
152 | #endif /* BROTLI_ENC_ENCODER_DICT_H_ */
153 |
--------------------------------------------------------------------------------
/python/bro.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 | """Compression/decompression utility using the Brotli algorithm."""
3 |
4 | from __future__ import print_function
5 | import argparse
6 | import os
7 | import platform
8 | import sys
9 |
10 | import brotli
11 |
12 | # default values of encoder parameters
13 | DEFAULT_PARAMS = {
14 | 'mode': brotli.MODE_GENERIC,
15 | 'quality': 11,
16 | 'lgwin': 22,
17 | 'lgblock': 0,
18 | }
19 |
20 |
21 | def get_binary_stdio(stream):
22 | """ Return the specified standard input, output or errors stream as a
23 | 'raw' buffer object suitable for reading/writing binary data from/to it.
24 | """
25 | assert stream in ['stdin', 'stdout', 'stderr'], 'invalid stream name'
26 | stdio = getattr(sys, stream)
27 | if sys.version_info[0] < 3:
28 | if sys.platform == 'win32':
29 | # set I/O stream binary flag on python2.x (Windows)
30 | runtime = platform.python_implementation()
31 | if runtime == 'PyPy':
32 | # the msvcrt trick doesn't work in pypy, so I use fdopen
33 | mode = 'rb' if stream == 'stdin' else 'wb'
34 | stdio = os.fdopen(stdio.fileno(), mode, 0)
35 | else:
36 | # this works with CPython -- untested on other implementations
37 | import msvcrt
38 | msvcrt.setmode(stdio.fileno(), os.O_BINARY)
39 | return stdio
40 | else:
41 | # get 'buffer' attribute to read/write binary data on python3.x
42 | if hasattr(stdio, 'buffer'):
43 | return stdio.buffer
44 | else:
45 | orig_stdio = getattr(sys, '__%s__' % stream)
46 | return orig_stdio.buffer
47 |
48 |
49 | def main(args=None):
50 |
51 | parser = argparse.ArgumentParser(
52 | prog=os.path.basename(__file__), description=__doc__)
53 | parser.add_argument(
54 | '--version', action='version', version=brotli.version)
55 | parser.add_argument(
56 | '-i',
57 | '--input',
58 | metavar='FILE',
59 | type=str,
60 | dest='infile',
61 | help='Input file',
62 | default=None)
63 | parser.add_argument(
64 | '-o',
65 | '--output',
66 | metavar='FILE',
67 | type=str,
68 | dest='outfile',
69 | help='Output file',
70 | default=None)
71 | parser.add_argument(
72 | '-f',
73 | '--force',
74 | action='store_true',
75 | help='Overwrite existing output file',
76 | default=False)
77 | parser.add_argument(
78 | '-d',
79 | '--decompress',
80 | action='store_true',
81 | help='Decompress input file',
82 | default=False)
83 | params = parser.add_argument_group('optional encoder parameters')
84 | params.add_argument(
85 | '-m',
86 | '--mode',
87 | metavar='MODE',
88 | type=int,
89 | choices=[0, 1, 2],
90 | help='The compression mode can be 0 for generic input, '
91 | '1 for UTF-8 encoded text, or 2 for WOFF 2.0 font data. '
92 | 'Defaults to 0.')
93 | params.add_argument(
94 | '-q',
95 | '--quality',
96 | metavar='QUALITY',
97 | type=int,
98 | choices=list(range(0, 12)),
99 | help='Controls the compression-speed vs compression-density '
100 | 'tradeoff. The higher the quality, the slower the '
101 | 'compression. Range is 0 to 11. Defaults to 11.')
102 | params.add_argument(
103 | '--lgwin',
104 | metavar='LGWIN',
105 | type=int,
106 | choices=list(range(10, 25)),
107 | help='Base 2 logarithm of the sliding window size. Range is '
108 | '10 to 24. Defaults to 22.')
109 | params.add_argument(
110 | '--lgblock',
111 | metavar='LGBLOCK',
112 | type=int,
113 | choices=[0] + list(range(16, 25)),
114 | help='Base 2 logarithm of the maximum input block size. '
115 | 'Range is 16 to 24. If set to 0, the value will be set based '
116 | 'on the quality. Defaults to 0.')
117 | # set default values using global DEFAULT_PARAMS dictionary
118 | parser.set_defaults(**DEFAULT_PARAMS)
119 |
120 | options = parser.parse_args(args=args)
121 |
122 | if options.infile:
123 | if not os.path.isfile(options.infile):
124 | parser.error('file "%s" not found' % options.infile)
125 | with open(options.infile, 'rb') as infile:
126 | data = infile.read()
127 | else:
128 | if sys.stdin.isatty():
129 | # interactive console, just quit
130 | parser.error('no input')
131 | infile = get_binary_stdio('stdin')
132 | data = infile.read()
133 |
134 | if options.outfile:
135 | if os.path.isfile(options.outfile) and not options.force:
136 | parser.error('output file exists')
137 | outfile = open(options.outfile, 'wb')
138 | else:
139 | outfile = get_binary_stdio('stdout')
140 |
141 | try:
142 | if options.decompress:
143 | data = brotli.decompress(data)
144 | else:
145 | data = brotli.compress(
146 | data,
147 | mode=options.mode,
148 | quality=options.quality,
149 | lgwin=options.lgwin,
150 | lgblock=options.lgblock)
151 | except brotli.error as e:
152 | parser.exit(1,
153 | 'bro: error: %s: %s' % (e, options.infile or 'sys.stdin'))
154 |
155 | outfile.write(data)
156 | outfile.close()
157 |
158 |
159 | if __name__ == '__main__':
160 | main()
161 |
--------------------------------------------------------------------------------
/c/enc/hash_composite_inc.h:
--------------------------------------------------------------------------------
1 | /* NOLINT(build/header_guard) */
2 | /* Copyright 2018 Google Inc. All Rights Reserved.
3 |
4 | Distributed under MIT license.
5 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
6 | */
7 |
8 | /* template parameters: FN, HASHER_A, HASHER_B */
9 |
10 | /* Composite hasher: This hasher allows to combine two other hashers, HASHER_A
11 | and HASHER_B. */
12 |
13 | #define HashComposite HASHER()
14 |
15 | #define FN_A(X) EXPAND_CAT(X, HASHER_A)
16 | #define FN_B(X) EXPAND_CAT(X, HASHER_B)
17 |
18 | static BROTLI_INLINE size_t FN(HashTypeLength)(void) {
19 | size_t a = FN_A(HashTypeLength)();
20 | size_t b = FN_B(HashTypeLength)();
21 | return a > b ? a : b;
22 | }
23 |
24 | static BROTLI_INLINE size_t FN(StoreLookahead)(void) {
25 | size_t a = FN_A(StoreLookahead)();
26 | size_t b = FN_B(StoreLookahead)();
27 | return a > b ? a : b;
28 | }
29 |
30 | typedef struct HashComposite {
31 | HASHER_A ha;
32 | HASHER_B hb;
33 | HasherCommon ha_common;
34 | HasherCommon hb_common;
35 |
36 | /* Shortcuts. */
37 | HasherCommon* common;
38 |
39 | BROTLI_BOOL fresh;
40 | const BrotliEncoderParams* params;
41 | } HashComposite;
42 |
43 | static void FN(Initialize)(HasherCommon* common,
44 | HashComposite* BROTLI_RESTRICT self, const BrotliEncoderParams* params) {
45 | self->common = common;
46 |
47 | self->ha_common = *self->common;
48 | self->hb_common = *self->common;
49 | self->fresh = BROTLI_TRUE;
50 | self->params = params;
51 | /* TODO(lode): Initialize of the hashers is deferred to Prepare (and params
52 | remembered here) because we don't get the one_shot and input_size params
53 | here that are needed to know the memory size of them. Instead provide
54 | those params to all hashers FN(Initialize) */
55 | }
56 |
57 | static void FN(Prepare)(
58 | HashComposite* BROTLI_RESTRICT self, BROTLI_BOOL one_shot,
59 | size_t input_size, const uint8_t* BROTLI_RESTRICT data) {
60 | if (self->fresh) {
61 | self->fresh = BROTLI_FALSE;
62 | self->ha_common.extra[0] = self->common->extra[0];
63 | self->ha_common.extra[1] = self->common->extra[1];
64 | self->ha_common.extra[2] = NULL;
65 | self->ha_common.extra[3] = NULL;
66 | self->hb_common.extra[0] = self->common->extra[2];
67 | self->hb_common.extra[1] = self->common->extra[3];
68 | self->hb_common.extra[2] = NULL;
69 | self->hb_common.extra[3] = NULL;
70 |
71 | FN_A(Initialize)(&self->ha_common, &self->ha, self->params);
72 | FN_B(Initialize)(&self->hb_common, &self->hb, self->params);
73 | }
74 | FN_A(Prepare)(&self->ha, one_shot, input_size, data);
75 | FN_B(Prepare)(&self->hb, one_shot, input_size, data);
76 | }
77 |
78 | static BROTLI_INLINE void FN(HashMemAllocInBytes)(
79 | const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
80 | size_t input_size, size_t* alloc_size) {
81 | size_t alloc_size_a[4] = {0};
82 | size_t alloc_size_b[4] = {0};
83 | FN_A(HashMemAllocInBytes)(params, one_shot, input_size, alloc_size_a);
84 | FN_B(HashMemAllocInBytes)(params, one_shot, input_size, alloc_size_b);
85 | /* Should never happen. */
86 | if (alloc_size_a[2] != 0 || alloc_size_a[3] != 0) exit(EXIT_FAILURE);
87 | if (alloc_size_b[2] != 0 || alloc_size_b[3] != 0) exit(EXIT_FAILURE);
88 | alloc_size[0] = alloc_size_a[0];
89 | alloc_size[1] = alloc_size_a[1];
90 | alloc_size[2] = alloc_size_b[0];
91 | alloc_size[3] = alloc_size_b[1];
92 | }
93 |
94 | static BROTLI_INLINE void FN(Store)(HashComposite* BROTLI_RESTRICT self,
95 | const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
96 | FN_A(Store)(&self->ha, data, mask, ix);
97 | FN_B(Store)(&self->hb, data, mask, ix);
98 | }
99 |
100 | static BROTLI_INLINE void FN(StoreRange)(
101 | HashComposite* BROTLI_RESTRICT self, const uint8_t* BROTLI_RESTRICT data,
102 | const size_t mask, const size_t ix_start,
103 | const size_t ix_end) {
104 | FN_A(StoreRange)(&self->ha, data, mask, ix_start, ix_end);
105 | FN_B(StoreRange)(&self->hb, data, mask, ix_start, ix_end);
106 | }
107 |
108 | static BROTLI_INLINE void FN(StitchToPreviousBlock)(
109 | HashComposite* BROTLI_RESTRICT self,
110 | size_t num_bytes, size_t position, const uint8_t* ringbuffer,
111 | size_t ring_buffer_mask) {
112 | FN_A(StitchToPreviousBlock)(&self->ha, num_bytes, position,
113 | ringbuffer, ring_buffer_mask);
114 | FN_B(StitchToPreviousBlock)(&self->hb, num_bytes, position,
115 | ringbuffer, ring_buffer_mask);
116 | }
117 |
118 | static BROTLI_INLINE void FN(PrepareDistanceCache)(
119 | HashComposite* BROTLI_RESTRICT self, int* BROTLI_RESTRICT distance_cache) {
120 | FN_A(PrepareDistanceCache)(&self->ha, distance_cache);
121 | FN_B(PrepareDistanceCache)(&self->hb, distance_cache);
122 | }
123 |
124 | static BROTLI_INLINE void FN(FindLongestMatch)(
125 | HashComposite* BROTLI_RESTRICT self,
126 | const BrotliEncoderDictionary* dictionary,
127 | const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
128 | const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
129 | const size_t max_length, const size_t max_backward,
130 | const size_t dictionary_distance, const size_t max_distance,
131 | HasherSearchResult* BROTLI_RESTRICT out) {
132 | FN_A(FindLongestMatch)(&self->ha, dictionary, data, ring_buffer_mask,
133 | distance_cache, cur_ix, max_length, max_backward, dictionary_distance,
134 | max_distance, out);
135 | FN_B(FindLongestMatch)(&self->hb, dictionary, data, ring_buffer_mask,
136 | distance_cache, cur_ix, max_length, max_backward, dictionary_distance,
137 | max_distance, out);
138 | }
139 |
140 | #undef HashComposite
141 |
--------------------------------------------------------------------------------
/c/enc/memory.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2015 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Algorithms for distributing the literals and commands of a metablock between
8 | block types and contexts. */
9 |
10 | #include "memory.h"
11 |
12 | #include /* exit, free, malloc */
13 | #include /* memcpy */
14 |
15 | #include
16 |
17 | #include "../common/platform.h"
18 |
19 | #if defined(__cplusplus) || defined(c_plusplus)
20 | extern "C" {
21 | #endif
22 |
23 | #define MAX_PERM_ALLOCATED 128
24 | #define MAX_NEW_ALLOCATED 64
25 | #define MAX_NEW_FREED 64
26 |
27 | #define PERM_ALLOCATED_OFFSET 0
28 | #define NEW_ALLOCATED_OFFSET MAX_PERM_ALLOCATED
29 | #define NEW_FREED_OFFSET (MAX_PERM_ALLOCATED + MAX_NEW_ALLOCATED)
30 |
31 | void BrotliInitMemoryManager(
32 | MemoryManager* m, brotli_alloc_func alloc_func, brotli_free_func free_func,
33 | void* opaque) {
34 | if (!alloc_func) {
35 | m->alloc_func = BrotliDefaultAllocFunc;
36 | m->free_func = BrotliDefaultFreeFunc;
37 | m->opaque = 0;
38 | } else {
39 | m->alloc_func = alloc_func;
40 | m->free_func = free_func;
41 | m->opaque = opaque;
42 | }
43 | #if !defined(BROTLI_ENCODER_EXIT_ON_OOM)
44 | m->is_oom = BROTLI_FALSE;
45 | m->perm_allocated = 0;
46 | m->new_allocated = 0;
47 | m->new_freed = 0;
48 | #endif /* BROTLI_ENCODER_EXIT_ON_OOM */
49 | }
50 |
51 | #if defined(BROTLI_ENCODER_EXIT_ON_OOM)
52 |
53 | void* BrotliAllocate(MemoryManager* m, size_t n) {
54 | void* result = m->alloc_func(m->opaque, n);
55 | if (!result) exit(EXIT_FAILURE);
56 | return result;
57 | }
58 |
59 | void BrotliFree(MemoryManager* m, void* p) {
60 | m->free_func(m->opaque, p);
61 | }
62 |
63 | void BrotliWipeOutMemoryManager(MemoryManager* m) {
64 | BROTLI_UNUSED(m);
65 | }
66 |
67 | #else /* BROTLI_ENCODER_EXIT_ON_OOM */
68 |
69 | static void SortPointers(void** items, const size_t n) {
70 | /* Shell sort. */
71 | static const size_t gaps[] = {23, 10, 4, 1};
72 | int g = 0;
73 | for (; g < 4; ++g) {
74 | size_t gap = gaps[g];
75 | size_t i;
76 | for (i = gap; i < n; ++i) {
77 | size_t j = i;
78 | void* tmp = items[i];
79 | for (; j >= gap && tmp < items[j - gap]; j -= gap) {
80 | items[j] = items[j - gap];
81 | }
82 | items[j] = tmp;
83 | }
84 | }
85 | }
86 |
87 | static size_t Annihilate(void** a, size_t a_len, void** b, size_t b_len) {
88 | size_t a_read_index = 0;
89 | size_t b_read_index = 0;
90 | size_t a_write_index = 0;
91 | size_t b_write_index = 0;
92 | size_t annihilated = 0;
93 | while (a_read_index < a_len && b_read_index < b_len) {
94 | if (a[a_read_index] == b[b_read_index]) {
95 | a_read_index++;
96 | b_read_index++;
97 | annihilated++;
98 | } else if (a[a_read_index] < b[b_read_index]) {
99 | a[a_write_index++] = a[a_read_index++];
100 | } else {
101 | b[b_write_index++] = b[b_read_index++];
102 | }
103 | }
104 | while (a_read_index < a_len) a[a_write_index++] = a[a_read_index++];
105 | while (b_read_index < b_len) b[b_write_index++] = b[b_read_index++];
106 | return annihilated;
107 | }
108 |
109 | static void CollectGarbagePointers(MemoryManager* m) {
110 | size_t annihilated;
111 | SortPointers(m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated);
112 | SortPointers(m->pointers + NEW_FREED_OFFSET, m->new_freed);
113 | annihilated = Annihilate(
114 | m->pointers + NEW_ALLOCATED_OFFSET, m->new_allocated,
115 | m->pointers + NEW_FREED_OFFSET, m->new_freed);
116 | m->new_allocated -= annihilated;
117 | m->new_freed -= annihilated;
118 |
119 | if (m->new_freed != 0) {
120 | annihilated = Annihilate(
121 | m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated,
122 | m->pointers + NEW_FREED_OFFSET, m->new_freed);
123 | m->perm_allocated -= annihilated;
124 | m->new_freed -= annihilated;
125 | BROTLI_DCHECK(m->new_freed == 0);
126 | }
127 |
128 | if (m->new_allocated != 0) {
129 | BROTLI_DCHECK(m->perm_allocated + m->new_allocated <= MAX_PERM_ALLOCATED);
130 | memcpy(m->pointers + PERM_ALLOCATED_OFFSET + m->perm_allocated,
131 | m->pointers + NEW_ALLOCATED_OFFSET,
132 | sizeof(void*) * m->new_allocated);
133 | m->perm_allocated += m->new_allocated;
134 | m->new_allocated = 0;
135 | SortPointers(m->pointers + PERM_ALLOCATED_OFFSET, m->perm_allocated);
136 | }
137 | }
138 |
139 | void* BrotliAllocate(MemoryManager* m, size_t n) {
140 | void* result = m->alloc_func(m->opaque, n);
141 | if (!result) {
142 | m->is_oom = BROTLI_TRUE;
143 | return NULL;
144 | }
145 | if (m->new_allocated == MAX_NEW_ALLOCATED) CollectGarbagePointers(m);
146 | m->pointers[NEW_ALLOCATED_OFFSET + (m->new_allocated++)] = result;
147 | return result;
148 | }
149 |
150 | void BrotliFree(MemoryManager* m, void* p) {
151 | if (!p) return;
152 | m->free_func(m->opaque, p);
153 | if (m->new_freed == MAX_NEW_FREED) CollectGarbagePointers(m);
154 | m->pointers[NEW_FREED_OFFSET + (m->new_freed++)] = p;
155 | }
156 |
157 | void BrotliWipeOutMemoryManager(MemoryManager* m) {
158 | size_t i;
159 | CollectGarbagePointers(m);
160 | /* Now all unfreed pointers are in perm-allocated list. */
161 | for (i = 0; i < m->perm_allocated; ++i) {
162 | m->free_func(m->opaque, m->pointers[PERM_ALLOCATED_OFFSET + i]);
163 | }
164 | m->perm_allocated = 0;
165 | }
166 |
167 | #endif /* BROTLI_ENCODER_EXIT_ON_OOM */
168 |
169 | void* BrotliBootstrapAlloc(size_t size,
170 | brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
171 | if (!alloc_func && !free_func) {
172 | return malloc(size);
173 | } else if (alloc_func && free_func) {
174 | return alloc_func(opaque, size);
175 | }
176 | return NULL;
177 | }
178 |
179 | void BrotliBootstrapFree(void* address, MemoryManager* m) {
180 | if (!address) {
181 | /* Should not happen! */
182 | return;
183 | } else {
184 | /* Copy values, as those would be freed. */
185 | brotli_free_func free_func = m->free_func;
186 | void* opaque = m->opaque;
187 | free_func(opaque, address);
188 | }
189 | }
190 |
191 | #if defined(__cplusplus) || defined(c_plusplus)
192 | } /* extern "C" */
193 | #endif
194 |
--------------------------------------------------------------------------------
/c/enc/fast_log.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | #include "fast_log.h"
8 |
9 | #if defined(__cplusplus) || defined(c_plusplus)
10 | extern "C" {
11 | #endif
12 |
13 | /* ", ".join(["%.16ff" % x for x in [0.0]+[log2(x) for x in range(1, 256)]]) */
14 | const double kBrotliLog2Table[BROTLI_LOG2_TABLE_SIZE] = {
15 | 0.0000000000000000f, 0.0000000000000000f, 1.0000000000000000f,
16 | 1.5849625007211563f, 2.0000000000000000f, 2.3219280948873622f,
17 | 2.5849625007211561f, 2.8073549220576042f, 3.0000000000000000f,
18 | 3.1699250014423126f, 3.3219280948873626f, 3.4594316186372978f,
19 | 3.5849625007211565f, 3.7004397181410922f, 3.8073549220576037f,
20 | 3.9068905956085187f, 4.0000000000000000f, 4.0874628412503400f,
21 | 4.1699250014423122f, 4.2479275134435852f, 4.3219280948873626f,
22 | 4.3923174227787607f, 4.4594316186372973f, 4.5235619560570131f,
23 | 4.5849625007211570f, 4.6438561897747244f, 4.7004397181410926f,
24 | 4.7548875021634691f, 4.8073549220576037f, 4.8579809951275728f,
25 | 4.9068905956085187f, 4.9541963103868758f, 5.0000000000000000f,
26 | 5.0443941193584534f, 5.0874628412503400f, 5.1292830169449664f,
27 | 5.1699250014423122f, 5.2094533656289501f, 5.2479275134435852f,
28 | 5.2854022188622487f, 5.3219280948873626f, 5.3575520046180838f,
29 | 5.3923174227787607f, 5.4262647547020979f, 5.4594316186372973f,
30 | 5.4918530963296748f, 5.5235619560570131f, 5.5545888516776376f,
31 | 5.5849625007211570f, 5.6147098441152083f, 5.6438561897747244f,
32 | 5.6724253419714961f, 5.7004397181410926f, 5.7279204545631996f,
33 | 5.7548875021634691f, 5.7813597135246599f, 5.8073549220576046f,
34 | 5.8328900141647422f, 5.8579809951275719f, 5.8826430493618416f,
35 | 5.9068905956085187f, 5.9307373375628867f, 5.9541963103868758f,
36 | 5.9772799234999168f, 6.0000000000000000f, 6.0223678130284544f,
37 | 6.0443941193584534f, 6.0660891904577721f, 6.0874628412503400f,
38 | 6.1085244567781700f, 6.1292830169449672f, 6.1497471195046822f,
39 | 6.1699250014423122f, 6.1898245588800176f, 6.2094533656289510f,
40 | 6.2288186904958804f, 6.2479275134435861f, 6.2667865406949019f,
41 | 6.2854022188622487f, 6.3037807481771031f, 6.3219280948873617f,
42 | 6.3398500028846252f, 6.3575520046180847f, 6.3750394313469254f,
43 | 6.3923174227787598f, 6.4093909361377026f, 6.4262647547020979f,
44 | 6.4429434958487288f, 6.4594316186372982f, 6.4757334309663976f,
45 | 6.4918530963296748f, 6.5077946401986964f, 6.5235619560570131f,
46 | 6.5391588111080319f, 6.5545888516776376f, 6.5698556083309478f,
47 | 6.5849625007211561f, 6.5999128421871278f, 6.6147098441152092f,
48 | 6.6293566200796095f, 6.6438561897747253f, 6.6582114827517955f,
49 | 6.6724253419714952f, 6.6865005271832185f, 6.7004397181410917f,
50 | 6.7142455176661224f, 6.7279204545631988f, 6.7414669864011465f,
51 | 6.7548875021634691f, 6.7681843247769260f, 6.7813597135246599f,
52 | 6.7944158663501062f, 6.8073549220576037f, 6.8201789624151887f,
53 | 6.8328900141647422f, 6.8454900509443757f, 6.8579809951275719f,
54 | 6.8703647195834048f, 6.8826430493618416f, 6.8948177633079437f,
55 | 6.9068905956085187f, 6.9188632372745955f, 6.9307373375628867f,
56 | 6.9425145053392399f, 6.9541963103868758f, 6.9657842846620879f,
57 | 6.9772799234999168f, 6.9886846867721664f, 7.0000000000000000f,
58 | 7.0112272554232540f, 7.0223678130284544f, 7.0334230015374501f,
59 | 7.0443941193584534f, 7.0552824355011898f, 7.0660891904577721f,
60 | 7.0768155970508317f, 7.0874628412503400f, 7.0980320829605272f,
61 | 7.1085244567781700f, 7.1189410727235076f, 7.1292830169449664f,
62 | 7.1395513523987937f, 7.1497471195046822f, 7.1598713367783891f,
63 | 7.1699250014423130f, 7.1799090900149345f, 7.1898245588800176f,
64 | 7.1996723448363644f, 7.2094533656289492f, 7.2191685204621621f,
65 | 7.2288186904958804f, 7.2384047393250794f, 7.2479275134435861f,
66 | 7.2573878426926521f, 7.2667865406949019f, 7.2761244052742384f,
67 | 7.2854022188622487f, 7.2946207488916270f, 7.3037807481771031f,
68 | 7.3128829552843557f, 7.3219280948873617f, 7.3309168781146177f,
69 | 7.3398500028846243f, 7.3487281542310781f, 7.3575520046180847f,
70 | 7.3663222142458151f, 7.3750394313469254f, 7.3837042924740528f,
71 | 7.3923174227787607f, 7.4008794362821844f, 7.4093909361377026f,
72 | 7.4178525148858991f, 7.4262647547020979f, 7.4346282276367255f,
73 | 7.4429434958487288f, 7.4512111118323299f, 7.4594316186372973f,
74 | 7.4676055500829976f, 7.4757334309663976f, 7.4838157772642564f,
75 | 7.4918530963296748f, 7.4998458870832057f, 7.5077946401986964f,
76 | 7.5156998382840436f, 7.5235619560570131f, 7.5313814605163119f,
77 | 7.5391588111080319f, 7.5468944598876373f, 7.5545888516776376f,
78 | 7.5622424242210728f, 7.5698556083309478f, 7.5774288280357487f,
79 | 7.5849625007211561f, 7.5924570372680806f, 7.5999128421871278f,
80 | 7.6073303137496113f, 7.6147098441152075f, 7.6220518194563764f,
81 | 7.6293566200796095f, 7.6366246205436488f, 7.6438561897747244f,
82 | 7.6510516911789290f, 7.6582114827517955f, 7.6653359171851765f,
83 | 7.6724253419714952f, 7.6794800995054464f, 7.6865005271832185f,
84 | 7.6934869574993252f, 7.7004397181410926f, 7.7073591320808825f,
85 | 7.7142455176661224f, 7.7210991887071856f, 7.7279204545631996f,
86 | 7.7347096202258392f, 7.7414669864011465f, 7.7481928495894596f,
87 | 7.7548875021634691f, 7.7615512324444795f, 7.7681843247769260f,
88 | 7.7747870596011737f, 7.7813597135246608f, 7.7879025593914317f,
89 | 7.7944158663501062f, 7.8008998999203047f, 7.8073549220576037f,
90 | 7.8137811912170374f, 7.8201789624151887f, 7.8265484872909159f,
91 | 7.8328900141647422f, 7.8392037880969445f, 7.8454900509443757f,
92 | 7.8517490414160571f, 7.8579809951275719f, 7.8641861446542798f,
93 | 7.8703647195834048f, 7.8765169465650002f, 7.8826430493618425f,
94 | 7.8887432488982601f, 7.8948177633079446f, 7.9008668079807496f,
95 | 7.9068905956085187f, 7.9128893362299619f, 7.9188632372745955f,
96 | 7.9248125036057813f, 7.9307373375628867f, 7.9366379390025719f,
97 | 7.9425145053392399f, 7.9483672315846778f, 7.9541963103868758f,
98 | 7.9600019320680806f, 7.9657842846620870f, 7.9715435539507720f,
99 | 7.9772799234999168f, 7.9829935746943104f, 7.9886846867721664f,
100 | 7.9943534368588578f
101 | };
102 |
103 | #if defined(__cplusplus) || defined(c_plusplus)
104 | } /* extern "C" */
105 | #endif
106 |
--------------------------------------------------------------------------------
/c/dec/state.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2015 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | #include "state.h"
8 |
9 | #include /* free, malloc */
10 |
11 | #include
12 |
13 | #include "../common/dictionary.h"
14 | #include "huffman.h"
15 |
16 | #if defined(__cplusplus) || defined(c_plusplus)
17 | extern "C" {
18 | #endif
19 |
20 | BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s,
21 | brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
22 | if (!alloc_func) {
23 | s->alloc_func = BrotliDefaultAllocFunc;
24 | s->free_func = BrotliDefaultFreeFunc;
25 | s->memory_manager_opaque = 0;
26 | } else {
27 | s->alloc_func = alloc_func;
28 | s->free_func = free_func;
29 | s->memory_manager_opaque = opaque;
30 | }
31 |
32 | s->error_code = 0; /* BROTLI_DECODER_NO_ERROR */
33 |
34 | BrotliInitBitReader(&s->br);
35 | s->state = BROTLI_STATE_UNINITED;
36 | s->large_window = 0;
37 | s->substate_metablock_header = BROTLI_STATE_METABLOCK_HEADER_NONE;
38 | s->substate_uncompressed = BROTLI_STATE_UNCOMPRESSED_NONE;
39 | s->substate_decode_uint8 = BROTLI_STATE_DECODE_UINT8_NONE;
40 | s->substate_read_block_length = BROTLI_STATE_READ_BLOCK_LENGTH_NONE;
41 |
42 | s->buffer_length = 0;
43 | s->loop_counter = 0;
44 | s->pos = 0;
45 | s->rb_roundtrips = 0;
46 | s->partial_pos_out = 0;
47 | s->used_input = 0;
48 |
49 | s->block_type_trees = NULL;
50 | s->block_len_trees = NULL;
51 | s->ringbuffer = NULL;
52 | s->ringbuffer_size = 0;
53 | s->new_ringbuffer_size = 0;
54 | s->ringbuffer_mask = 0;
55 |
56 | s->context_map = NULL;
57 | s->context_modes = NULL;
58 | s->dist_context_map = NULL;
59 | s->context_map_slice = NULL;
60 | s->dist_context_map_slice = NULL;
61 |
62 | s->literal_hgroup.codes = NULL;
63 | s->literal_hgroup.htrees = NULL;
64 | s->insert_copy_hgroup.codes = NULL;
65 | s->insert_copy_hgroup.htrees = NULL;
66 | s->distance_hgroup.codes = NULL;
67 | s->distance_hgroup.htrees = NULL;
68 |
69 | s->is_last_metablock = 0;
70 | s->is_uncompressed = 0;
71 | s->is_metadata = 0;
72 | s->should_wrap_ringbuffer = 0;
73 | s->canny_ringbuffer_allocation = 1;
74 |
75 | s->window_bits = 0;
76 | s->max_distance = 0;
77 | s->dist_rb[0] = 16;
78 | s->dist_rb[1] = 15;
79 | s->dist_rb[2] = 11;
80 | s->dist_rb[3] = 4;
81 | s->dist_rb_idx = 0;
82 | s->block_type_trees = NULL;
83 | s->block_len_trees = NULL;
84 |
85 | s->mtf_upper_bound = 63;
86 |
87 | s->compound_dictionary = NULL;
88 | s->dictionary =
89 | BrotliSharedDictionaryCreateInstance(alloc_func, free_func, opaque);
90 | if (!s->dictionary) return BROTLI_FALSE;
91 |
92 | s->metadata_start_func = NULL;
93 | s->metadata_chunk_func = NULL;
94 | s->metadata_callback_opaque = 0;
95 |
96 | return BROTLI_TRUE;
97 | }
98 |
99 | void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s) {
100 | s->meta_block_remaining_len = 0;
101 | s->block_length[0] = 1U << 24;
102 | s->block_length[1] = 1U << 24;
103 | s->block_length[2] = 1U << 24;
104 | s->num_block_types[0] = 1;
105 | s->num_block_types[1] = 1;
106 | s->num_block_types[2] = 1;
107 | s->block_type_rb[0] = 1;
108 | s->block_type_rb[1] = 0;
109 | s->block_type_rb[2] = 1;
110 | s->block_type_rb[3] = 0;
111 | s->block_type_rb[4] = 1;
112 | s->block_type_rb[5] = 0;
113 | s->context_map = NULL;
114 | s->context_modes = NULL;
115 | s->dist_context_map = NULL;
116 | s->context_map_slice = NULL;
117 | s->literal_htree = NULL;
118 | s->dist_context_map_slice = NULL;
119 | s->dist_htree_index = 0;
120 | s->context_lookup = NULL;
121 | s->literal_hgroup.codes = NULL;
122 | s->literal_hgroup.htrees = NULL;
123 | s->insert_copy_hgroup.codes = NULL;
124 | s->insert_copy_hgroup.htrees = NULL;
125 | s->distance_hgroup.codes = NULL;
126 | s->distance_hgroup.htrees = NULL;
127 | }
128 |
129 | void BrotliDecoderStateCleanupAfterMetablock(BrotliDecoderState* s) {
130 | BROTLI_DECODER_FREE(s, s->context_modes);
131 | BROTLI_DECODER_FREE(s, s->context_map);
132 | BROTLI_DECODER_FREE(s, s->dist_context_map);
133 | BROTLI_DECODER_FREE(s, s->literal_hgroup.htrees);
134 | BROTLI_DECODER_FREE(s, s->insert_copy_hgroup.htrees);
135 | BROTLI_DECODER_FREE(s, s->distance_hgroup.htrees);
136 | }
137 |
138 | #ifdef BROTLI_REPORTING
139 | /* When BROTLI_REPORTING is defined extra reporting module have to be linked. */
140 | void BrotliDecoderOnFinish(const BrotliDecoderState* s);
141 | #define BROTLI_DECODER_ON_FINISH(s) BrotliDecoderOnFinish(s);
142 | #else
143 | #if !defined(BROTLI_DECODER_ON_FINISH)
144 | #define BROTLI_DECODER_ON_FINISH(s) (void)(s);
145 | #endif
146 | #endif
147 |
148 | void BrotliDecoderStateCleanup(BrotliDecoderState* s) {
149 | BrotliDecoderStateCleanupAfterMetablock(s);
150 |
151 | BROTLI_DECODER_ON_FINISH(s);
152 |
153 | BROTLI_DECODER_FREE(s, s->compound_dictionary);
154 | BrotliSharedDictionaryDestroyInstance(s->dictionary);
155 | s->dictionary = NULL;
156 | BROTLI_DECODER_FREE(s, s->ringbuffer);
157 | BROTLI_DECODER_FREE(s, s->block_type_trees);
158 | }
159 |
160 | BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit(BrotliDecoderState* s,
161 | HuffmanTreeGroup* group, uint32_t alphabet_size_max,
162 | uint32_t alphabet_size_limit, uint32_t ntrees) {
163 | /* 376 = 256 (1-st level table) + 4 + 7 + 15 + 31 + 63 (2-nd level mix-tables)
164 | This number is discovered "unlimited" "enough" calculator; it is actually
165 | a wee bigger than required in several cases (especially for alphabets with
166 | less than 16 symbols). */
167 | const size_t max_table_size = alphabet_size_limit + 376;
168 | const size_t code_size = sizeof(HuffmanCode) * ntrees * max_table_size;
169 | const size_t htree_size = sizeof(HuffmanCode*) * ntrees;
170 | /* Pointer alignment is, hopefully, wider than sizeof(HuffmanCode). */
171 | HuffmanCode** p = (HuffmanCode**)BROTLI_DECODER_ALLOC(s,
172 | code_size + htree_size);
173 | group->alphabet_size_max = (uint16_t)alphabet_size_max;
174 | group->alphabet_size_limit = (uint16_t)alphabet_size_limit;
175 | group->num_htrees = (uint16_t)ntrees;
176 | group->htrees = p;
177 | group->codes = (HuffmanCode*)(&p[ntrees]);
178 | return !!p;
179 | }
180 |
181 | #if defined(__cplusplus) || defined(c_plusplus)
182 | } /* extern "C" */
183 | #endif
184 |
--------------------------------------------------------------------------------
/c/enc/backward_references.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Function to find backward reference copies. */
8 |
9 | #include "backward_references.h"
10 |
11 | #include
12 |
13 | #include "../common/constants.h"
14 | #include "../common/dictionary.h"
15 | #include "../common/platform.h"
16 | #include "command.h"
17 | #include "compound_dictionary.h"
18 | #include "dictionary_hash.h"
19 | #include "encoder_dict.h"
20 | #include "memory.h"
21 | #include "quality.h"
22 |
23 | #if defined(__cplusplus) || defined(c_plusplus)
24 | extern "C" {
25 | #endif
26 |
27 | static BROTLI_INLINE size_t ComputeDistanceCode(size_t distance,
28 | size_t max_distance,
29 | const int* dist_cache) {
30 | if (distance <= max_distance) {
31 | size_t distance_plus_3 = distance + 3;
32 | size_t offset0 = distance_plus_3 - (size_t)dist_cache[0];
33 | size_t offset1 = distance_plus_3 - (size_t)dist_cache[1];
34 | if (distance == (size_t)dist_cache[0]) {
35 | return 0;
36 | } else if (distance == (size_t)dist_cache[1]) {
37 | return 1;
38 | } else if (offset0 < 7) {
39 | return (0x9750468 >> (4 * offset0)) & 0xF;
40 | } else if (offset1 < 7) {
41 | return (0xFDB1ACE >> (4 * offset1)) & 0xF;
42 | } else if (distance == (size_t)dist_cache[2]) {
43 | return 2;
44 | } else if (distance == (size_t)dist_cache[3]) {
45 | return 3;
46 | }
47 | }
48 | return distance + BROTLI_NUM_DISTANCE_SHORT_CODES - 1;
49 | }
50 |
51 | #define EXPAND_CAT(a, b) CAT(a, b)
52 | #define CAT(a, b) a ## b
53 | #define FN(X) EXPAND_CAT(X, HASHER())
54 | #define EXPORT_FN(X) EXPAND_CAT(X, EXPAND_CAT(PREFIX(), HASHER()))
55 |
56 | #define PREFIX() N
57 | #define ENABLE_COMPOUND_DICTIONARY 0
58 |
59 | #define HASHER() H2
60 | /* NOLINTNEXTLINE(build/include) */
61 | #include "backward_references_inc.h"
62 | #undef HASHER
63 |
64 | #define HASHER() H3
65 | /* NOLINTNEXTLINE(build/include) */
66 | #include "backward_references_inc.h"
67 | #undef HASHER
68 |
69 | #define HASHER() H4
70 | /* NOLINTNEXTLINE(build/include) */
71 | #include "backward_references_inc.h"
72 | #undef HASHER
73 |
74 | #define HASHER() H5
75 | /* NOLINTNEXTLINE(build/include) */
76 | #include "backward_references_inc.h"
77 | #undef HASHER
78 |
79 | #define HASHER() H6
80 | /* NOLINTNEXTLINE(build/include) */
81 | #include "backward_references_inc.h"
82 | #undef HASHER
83 |
84 | #define HASHER() H40
85 | /* NOLINTNEXTLINE(build/include) */
86 | #include "backward_references_inc.h"
87 | #undef HASHER
88 |
89 | #define HASHER() H41
90 | /* NOLINTNEXTLINE(build/include) */
91 | #include "backward_references_inc.h"
92 | #undef HASHER
93 |
94 | #define HASHER() H42
95 | /* NOLINTNEXTLINE(build/include) */
96 | #include "backward_references_inc.h"
97 | #undef HASHER
98 |
99 | #define HASHER() H54
100 | /* NOLINTNEXTLINE(build/include) */
101 | #include "backward_references_inc.h"
102 | #undef HASHER
103 |
104 | #define HASHER() H35
105 | /* NOLINTNEXTLINE(build/include) */
106 | #include "backward_references_inc.h"
107 | #undef HASHER
108 |
109 | #define HASHER() H55
110 | /* NOLINTNEXTLINE(build/include) */
111 | #include "backward_references_inc.h"
112 | #undef HASHER
113 |
114 | #define HASHER() H65
115 | /* NOLINTNEXTLINE(build/include) */
116 | #include "backward_references_inc.h"
117 | #undef HASHER
118 |
119 | #undef ENABLE_COMPOUND_DICTIONARY
120 | #undef PREFIX
121 | #define PREFIX() D
122 | #define ENABLE_COMPOUND_DICTIONARY 1
123 |
124 | #define HASHER() H5
125 | /* NOLINTNEXTLINE(build/include) */
126 | #include "backward_references_inc.h"
127 | #undef HASHER
128 | #define HASHER() H6
129 | /* NOLINTNEXTLINE(build/include) */
130 | #include "backward_references_inc.h"
131 | #undef HASHER
132 | #define HASHER() H40
133 | /* NOLINTNEXTLINE(build/include) */
134 | #include "backward_references_inc.h"
135 | #undef HASHER
136 | #define HASHER() H41
137 | /* NOLINTNEXTLINE(build/include) */
138 | #include "backward_references_inc.h"
139 | #undef HASHER
140 | #define HASHER() H42
141 | /* NOLINTNEXTLINE(build/include) */
142 | #include "backward_references_inc.h"
143 | #undef HASHER
144 | #define HASHER() H55
145 | /* NOLINTNEXTLINE(build/include) */
146 | #include "backward_references_inc.h"
147 | #undef HASHER
148 | #define HASHER() H65
149 | /* NOLINTNEXTLINE(build/include) */
150 | #include "backward_references_inc.h"
151 | #undef HASHER
152 |
153 | #undef ENABLE_COMPOUND_DICTIONARY
154 | #undef PREFIX
155 |
156 | #undef EXPORT_FN
157 | #undef FN
158 | #undef CAT
159 | #undef EXPAND_CAT
160 |
161 | void BrotliCreateBackwardReferences(size_t num_bytes,
162 | size_t position, const uint8_t* ringbuffer, size_t ringbuffer_mask,
163 | ContextLut literal_context_lut, const BrotliEncoderParams* params,
164 | Hasher* hasher, int* dist_cache, size_t* last_insert_len,
165 | Command* commands, size_t* num_commands, size_t* num_literals) {
166 | if (params->dictionary.compound.num_chunks != 0) {
167 | switch (params->hasher.type) {
168 | #define CASE_(N) \
169 | case N: \
170 | CreateBackwardReferencesDH ## N(num_bytes, \
171 | position, ringbuffer, ringbuffer_mask, \
172 | literal_context_lut, params, hasher, dist_cache, \
173 | last_insert_len, commands, num_commands, num_literals); \
174 | return;
175 | CASE_(5)
176 | CASE_(6)
177 | CASE_(40)
178 | CASE_(41)
179 | CASE_(42)
180 | CASE_(55)
181 | CASE_(65)
182 | #undef CASE_
183 | default:
184 | break;
185 | }
186 | }
187 |
188 | switch (params->hasher.type) {
189 | #define CASE_(N) \
190 | case N: \
191 | CreateBackwardReferencesNH ## N(num_bytes, \
192 | position, ringbuffer, ringbuffer_mask, \
193 | literal_context_lut, params, hasher, dist_cache, \
194 | last_insert_len, commands, num_commands, num_literals); \
195 | return;
196 | FOR_GENERIC_HASHERS(CASE_)
197 | #undef CASE_
198 | default:
199 | break;
200 | }
201 | }
202 |
203 | #if defined(__cplusplus) || defined(c_plusplus)
204 | } /* extern "C" */
205 | #endif
206 |
--------------------------------------------------------------------------------
/c/enc/literal_cost.c:
--------------------------------------------------------------------------------
1 | /* Copyright 2013 Google Inc. All Rights Reserved.
2 |
3 | Distributed under MIT license.
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5 | */
6 |
7 | /* Literal cost model to allow backward reference replacement to be efficient.
8 | */
9 |
10 | #include "literal_cost.h"
11 |
12 | #include /* memset */
13 |
14 | #include
15 |
16 | #include "../common/platform.h"
17 | #include "fast_log.h"
18 | #include "utf8_util.h"
19 |
20 | #if defined(__cplusplus) || defined(c_plusplus)
21 | extern "C" {
22 | #endif
23 |
24 | static size_t UTF8Position(size_t last, size_t c, size_t clamp) {
25 | if (c < 128) {
26 | return 0; /* Next one is the 'Byte 1' again. */
27 | } else if (c >= 192) { /* Next one is the 'Byte 2' of utf-8 encoding. */
28 | return BROTLI_MIN(size_t, 1, clamp);
29 | } else {
30 | /* Let's decide over the last byte if this ends the sequence. */
31 | if (last < 0xE0) {
32 | return 0; /* Completed two or three byte coding. */
33 | } else { /* Next one is the 'Byte 3' of utf-8 encoding. */
34 | return BROTLI_MIN(size_t, 2, clamp);
35 | }
36 | }
37 | }
38 |
39 | static size_t DecideMultiByteStatsLevel(size_t pos, size_t len, size_t mask,
40 | const uint8_t* data) {
41 | size_t counts[3] = { 0 };
42 | size_t max_utf8 = 1; /* should be 2, but 1 compresses better. */
43 | size_t last_c = 0;
44 | size_t i;
45 | for (i = 0; i < len; ++i) {
46 | size_t c = data[(pos + i) & mask];
47 | ++counts[UTF8Position(last_c, c, 2)];
48 | last_c = c;
49 | }
50 | if (counts[2] < 500) {
51 | max_utf8 = 1;
52 | }
53 | if (counts[1] + counts[2] < 25) {
54 | max_utf8 = 0;
55 | }
56 | return max_utf8;
57 | }
58 |
59 | static void EstimateBitCostsForLiteralsUTF8(size_t pos, size_t len, size_t mask,
60 | const uint8_t* data,
61 | size_t* histogram, float* cost) {
62 | /* max_utf8 is 0 (normal ASCII single byte modeling),
63 | 1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
64 | const size_t max_utf8 = DecideMultiByteStatsLevel(pos, len, mask, data);
65 | size_t window_half = 495;
66 | size_t in_window = BROTLI_MIN(size_t, window_half, len);
67 | size_t in_window_utf8[3] = { 0 };
68 | size_t i;
69 | memset(histogram, 0, 3 * 256 * sizeof(histogram[0]));
70 |
71 | { /* Bootstrap histograms. */
72 | size_t last_c = 0;
73 | size_t utf8_pos = 0;
74 | for (i = 0; i < in_window; ++i) {
75 | size_t c = data[(pos + i) & mask];
76 | ++histogram[256 * utf8_pos + c];
77 | ++in_window_utf8[utf8_pos];
78 | utf8_pos = UTF8Position(last_c, c, max_utf8);
79 | last_c = c;
80 | }
81 | }
82 |
83 | /* Compute bit costs with sliding window. */
84 | for (i = 0; i < len; ++i) {
85 | if (i >= window_half) {
86 | /* Remove a byte in the past. */
87 | size_t c =
88 | i < window_half + 1 ? 0 : data[(pos + i - window_half - 1) & mask];
89 | size_t last_c =
90 | i < window_half + 2 ? 0 : data[(pos + i - window_half - 2) & mask];
91 | size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
92 | --histogram[256 * utf8_pos2 + data[(pos + i - window_half) & mask]];
93 | --in_window_utf8[utf8_pos2];
94 | }
95 | if (i + window_half < len) {
96 | /* Add a byte in the future. */
97 | size_t c = data[(pos + i + window_half - 1) & mask];
98 | size_t last_c = data[(pos + i + window_half - 2) & mask];
99 | size_t utf8_pos2 = UTF8Position(last_c, c, max_utf8);
100 | ++histogram[256 * utf8_pos2 + data[(pos + i + window_half) & mask]];
101 | ++in_window_utf8[utf8_pos2];
102 | }
103 | {
104 | size_t c = i < 1 ? 0 : data[(pos + i - 1) & mask];
105 | size_t last_c = i < 2 ? 0 : data[(pos + i - 2) & mask];
106 | size_t utf8_pos = UTF8Position(last_c, c, max_utf8);
107 | size_t masked_pos = (pos + i) & mask;
108 | size_t histo = histogram[256 * utf8_pos + data[masked_pos]];
109 | double lit_cost;
110 | if (histo == 0) {
111 | histo = 1;
112 | }
113 | lit_cost = FastLog2(in_window_utf8[utf8_pos]) - FastLog2(histo);
114 | lit_cost += 0.02905;
115 | if (lit_cost < 1.0) {
116 | lit_cost *= 0.5;
117 | lit_cost += 0.5;
118 | }
119 | /* Make the first bytes more expensive -- seems to help, not sure why.
120 | Perhaps because the entropy source is changing its properties
121 | rapidly in the beginning of the file, perhaps because the beginning
122 | of the data is a statistical "anomaly". */
123 | if (i < 2000) {
124 | lit_cost += 0.7 - ((double)(2000 - i) / 2000.0 * 0.35);
125 | }
126 | cost[i] = (float)lit_cost;
127 | }
128 | }
129 | }
130 |
131 | void BrotliEstimateBitCostsForLiterals(size_t pos, size_t len, size_t mask,
132 | const uint8_t* data,
133 | size_t* histogram, float* cost) {
134 | if (BrotliIsMostlyUTF8(data, pos, mask, len, kMinUTF8Ratio)) {
135 | EstimateBitCostsForLiteralsUTF8(pos, len, mask, data, histogram, cost);
136 | return;
137 | } else {
138 | size_t window_half = 2000;
139 | size_t in_window = BROTLI_MIN(size_t, window_half, len);
140 | size_t i;
141 | memset(histogram, 0, 256 * sizeof(histogram[0]));
142 |
143 | /* Bootstrap histogram. */
144 | for (i = 0; i < in_window; ++i) {
145 | ++histogram[data[(pos + i) & mask]];
146 | }
147 |
148 | /* Compute bit costs with sliding window. */
149 | for (i = 0; i < len; ++i) {
150 | size_t histo;
151 | if (i >= window_half) {
152 | /* Remove a byte in the past. */
153 | --histogram[data[(pos + i - window_half) & mask]];
154 | --in_window;
155 | }
156 | if (i + window_half < len) {
157 | /* Add a byte in the future. */
158 | ++histogram[data[(pos + i + window_half) & mask]];
159 | ++in_window;
160 | }
161 | histo = histogram[data[(pos + i) & mask]];
162 | if (histo == 0) {
163 | histo = 1;
164 | }
165 | {
166 | double lit_cost = FastLog2(in_window) - FastLog2(histo);
167 | lit_cost += 0.029;
168 | if (lit_cost < 1.0) {
169 | lit_cost *= 0.5;
170 | lit_cost += 0.5;
171 | }
172 | cost[i] = (float)lit_cost;
173 | }
174 | }
175 | }
176 | }
177 |
178 | #if defined(__cplusplus) || defined(c_plusplus)
179 | } /* extern "C" */
180 | #endif
181 |
--------------------------------------------------------------------------------