├── compression_experiments
    ├── gzip_compress.sh
    ├── npm_install_packages.sh
    ├── brotli_compress.sh
    ├── find_urls_save.sh
    ├── js_dataset_compression.pdf
    ├── http_archive_compression.pdf
    ├── npm_packages_compression.pdf
    ├── npm_packages_compression.ipynb
    ├── js_dataset_compression.ipynb
    └── http_archive_compression.ipynb
├── brotli_bits_types
    ├── brotli_decompress_and_log.sh
    └── bits_types_in_compressed_file.pdf
├── comparison_script
    ├── analyze_rates.pdf
    ├── makefile
    ├── README.md
    └── compression.cc
├── analyze_block_splitting
    ├── analyze_block_splitting.pdf
    ├── makefile
    └── save_block_splitting.cc
├── third_party
    └── bundle_analyzer
    │   ├── package.json
    │   ├── predictions.js
    │   ├── get_chunks.js
    │   ├── package-lock.json
    │   ├── index.js
    │   └── module-clues.js
├── README.md
├── contributing.md
├── code-of-conduct.md
└── LICENSE


/compression_experiments/gzip_compress.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | { time gzip -$1 -f -k -c $4 > $3; } 2> $2
4 | 


--------------------------------------------------------------------------------
/compression_experiments/npm_install_packages.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | cat package.txt | xargs npm install
4 | 


--------------------------------------------------------------------------------
/compression_experiments/brotli_compress.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | { time brotli -q $1 -f -o $3 $4 ; } 2> $2
4 | 


--------------------------------------------------------------------------------
/brotli_bits_types/brotli_decompress_and_log.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | brotli -d -q 11 -f -o example.txt example.txt.br > log.txt
3 | 


--------------------------------------------------------------------------------
/compression_experiments/find_urls_save.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | find node_modules -name "*.js" -type f > urls_for_package.txt
4 | 


--------------------------------------------------------------------------------
/comparison_script/analyze_rates.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GoogleChromeLabs/dynamic-web-bundle-serving/master/comparison_script/analyze_rates.pdf


--------------------------------------------------------------------------------
/analyze_block_splitting/analyze_block_splitting.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GoogleChromeLabs/dynamic-web-bundle-serving/master/analyze_block_splitting/analyze_block_splitting.pdf


--------------------------------------------------------------------------------
/brotli_bits_types/bits_types_in_compressed_file.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GoogleChromeLabs/dynamic-web-bundle-serving/master/brotli_bits_types/bits_types_in_compressed_file.pdf


--------------------------------------------------------------------------------
/compression_experiments/js_dataset_compression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GoogleChromeLabs/dynamic-web-bundle-serving/master/compression_experiments/js_dataset_compression.pdf


--------------------------------------------------------------------------------
/compression_experiments/http_archive_compression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GoogleChromeLabs/dynamic-web-bundle-serving/master/compression_experiments/http_archive_compression.pdf


--------------------------------------------------------------------------------
/compression_experiments/npm_packages_compression.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GoogleChromeLabs/dynamic-web-bundle-serving/master/compression_experiments/npm_packages_compression.pdf


--------------------------------------------------------------------------------
/third_party/bundle_analyzer/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "bundle-analyzer",
 3 |   "version": "0.0.1",
 4 |   "main": "index.js",
 5 |   "type": "module",
 6 |   "scripts": {
 7 |     "start": "node --experimental-modules demo-server.js"
 8 |   },
 9 |   "dependencies": {
10 |     "acorn-loose": "^7.0.0",
11 |     "dlv": "^1.1.3",
12 |     "fs": "^0.0.1-security"
13 |   },
14 |   "engines": {
15 |     "node": "12.x"
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Dynamic Web Bundle Serving
2 | 
3 | Experiments in prototyping dynamic web bundle serving
4 | 
5 | ## What is here
6 | The `comparison_script` folder contains the script for running the compression for bundled and unbundled approaches for different compression algorithms . See more in [README](comparison_script/README.md)
7 | 
8 | The `compression_experiments` folder contains some research on compression of js code and bundles of js code from different resources.
9 | 


--------------------------------------------------------------------------------
/comparison_script/makefile:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | CXX=g++
16 | CXXFLAGS=-g -Wall -MMD -std=c++11
17 | LDLIBS=-lstdc++ -lbrotlienc -lz
18 | 
19 | all: compression
20 | 
21 | #compression.o: compression.cc
22 | #    g++ -std=c++11 -c compression.cc
23 | 
24 | compression: compression.o
25 | 
26 | clean:
27 | 	rm compression.o compression
28 | 


--------------------------------------------------------------------------------
/analyze_block_splitting/makefile:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 Google Inc. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | CXX=g++
16 | CXXFLAGS=-g -Wall -MMD -std=c++11
17 | LDLIBS=-lstdc++ -lbrotlienc -lz
18 | 
19 | all: save_block_splitting
20 | 
21 | #save_block_splitting.o: save_block_splitting.cc
22 | #    g++ -std=c++11 -c save_block_splitting.cc
23 | 
24 | save_block_splitting: save_block_splitting.o
25 | 
26 | clean:
27 | 	rm save_block_splitting.o save_block_splitting
28 | 


--------------------------------------------------------------------------------
/third_party/bundle_analyzer/predictions.js:
--------------------------------------------------------------------------------
 1 | import moduleClues from "./module-clues.js";
 2 | 
 3 | /**
 4 |  * Uses the list of module clues to find prediction matches for a minified webpack module,
 5 |  * provided as a string
 6 |  *
 7 |  * @param {string} functionText - A function extracted from a webpack bundle
 8 |  */
 9 | export function predictModuleName(functionText) {
10 |   // Note, this is obviously a potential performance problem--as we're currently doing a
11 |   // full search of the (potententially very large) bundle for every clue in the list.
12 |   // Running this in a worker seems to solve the problem for now, but if the clue list got
13 |   // very long, a different solution might be required.
14 |   let matchedClue = moduleClues.find(clue =>
15 |     typeof clue.includes === "object" // Handle regex clues differently from strings
16 |       ? clue.includes.test(functionText)
17 |       : functionText.includes(clue.includes)
18 |   );
19 |   if (!matchedClue) {
20 |     matchedClue = { module: "", url: "", includes: "" };
21 |   }
22 |   return matchedClue;
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/third_party/bundle_analyzer/get_chunks.js:
--------------------------------------------------------------------------------
 1 |   /** @license
 2 |  * Copyright 2020 Google Inc. All Rights Reserved.
 3 |  *
 4 |  * Licensed under the Apache License, Version 2.0 (the "License");
 5 |  * you may not use this file except in compliance with the License.
 6 |  * You may obtain a copy of the License at
 7 |  *
 8 |  *     http://www.apache.org/licenses/LICENSE-2.0
 9 |  *
10 |  * Unless required by applicable law or agreed to in writing, software
11 |  * distributed under the License is distributed on an "AS IS" BASIS,
12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  * See the License for the specific language governing permissions and
14 |  * limitations under the License.
15 |  */
16 | 
17 | import { createServer } from 'http';
18 | import { readFile } from 'fs';
19 | import { analyze } from './index.js';
20 | import { writeFileSync } from 'fs';
21 | 
22 | var args = process.argv.slice(2);
23 | readFile(args[0], 'utf8', function (err,data) {
24 |   if (err) {
25 |     return console.log(err);
26 |   }
27 |   const result = analyze(data);
28 |   writeFileSync(args[1], JSON.stringify(result, 0, 2));
29 | });
30 | 


--------------------------------------------------------------------------------
/contributing.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution;
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 
25 | ## Community Guidelines
26 | 
27 | This project follows [Google's Open Source Community
28 | Guidelines](https://opensource.google/conduct/).
29 | 


--------------------------------------------------------------------------------
/third_party/bundle_analyzer/package-lock.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "bundle-analyzer",
 3 |   "version": "0.0.1",
 4 |   "lockfileVersion": 1,
 5 |   "requires": true,
 6 |   "dependencies": {
 7 |     "acorn": {
 8 |       "version": "7.2.0",
 9 |       "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.2.0.tgz",
10 |       "integrity": "sha512-apwXVmYVpQ34m/i71vrApRrRKCWQnZZF1+npOD0WV5xZFfwWOmKGQ2RWlfdy9vWITsenisM8M0Qeq8agcFHNiQ=="
11 |     },
12 |     "acorn-loose": {
13 |       "version": "7.0.0",
14 |       "resolved": "https://registry.npmjs.org/acorn-loose/-/acorn-loose-7.0.0.tgz",
15 |       "integrity": "sha512-TIqpAWkqpdBXfj1XDVBQ/jNbAb6ByGfoqkcz2Pwd8mEHUndxOCw9FR6TqkMCMAr5XV8zYx0+m9GcGjxZzQuA2w==",
16 |       "requires": {
17 |         "acorn": "^7.0.0"
18 |       }
19 |     },
20 |     "dlv": {
21 |       "version": "1.1.3",
22 |       "resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz",
23 |       "integrity": "sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA=="
24 |     },
25 |     "fs": {
26 |       "version": "0.0.1-security",
27 |       "resolved": "https://registry.npmjs.org/fs/-/fs-0.0.1-security.tgz",
28 |       "integrity": "sha1-invTcYa23d84E/I4WLV+yq9eQdQ="
29 |     }
30 |   }
31 | }
32 | 


--------------------------------------------------------------------------------
/comparison_script/README.md:
--------------------------------------------------------------------------------
 1 | # Comparison of bundled and unbundled approaches for js scripts compression.
 2 |   
 3 | The script compares an unbundled approach with a bundled one for js scripts compression for gzip and brotli of different levels.
 4 | After running the script you will get a file `compression_results.json` of results of the compression. An example of such a file could be found [here](compression_results.json).
 5 | 
 6 | 
 7 | ## How to run the script
 8 | Compression script has an argument ‘number_repetitions’ which shows how many times to compress each bundle so the result would be more stable.
 9 | 
10 | ```
11 | $ cd bundle_analyzer
12 | $ npm i
13 | $ cd ..
14 | $ make
15 | $  ./compression 1
16 | ```
17 | 
18 | Before running make sure you have a folder named `bundles_source` in current directory. This folder should contain a file `bundle_files.txt` with names of files with bundles.
19 | Example of `bundles_files.txt`:
20 | ```
21 | bundles_source/bundle_0.txt
22 | bundles_source/bundle_1.txt
23 | bundles_source/bundle_2.txt
24 | bundles_source/bundle_3.txt
25 | bundles_source/bundle_4.txt
26 | bundles_source/bundle_5.txt
27 | ```
28 | 
29 | The folder `bundles_source` should also have files written in `bundle_files.txt` containing text of one bundle each.
30 | 


--------------------------------------------------------------------------------
/third_party/bundle_analyzer/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @see https://github.com/azukaru/bundle-inspector/blob/master/src/lib/ASTanalyzer.js
 3 |  */
 4 | 
 5 | import acornLoose from "acorn-loose";
 6 | import get from "dlv";
 7 | import { predictModuleName } from "./predictions.js";
 8 | 
 9 | /** Dot-notated paths to look for within a bundle */
10 | const bundleFunctionPaths = [
11 |   "body.0.expression.right.arguments.0.elements",
12 |   "body.0.expression.argument.arguments.0.elements",
13 |   "body.0.expression.arguments.0.properties",
14 |   "body.0.expression.arguments.0.elements.1.elements",
15 |   "body.0.expression.arguments.0.elements.1.properties",
16 |   "body.0.expression.right.arguments.1.body.body.0.declarations.0.init.arguments.1.elements",
17 |   "body.2.expression.arguments.0.elements.1.properties"
18 | ].map(p => p.split("."));
19 | 
20 | /**
21 |  * Tries several different path signatures to find the list of functions contained
22 |  * within an AST representation of a webpack bundle
23 |  *
24 |  * @param {Object} syntaxTree - An AST representation of a webpack bundle
25 |  */
26 | function getFunctionNodes(syntaxTree) {
27 |   return bundleFunctionPaths.reduce(
28 |     (acc, path) => acc || get(syntaxTree, path),
29 |     null
30 |   );
31 | }
32 | 
33 | /**
34 |  * Analyzes a webpack bundle in string form and returns useable stats and a list of functions
35 |  *
36 |  * @param {string} bundle - A webpack bundle
37 |  * @return {object} A parsed bundle, with properties for bundle-wide stats and a list of functions
38 |  */
39 | export function analyze(bundle) {
40 |   const ast = acornLoose.parse(bundle);
41 |   const functionNodes = getFunctionNodes(ast).filter(Boolean);
42 |   const functions = functionNodes.map((node, index) => {
43 |     const code = bundle.substring(node.start, node.end);
44 |     const name = predictModuleName(code);
45 |     const id = index + 1;
46 |     
47 |     return { id, name, code };
48 |   });
49 |   return functions;
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/analyze_block_splitting/save_block_splitting.cc:
--------------------------------------------------------------------------------
 1 | // Copyright 2020 Google Inc. All Rights Reserved.
 2 | //
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //     http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // Unless required by applicable law or agreed to in writing, software
10 | // distributed under the License is distributed on an "AS IS" BASIS,
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | // See the License for the specific language governing permissions and
13 | // limitations under the License.
14 | 
15 | #include <stdlib.h>
16 | #include <stdio.h>
17 | #include <brotli/encode.h>
18 | #include <iostream>
19 | #include <sstream>
20 | #include <iomanip>
21 | #include <fstream>
22 | #include <stdexcept>
23 | #include <string>
24 | 
25 | int DEFAULT_WINDOW = 24;
26 | 
27 | size_t FileSize(FILE* file) {
28 |   fseek(file, 0, SEEK_END);
29 |   size_t size = ftell(file);
30 |   fseek(file, 0, SEEK_SET);
31 |   return size;
32 | }
33 | 
34 | FILE* OpenFile(const char* filename, const char* mode) {
35 |   FILE* file = fopen(filename, mode);
36 |   if (file == NULL) {
37 |     perror("fopen failed");
38 |   }
39 |   return file;
40 | }
41 | 
42 | void ReadData(FILE* file, unsigned char** data, size_t* size) {
43 |   *size = FileSize(file);
44 |   *data = (unsigned char*) malloc(*size);
45 |   if (0 == fread(*data, 1, *size, file)) {
46 |     throw "Failed to read from file";
47 |   }
48 |   return;
49 | }
50 | 
51 | void BrotliCompressAndSaveBlockSplitting(int level, int window, const unsigned char* input_data, size_t input_size, unsigned char* output_data, size_t output_buffer_size) {
52 |   ShouldSaveBlockSplit();
53 |   if (!BrotliEncoderCompress(level, window, BROTLI_MODE_GENERIC, input_size, input_data, &output_buffer_size, output_data)) {
54 |     throw "Failure in BrotliCompress";
55 |   }
56 | }
57 | 
58 | int MinWindowLargerThanFile(int fileSize, int max) {
59 |     int window = 24;
60 |     if (fileSize > 0) {
61 |         window = 10;
62 |         while (((size_t)1 << (window)) - 16 < (uint64_t)fileSize) {
63 |             ++window;
64 |             if (window == max) break;
65 |         }
66 |     }
67 |     return window;
68 | }
69 | 
70 | 
71 | 
72 | int main (int argc, char** argv) {
73 |   try {
74 |     char* bundle_file = argv[1];
75 |     FILE* infile = OpenFile(bundle_file, "rb");
76 |     if (infile == NULL) {
77 |         exit(1);
78 |     }
79 |     unsigned char* input_data = NULL;
80 |     size_t input_size = 0;
81 |     ReadData(infile, &input_data, &input_size);
82 |     fclose(infile);
83 |     size_t output_buffer_size = input_size * 2;
84 |     unsigned char* output_data = (unsigned char*) malloc(output_buffer_size);
85 |     int window = MinWindowLargerThanFile(input_size, DEFAULT_WINDOW);
86 |     BrotliCompressAndSaveBlockSplitting(11, window, input_data, input_size, output_data, output_buffer_size);
87 |   } catch (const char* message) {
88 |       std::cout << "Error\n";
89 |   }
90 |   return 0;
91 | }
92 | 


--------------------------------------------------------------------------------
/code-of-conduct.md:
--------------------------------------------------------------------------------
 1 | # Google Open Source Community Guidelines
 2 | 
 3 | At Google, we recognize and celebrate the creativity and collaboration of open
 4 | source contributors and the diversity of skills, experiences, cultures, and
 5 | opinions they bring to the projects and communities they participate in.
 6 | 
 7 | Every one of Google's open source projects and communities are inclusive
 8 | environments, based on treating all individuals respectfully, regardless of
 9 | gender identity and expression, sexual orientation, disabilities,
10 | neurodiversity, physical appearance, body size, ethnicity, nationality, race,
11 | age, religion, or similar personal characteristic.
12 | 
13 | We value diverse opinions, but we value respectful behavior more.
14 | 
15 | Respectful behavior includes:
16 | 
17 | * Being considerate, kind, constructive, and helpful.
18 | * Not engaging in demeaning, discriminatory, harassing, hateful, sexualized, or
19 |   physically threatening behavior, speech, and imagery.
20 | * Not engaging in unwanted physical contact.
21 | 
22 | Some Google open source projects [may adopt][] an explicit project code of
23 | conduct, which may have additional detailed expectations for participants. Most
24 | of those projects will use our [modified Contributor Covenant][].
25 | 
26 | [may adopt]: https://opensource.google/docs/releasing/preparing/#conduct
27 | [modified Contributor Covenant]: https://opensource.google/docs/releasing/template/CODE_OF_CONDUCT/
28 | 
29 | ## Resolve peacefully
30 | 
31 | We do not believe that all conflict is necessarily bad; healthy debate and
32 | disagreement often yields positive results. However, it is never okay to be
33 | disrespectful.
34 | 
35 | If you see someone behaving disrespectfully, you are encouraged to address the
36 | behavior directly with those involved. Many issues can be resolved quickly and
37 | easily, and this gives people more control over the outcome of their dispute.
38 | If you are unable to resolve the matter for any reason, or if the behavior is
39 | threatening or harassing, report it. We are dedicated to providing an
40 | environment where participants feel welcome and safe.
41 | 
42 | ## Reporting problems
43 | 
44 | Some Google open source projects may adopt a project-specific code of conduct.
45 | In those cases, a Google employee will be identified as the Project Steward,
46 | who will receive and handle reports of code of conduct violations. In the event
47 | that a project hasn’t identified a Project Steward, you can report problems by
48 | emailing opensource@google.com.
49 | 
50 | We will investigate every complaint, but you may not receive a direct response.
51 | We will use our discretion in determining when and how to follow up on reported
52 | incidents, which may range from not taking action to permanent expulsion from
53 | the project and project-sponsored spaces. We will notify the accused of the
54 | report and provide them an opportunity to discuss it before any action is
55 | taken. The identity of the reporter will be omitted from the details of the
56 | report supplied to the accused. In potentially harmful situations, such as
57 | ongoing harassment or threats to anyone's safety, we may take action without
58 | notice.
59 | 
60 | *This document was adapted from the [IndieWeb Code of Conduct][] and can also
61 | be found at <https://opensource.google/conduct/>.*
62 | 
63 | [IndieWeb Code of Conduct]: https://indieweb.org/code-of-conduct
64 | 


--------------------------------------------------------------------------------
/third_party/bundle_analyzer/module-clues.js:
--------------------------------------------------------------------------------
  1 | // Exports an array of "clue" objects for use in automatically deducing the source of
  2 | // minified code in webpack bundles
  3 | 
  4 | export default [
  5 |   {
  6 |     module: "bluebird",
  7 |     includes: "prototype.disableTrampolineIfNecessary",
  8 |     url: "https://github.com/petkaantonov/bluebird",
  9 |     polyfill: true
 10 |   },
 11 |   {
 12 |     module: "bn",
 13 |     includes: /67108864\s?&\s?67108863/,
 14 |     url: "https://github.com/indutny/bn.js/"
 15 |   },
 16 |   {
 17 |     module: "character-entities",
 18 |     includes: "CapitalDifferentialD:",
 19 |     url: "https://www.npmjs.com/package/character-entities"
 20 |   },
 21 |   {
 22 |     module: "corejs/promise",
 23 |     includes: "Promise can't be resolved itself",
 24 |     url:
 25 |       "https://github.com/zloirock/core-js/blob/master/packages/core-js/modules/es.promise.js",
 26 |     polyfill: true
 27 |   },
 28 |   {
 29 |     module: "elliptic",
 30 |     includes: "prototype._getEndoRoots",
 31 |     url: "https://github.com/indutny/elliptic"
 32 |   },
 33 |   {
 34 |     module: "emotion",
 35 |     includes: 'querySelectorAll("style[data-emotion-"',
 36 |     url: "https://github.com/emotion-js/emotion"
 37 |   },
 38 |   {
 39 |     module: "fingerprint2",
 40 |     includes: "getWebglVendorAndRenderer",
 41 |     url: "https://github.com/Valve/fingerprintjs2/"
 42 |   },
 43 |   {
 44 |     module: "i18next",
 45 |     includes: "options.overloadTranslationOptionHandler",
 46 |     url: "https://www.i18next.com/"
 47 |   },
 48 |   {
 49 |     module: "immutable-js",
 50 |     includes: "Expected Array or iterable object of values, or keyed object",
 51 |     url: "https://github.com/immutable-js/immutable-js"
 52 |   },
 53 |   {
 54 |     module: "lodash",
 55 |     includes: "__lodash_placeholder__",
 56 |     url: "https://lodash.com/"
 57 |   },
 58 |   {
 59 |     module: "marked",
 60 |     includes: 'Error("Infinite loop on byte: "',
 61 |     url: "https://github.com/markedjs/marked"
 62 |   },
 63 |   {
 64 |     module: "mobile-detect",
 65 |     includes: "PrestigioTablet",
 66 |     url: "http://hgoebl.github.io/mobile-detect.js/"
 67 |   },
 68 |   {
 69 |     module: "moment",
 70 |     includes: "localeData().monthsShort",
 71 |     url: "https://momentjs.com/"
 72 |   },
 73 |   {
 74 |     module: "next/route",
 75 |     includes: 'Error("Cannot update unavailable route:',
 76 |     url: ""
 77 |   },
 78 |   {
 79 |     module: "next/client/index",
 80 |     includes: 'emit("before-reactdom-render"',
 81 |     url:
 82 |       "https://github.com/zeit/next.js/blob/canary/packages/next/client/index.js"
 83 |   },
 84 |   {
 85 |     module: "node/buffer",
 86 |     includes:
 87 |       'Error("If encoding is specified then the first argument must be a string")',
 88 |     url: ""
 89 |   },
 90 |   {
 91 |     module: "node/url",
 92 |     includes: "\"Parameter 'url' must be a string, not \"",
 93 |     url: "https://github.com/nodejs/node/blob/master/lib/url.js"
 94 |   },
 95 |   {
 96 |     module: "nprogress",
 97 |     includes: "nprogress-custom-parent",
 98 |     url: "https://github.com/rstacruz/nprogress"
 99 |   },
100 |   {
101 |     module: "popmotion",
102 |     includes: "are of different format, or a value might have changed value",
103 |     url: "https://popmotion.io/"
104 |   },
105 |   {
106 |     module: "preact",
107 |     includes: "__preactattr_",
108 |     url: "https://preactjs.com/"
109 |   },
110 |   {
111 |     module: "raven-js",
112 |     includes: "Error: Raven has already been configured",
113 |     url: "https://www.npmjs.com/package/raven-js"
114 |   },
115 |   {
116 |     module: "react-aria-modal",
117 |     includes: "react-aria-modal instances should have",
118 |     url: "https://github.com/davidtheclark/react-aria-modal"
119 |   },
120 |   {
121 |     module: "react-i18Next",
122 |     includes: "getI18nTranslate.bind",
123 |     url: "https://react.i18next.com/"
124 |   },
125 |   {
126 |     module: "react-helmet",
127 |     includes: "convertReactPropstoHtmlAttributes",
128 |     url: "https://github.com/nfl/react-helmet"
129 |   },
130 |   {
131 |     module: "react-intl",
132 |     includes: "pluralRuleFunction:function",
133 |     url: "https://github.com/formatjs/react-intl"
134 |   },
135 |   {
136 |     module: "react-redux",
137 |     includes: "parentSub.addNestedSub",
138 |     url: "https://react-redux.js.org/"
139 |   },
140 |   {
141 |     module: "regenerator-runtime",
142 |     includes: 'Error("Generator is already running")',
143 |     url: "https://www.npmjs.com/package/regenerator-runtime",
144 |     polyfill: true
145 |   },
146 |   {
147 |     module: "styled-jsx/stylesheet",
148 |     includes: "StyleSheet: illegal rule:",
149 |     url: "https://github.com/zeit/styled-jsx"
150 |   },
151 |   {
152 |     module: "whatwg-fetch",
153 |     includes: 'searchParams:"URLSearchParams"',
154 |     url: "https://www.npmjs.com/package/whatwg-fetch",
155 |     polyfill: true
156 |   },
157 |   // VAGUE SELECTORS BELOW
158 |   {
159 |     module: "possible application code",
160 |     includes: "this hasn't been initialised - super() hasn't been called", //Babel adds this during transpilation
161 |     url: ""
162 |   },
163 |   {
164 |     module: "react module",
165 |     includes: "https://reactjs.org/docs/error-decoder.html?invariant="
166 |   }
167 | ];
168 | 
169 | // Snippet for authoring clues:
170 | // "Module_Clue": {
171 | //     "prefix": "clue",
172 | //     "body": [
173 | //         "{",
174 | //         "\tmodule: '$1',",
175 | //         "\tincludes: '$2',",
176 | //         "\turl: '$3'",
177 | //         "},"
178 | //     ],
179 | //     "description": "Clue syntax for bundle analyzer"
180 | // }
181 | 
182 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/compression_experiments/npm_packages_compression.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Copyright 2020 Google Inc. All Rights Reserved.\n",
  8 |     "\n",
  9 |     "Licensed under the Apache License, Version 2.0 (the \"License\"); <br>\n",
 10 |     "you may not use this file except in compliance with the License.<br>\n",
 11 |     "You may obtain a copy of the License at<br>\n",
 12 |     "\n",
 13 |     "     http://www.apache.org/licenses/LICENSE-2.0\n",
 14 |     "\n",
 15 |     "Unless required by applicable law or agreed to in writing, software\n",
 16 |     "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 17 |     "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<br>\n",
 18 |     "\n",
 19 |     "See the License for the specific language governing permissions and \n",
 20 |     "limitations under the License."
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 1,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "import numpy as np\n",
 30 |     "import json\n",
 31 |     "import matplotlib.pyplot as plt\n",
 32 |     "from tqdm import tqdm\n",
 33 |     "import random\n",
 34 |     "import subprocess\n",
 35 |     "import time\n",
 36 |     "import os"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 3,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "with open(\"packages_npm.txt\") as file:\n",
 46 |     "    packages = file.read().strip().split('\\n')"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 10,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "def get_seconds(time): \n",
 56 |     "    min_ind = time.find('m')\n",
 57 |     "    mins = int(time[:min_ind])\n",
 58 |     "    second = float(time[min_ind + 1:-1])\n",
 59 |     "    return mins * 60 + second\n",
 60 |     "\n",
 61 |     "def log(file, msg):\n",
 62 |     "    f = open(file, 'a+')\n",
 63 |     "    f.write(msg + '\\n')\n",
 64 |     "    f.close()"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 11,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "rates_gzip = []\n",
 74 |     "rates_brotli = []\n",
 75 |     "times_gzip = []\n",
 76 |     "times_brotli = []\n",
 77 |     "speed_gzip = []\n",
 78 |     "speed_brotli = []\n",
 79 |     "init_sizes = []\n",
 80 |     "all_urls = []\n",
 81 |     "\n",
 82 |     "for i in range(len(packages)):\n",
 83 |     "    with open(\"package.txt\", \"w\") as file:\n",
 84 |     "        file.write(packages[i])\n",
 85 |     "    #delete the current node_modules directories containing previous package\n",
 86 |     "    result = subprocess.run([\"rm\", \"-rf\", \"node_modules\"])\n",
 87 |     "    #install the package and save the names of js scripts\n",
 88 |     "    result = subprocess.run([\"bash\", \"npm_install_packages.sh\"])\n",
 89 |     "    result = subprocess.run([\"bash\", \"find_urls_save.sh\"])\n",
 90 |     "    with open(\"urls_for_package.txt\") as file:\n",
 91 |     "        urls = file.read().strip().split('\\n')\n",
 92 |     "    all_urls.append(urls)\n",
 93 |     "    \n",
 94 |     "    #concatenate all scripts of that package together to simulate web bundle\n",
 95 |     "    script_concatenated = \"\"\n",
 96 |     "    for url in all_urls[i]:\n",
 97 |     "        if url == \"\":\n",
 98 |     "            continue\n",
 99 |     "        if not os.path.exists(url):\n",
100 |     "            print(i)\n",
101 |     "            print(\"DOESN'T EXIST: \", url)\n",
102 |     "            continue\n",
103 |     "        with open(url) as file:\n",
104 |     "            script_concatenated += file.read()\n",
105 |     "            \n",
106 |     "    rates_gzip_compressed = []\n",
107 |     "    rates_brotli_compressed = []\n",
108 |     "    times_gzip_compressed = []\n",
109 |     "    times_brotli_compressed = []\n",
110 |     "    speed_gzip_compressed = []\n",
111 |     "    speed_brotli_compressed = []\n",
112 |     "    \n",
113 |     "    with open(\"example2.txt\", \"w\") as file:\n",
114 |     "        file.write(script_concatenated)\n",
115 |     "    size_non_compressed = os.stat(\"example2.txt\").st_size\n",
116 |     "    init_sizes.append(size_non_compressed)\n",
117 |     "    \n",
118 |     "    # do the gzip compression with different levels\n",
119 |     "    for level in range(4, 10):\n",
120 |     "        result = subprocess.run([\"bash\", \"gzip_compress.sh\", str(level), \"time2.txt\", \n",
121 |     "                                 \"example_gzip2.txt.gz\", \"example2.txt\"])\n",
122 |     "        with open(\"time2.txt\") as file:\n",
123 |     "            user_sys = file.read().strip().split('\\n')[1:]\n",
124 |     "        time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n",
125 |     "        size_gzip_compressed = os.stat(\"example_gzip2.txt.gz\").st_size\n",
126 |     "        rates_gzip_compressed.append(size_non_compressed / size_gzip_compressed)\n",
127 |     "        times_gzip_compressed.append(time)\n",
128 |     "        speed_gzip_compressed.append(size_non_compressed / time)\n",
129 |     "\n",
130 |     "    # do the brotli compression with different levels\n",
131 |     "    for level in range(4, 12):\n",
132 |     "        result = subprocess.run([\"bash\", \"brotli_compress.sh\", str(level), \"time2.txt\", \n",
133 |     "                                 \"example_brotli2.txt.br\", \"example2.txt\"])\n",
134 |     "        with open(\"time2.txt\") as file:\n",
135 |     "            user_sys = file.read().strip().split('\\n')[1:]\n",
136 |     "        time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n",
137 |     "        size_br_compressed = os.stat(\"example_brotli2.txt.br\").st_size\n",
138 |     "        rates_brotli_compressed.append(size_non_compressed / size_br_compressed)\n",
139 |     "        times_brotli_compressed.append(time)\n",
140 |     "        speed_brotli_compressed.append(size_non_compressed / time)\n",
141 |     "    \n",
142 |     "    rates_gzip.append(rates_gzip_compressed)\n",
143 |     "    rates_brotli.append(rates_brotli_compressed)\n",
144 |     "    times_gzip.append(times_gzip_compressed)\n",
145 |     "    times_brotli.append(times_brotli_compressed)\n",
146 |     "    speed_gzip.append(speed_gzip_compressed)\n",
147 |     "    speed_brotli.append(speed_brotli_compressed)\n",
148 |     "    \n",
149 |     "    if i != 0 and i % 100 == 0:\n",
150 |     "        log(\"logs3.txt\", \"rates_gzip: \" + str(np.mean(rates_gzip, axis=0)))\n",
151 |     "        log(\"logs3.txt\", \"rates_brotli: \" + str(np.mean(rates_brotli, axis=0)))\n",
152 |     "        log(\"logs3.txt\", \"times_gzip: \" + str(np.mean(times_gzip, axis=0)))\n",
153 |     "        log(\"logs3.txt\", \"times_brotli: \" + str(np.mean(times_brotli, axis=0)))\n",
154 |     "        log(\"logs3.txt\", \"speed_gzip: \" + str(np.mean(speed_gzip, axis=0)))\n",
155 |     "        log(\"logs3.txt\", \"speed_brotli: \" + str(np.mean(speed_brotli, axis=0)))"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 13,
161 |    "metadata": {},
162 |    "outputs": [
163 |     {
164 |      "data": {
165 |       "text/html": [
166 |        "<div>\n",
167 |        "<style scoped>\n",
168 |        "    .dataframe tbody tr th:only-of-type {\n",
169 |        "        vertical-align: middle;\n",
170 |        "    }\n",
171 |        "\n",
172 |        "    .dataframe tbody tr th {\n",
173 |        "        vertical-align: top;\n",
174 |        "    }\n",
175 |        "\n",
176 |        "    .dataframe thead th {\n",
177 |        "        text-align: right;\n",
178 |        "    }\n",
179 |        "</style>\n",
180 |        "<table border=\"1\" class=\"dataframe\">\n",
181 |        "  <thead>\n",
182 |        "    <tr style=\"text-align: right;\">\n",
183 |        "      <th></th>\n",
184 |        "      <th>name</th>\n",
185 |        "      <th>rates</th>\n",
186 |        "      <th>savings</th>\n",
187 |        "      <th>speed(MB/s)</th>\n",
188 |        "    </tr>\n",
189 |        "  </thead>\n",
190 |        "  <tbody>\n",
191 |        "    <tr>\n",
192 |        "      <th>0</th>\n",
193 |        "      <td>gzip 4</td>\n",
194 |        "      <td>4.612111</td>\n",
195 |        "      <td>0.783180</td>\n",
196 |        "      <td>58.640409</td>\n",
197 |        "    </tr>\n",
198 |        "    <tr>\n",
199 |        "      <th>1</th>\n",
200 |        "      <td>gzip 5</td>\n",
201 |        "      <td>4.830213</td>\n",
202 |        "      <td>0.792970</td>\n",
203 |        "      <td>42.773977</td>\n",
204 |        "    </tr>\n",
205 |        "    <tr>\n",
206 |        "      <th>2</th>\n",
207 |        "      <td>gzip 6</td>\n",
208 |        "      <td>4.942799</td>\n",
209 |        "      <td>0.797685</td>\n",
210 |        "      <td>29.547876</td>\n",
211 |        "    </tr>\n",
212 |        "    <tr>\n",
213 |        "      <th>3</th>\n",
214 |        "      <td>gzip 7</td>\n",
215 |        "      <td>4.966302</td>\n",
216 |        "      <td>0.798643</td>\n",
217 |        "      <td>24.132054</td>\n",
218 |        "    </tr>\n",
219 |        "    <tr>\n",
220 |        "      <th>4</th>\n",
221 |        "      <td>gzip 8</td>\n",
222 |        "      <td>4.984292</td>\n",
223 |        "      <td>0.799370</td>\n",
224 |        "      <td>14.945622</td>\n",
225 |        "    </tr>\n",
226 |        "    <tr>\n",
227 |        "      <th>5</th>\n",
228 |        "      <td>gzip 9</td>\n",
229 |        "      <td>4.986703</td>\n",
230 |        "      <td>0.799467</td>\n",
231 |        "      <td>12.569951</td>\n",
232 |        "    </tr>\n",
233 |        "    <tr>\n",
234 |        "      <th>6</th>\n",
235 |        "      <td>brotli 4</td>\n",
236 |        "      <td>8.207642</td>\n",
237 |        "      <td>0.878162</td>\n",
238 |        "      <td>48.296430</td>\n",
239 |        "    </tr>\n",
240 |        "    <tr>\n",
241 |        "      <th>7</th>\n",
242 |        "      <td>brotli 5</td>\n",
243 |        "      <td>8.530252</td>\n",
244 |        "      <td>0.882770</td>\n",
245 |        "      <td>29.497596</td>\n",
246 |        "    </tr>\n",
247 |        "    <tr>\n",
248 |        "      <th>8</th>\n",
249 |        "      <td>brotli 6</td>\n",
250 |        "      <td>9.082333</td>\n",
251 |        "      <td>0.889896</td>\n",
252 |        "      <td>22.155465</td>\n",
253 |        "    </tr>\n",
254 |        "    <tr>\n",
255 |        "      <th>9</th>\n",
256 |        "      <td>brotli 7</td>\n",
257 |        "      <td>9.498521</td>\n",
258 |        "      <td>0.894720</td>\n",
259 |        "      <td>14.364543</td>\n",
260 |        "    </tr>\n",
261 |        "    <tr>\n",
262 |        "      <th>10</th>\n",
263 |        "      <td>brotli 8</td>\n",
264 |        "      <td>9.713480</td>\n",
265 |        "      <td>0.897050</td>\n",
266 |        "      <td>9.417515</td>\n",
267 |        "    </tr>\n",
268 |        "    <tr>\n",
269 |        "      <th>11</th>\n",
270 |        "      <td>brotli 9</td>\n",
271 |        "      <td>9.934222</td>\n",
272 |        "      <td>0.899338</td>\n",
273 |        "      <td>6.265608</td>\n",
274 |        "    </tr>\n",
275 |        "    <tr>\n",
276 |        "      <th>12</th>\n",
277 |        "      <td>brotli 10</td>\n",
278 |        "      <td>11.089514</td>\n",
279 |        "      <td>0.909825</td>\n",
280 |        "      <td>1.230184</td>\n",
281 |        "    </tr>\n",
282 |        "    <tr>\n",
283 |        "      <th>13</th>\n",
284 |        "      <td>brotli 11</td>\n",
285 |        "      <td>11.378584</td>\n",
286 |        "      <td>0.912116</td>\n",
287 |        "      <td>0.571473</td>\n",
288 |        "    </tr>\n",
289 |        "  </tbody>\n",
290 |        "</table>\n",
291 |        "</div>"
292 |       ],
293 |       "text/plain": [
294 |        "         name      rates   savings  speed(MB/s)\n",
295 |        "0      gzip 4   4.612111  0.783180    58.640409\n",
296 |        "1      gzip 5   4.830213  0.792970    42.773977\n",
297 |        "2      gzip 6   4.942799  0.797685    29.547876\n",
298 |        "3      gzip 7   4.966302  0.798643    24.132054\n",
299 |        "4      gzip 8   4.984292  0.799370    14.945622\n",
300 |        "5      gzip 9   4.986703  0.799467    12.569951\n",
301 |        "6    brotli 4   8.207642  0.878162    48.296430\n",
302 |        "7    brotli 5   8.530252  0.882770    29.497596\n",
303 |        "8    brotli 6   9.082333  0.889896    22.155465\n",
304 |        "9    brotli 7   9.498521  0.894720    14.364543\n",
305 |        "10   brotli 8   9.713480  0.897050     9.417515\n",
306 |        "11   brotli 9   9.934222  0.899338     6.265608\n",
307 |        "12  brotli 10  11.089514  0.909825     1.230184\n",
308 |        "13  brotli 11  11.378584  0.912116     0.571473"
309 |       ]
310 |      },
311 |      "execution_count": 13,
312 |      "metadata": {},
313 |      "output_type": "execute_result"
314 |     }
315 |    ],
316 |    "source": [
317 |     "import pandas as pd\n",
318 |     "frame = pd.DataFrame()\n",
319 |     "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n",
320 |     "                 \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n",
321 |     "\n",
322 |     "frame[\"rates\"] = np.hstack((np.mean(rates_gzip, axis=0), np.mean(rates_brotli, axis=0)))\n",
323 |     "frame[\"savings\"] = 1 - 1 / np.hstack((np.mean(rates_gzip, axis=0), np.mean(rates_brotli, axis=0)))\n",
324 |     "frame[\"speed(MB/s)\"] = np.hstack((np.mean(speed_gzip, axis=0), np.mean(speed_brotli, axis=0))) / 1000000\n",
325 |     "\n",
326 |     "frame"
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": 25,
332 |    "metadata": {},
333 |    "outputs": [
334 |     {
335 |      "name": "stdout",
336 |      "output_type": "stream",
337 |      "text": [
338 |       "non compressed size range 34.465761MB-81.676873MB\n"
339 |      ]
340 |     }
341 |    ],
342 |    "source": [
343 |     "print(\"non compressed size range {}MB-{}MB\".format(np.min(init_sizes) / 1000000, np.max(init_sizes)/ 1000000))"
344 |    ]
345 |   }
346 |  ],
347 |  "metadata": {
348 |   "kernelspec": {
349 |    "display_name": "Python 3",
350 |    "language": "python",
351 |    "name": "python3"
352 |   },
353 |   "language_info": {
354 |    "codemirror_mode": {
355 |     "name": "ipython",
356 |     "version": 3
357 |    },
358 |    "file_extension": ".py",
359 |    "mimetype": "text/x-python",
360 |    "name": "python",
361 |    "nbconvert_exporter": "python",
362 |    "pygments_lexer": "ipython3",
363 |    "version": "3.6.7"
364 |   }
365 |  },
366 |  "nbformat": 4,
367 |  "nbformat_minor": 2
368 | }
369 | 


--------------------------------------------------------------------------------
/comparison_script/compression.cc:
--------------------------------------------------------------------------------
  1 | // Copyright 2020 Google Inc. All Rights Reserved.
  2 | //
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //     http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // Unless required by applicable law or agreed to in writing, software
 10 | // distributed under the License is distributed on an "AS IS" BASIS,
 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | // See the License for the specific language governing permissions and
 13 | // limitations under the License.
 14 | #include <stdlib.h>
 15 | #include <string.h>
 16 | #include <stdio.h>
 17 | #include <brotli/encode.h>
 18 | #include <brotli/decode.h>
 19 | #include <zlib.h>
 20 | #include <time.h>
 21 | #include <iostream>
 22 | #include <sstream>
 23 | #include <iomanip>
 24 | #include <fstream>
 25 | #include <stdexcept>
 26 | #include <string>
 27 | #include <stdlib.h>
 28 | #include "json.hpp"
 29 | 
 30 | 
 31 | using json = nlohmann::json;
 32 | 
 33 | int DEFAULT_WINDOW = 24;
 34 | 
 35 | struct CompressionStatistics {
 36 |     float compressed_size;
 37 |     float compression_time;
 38 |     float decompression_time;
 39 |     CompressionStatistics(float size, float comp_time, float decomp_time) : compressed_size(size), compression_time(comp_time),
 40 |                                                                             decompression_time(decomp_time) {}
 41 | };
 42 | 
 43 | size_t FileSize(FILE* file) {
 44 |   fseek(file, 0, SEEK_END);
 45 |   size_t size = ftell(file);
 46 |   fseek(file, 0, SEEK_SET);
 47 |   return size;
 48 | }
 49 | 
 50 | FILE* OpenFile(const char* filename, const char* mode) {
 51 |   FILE* file = fopen(filename, mode);
 52 |   if (file == NULL) {
 53 |     perror("fopen failed");
 54 |   }
 55 |   return file;
 56 | }
 57 | 
 58 | void ReadData(FILE* file, unsigned char** data, size_t* size) {
 59 |   *size = FileSize(file);
 60 |   *data = (unsigned char*) malloc(*size);
 61 |   if (0 == fread(*data, 1, *size, file)) {
 62 |     throw "Failed to read from file";
 63 |   }
 64 |   return;
 65 | }
 66 | 
 67 | void GetNamesFromFile(std::string file_name, std::vector<std::string>& names) {
 68 |     std::string line;
 69 |     std::ifstream infile(file_name);
 70 |     while (std::getline(infile, line)) {
 71 |         names.push_back(line);
 72 |     }
 73 |     infile.close();
 74 | }
 75 | 
 76 | float GetSeconds(std::string time_string) {
 77 |     size_t ind = time_string.find("\t");
 78 |     auto time = time_string.substr(ind, time_string.size() - ind);
 79 |     auto minutes_ind = time.find('m');
 80 |     auto minutes = time.substr(0, minutes_ind);
 81 |     auto seconds = time.substr(minutes_ind + 1, time.size() - minutes_ind - 2);
 82 |     return std::stof(minutes) * 60 + std::stof(seconds);
 83 | }
 84 | 
 85 | size_t BrotliCompress(int level, int window, const unsigned char* input_data, size_t input_size, unsigned char* output_data, size_t output_buffer_size) {
 86 |   if (!BrotliEncoderCompress(level, window, BROTLI_MODE_GENERIC, input_size, input_data, &output_buffer_size, output_data)) {
 87 |     throw "Failure in BrotliCompress";
 88 |   }
 89 |   return output_buffer_size;
 90 | }
 91 | 
 92 | size_t BrotliDecompress(const unsigned char* input_data, size_t input_size, unsigned char* output_data, size_t output_buffer_size) {
 93 |   if (BrotliDecoderDecompress(input_size, input_data, &output_buffer_size, output_data) != 1) {
 94 |     throw "Failure in BrotliDecompress";
 95 |   }
 96 |   return output_buffer_size;
 97 | }
 98 | 
 99 | size_t GzipCompress(int level, int window, const unsigned char* input_data, size_t input_size, unsigned char* output_data, size_t output_buffer_size, int& time) {
100 |     std::ofstream out("example.txt");
101 |     out.write((const char*)input_data, input_size);
102 |     out.close();
103 | 
104 |     std::string command = "{ time gzip -" + std::to_string(level) +
105 |         " -f -k -c example.txt > example_gzip.txt.gz; } 2> time.txt";
106 |     system(command.c_str());
107 | 
108 |     std::vector<std::string> times;
109 |     GetNamesFromFile("time.txt", times);
110 |     time = GetSeconds(times[2]) + GetSeconds(times[3]);
111 | 
112 |     std::ifstream infile("example_gzip.txt.gz");
113 |     infile.seekg(0,std::ios_base::end);
114 |     auto length = infile.tellg();
115 |     return static_cast<size_t>(length);
116 | }
117 | 
118 | size_t ZlibCompress(int level, int window, const unsigned char* input_data, size_t input_size, unsigned char* output_data, size_t output_buffer_size) {
119 |   z_stream strm;
120 |   strm.zalloc = Z_NULL;
121 |   strm.zfree = Z_NULL;
122 |   strm.opaque = Z_NULL;
123 |   if (Z_OK != deflateInit2(&strm, level, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY)) {
124 |     throw "Failure in deflateInit";
125 |   }
126 |   strm.avail_in = input_size;
127 |   strm.next_in = (unsigned char*) input_data;
128 |   strm.avail_out = output_buffer_size;
129 |   strm.next_out = output_data;
130 |   if (Z_STREAM_ERROR == deflate(&strm, Z_FINISH)) {
131 |     throw "Failure in deflate";
132 |   }
133 |   if (0 != strm.avail_in) {
134 |     throw "Failed to consume entire input in deflate";
135 |   }
136 |   size_t output_size = output_buffer_size - strm.avail_out;
137 |   deflateEnd(&strm);
138 |   return output_size;
139 | }
140 | 
141 | size_t ZlibDecompress(const unsigned char* input_data, size_t input_size, unsigned char* output_data, size_t output_buffer_size) {
142 |   z_stream strm;
143 |   strm.zalloc = Z_NULL;
144 |   strm.zfree = Z_NULL;
145 |   strm.opaque = Z_NULL;
146 |   strm.avail_in = input_size;
147 |   strm.next_in = (unsigned char*) input_data;
148 |   strm.avail_out = output_buffer_size;
149 |   strm.next_out = output_data;
150 |   
151 |   if (Z_OK != inflateInit2(&strm, 15 + 16)) {
152 |     std::cout << "Failure in inflateInit";
153 |     throw "Failure in inflateInit";
154 |   }
155 |   if (Z_STREAM_ERROR == inflate(&strm, Z_NO_FLUSH)) {
156 |     std::cout << "Failure in inflate";
157 |     throw "Failure in inflate";
158 |   }
159 | 
160 |   size_t output_size = output_buffer_size - strm.avail_out;
161 |   inflateEnd(&strm);
162 |   return output_size;
163 | }
164 | 
165 | typedef size_t (*CompressionFunc)(int, int, const unsigned char*, size_t, unsigned char*, size_t);
166 | typedef size_t (*DecompressionFunc)(const unsigned char*, size_t, unsigned char*, size_t);
167 | CompressionStatistics MeasureCompress(int level, int window,
168 |                                        const unsigned char* input_data, size_t input_size,
169 |                                        unsigned char* output_data, size_t output_buffer_size,
170 |                                        CompressionFunc compress, DecompressionFunc decompress, int repetitions) {
171 |   size_t total_output_size = 0;
172 |   clock_t start = clock();
173 |   for (int i = 0 ; i < repetitions ; i++) {
174 |     total_output_size += compress(level, window, input_data, input_size, output_data, output_buffer_size);
175 |   }
176 |   clock_t end = clock();
177 |   float elapsed_time_compress = (float) (end - start) / CLOCKS_PER_SEC;
178 |   float compressed_size = (float) total_output_size / repetitions;
179 | 
180 |   size_t decompressed_size = input_size * 2;
181 |   unsigned char* decompressed_data = (unsigned char*) malloc(decompressed_size);
182 |   size_t total_decopress_size = 0;
183 |   start = clock();
184 |   for (int i = 0 ; i < repetitions ; i++) {
185 |     total_decopress_size = decompress(output_data, output_buffer_size, decompressed_data, decompressed_size);
186 |   }
187 |   end = clock();
188 |   float elapsed_time_decompress = (float) (end - start) / CLOCKS_PER_SEC;
189 | 
190 |   assert(total_decopress_size == input_size);
191 |   assert(memcmp(decompressed_data, input_data, input_size)==0);
192 |   return CompressionStatistics(compressed_size, elapsed_time_compress, elapsed_time_decompress);
193 | }
194 | 
195 | int MinWindowLargerThanFile(int fileSize, int max) {
196 |     int window = 24;
197 |     if (fileSize > 0) {
198 |         window = 10;
199 |         while (((size_t)1 << (window)) - 16 < (uint64_t)fileSize) {
200 |             ++window;
201 |             if (window == max) break;
202 |         }
203 |     }
204 |     return window;
205 | }
206 | 
207 | bool Execute(const char* cmd) {
208 |     char buffer[128];
209 |     std::string stdout = "";
210 |     FILE* pipe = popen(cmd, "r");
211 |     if (!pipe) throw std::runtime_error("popen() failed!");
212 |     try {
213 |         while (fgets(buffer, sizeof buffer, pipe) != NULL) {
214 |             stdout += buffer;
215 |         }
216 |     } catch (...) {
217 |         pclose(pipe);
218 |         throw;
219 |     }
220 |     pclose(pipe);
221 |     if (stdout.find("TypeError") != std::string::npos) {
222 |         return false;
223 |     }
224 |     return true;
225 | }
226 | 
227 | void BundledCompression(const unsigned char* input_data, size_t input_size,
228 |                          unsigned char* output_data, size_t output_buffer_size,
229 |                          std::ostream & results, int repetitions) {
230 | 
231 |     results << "\"bundled\":{";
232 |     int window = MinWindowLargerThanFile(input_size, DEFAULT_WINDOW);
233 |     std::string name = "brotli";
234 |     for (int level = 1; level <= 11; level ++) {
235 |       CompressionStatistics comp_results = MeasureCompress(level, window, input_data, input_size,
236 |                                                             output_data, output_buffer_size,
237 |                                                             BrotliCompress, BrotliDecompress, repetitions);
238 |       float rate = input_size / comp_results.compressed_size;
239 |       float speed = (float) (input_size * repetitions) / (comp_results.compression_time * 1024 * 1024);
240 |       float decompession_speed = (float) (comp_results.compressed_size * repetitions) / (comp_results.decompression_time * 1024 * 1024);
241 |       results << "\"" << name << level << "_compression_rate\":" << std::setprecision(4) << rate << ", \"";
242 |       results << name << level << "_compressed_size\":" << std::setprecision(4) << comp_results.compressed_size << ", \"";
243 |       results << name << level << "_speed\":" << std::setprecision(4) << speed << ", \"";
244 |       results << name << level << "_decompression_speed\":" << std::setprecision(4) << decompession_speed << ",\n";
245 |     }
246 | 
247 |     name = "zlib";
248 |     for (int level = 1; level <= 9 ; level ++) {
249 |       CompressionStatistics comp_results = MeasureCompress(level, window, input_data, input_size,
250 |                                                             output_data, output_buffer_size,
251 |                                                             ZlibCompress, ZlibDecompress, repetitions);
252 |       float rate = input_size / comp_results.compressed_size;
253 |       float speed = (float) (input_size * repetitions) / (comp_results.compression_time * 1024 * 1024);
254 |       float decompession_speed = (float) (comp_results.compressed_size * repetitions) / (comp_results.decompression_time * 1024 * 1024);
255 |       results << "\"" << name << level << "_compression_rate\":" << std::setprecision(4) << rate << ", \"";
256 |       results << name << level << "_compressed_size\":" << std::setprecision(4) << comp_results.compressed_size << ", \"";
257 |       results << name << level << "_speed\":" << std::setprecision(4) << speed << ", \"";
258 |       results << name << level << "_decompression_speed\":" << std::setprecision(4) << decompession_speed;
259 |       if (level < 9) {
260 |           results << ",\n";
261 |       } else {
262 |           results << "}\n";
263 |       }
264 |     }
265 | }
266 | 
267 | 
268 | void UnbundledCompression(const unsigned char* input_data, size_t input_size,
269 |                            unsigned char* output_data, size_t output_buffer_size,
270 |                            std::ostream & results, int repetitions, std::string file_name) {
271 | 
272 |     std::string command = "node --experimental-modules third_party/bundle_analyzer/get_chunks.js " + file_name +
273 |         " parsed_bundle.json 2>&1";
274 |     bool execution_result = Execute(command.c_str());
275 |     if (!execution_result) {
276 |         results << "\"unbundled\":{\"chunks_execution_result\":" << execution_result << "},\n";
277 |         return;
278 |     }
279 | 
280 |     std::ifstream parsed_bundle("parsed_bundle.json");
281 |     json chunks;
282 |     parsed_bundle >> chunks;
283 |     std::string code;
284 |     std::vector<float> compressed_sizes(11 + 9, 0);
285 |     std::vector<float> compression_times(11 + 9, 0);
286 |     std::vector<float> decompression_times(11 + 9, 0);
287 |     int overall_size = 0;
288 |     results << "\"chunks_count\":" << chunks.size() << ",\n";
289 |     if (!chunks.size()) {
290 |         results << "\"unbundled\":{\"chunks_execution_result\":" << execution_result << "},\n";
291 |         return;
292 |     }
293 |     for (const auto& chunk : chunks) {
294 |         code = chunk["code"];
295 |         int chunk_size = code.size();
296 |         std::vector<CompressionStatistics> statistics_chunk;
297 |         int window = MinWindowLargerThanFile(chunk_size, DEFAULT_WINDOW);
298 |         std::string name = "brotli";
299 |         overall_size += chunk_size;
300 |         for (int level = 1; level <= 11; level ++) {
301 |           CompressionStatistics comp_results = MeasureCompress(level, window, (unsigned char*)code.c_str(), chunk_size,
302 |                                                                 output_data, output_buffer_size, BrotliCompress, BrotliDecompress, repetitions);
303 |           compressed_sizes[level - 1] += comp_results.compressed_size;
304 |           compression_times[level - 1] += comp_results.compression_time;
305 |           decompression_times[level - 1] += comp_results.decompression_time;
306 |         }
307 | 
308 |         for (int level = 1; level <= 9; level ++) {
309 |           CompressionStatistics comp_results = MeasureCompress(level, window, (unsigned char*)code.c_str(), chunk_size,
310 |                                                                 output_data, output_buffer_size, ZlibCompress, ZlibDecompress, repetitions);
311 |           compressed_sizes[11 + level - 1] += comp_results.compressed_size;
312 |           compression_times[11 + level - 1] += comp_results.compression_time;
313 |           decompression_times[11 + level - 1] += comp_results.decompression_time;
314 |         }
315 |     }
316 | 
317 |     results << "\"overall_chunks_size\":" << overall_size << ",\n";
318 |     results << "\"unbundled\":{";
319 |     std::string name = "brotli";
320 |     for (int level = 1; level <= 11; level ++) {
321 |       float rate = input_size / compressed_sizes[level - 1];
322 |       float speed = (float) (input_size * repetitions) / (compression_times[level - 1] * 1024 * 1024);
323 |       float decompression_speed = (float) (compressed_sizes[level - 1] * repetitions) / (decompression_times[level - 1] * 1024 * 1024);
324 |       results << "\"" << name << level << "_compression_rate\":" << std::setprecision(4) << rate << ", \"";
325 |       results << name << level << "_compressed_size\":" << std::setprecision(4) << compressed_sizes[level - 1] << ", \"";
326 |       results << name << level << "_speed\":" << std::setprecision(4) << speed << ", \"";
327 |       results << name << level << "_decompression_speed\":" << std::setprecision(4) << decompression_speed << ",\n";
328 |     }
329 | 
330 |     name = "zlib";
331 |     for (int level = 1; level <= 9; level ++) {
332 |       float rate = input_size / compressed_sizes[11 + level - 1];
333 |       float speed = (float) (input_size * repetitions) / (compression_times[11 + level - 1] * 1024 * 1024);
334 |       float decompression_speed = (float) (compressed_sizes[11 + level - 1] * repetitions) / (decompression_times[11 + level - 1] * 1024 * 1024);
335 |       results << "\"" << name << level << "_compression_rate\":" << std::setprecision(4) << rate << ", \"";
336 |       results << name << level << "_compressed_size\":" << std::setprecision(4) << compressed_sizes[11 + level - 1] << ", \"";
337 |       results << name << level << "_speed\":" << std::setprecision(4) << speed << ", \"";
338 |       results << name << level << "_decompression_speed\":" << std::setprecision(4) << decompression_speed;
339 |       if (level < 9) {
340 |           results << ",\n";
341 |       } else {
342 |           results << "},\n";
343 |       }
344 |     }
345 | }
346 | 
347 | 
348 | int main (int argc, char** argv) {
349 |   try {
350 |     std::vector<std::string> bundle_files;
351 |     GetNamesFromFile("bundles_source/bundle_files.txt", bundle_files);
352 |     int repetitions = std::stoi(std::string(argv[1]));
353 |     std::ostringstream results;
354 |     results << "[";
355 |     for (int i = 0; i < bundle_files.size(); ++i) {
356 |         FILE* infile = OpenFile(bundle_files[i].c_str(), "rb");
357 |         if (infile == NULL) {
358 |           exit(1);
359 |         }
360 |         unsigned char* input_data = NULL;
361 |         size_t input_size = 0;
362 |         ReadData(infile, &input_data, &input_size);
363 |         fclose(infile);
364 |         size_t output_buffer_size = input_size * 2;
365 |         unsigned char* output_data = (unsigned char*) malloc(output_buffer_size);
366 | 
367 | 
368 |         results << "{\"valid\":true, \"original_size\":" << input_size << ",\n";
369 | 
370 |         UnbundledCompression(input_data, input_size, output_data, output_buffer_size,
371 |                             results, repetitions, bundle_files[i]);
372 | 
373 |         BundledCompression(input_data, input_size, output_data, output_buffer_size,
374 |                            results, repetitions);
375 |         if (i == bundle_files.size() - 1) {
376 |             results << "}]\n";
377 |         } else {
378 |            results << "},\n";
379 |         }
380 |         std::cout << "bundle number " << i << " completed\n";
381 |     }
382 |     std::ofstream out("compression_results.json");
383 |     out << results.str();
384 |     out.close();
385 |   } catch (const char* message) {
386 |     std::ofstream out("compression_results.json");
387 |     out << "{\"valid\":false, \"message\":\"" << message << "\"}\n";
388 |     out.close();
389 |   }
390 |   return 0;
391 | }
392 | 


--------------------------------------------------------------------------------
/compression_experiments/js_dataset_compression.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "Copyright 2020 Google Inc. All Rights Reserved.\n",
  8 |     "\n",
  9 |     "Licensed under the Apache License, Version 2.0 (the \"License\"); <br>\n",
 10 |     "you may not use this file except in compliance with the License.<br>\n",
 11 |     "You may obtain a copy of the License at<br>\n",
 12 |     "\n",
 13 |     "     http://www.apache.org/licenses/LICENSE-2.0\n",
 14 |     "\n",
 15 |     "Unless required by applicable law or agreed to in writing, software\n",
 16 |     "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 17 |     "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<br>\n",
 18 |     "\n",
 19 |     "See the License for the specific language governing permissions and \n",
 20 |     "limitations under the License."
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 15,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "import numpy as np\n",
 30 |     "import json\n",
 31 |     "import matplotlib.pyplot as plt\n",
 32 |     "from tqdm import tqdm\n",
 33 |     "import random\n",
 34 |     "import subprocess\n",
 35 |     "import time\n",
 36 |     "import os"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "markdown",
 41 |    "metadata": {},
 42 |    "source": [
 43 |     "### Read the data"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 5,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "# js_scripts.txt constains the paths to js files\n",
 53 |     "with open(\"js_dataset/js_scripts.txt\") as file:\n",
 54 |     "    scripts = file.read().strip().split('\\n')\n",
 55 |     "    \n",
 56 |     "# dirs_data.txt constains the names of directories in data directory of js 150 dataset\n",
 57 |     "# we assume that different directories indicates different js apps\n",
 58 |     "with open(\"js_dataset/dirs_data.txt\") as file:\n",
 59 |     "    dirs = file.read().strip().split('\\n')"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 18,
 65 |    "metadata": {},
 66 |    "outputs": [
 67 |     {
 68 |      "name": "stderr",
 69 |      "output_type": "stream",
 70 |      "text": [
 71 |       "100%|██████████| 9620/9620 [06:50<00:00, 23.42it/s]\n"
 72 |      ]
 73 |     }
 74 |    ],
 75 |    "source": [
 76 |     "# group script paths by directories\n",
 77 |     "scripts_by_dirs = []\n",
 78 |     "\n",
 79 |     "for directory in tqdm(dirs):\n",
 80 |     "    dir_scripts = []\n",
 81 |     "    for script in scripts:\n",
 82 |     "        if script.startswith(\"data/\" + directory):\n",
 83 |     "            dir_scripts.append(script)\n",
 84 |     "    if len(dir_scripts):\n",
 85 |     "        scripts_by_dirs.append(dir_scripts)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "### Perform compression"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 21,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "def get_seconds(time): \n",
102 |     "    min_ind = time.find('m')\n",
103 |     "    mins = int(time[:min_ind])\n",
104 |     "    second = float(time[min_ind + 1:-1])\n",
105 |     "    return mins * 60 + second\n",
106 |     "\n",
107 |     "def log(file, msg):\n",
108 |     "    f = open(file, 'a+')\n",
109 |     "    f.write(msg + '\\n')\n",
110 |     "    f.close()"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 25,
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "rates_gzip = []\n",
120 |     "rates_brotli = []\n",
121 |     "times_gzip = []\n",
122 |     "times_brotli = []\n",
123 |     "speed_gzip = []\n",
124 |     "speed_brotli = []\n",
125 |     "init_sizes = []\n",
126 |     "\n",
127 |     "for i in range(len(scripts_by_dirs)):\n",
128 |     "    \n",
129 |     "    #concatenate all scripts inside the directory to simulate web bundle\n",
130 |     "    script_concatenated = \"\"\n",
131 |     "    for url in scripts_by_dirs[i]:\n",
132 |     "        if url == \"\":\n",
133 |     "            continue\n",
134 |     "        if not os.path.exists(\"js_dataset/\" + url):\n",
135 |     "            print(\"DOESN'T EXIST: \", url)\n",
136 |     "            continue\n",
137 |     "        try:\n",
138 |     "            with open(\"js_dataset/\" + url) as file:\n",
139 |     "                script_concatenated += file.read()\n",
140 |     "        except:\n",
141 |     "            print(\"didn't read\")\n",
142 |     "            \n",
143 |     "    rates_gzip_compressed = []\n",
144 |     "    rates_brotli_compressed = []\n",
145 |     "    times_gzip_compressed = []\n",
146 |     "    times_brotli_compressed = []\n",
147 |     "    speed_gzip_compressed = []\n",
148 |     "    speed_brotli_compressed = []\n",
149 |     "    \n",
150 |     "    with open(\"example2.txt\", \"w\") as file:\n",
151 |     "        file.write(script_concatenated)\n",
152 |     "    size_non_compressed = os.stat(\"example2.txt\").st_size\n",
153 |     "    init_sizes.append(size_non_compressed)\n",
154 |     "    \n",
155 |     "    # do the gzip compression with different levels\n",
156 |     "    for level in range(4, 10):\n",
157 |     "        result = subprocess.run([\"bash\", \"gzip_compress.sh\", str(level), \"time2.txt\", \n",
158 |     "                                 \"example_gzip2.txt.gz\", \"example2.txt\"])\n",
159 |     "        with open(\"time2.txt\") as file:\n",
160 |     "            user_sys = file.read().strip().split('\\n')[1:]\n",
161 |     "        time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n",
162 |     "        size_gzip_compressed = os.stat(\"example_gzip2.txt.gz\").st_size\n",
163 |     "        rates_gzip_compressed.append(size_non_compressed / size_gzip_compressed)\n",
164 |     "        times_gzip_compressed.append(time)\n",
165 |     "        speed_gzip_compressed.append(size_non_compressed / time)\n",
166 |     "\n",
167 |     "    # do the brotli compression with different levels\n",
168 |     "    for level in range(4, 12):\n",
169 |     "        result = subprocess.run([\"bash\", \"brotli_compress.sh\", str(level), \"time2.txt\", \n",
170 |     "                                 \"example_brotli2.txt.br\", \"example2.txt\"])\n",
171 |     "        with open(\"time2.txt\") as file:\n",
172 |     "            user_sys = file.read().strip().split('\\n')[1:]\n",
173 |     "        time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n",
174 |     "        size_br_compressed = os.stat(\"example_brotli2.txt.br\").st_size\n",
175 |     "        rates_brotli_compressed.append(size_non_compressed / size_br_compressed)\n",
176 |     "        times_brotli_compressed.append(time)\n",
177 |     "        speed_brotli_compressed.append(size_non_compressed / time)\n",
178 |     "    \n",
179 |     "    rates_gzip.append(rates_gzip_compressed)\n",
180 |     "    rates_brotli.append(rates_brotli_compressed)\n",
181 |     "    times_gzip.append(times_gzip_compressed)\n",
182 |     "    times_brotli.append(times_brotli_compressed)\n",
183 |     "    speed_gzip.append(speed_gzip_compressed)\n",
184 |     "    speed_brotli.append(speed_brotli_compressed)\n",
185 |     "    \n",
186 |     "    if i != 0 and i % 500 == 0:\n",
187 |     "        log(\"logs4.txt\", \"rates_gzip: \" + str(np.mean(rates_gzip, axis=0)))\n",
188 |     "        log(\"logs4.txt\", \"rates_brotli: \" + str(np.mean(rates_brotli, axis=0)))\n",
189 |     "        log(\"logs4.txt\", \"times_gzip: \" + str(np.mean(times_gzip, axis=0)))\n",
190 |     "        log(\"logs4.txt\", \"times_brotli: \" + str(np.mean(times_brotli, axis=0)))\n",
191 |     "        log(\"logs4.txt\", \"speed_gzip: \" + str(np.mean(speed_gzip, axis=0)))\n",
192 |     "        log(\"logs4.txt\", \"speed_brotli: \" + str(np.mean(speed_brotli, axis=0)))"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": 27,
198 |    "metadata": {},
199 |    "outputs": [
200 |     {
201 |      "data": {
202 |       "text/html": [
203 |        "<div>\n",
204 |        "<style scoped>\n",
205 |        "    .dataframe tbody tr th:only-of-type {\n",
206 |        "        vertical-align: middle;\n",
207 |        "    }\n",
208 |        "\n",
209 |        "    .dataframe tbody tr th {\n",
210 |        "        vertical-align: top;\n",
211 |        "    }\n",
212 |        "\n",
213 |        "    .dataframe thead th {\n",
214 |        "        text-align: right;\n",
215 |        "    }\n",
216 |        "</style>\n",
217 |        "<table border=\"1\" class=\"dataframe\">\n",
218 |        "  <thead>\n",
219 |        "    <tr style=\"text-align: right;\">\n",
220 |        "      <th></th>\n",
221 |        "      <th>name</th>\n",
222 |        "      <th>rates</th>\n",
223 |        "      <th>savings</th>\n",
224 |        "      <th>speed(MB/s)</th>\n",
225 |        "    </tr>\n",
226 |        "  </thead>\n",
227 |        "  <tbody>\n",
228 |        "    <tr>\n",
229 |        "      <th>0</th>\n",
230 |        "      <td>gzip 4</td>\n",
231 |        "      <td>3.825069</td>\n",
232 |        "      <td>0.738567</td>\n",
233 |        "      <td>15.719552</td>\n",
234 |        "    </tr>\n",
235 |        "    <tr>\n",
236 |        "      <th>1</th>\n",
237 |        "      <td>gzip 5</td>\n",
238 |        "      <td>3.948932</td>\n",
239 |        "      <td>0.746767</td>\n",
240 |        "      <td>13.392738</td>\n",
241 |        "    </tr>\n",
242 |        "    <tr>\n",
243 |        "      <th>2</th>\n",
244 |        "      <td>gzip 6</td>\n",
245 |        "      <td>4.003179</td>\n",
246 |        "      <td>0.750199</td>\n",
247 |        "      <td>10.956911</td>\n",
248 |        "    </tr>\n",
249 |        "    <tr>\n",
250 |        "      <th>3</th>\n",
251 |        "      <td>gzip 7</td>\n",
252 |        "      <td>4.017695</td>\n",
253 |        "      <td>0.751101</td>\n",
254 |        "      <td>9.777660</td>\n",
255 |        "    </tr>\n",
256 |        "    <tr>\n",
257 |        "      <th>4</th>\n",
258 |        "      <td>gzip 8</td>\n",
259 |        "      <td>4.029332</td>\n",
260 |        "      <td>0.751820</td>\n",
261 |        "      <td>7.136008</td>\n",
262 |        "    </tr>\n",
263 |        "    <tr>\n",
264 |        "      <th>5</th>\n",
265 |        "      <td>gzip 9</td>\n",
266 |        "      <td>4.031706</td>\n",
267 |        "      <td>0.751966</td>\n",
268 |        "      <td>6.170267</td>\n",
269 |        "    </tr>\n",
270 |        "    <tr>\n",
271 |        "      <th>6</th>\n",
272 |        "      <td>brotli 4</td>\n",
273 |        "      <td>4.135726</td>\n",
274 |        "      <td>0.758204</td>\n",
275 |        "      <td>12.866184</td>\n",
276 |        "    </tr>\n",
277 |        "    <tr>\n",
278 |        "      <th>7</th>\n",
279 |        "      <td>brotli 5</td>\n",
280 |        "      <td>4.496571</td>\n",
281 |        "      <td>0.777608</td>\n",
282 |        "      <td>9.528445</td>\n",
283 |        "    </tr>\n",
284 |        "    <tr>\n",
285 |        "      <th>8</th>\n",
286 |        "      <td>brotli 6</td>\n",
287 |        "      <td>4.543836</td>\n",
288 |        "      <td>0.779922</td>\n",
289 |        "      <td>8.582947</td>\n",
290 |        "    </tr>\n",
291 |        "    <tr>\n",
292 |        "      <th>9</th>\n",
293 |        "      <td>brotli 7</td>\n",
294 |        "      <td>4.582319</td>\n",
295 |        "      <td>0.781770</td>\n",
296 |        "      <td>6.631221</td>\n",
297 |        "    </tr>\n",
298 |        "    <tr>\n",
299 |        "      <th>10</th>\n",
300 |        "      <td>brotli 8</td>\n",
301 |        "      <td>4.599897</td>\n",
302 |        "      <td>0.782604</td>\n",
303 |        "      <td>5.447145</td>\n",
304 |        "    </tr>\n",
305 |        "    <tr>\n",
306 |        "      <th>11</th>\n",
307 |        "      <td>brotli 9</td>\n",
308 |        "      <td>4.622002</td>\n",
309 |        "      <td>0.783644</td>\n",
310 |        "      <td>4.209170</td>\n",
311 |        "    </tr>\n",
312 |        "    <tr>\n",
313 |        "      <th>12</th>\n",
314 |        "      <td>brotli 10</td>\n",
315 |        "      <td>4.930100</td>\n",
316 |        "      <td>0.797164</td>\n",
317 |        "      <td>1.157362</td>\n",
318 |        "    </tr>\n",
319 |        "    <tr>\n",
320 |        "      <th>13</th>\n",
321 |        "      <td>brotli 11</td>\n",
322 |        "      <td>5.019602</td>\n",
323 |        "      <td>0.800781</td>\n",
324 |        "      <td>0.506957</td>\n",
325 |        "    </tr>\n",
326 |        "  </tbody>\n",
327 |        "</table>\n",
328 |        "</div>"
329 |       ],
330 |       "text/plain": [
331 |        "         name     rates   savings  speed(MB/s)\n",
332 |        "0      gzip 4  3.825069  0.738567    15.719552\n",
333 |        "1      gzip 5  3.948932  0.746767    13.392738\n",
334 |        "2      gzip 6  4.003179  0.750199    10.956911\n",
335 |        "3      gzip 7  4.017695  0.751101     9.777660\n",
336 |        "4      gzip 8  4.029332  0.751820     7.136008\n",
337 |        "5      gzip 9  4.031706  0.751966     6.170267\n",
338 |        "6    brotli 4  4.135726  0.758204    12.866184\n",
339 |        "7    brotli 5  4.496571  0.777608     9.528445\n",
340 |        "8    brotli 6  4.543836  0.779922     8.582947\n",
341 |        "9    brotli 7  4.582319  0.781770     6.631221\n",
342 |        "10   brotli 8  4.599897  0.782604     5.447145\n",
343 |        "11   brotli 9  4.622002  0.783644     4.209170\n",
344 |        "12  brotli 10  4.930100  0.797164     1.157362\n",
345 |        "13  brotli 11  5.019602  0.800781     0.506957"
346 |       ]
347 |      },
348 |      "execution_count": 27,
349 |      "metadata": {},
350 |      "output_type": "execute_result"
351 |     }
352 |    ],
353 |    "source": [
354 |     "import pandas as pd\n",
355 |     "frame = pd.DataFrame()\n",
356 |     "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n",
357 |     "                 \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n",
358 |     "\n",
359 |     "frame[\"rates\"] = np.hstack((np.mean(rates_gzip, axis=0), np.mean(rates_brotli, axis=0)))\n",
360 |     "frame[\"savings\"] = 1 - 1 / np.hstack((np.mean(rates_gzip, axis=0), np.mean(rates_brotli, axis=0)))\n",
361 |     "frame[\"speed(MB/s)\"] = np.hstack((np.mean(speed_gzip, axis=0), np.mean(speed_brotli, axis=0))) / 1000000\n",
362 |     "\n",
363 |     "frame"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": 46,
369 |    "metadata": {},
370 |    "outputs": [
371 |     {
372 |      "name": "stdout",
373 |      "output_type": "stream",
374 |      "text": [
375 |       "non compressed size range 0.0MB-519.170072MB\n"
376 |      ]
377 |     }
378 |    ],
379 |    "source": [
380 |     "print(\"non compressed size range {}MB-{}MB\".format(np.min(init_sizes) / 1000000, np.max(init_sizes)/ 1000000))"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "markdown",
385 |    "metadata": {},
386 |    "source": [
387 |     "### Group results by non compressed size ranges"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "code",
392 |    "execution_count": 49,
393 |    "metadata": {},
394 |    "outputs": [
395 |     {
396 |      "name": "stdout",
397 |      "output_type": "stream",
398 |      "text": [
399 |       "0 - 100000 bytes\n"
400 |      ]
401 |     },
402 |     {
403 |      "data": {
404 |       "text/html": [
405 |        "<div>\n",
406 |        "<style scoped>\n",
407 |        "    .dataframe tbody tr th:only-of-type {\n",
408 |        "        vertical-align: middle;\n",
409 |        "    }\n",
410 |        "\n",
411 |        "    .dataframe tbody tr th {\n",
412 |        "        vertical-align: top;\n",
413 |        "    }\n",
414 |        "\n",
415 |        "    .dataframe thead th {\n",
416 |        "        text-align: right;\n",
417 |        "    }\n",
418 |        "</style>\n",
419 |        "<table border=\"1\" class=\"dataframe\">\n",
420 |        "  <thead>\n",
421 |        "    <tr style=\"text-align: right;\">\n",
422 |        "      <th></th>\n",
423 |        "      <th>name</th>\n",
424 |        "      <th>rates</th>\n",
425 |        "      <th>savings</th>\n",
426 |        "      <th>speed(MB/s)</th>\n",
427 |        "    </tr>\n",
428 |        "  </thead>\n",
429 |        "  <tbody>\n",
430 |        "    <tr>\n",
431 |        "      <th>0</th>\n",
432 |        "      <td>gzip 4</td>\n",
433 |        "      <td>3.580008</td>\n",
434 |        "      <td>0.720671</td>\n",
435 |        "      <td>7.447231</td>\n",
436 |        "    </tr>\n",
437 |        "    <tr>\n",
438 |        "      <th>1</th>\n",
439 |        "      <td>gzip 5</td>\n",
440 |        "      <td>3.676672</td>\n",
441 |        "      <td>0.728015</td>\n",
442 |        "      <td>7.008153</td>\n",
443 |        "    </tr>\n",
444 |        "    <tr>\n",
445 |        "      <th>2</th>\n",
446 |        "      <td>gzip 6</td>\n",
447 |        "      <td>3.712879</td>\n",
448 |        "      <td>0.730667</td>\n",
449 |        "      <td>6.428090</td>\n",
450 |        "    </tr>\n",
451 |        "    <tr>\n",
452 |        "      <th>3</th>\n",
453 |        "      <td>gzip 7</td>\n",
454 |        "      <td>3.723238</td>\n",
455 |        "      <td>0.731417</td>\n",
456 |        "      <td>6.065006</td>\n",
457 |        "    </tr>\n",
458 |        "    <tr>\n",
459 |        "      <th>4</th>\n",
460 |        "      <td>gzip 8</td>\n",
461 |        "      <td>3.730148</td>\n",
462 |        "      <td>0.731914</td>\n",
463 |        "      <td>5.120283</td>\n",
464 |        "    </tr>\n",
465 |        "    <tr>\n",
466 |        "      <th>5</th>\n",
467 |        "      <td>gzip 9</td>\n",
468 |        "      <td>3.731493</td>\n",
469 |        "      <td>0.732011</td>\n",
470 |        "      <td>4.732681</td>\n",
471 |        "    </tr>\n",
472 |        "    <tr>\n",
473 |        "      <th>6</th>\n",
474 |        "      <td>brotli 4</td>\n",
475 |        "      <td>3.694064</td>\n",
476 |        "      <td>0.729295</td>\n",
477 |        "      <td>5.004788</td>\n",
478 |        "    </tr>\n",
479 |        "    <tr>\n",
480 |        "      <th>7</th>\n",
481 |        "      <td>brotli 5</td>\n",
482 |        "      <td>4.011637</td>\n",
483 |        "      <td>0.750725</td>\n",
484 |        "      <td>4.648579</td>\n",
485 |        "    </tr>\n",
486 |        "    <tr>\n",
487 |        "      <th>8</th>\n",
488 |        "      <td>brotli 6</td>\n",
489 |        "      <td>4.033570</td>\n",
490 |        "      <td>0.752081</td>\n",
491 |        "      <td>4.471990</td>\n",
492 |        "    </tr>\n",
493 |        "    <tr>\n",
494 |        "      <th>9</th>\n",
495 |        "      <td>brotli 7</td>\n",
496 |        "      <td>4.049876</td>\n",
497 |        "      <td>0.753079</td>\n",
498 |        "      <td>3.971136</td>\n",
499 |        "    </tr>\n",
500 |        "    <tr>\n",
501 |        "      <th>10</th>\n",
502 |        "      <td>brotli 8</td>\n",
503 |        "      <td>4.056882</td>\n",
504 |        "      <td>0.753505</td>\n",
505 |        "      <td>3.708456</td>\n",
506 |        "    </tr>\n",
507 |        "    <tr>\n",
508 |        "      <th>11</th>\n",
509 |        "      <td>brotli 9</td>\n",
510 |        "      <td>4.065070</td>\n",
511 |        "      <td>0.754002</td>\n",
512 |        "      <td>3.146465</td>\n",
513 |        "    </tr>\n",
514 |        "    <tr>\n",
515 |        "      <th>12</th>\n",
516 |        "      <td>brotli 10</td>\n",
517 |        "      <td>4.318749</td>\n",
518 |        "      <td>0.768451</td>\n",
519 |        "      <td>1.005612</td>\n",
520 |        "    </tr>\n",
521 |        "    <tr>\n",
522 |        "      <th>13</th>\n",
523 |        "      <td>brotli 11</td>\n",
524 |        "      <td>4.426846</td>\n",
525 |        "      <td>0.774106</td>\n",
526 |        "      <td>0.470691</td>\n",
527 |        "    </tr>\n",
528 |        "  </tbody>\n",
529 |        "</table>\n",
530 |        "</div>"
531 |       ],
532 |       "text/plain": [
533 |        "         name     rates   savings  speed(MB/s)\n",
534 |        "0      gzip 4  3.580008  0.720671     7.447231\n",
535 |        "1      gzip 5  3.676672  0.728015     7.008153\n",
536 |        "2      gzip 6  3.712879  0.730667     6.428090\n",
537 |        "3      gzip 7  3.723238  0.731417     6.065006\n",
538 |        "4      gzip 8  3.730148  0.731914     5.120283\n",
539 |        "5      gzip 9  3.731493  0.732011     4.732681\n",
540 |        "6    brotli 4  3.694064  0.729295     5.004788\n",
541 |        "7    brotli 5  4.011637  0.750725     4.648579\n",
542 |        "8    brotli 6  4.033570  0.752081     4.471990\n",
543 |        "9    brotli 7  4.049876  0.753079     3.971136\n",
544 |        "10   brotli 8  4.056882  0.753505     3.708456\n",
545 |        "11   brotli 9  4.065070  0.754002     3.146465\n",
546 |        "12  brotli 10  4.318749  0.768451     1.005612\n",
547 |        "13  brotli 11  4.426846  0.774106     0.470691"
548 |       ]
549 |      },
550 |      "execution_count": 49,
551 |      "metadata": {},
552 |      "output_type": "execute_result"
553 |     }
554 |    ],
555 |    "source": [
556 |     "splits = [0, 100000, 1000000, 519170072]\n",
557 |     "init_sizes = np.array(init_sizes)\n",
558 |     "group1 = np.where((init_sizes >= 0)*(init_sizes <= 100000))[0]\n",
559 |     "group2 = np.where((init_sizes > 100000)*(init_sizes <= 1000000))[0]\n",
560 |     "group3 = np.where((init_sizes > 1000000)*(init_sizes <= 519170072))[0]\n",
561 |     "\n",
562 |     "print(0, \"-\", 100000, \"bytes\")\n",
563 |     "frame = pd.DataFrame()\n",
564 |     "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n",
565 |     "                 \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n",
566 |     "\n",
567 |     "frame[\"rates\"] = np.hstack((np.mean(np.array(rates_gzip)[group1], axis=0), np.mean(np.array(rates_brotli)[group1], axis=0)))\n",
568 |     "frame[\"savings\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip)[group1], axis=0), np.mean(np.array(rates_brotli)[group1], axis=0)))\n",
569 |     "frame[\"speed(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip)[group1], axis=0), np.mean(np.array(speed_brotli)[group1], axis=0))) / 1000000\n",
570 |     "\n",
571 |     "frame"
572 |    ]
573 |   },
574 |   {
575 |    "cell_type": "code",
576 |    "execution_count": 50,
577 |    "metadata": {},
578 |    "outputs": [
579 |     {
580 |      "name": "stdout",
581 |      "output_type": "stream",
582 |      "text": [
583 |       "100000 - 1000000 bytes\n"
584 |      ]
585 |     },
586 |     {
587 |      "data": {
588 |       "text/html": [
589 |        "<div>\n",
590 |        "<style scoped>\n",
591 |        "    .dataframe tbody tr th:only-of-type {\n",
592 |        "        vertical-align: middle;\n",
593 |        "    }\n",
594 |        "\n",
595 |        "    .dataframe tbody tr th {\n",
596 |        "        vertical-align: top;\n",
597 |        "    }\n",
598 |        "\n",
599 |        "    .dataframe thead th {\n",
600 |        "        text-align: right;\n",
601 |        "    }\n",
602 |        "</style>\n",
603 |        "<table border=\"1\" class=\"dataframe\">\n",
604 |        "  <thead>\n",
605 |        "    <tr style=\"text-align: right;\">\n",
606 |        "      <th></th>\n",
607 |        "      <th>name</th>\n",
608 |        "      <th>rates</th>\n",
609 |        "      <th>savings</th>\n",
610 |        "      <th>speed(MB/s)</th>\n",
611 |        "    </tr>\n",
612 |        "  </thead>\n",
613 |        "  <tbody>\n",
614 |        "    <tr>\n",
615 |        "      <th>0</th>\n",
616 |        "      <td>gzip 4</td>\n",
617 |        "      <td>4.610515</td>\n",
618 |        "      <td>0.783104</td>\n",
619 |        "      <td>40.486917</td>\n",
620 |        "    </tr>\n",
621 |        "    <tr>\n",
622 |        "      <th>1</th>\n",
623 |        "      <td>gzip 5</td>\n",
624 |        "      <td>4.821605</td>\n",
625 |        "      <td>0.792600</td>\n",
626 |        "      <td>32.909052</td>\n",
627 |        "    </tr>\n",
628 |        "    <tr>\n",
629 |        "      <th>2</th>\n",
630 |        "      <td>gzip 6</td>\n",
631 |        "      <td>4.927023</td>\n",
632 |        "      <td>0.797038</td>\n",
633 |        "      <td>25.098103</td>\n",
634 |        "    </tr>\n",
635 |        "    <tr>\n",
636 |        "      <th>3</th>\n",
637 |        "      <td>gzip 7</td>\n",
638 |        "      <td>4.953874</td>\n",
639 |        "      <td>0.798138</td>\n",
640 |        "      <td>21.498278</td>\n",
641 |        "    </tr>\n",
642 |        "    <tr>\n",
643 |        "      <th>4</th>\n",
644 |        "      <td>gzip 8</td>\n",
645 |        "      <td>4.976779</td>\n",
646 |        "      <td>0.799067</td>\n",
647 |        "      <td>13.639378</td>\n",
648 |        "    </tr>\n",
649 |        "    <tr>\n",
650 |        "      <th>5</th>\n",
651 |        "      <td>gzip 9</td>\n",
652 |        "      <td>4.981861</td>\n",
653 |        "      <td>0.799272</td>\n",
654 |        "      <td>10.864447</td>\n",
655 |        "    </tr>\n",
656 |        "    <tr>\n",
657 |        "      <th>6</th>\n",
658 |        "      <td>brotli 4</td>\n",
659 |        "      <td>5.086900</td>\n",
660 |        "      <td>0.803417</td>\n",
661 |        "      <td>35.662622</td>\n",
662 |        "    </tr>\n",
663 |        "    <tr>\n",
664 |        "      <th>7</th>\n",
665 |        "      <td>brotli 5</td>\n",
666 |        "      <td>5.540047</td>\n",
667 |        "      <td>0.819496</td>\n",
668 |        "      <td>24.545289</td>\n",
669 |        "    </tr>\n",
670 |        "    <tr>\n",
671 |        "      <th>8</th>\n",
672 |        "      <td>brotli 6</td>\n",
673 |        "      <td>5.629082</td>\n",
674 |        "      <td>0.822351</td>\n",
675 |        "      <td>21.281608</td>\n",
676 |        "    </tr>\n",
677 |        "    <tr>\n",
678 |        "      <th>9</th>\n",
679 |        "      <td>brotli 7</td>\n",
680 |        "      <td>5.707098</td>\n",
681 |        "      <td>0.824780</td>\n",
682 |        "      <td>14.285831</td>\n",
683 |        "    </tr>\n",
684 |        "    <tr>\n",
685 |        "      <th>10</th>\n",
686 |        "      <td>brotli 8</td>\n",
687 |        "      <td>5.742195</td>\n",
688 |        "      <td>0.825851</td>\n",
689 |        "      <td>10.013604</td>\n",
690 |        "    </tr>\n",
691 |        "    <tr>\n",
692 |        "      <th>11</th>\n",
693 |        "      <td>brotli 9</td>\n",
694 |        "      <td>5.777223</td>\n",
695 |        "      <td>0.826906</td>\n",
696 |        "      <td>6.719082</td>\n",
697 |        "    </tr>\n",
698 |        "    <tr>\n",
699 |        "      <th>12</th>\n",
700 |        "      <td>brotli 10</td>\n",
701 |        "      <td>6.213230</td>\n",
702 |        "      <td>0.839053</td>\n",
703 |        "      <td>1.659122</td>\n",
704 |        "    </tr>\n",
705 |        "    <tr>\n",
706 |        "      <th>13</th>\n",
707 |        "      <td>brotli 11</td>\n",
708 |        "      <td>6.359796</td>\n",
709 |        "      <td>0.842762</td>\n",
710 |        "      <td>0.617029</td>\n",
711 |        "    </tr>\n",
712 |        "  </tbody>\n",
713 |        "</table>\n",
714 |        "</div>"
715 |       ],
716 |       "text/plain": [
717 |        "         name     rates   savings  speed(MB/s)\n",
718 |        "0      gzip 4  4.610515  0.783104    40.486917\n",
719 |        "1      gzip 5  4.821605  0.792600    32.909052\n",
720 |        "2      gzip 6  4.927023  0.797038    25.098103\n",
721 |        "3      gzip 7  4.953874  0.798138    21.498278\n",
722 |        "4      gzip 8  4.976779  0.799067    13.639378\n",
723 |        "5      gzip 9  4.981861  0.799272    10.864447\n",
724 |        "6    brotli 4  5.086900  0.803417    35.662622\n",
725 |        "7    brotli 5  5.540047  0.819496    24.545289\n",
726 |        "8    brotli 6  5.629082  0.822351    21.281608\n",
727 |        "9    brotli 7  5.707098  0.824780    14.285831\n",
728 |        "10   brotli 8  5.742195  0.825851    10.013604\n",
729 |        "11   brotli 9  5.777223  0.826906     6.719082\n",
730 |        "12  brotli 10  6.213230  0.839053     1.659122\n",
731 |        "13  brotli 11  6.359796  0.842762     0.617029"
732 |       ]
733 |      },
734 |      "execution_count": 50,
735 |      "metadata": {},
736 |      "output_type": "execute_result"
737 |     }
738 |    ],
739 |    "source": [
740 |     "print(100000, \"-\", 1000000, \"bytes\")\n",
741 |     "frame = pd.DataFrame()\n",
742 |     "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n",
743 |     "                 \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n",
744 |     "\n",
745 |     "frame[\"rates\"] = np.hstack((np.mean(np.array(rates_gzip)[group2], axis=0), np.mean(np.array(rates_brotli)[group2], axis=0)))\n",
746 |     "frame[\"savings\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip)[group2], axis=0), np.mean(np.array(rates_brotli)[group2], axis=0)))\n",
747 |     "frame[\"speed(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip)[group2], axis=0), np.mean(np.array(speed_brotli)[group2], axis=0))) / 1000000\n",
748 |     "\n",
749 |     "frame"
750 |    ]
751 |   },
752 |   {
753 |    "cell_type": "code",
754 |    "execution_count": 51,
755 |    "metadata": {},
756 |    "outputs": [
757 |     {
758 |      "name": "stdout",
759 |      "output_type": "stream",
760 |      "text": [
761 |       "1000000 - 519170072 bytes\n"
762 |      ]
763 |     },
764 |     {
765 |      "data": {
766 |       "text/html": [
767 |        "<div>\n",
768 |        "<style scoped>\n",
769 |        "    .dataframe tbody tr th:only-of-type {\n",
770 |        "        vertical-align: middle;\n",
771 |        "    }\n",
772 |        "\n",
773 |        "    .dataframe tbody tr th {\n",
774 |        "        vertical-align: top;\n",
775 |        "    }\n",
776 |        "\n",
777 |        "    .dataframe thead th {\n",
778 |        "        text-align: right;\n",
779 |        "    }\n",
780 |        "</style>\n",
781 |        "<table border=\"1\" class=\"dataframe\">\n",
782 |        "  <thead>\n",
783 |        "    <tr style=\"text-align: right;\">\n",
784 |        "      <th></th>\n",
785 |        "      <th>name</th>\n",
786 |        "      <th>rates</th>\n",
787 |        "      <th>savings</th>\n",
788 |        "      <th>speed(MB/s)</th>\n",
789 |        "    </tr>\n",
790 |        "  </thead>\n",
791 |        "  <tbody>\n",
792 |        "    <tr>\n",
793 |        "      <th>0</th>\n",
794 |        "      <td>gzip 4</td>\n",
795 |        "      <td>4.947584</td>\n",
796 |        "      <td>0.797881</td>\n",
797 |        "      <td>62.609464</td>\n",
798 |        "    </tr>\n",
799 |        "    <tr>\n",
800 |        "      <th>1</th>\n",
801 |        "      <td>gzip 5</td>\n",
802 |        "      <td>5.195765</td>\n",
803 |        "      <td>0.807536</td>\n",
804 |        "      <td>47.516889</td>\n",
805 |        "    </tr>\n",
806 |        "    <tr>\n",
807 |        "      <th>2</th>\n",
808 |        "      <td>gzip 6</td>\n",
809 |        "      <td>5.366891</td>\n",
810 |        "      <td>0.813672</td>\n",
811 |        "      <td>33.629151</td>\n",
812 |        "    </tr>\n",
813 |        "    <tr>\n",
814 |        "      <th>3</th>\n",
815 |        "      <td>gzip 7</td>\n",
816 |        "      <td>5.405544</td>\n",
817 |        "      <td>0.815005</td>\n",
818 |        "      <td>27.704937</td>\n",
819 |        "    </tr>\n",
820 |        "    <tr>\n",
821 |        "      <th>4</th>\n",
822 |        "      <td>gzip 8</td>\n",
823 |        "      <td>5.458839</td>\n",
824 |        "      <td>0.816811</td>\n",
825 |        "      <td>16.148692</td>\n",
826 |        "    </tr>\n",
827 |        "    <tr>\n",
828 |        "      <th>5</th>\n",
829 |        "      <td>gzip 9</td>\n",
830 |        "      <td>5.468953</td>\n",
831 |        "      <td>0.817150</td>\n",
832 |        "      <td>12.309037</td>\n",
833 |        "    </tr>\n",
834 |        "    <tr>\n",
835 |        "      <th>6</th>\n",
836 |        "      <td>brotli 4</td>\n",
837 |        "      <td>8.551782</td>\n",
838 |        "      <td>0.883065</td>\n",
839 |        "      <td>61.243214</td>\n",
840 |        "    </tr>\n",
841 |        "    <tr>\n",
842 |        "      <th>7</th>\n",
843 |        "      <td>brotli 5</td>\n",
844 |        "      <td>9.349877</td>\n",
845 |        "      <td>0.893047</td>\n",
846 |        "      <td>35.094220</td>\n",
847 |        "    </tr>\n",
848 |        "    <tr>\n",
849 |        "      <th>8</th>\n",
850 |        "      <td>brotli 6</td>\n",
851 |        "      <td>9.716333</td>\n",
852 |        "      <td>0.897081</td>\n",
853 |        "      <td>29.873129</td>\n",
854 |        "    </tr>\n",
855 |        "    <tr>\n",
856 |        "      <th>9</th>\n",
857 |        "      <td>brotli 7</td>\n",
858 |        "      <td>10.018952</td>\n",
859 |        "      <td>0.900189</td>\n",
860 |        "      <td>23.305218</td>\n",
861 |        "    </tr>\n",
862 |        "    <tr>\n",
863 |        "      <th>10</th>\n",
864 |        "      <td>brotli 8</td>\n",
865 |        "      <td>10.169293</td>\n",
866 |        "      <td>0.901665</td>\n",
867 |        "      <td>18.596231</td>\n",
868 |        "    </tr>\n",
869 |        "    <tr>\n",
870 |        "      <th>11</th>\n",
871 |        "      <td>brotli 9</td>\n",
872 |        "      <td>10.418385</td>\n",
873 |        "      <td>0.904016</td>\n",
874 |        "      <td>13.694826</td>\n",
875 |        "    </tr>\n",
876 |        "    <tr>\n",
877 |        "      <th>12</th>\n",
878 |        "      <td>brotli 10</td>\n",
879 |        "      <td>11.215429</td>\n",
880 |        "      <td>0.910837</td>\n",
881 |        "      <td>1.773174</td>\n",
882 |        "    </tr>\n",
883 |        "    <tr>\n",
884 |        "      <th>13</th>\n",
885 |        "      <td>brotli 11</td>\n",
886 |        "      <td>10.618584</td>\n",
887 |        "      <td>0.905825</td>\n",
888 |        "      <td>0.704833</td>\n",
889 |        "    </tr>\n",
890 |        "  </tbody>\n",
891 |        "</table>\n",
892 |        "</div>"
893 |       ],
894 |       "text/plain": [
895 |        "         name      rates   savings  speed(MB/s)\n",
896 |        "0      gzip 4   4.947584  0.797881    62.609464\n",
897 |        "1      gzip 5   5.195765  0.807536    47.516889\n",
898 |        "2      gzip 6   5.366891  0.813672    33.629151\n",
899 |        "3      gzip 7   5.405544  0.815005    27.704937\n",
900 |        "4      gzip 8   5.458839  0.816811    16.148692\n",
901 |        "5      gzip 9   5.468953  0.817150    12.309037\n",
902 |        "6    brotli 4   8.551782  0.883065    61.243214\n",
903 |        "7    brotli 5   9.349877  0.893047    35.094220\n",
904 |        "8    brotli 6   9.716333  0.897081    29.873129\n",
905 |        "9    brotli 7  10.018952  0.900189    23.305218\n",
906 |        "10   brotli 8  10.169293  0.901665    18.596231\n",
907 |        "11   brotli 9  10.418385  0.904016    13.694826\n",
908 |        "12  brotli 10  11.215429  0.910837     1.773174\n",
909 |        "13  brotli 11  10.618584  0.905825     0.704833"
910 |       ]
911 |      },
912 |      "execution_count": 51,
913 |      "metadata": {},
914 |      "output_type": "execute_result"
915 |     }
916 |    ],
917 |    "source": [
918 |     "print(1000000, \"-\", 519170072, \"bytes\")\n",
919 |     "frame = pd.DataFrame()\n",
920 |     "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n",
921 |     "                 \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n",
922 |     "\n",
923 |     "frame[\"rates\"] = np.hstack((np.mean(np.array(rates_gzip)[group3], axis=0), np.mean(np.array(rates_brotli)[group3], axis=0)))\n",
924 |     "frame[\"savings\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip)[group3], axis=0), np.mean(np.array(rates_brotli)[group3], axis=0)))\n",
925 |     "frame[\"speed(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip)[group3], axis=0), np.mean(np.array(speed_brotli)[group3], axis=0))) / 1000000\n",
926 |     "\n",
927 |     "frame"
928 |    ]
929 |   }
930 |  ],
931 |  "metadata": {
932 |   "kernelspec": {
933 |    "display_name": "Python 3",
934 |    "language": "python",
935 |    "name": "python3"
936 |   },
937 |   "language_info": {
938 |    "codemirror_mode": {
939 |     "name": "ipython",
940 |     "version": 3
941 |    },
942 |    "file_extension": ".py",
943 |    "mimetype": "text/x-python",
944 |    "name": "python",
945 |    "nbconvert_exporter": "python",
946 |    "pygments_lexer": "ipython3",
947 |    "version": "3.6.7"
948 |   }
949 |  },
950 |  "nbformat": 4,
951 |  "nbformat_minor": 2
952 | }
953 | 


--------------------------------------------------------------------------------
/compression_experiments/http_archive_compression.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "Copyright 2020 Google Inc. All Rights Reserved.\n",
   8 |     "\n",
   9 |     "Licensed under the Apache License, Version 2.0 (the \"License\"); <br>\n",
  10 |     "you may not use this file except in compliance with the License.<br>\n",
  11 |     "You may obtain a copy of the License at<br>\n",
  12 |     "\n",
  13 |     "     http://www.apache.org/licenses/LICENSE-2.0\n",
  14 |     "\n",
  15 |     "Unless required by applicable law or agreed to in writing, software\n",
  16 |     "distributed under the License is distributed on an \"AS IS\" BASIS,\n",
  17 |     "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.<br>\n",
  18 |     "\n",
  19 |     "See the License for the specific language governing permissions and \n",
  20 |     "limitations under the License."
  21 |    ]
  22 |   },
  23 |   {
  24 |    "cell_type": "code",
  25 |    "execution_count": 1,
  26 |    "metadata": {},
  27 |    "outputs": [],
  28 |    "source": [
  29 |     "import numpy as np\n",
  30 |     "import json\n",
  31 |     "import matplotlib.pyplot as plt\n",
  32 |     "from tqdm import tqdm\n",
  33 |     "import random\n",
  34 |     "import subprocess\n",
  35 |     "import time\n",
  36 |     "import pandas as pd\n",
  37 |     "import os"
  38 |    ]
  39 |   },
  40 |   {
  41 |    "cell_type": "markdown",
  42 |    "metadata": {},
  43 |    "source": [
  44 |     "### Read the data"
  45 |    ]
  46 |   },
  47 |   {
  48 |    "cell_type": "code",
  49 |    "execution_count": 2,
  50 |    "metadata": {},
  51 |    "outputs": [],
  52 |    "source": [
  53 |     "with open(\"webpacks/webpack_bodies.json\") as file:\n",
  54 |     "    data = file.read()\n",
  55 |     "    \n",
  56 |     "splitted_data = data.split('{\"page\"')[1:]\n",
  57 |     "splitted_data = [json.loads('{\"page\"' + line)[\"body\"] for line in splitted_data]\n",
  58 |     "\n",
  59 |     "for i in range(50):\n",
  60 |     "    name = \"0\" * (12 - len(str(i))) + str(i)\n",
  61 |     "    with open(\"webpacks/webpack_bodies_\" + name + \".json\") as file:\n",
  62 |     "        data = file.read().split('{\"page\"')[1:]\n",
  63 |     "        splitted_data += [json.loads('{\"page\"' + line)[\"body\"] for line in data]"
  64 |    ]
  65 |   },
  66 |   {
  67 |    "cell_type": "markdown",
  68 |    "metadata": {},
  69 |    "source": [
  70 |     "## Perform compression for bundled approach"
  71 |    ]
  72 |   },
  73 |   {
  74 |    "cell_type": "code",
  75 |    "execution_count": 17,
  76 |    "metadata": {},
  77 |    "outputs": [],
  78 |    "source": [
  79 |     "def log(file, msg):\n",
  80 |     "    f = open(file, 'a+')\n",
  81 |     "    f.write(msg + '\\n')\n",
  82 |     "    f.close()\n",
  83 |     "    \n",
  84 |     "def get_seconds(time): \n",
  85 |     "    min_ind = time.find('m')\n",
  86 |     "    mins = int(time[:min_ind])\n",
  87 |     "    second = float(time[min_ind + 1:-1])\n",
  88 |     "    return mins * 60 + second"
  89 |    ]
  90 |   },
  91 |   {
  92 |    "cell_type": "code",
  93 |    "execution_count": 219,
  94 |    "metadata": {},
  95 |    "outputs": [],
  96 |    "source": [
  97 |     "rates_gzip_bundled = []\n",
  98 |     "rates_brotli_bundled = []\n",
  99 |     "times_gzip_bundled = []\n",
 100 |     "times_brotli_bundled = []\n",
 101 |     "speed_gzip_bundled = []\n",
 102 |     "speed_brotli_bundled = []\n",
 103 |     "init_sizes_bundled = []\n",
 104 |     "for i in range(4000):\n",
 105 |     "    rates_gzip_compressed = []\n",
 106 |     "    rates_brotli_compressed = []\n",
 107 |     "    times_gzip_compressed = []\n",
 108 |     "    times_brotli_compressed = []\n",
 109 |     "    speed_gzip_compressed = []\n",
 110 |     "    speed_brotli_compressed = []\n",
 111 |     "\n",
 112 |     "    # write the text of a bundle to file to use it for compression later\n",
 113 |     "    with open(\"example.txt\", \"w\") as file:\n",
 114 |     "        file.write(splitted_data[i])\n",
 115 |     "    size_non_compressed = os.stat(\"example.txt\").st_size\n",
 116 |     "    init_sizes_bundled.append(size_non_compressed)\n",
 117 |     "\n",
 118 |     "    # do the gzip compression with different levels\n",
 119 |     "    for level in range(4, 10):\n",
 120 |     "        result = subprocess.run([\"bash\", \"gzip_compress.sh\", str(level), \"time.txt\", \n",
 121 |     "                                 \"example_gzip.txt.gz\", \"example.txt\"])\n",
 122 |     "        #previous script saves the time into the file\n",
 123 |     "        with open(\"time.txt\") as file:\n",
 124 |     "            user_sys = file.read().strip().split('\\n')[1:]\n",
 125 |     "        time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n",
 126 |     "        size_gzip_compressed = os.stat(\"example_gzip.txt.gz\").st_size\n",
 127 |     "        rates_gzip_compressed.append(size_non_compressed / size_gzip_compressed)\n",
 128 |     "        times_gzip_compressed.append(time)\n",
 129 |     "        speed_gzip_compressed.append(size_non_compressed / time)\n",
 130 |     "\n",
 131 |     "    # do the brotli compression with different levels\n",
 132 |     "    for level in range(4, 12):\n",
 133 |     "        result = subprocess.run([\"bash\", \"brotli_compress.sh\", str(level), \"time.txt\", \n",
 134 |     "                                 \"example_brotli.txt.br\", \"example.txt\"])\n",
 135 |     "        with open(\"time.txt\") as file:\n",
 136 |     "            user_sys = file.read().strip().split('\\n')[1:]\n",
 137 |     "        time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n",
 138 |     "        size_br_compressed = os.stat(\"example_brotli.txt.br\").st_size\n",
 139 |     "        rates_brotli_compressed.append(size_non_compressed / size_br_compressed)\n",
 140 |     "        times_brotli_compressed.append(time)\n",
 141 |     "        speed_brotli_compressed.append(size_non_compressed / time)\n",
 142 |     "    \n",
 143 |     "    rates_gzip_bundled.append(rates_gzip_compressed)\n",
 144 |     "    rates_brotli_bundled.append(rates_brotli_compressed)\n",
 145 |     "    times_gzip_bundled.append(times_gzip_compressed)\n",
 146 |     "    times_brotli_bundled.append(times_brotli_compressed)\n",
 147 |     "    speed_gzip_bundled.append(speed_gzip_compressed)\n",
 148 |     "    speed_brotli_bundled.append(speed_brotli_compressed)\n",
 149 |     "    \n",
 150 |     "    if i != 0 and i % 50 == 0:\n",
 151 |     "        log(\"logs.txt\", \"rates_gzip: \" + str(np.mean(rates_gzip_bundled, axis=0)))\n",
 152 |     "        log(\"logs.txt\", \"rates_brotli: \" + str(np.mean(rates_brotli_bundled, axis=0)))\n",
 153 |     "        log(\"logs.txt\", \"times_gzip: \" + str(np.mean(times_gzip_bundled, axis=0)))\n",
 154 |     "        log(\"logs.txt\", \"times_brotli: \" + str(np.mean(times_brotli_bundled, axis=0)))\n",
 155 |     "        log(\"logs.txt\", \"speed_gzip: \" + str(np.mean(speed_gzip_bundled, axis=0)))\n",
 156 |     "        log(\"logs.txt\", \"speed_brotli: \" + str(np.mean(speed_brotli_bundled, axis=0)))"
 157 |    ]
 158 |   },
 159 |   {
 160 |    "cell_type": "code",
 161 |    "execution_count": 408,
 162 |    "metadata": {},
 163 |    "outputs": [
 164 |     {
 165 |      "data": {
 166 |       "text/html": [
 167 |        "<div>\n",
 168 |        "<style scoped>\n",
 169 |        "    .dataframe tbody tr th:only-of-type {\n",
 170 |        "        vertical-align: middle;\n",
 171 |        "    }\n",
 172 |        "\n",
 173 |        "    .dataframe tbody tr th {\n",
 174 |        "        vertical-align: top;\n",
 175 |        "    }\n",
 176 |        "\n",
 177 |        "    .dataframe thead th {\n",
 178 |        "        text-align: right;\n",
 179 |        "    }\n",
 180 |        "</style>\n",
 181 |        "<table border=\"1\" class=\"dataframe\">\n",
 182 |        "  <thead>\n",
 183 |        "    <tr style=\"text-align: right;\">\n",
 184 |        "      <th></th>\n",
 185 |        "      <th>name</th>\n",
 186 |        "      <th>rates</th>\n",
 187 |        "      <th>savings</th>\n",
 188 |        "      <th>speed(MB/s)</th>\n",
 189 |        "    </tr>\n",
 190 |        "  </thead>\n",
 191 |        "  <tbody>\n",
 192 |        "    <tr>\n",
 193 |        "      <th>0</th>\n",
 194 |        "      <td>gzip 4</td>\n",
 195 |        "      <td>3.260182</td>\n",
 196 |        "      <td>0.726951</td>\n",
 197 |        "      <td>20.650285</td>\n",
 198 |        "    </tr>\n",
 199 |        "    <tr>\n",
 200 |        "      <th>1</th>\n",
 201 |        "      <td>gzip 5</td>\n",
 202 |        "      <td>3.357855</td>\n",
 203 |        "      <td>0.736465</td>\n",
 204 |        "      <td>17.320770</td>\n",
 205 |        "    </tr>\n",
 206 |        "    <tr>\n",
 207 |        "      <th>2</th>\n",
 208 |        "      <td>gzip 6</td>\n",
 209 |        "      <td>3.392349</td>\n",
 210 |        "      <td>0.739789</td>\n",
 211 |        "      <td>14.828584</td>\n",
 212 |        "    </tr>\n",
 213 |        "    <tr>\n",
 214 |        "      <th>3</th>\n",
 215 |        "      <td>gzip 7</td>\n",
 216 |        "      <td>3.402581</td>\n",
 217 |        "      <td>0.740678</td>\n",
 218 |        "      <td>13.650937</td>\n",
 219 |        "    </tr>\n",
 220 |        "    <tr>\n",
 221 |        "      <th>4</th>\n",
 222 |        "      <td>gzip 8</td>\n",
 223 |        "      <td>3.409667</td>\n",
 224 |        "      <td>0.741257</td>\n",
 225 |        "      <td>11.943546</td>\n",
 226 |        "    </tr>\n",
 227 |        "    <tr>\n",
 228 |        "      <th>5</th>\n",
 229 |        "      <td>gzip 9</td>\n",
 230 |        "      <td>3.410150</td>\n",
 231 |        "      <td>0.741290</td>\n",
 232 |        "      <td>11.749338</td>\n",
 233 |        "    </tr>\n",
 234 |        "    <tr>\n",
 235 |        "      <th>6</th>\n",
 236 |        "      <td>brotli 4</td>\n",
 237 |        "      <td>3.460357</td>\n",
 238 |        "      <td>0.711012</td>\n",
 239 |        "      <td>16.654836</td>\n",
 240 |        "    </tr>\n",
 241 |        "    <tr>\n",
 242 |        "      <th>7</th>\n",
 243 |        "      <td>brotli 5</td>\n",
 244 |        "      <td>3.714279</td>\n",
 245 |        "      <td>0.730769</td>\n",
 246 |        "      <td>11.635901</td>\n",
 247 |        "    </tr>\n",
 248 |        "    <tr>\n",
 249 |        "      <th>8</th>\n",
 250 |        "      <td>brotli 6</td>\n",
 251 |        "      <td>3.745654</td>\n",
 252 |        "      <td>0.733024</td>\n",
 253 |        "      <td>10.270090</td>\n",
 254 |        "    </tr>\n",
 255 |        "    <tr>\n",
 256 |        "      <th>9</th>\n",
 257 |        "      <td>brotli 7</td>\n",
 258 |        "      <td>3.771763</td>\n",
 259 |        "      <td>0.734872</td>\n",
 260 |        "      <td>7.601850</td>\n",
 261 |        "    </tr>\n",
 262 |        "    <tr>\n",
 263 |        "      <th>10</th>\n",
 264 |        "      <td>brotli 8</td>\n",
 265 |        "      <td>3.782508</td>\n",
 266 |        "      <td>0.735625</td>\n",
 267 |        "      <td>5.944935</td>\n",
 268 |        "    </tr>\n",
 269 |        "    <tr>\n",
 270 |        "      <th>11</th>\n",
 271 |        "      <td>brotli 9</td>\n",
 272 |        "      <td>3.793822</td>\n",
 273 |        "      <td>0.736414</td>\n",
 274 |        "      <td>4.489888</td>\n",
 275 |        "    </tr>\n",
 276 |        "    <tr>\n",
 277 |        "      <th>12</th>\n",
 278 |        "      <td>brotli 10</td>\n",
 279 |        "      <td>4.044363</td>\n",
 280 |        "      <td>0.752742</td>\n",
 281 |        "      <td>1.181333</td>\n",
 282 |        "    </tr>\n",
 283 |        "    <tr>\n",
 284 |        "      <th>13</th>\n",
 285 |        "      <td>brotli 11</td>\n",
 286 |        "      <td>4.118859</td>\n",
 287 |        "      <td>0.757214</td>\n",
 288 |        "      <td>0.519743</td>\n",
 289 |        "    </tr>\n",
 290 |        "  </tbody>\n",
 291 |        "</table>\n",
 292 |        "</div>"
 293 |       ],
 294 |       "text/plain": [
 295 |        "         name     rates   savings  speed(MB/s)\n",
 296 |        "0      gzip 4  3.260182  0.726951    20.650285\n",
 297 |        "1      gzip 5  3.357855  0.736465    17.320770\n",
 298 |        "2      gzip 6  3.392349  0.739789    14.828584\n",
 299 |        "3      gzip 7  3.402581  0.740678    13.650937\n",
 300 |        "4      gzip 8  3.409667  0.741257    11.943546\n",
 301 |        "5      gzip 9  3.410150  0.741290    11.749338\n",
 302 |        "6    brotli 4  3.460357  0.711012    16.654836\n",
 303 |        "7    brotli 5  3.714279  0.730769    11.635901\n",
 304 |        "8    brotli 6  3.745654  0.733024    10.270090\n",
 305 |        "9    brotli 7  3.771763  0.734872     7.601850\n",
 306 |        "10   brotli 8  3.782508  0.735625     5.944935\n",
 307 |        "11   brotli 9  3.793822  0.736414     4.489888\n",
 308 |        "12  brotli 10  4.044363  0.752742     1.181333\n",
 309 |        "13  brotli 11  4.118859  0.757214     0.519743"
 310 |       ]
 311 |      },
 312 |      "execution_count": 408,
 313 |      "metadata": {},
 314 |      "output_type": "execute_result"
 315 |     }
 316 |    ],
 317 |    "source": [
 318 |     "frame = pd.DataFrame()\n",
 319 |     "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n",
 320 |     "                 \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n",
 321 |     "\n",
 322 |     "frame[\"rates\"] = np.hstack((np.mean(rates_gzip_bundled, axis=0), np.mean(rates_brotli_bundled, axis=0)))\n",
 323 |     "frame[\"savings\"] = 1 - 1 / np.hstack((np.mean(rates_gzip, axis=0), np.mean(rates_brotli_bundled, axis=0)))\n",
 324 |     "frame[\"speed(MB/s)\"] = np.hstack((np.mean(speed_gzip_bundled, axis=0), np.mean(speed_brotli_bundled, axis=0))) / 1000000\n",
 325 |     "\n",
 326 |     "frame"
 327 |    ]
 328 |   },
 329 |   {
 330 |    "cell_type": "markdown",
 331 |    "metadata": {},
 332 |    "source": [
 333 |     "## Unbundled approach"
 334 |    ]
 335 |   },
 336 |   {
 337 |    "cell_type": "code",
 338 |    "execution_count": 214,
 339 |    "metadata": {},
 340 |    "outputs": [],
 341 |    "source": [
 342 |     "rates_gzip_unbundled = []\n",
 343 |     "rates_brotli_unbundled = []\n",
 344 |     "times_gzip_unbundled = []\n",
 345 |     "times_brotli_unbundled = []\n",
 346 |     "speed_gzip_unbundled = []\n",
 347 |     "speed_brotli_unbundled = []\n",
 348 |     "init_sizes_unbundled = []\n",
 349 |     "\n",
 350 |     "for i in range(600):\n",
 351 |     "    # write the text of a bundle to file to use it for getting chunks from bundle later\n",
 352 |     "    with open(\"third_party/bundle_analyzer/text_bundle.txt\", \"w\") as file:\n",
 353 |     "        file.write(splitted_data[i])\n",
 354 |     "    try:\n",
 355 |     "        # save chunks from bundle to parsed_bundle.json file\n",
 356 |     "        result = subprocess.run([\"node\", \"--experimental-modules\", \"third_party/bundle_analyzer/get_chunks.js\"])\n",
 357 |     "    except:\n",
 358 |     "        continue\n",
 359 |     "    # get chunks\n",
 360 |     "    with open(\"parsed_bundle.json\") as file:\n",
 361 |     "        codes = [line['code'] for line in json.loads(file.read())]\n",
 362 |     "\n",
 363 |     "    sizes_gzip_compressed = np.zeros(6)\n",
 364 |     "    sizes_brotli_compressed = np.zeros(8)\n",
 365 |     "    times_gzip_compressed = np.zeros(6)\n",
 366 |     "    times_brotli_compressed = np.zeros(8)\n",
 367 |     "    overall_init_size = 0\n",
 368 |     "\n",
 369 |     "    for code in codes:\n",
 370 |     "        if not code:\n",
 371 |     "            continue\n",
 372 |     "        # write the text of a bundle to file to use it for compression later\n",
 373 |     "        with open(\"example.txt\", \"w\") as file:\n",
 374 |     "            file.write(code)\n",
 375 |     "        overall_init_size += os.stat(\"example.txt\").st_size\n",
 376 |     "\n",
 377 |     "        # do the gzip compression with different levels\n",
 378 |     "        for level in range(4, 10):\n",
 379 |     "            result = subprocess.run([\"bash\", \"gzip_compress.sh\", str(level), \"time.txt\", \n",
 380 |     "                                     \"example_gzip.txt.gz\", \"example.txt\"])\n",
 381 |     "            with open(\"time.txt\") as file:\n",
 382 |     "                user_sys = file.read().strip().split('\\n')[1:]\n",
 383 |     "            time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n",
 384 |     "            sizes_gzip_compressed[level - 4] += os.stat(\"example_gzip.txt.gz\").st_size\n",
 385 |     "            times_gzip_compressed[level - 4] += time\n",
 386 |     "\n",
 387 |     "        # do the brotli compression with different levels\n",
 388 |     "        for level in range(4, 12):\n",
 389 |     "            result = subprocess.run([\"bash\", \"brotli_compress.sh\", str(level), \"time.txt\", \n",
 390 |     "                                     \"example_brotli.txt.br\", \"example.txt\"])\n",
 391 |     "            with open(\"time.txt\") as file:\n",
 392 |     "                user_sys = file.read().strip().split('\\n')[1:]\n",
 393 |     "            time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n",
 394 |     "            sizes_brotli_compressed[level - 4] += os.stat(\"example_brotli.txt.br\").st_size\n",
 395 |     "            times_brotli_compressed[level - 4] += time\n",
 396 |     "\n",
 397 |     "    rates_gzip_unbundled.append(overall_init_size / sizes_gzip_compressed)\n",
 398 |     "    rates_brotli_unbundled.append(overall_init_size / sizes_brotli_compressed)\n",
 399 |     "    times_gzip_unbundled.append(times_gzip_compressed)\n",
 400 |     "    times_brotli_unbundled.append(times_brotli_compressed)\n",
 401 |     "    speed_gzip_unbundled.append(overall_init_size / times_gzip_compressed)\n",
 402 |     "    speed_brotli_unbundled.append(overall_init_size / times_brotli_compressed)\n",
 403 |     "    init_sizes_unbundled.append(overall_init_size)\n",
 404 |     "    \n",
 405 |     "    if i != 0 and i % 100 == 0:\n",
 406 |     "        log(\"logs2.txt\", \"rates_gzip: \" + str(np.mean(rates_gzip_unbundled, axis=0)))\n",
 407 |     "        log(\"logs2.txt\", \"rates_brotli: \" + str(np.mean(rates_brotli_unbundled, axis=0)))\n",
 408 |     "        log(\"logs2.txt\", \"times_gzip: \" + str(np.mean(times_gzip_unbundled, axis=0)))\n",
 409 |     "        log(\"logs2.txt\", \"times_brotli: \" + str(np.mean(times_brotli_unbundled, axis=0)))\n",
 410 |     "        log(\"logs2.txt\", \"speed_gzip: \" + str(np.mean(speed_gzip_unbundled, axis=0)))\n",
 411 |     "        log(\"logs2.txt\", \"speed_brotli: \" + str(np.mean(speed_brotli_unbundled, axis=0)))"
 412 |    ]
 413 |   },
 414 |   {
 415 |    "cell_type": "code",
 416 |    "execution_count": 419,
 417 |    "metadata": {},
 418 |    "outputs": [
 419 |     {
 420 |      "data": {
 421 |       "text/html": [
 422 |        "<div>\n",
 423 |        "<style scoped>\n",
 424 |        "    .dataframe tbody tr th:only-of-type {\n",
 425 |        "        vertical-align: middle;\n",
 426 |        "    }\n",
 427 |        "\n",
 428 |        "    .dataframe tbody tr th {\n",
 429 |        "        vertical-align: top;\n",
 430 |        "    }\n",
 431 |        "\n",
 432 |        "    .dataframe thead th {\n",
 433 |        "        text-align: right;\n",
 434 |        "    }\n",
 435 |        "</style>\n",
 436 |        "<table border=\"1\" class=\"dataframe\">\n",
 437 |        "  <thead>\n",
 438 |        "    <tr style=\"text-align: right;\">\n",
 439 |        "      <th></th>\n",
 440 |        "      <th>name</th>\n",
 441 |        "      <th>rates_bundled</th>\n",
 442 |        "      <th>savings_bundled</th>\n",
 443 |        "      <th>speed_bundled(MB/s)</th>\n",
 444 |        "      <th>rates_unbundled</th>\n",
 445 |        "      <th>savings_unbundled</th>\n",
 446 |        "      <th>speed_unbundled(MB/s)</th>\n",
 447 |        "    </tr>\n",
 448 |        "  </thead>\n",
 449 |        "  <tbody>\n",
 450 |        "    <tr>\n",
 451 |        "      <th>0</th>\n",
 452 |        "      <td>gzip 4</td>\n",
 453 |        "      <td>3.352383</td>\n",
 454 |        "      <td>0.701705</td>\n",
 455 |        "      <td>20.010897</td>\n",
 456 |        "      <td>2.714575</td>\n",
 457 |        "      <td>0.631618</td>\n",
 458 |        "      <td>2.922456</td>\n",
 459 |        "    </tr>\n",
 460 |        "    <tr>\n",
 461 |        "      <th>1</th>\n",
 462 |        "      <td>gzip 5</td>\n",
 463 |        "      <td>3.454900</td>\n",
 464 |        "      <td>0.710556</td>\n",
 465 |        "      <td>17.201050</td>\n",
 466 |        "      <td>2.770296</td>\n",
 467 |        "      <td>0.639028</td>\n",
 468 |        "      <td>2.792084</td>\n",
 469 |        "    </tr>\n",
 470 |        "    <tr>\n",
 471 |        "      <th>2</th>\n",
 472 |        "      <td>gzip 6</td>\n",
 473 |        "      <td>3.491963</td>\n",
 474 |        "      <td>0.713628</td>\n",
 475 |        "      <td>14.601151</td>\n",
 476 |        "      <td>2.792162</td>\n",
 477 |        "      <td>0.641855</td>\n",
 478 |        "      <td>2.574496</td>\n",
 479 |        "    </tr>\n",
 480 |        "    <tr>\n",
 481 |        "      <th>3</th>\n",
 482 |        "      <td>gzip 7</td>\n",
 483 |        "      <td>3.502496</td>\n",
 484 |        "      <td>0.714489</td>\n",
 485 |        "      <td>13.574471</td>\n",
 486 |        "      <td>2.798391</td>\n",
 487 |        "      <td>0.642652</td>\n",
 488 |        "      <td>2.451160</td>\n",
 489 |        "    </tr>\n",
 490 |        "    <tr>\n",
 491 |        "      <th>4</th>\n",
 492 |        "      <td>gzip 8</td>\n",
 493 |        "      <td>3.510398</td>\n",
 494 |        "      <td>0.715132</td>\n",
 495 |        "      <td>11.716914</td>\n",
 496 |        "      <td>2.804235</td>\n",
 497 |        "      <td>0.643397</td>\n",
 498 |        "      <td>2.200946</td>\n",
 499 |        "    </tr>\n",
 500 |        "    <tr>\n",
 501 |        "      <th>5</th>\n",
 502 |        "      <td>gzip 9</td>\n",
 503 |        "      <td>3.510933</td>\n",
 504 |        "      <td>0.715175</td>\n",
 505 |        "      <td>11.625863</td>\n",
 506 |        "      <td>2.804699</td>\n",
 507 |        "      <td>0.643455</td>\n",
 508 |        "      <td>2.138140</td>\n",
 509 |        "    </tr>\n",
 510 |        "    <tr>\n",
 511 |        "      <th>6</th>\n",
 512 |        "      <td>brotli 4</td>\n",
 513 |        "      <td>3.565088</td>\n",
 514 |        "      <td>0.719502</td>\n",
 515 |        "      <td>16.478388</td>\n",
 516 |        "      <td>2.903769</td>\n",
 517 |        "      <td>0.655620</td>\n",
 518 |        "      <td>2.062052</td>\n",
 519 |        "    </tr>\n",
 520 |        "    <tr>\n",
 521 |        "      <th>7</th>\n",
 522 |        "      <td>brotli 5</td>\n",
 523 |        "      <td>3.829246</td>\n",
 524 |        "      <td>0.738852</td>\n",
 525 |        "      <td>11.519678</td>\n",
 526 |        "      <td>3.112397</td>\n",
 527 |        "      <td>0.678704</td>\n",
 528 |        "      <td>1.802857</td>\n",
 529 |        "    </tr>\n",
 530 |        "    <tr>\n",
 531 |        "      <th>8</th>\n",
 532 |        "      <td>brotli 6</td>\n",
 533 |        "      <td>3.863344</td>\n",
 534 |        "      <td>0.741157</td>\n",
 535 |        "      <td>10.184582</td>\n",
 536 |        "      <td>3.128816</td>\n",
 537 |        "      <td>0.680390</td>\n",
 538 |        "      <td>1.697186</td>\n",
 539 |        "    </tr>\n",
 540 |        "    <tr>\n",
 541 |        "      <th>9</th>\n",
 542 |        "      <td>brotli 7</td>\n",
 543 |        "      <td>3.892121</td>\n",
 544 |        "      <td>0.743071</td>\n",
 545 |        "      <td>7.572282</td>\n",
 546 |        "      <td>3.145081</td>\n",
 547 |        "      <td>0.682043</td>\n",
 548 |        "      <td>1.495760</td>\n",
 549 |        "    </tr>\n",
 550 |        "    <tr>\n",
 551 |        "      <th>10</th>\n",
 552 |        "      <td>brotli 8</td>\n",
 553 |        "      <td>3.904304</td>\n",
 554 |        "      <td>0.743872</td>\n",
 555 |        "      <td>5.926585</td>\n",
 556 |        "      <td>3.151561</td>\n",
 557 |        "      <td>0.682697</td>\n",
 558 |        "      <td>1.312970</td>\n",
 559 |        "    </tr>\n",
 560 |        "    <tr>\n",
 561 |        "      <th>11</th>\n",
 562 |        "      <td>brotli 9</td>\n",
 563 |        "      <td>3.917189</td>\n",
 564 |        "      <td>0.744715</td>\n",
 565 |        "      <td>4.506134</td>\n",
 566 |        "      <td>3.159067</td>\n",
 567 |        "      <td>0.683451</td>\n",
 568 |        "      <td>1.132643</td>\n",
 569 |        "    </tr>\n",
 570 |        "    <tr>\n",
 571 |        "      <th>12</th>\n",
 572 |        "      <td>brotli 10</td>\n",
 573 |        "      <td>4.177070</td>\n",
 574 |        "      <td>0.760598</td>\n",
 575 |        "      <td>1.226846</td>\n",
 576 |        "      <td>3.335079</td>\n",
 577 |        "      <td>0.700157</td>\n",
 578 |        "      <td>0.508489</td>\n",
 579 |        "    </tr>\n",
 580 |        "    <tr>\n",
 581 |        "      <th>13</th>\n",
 582 |        "      <td>brotli 11</td>\n",
 583 |        "      <td>4.259316</td>\n",
 584 |        "      <td>0.765221</td>\n",
 585 |        "      <td>0.539299</td>\n",
 586 |        "      <td>3.402424</td>\n",
 587 |        "      <td>0.706092</td>\n",
 588 |        "      <td>0.302382</td>\n",
 589 |        "    </tr>\n",
 590 |        "  </tbody>\n",
 591 |        "</table>\n",
 592 |        "</div>"
 593 |       ],
 594 |       "text/plain": [
 595 |        "         name  rates_bundled  savings_bundled  speed_bundled(MB/s)  \\\n",
 596 |        "0      gzip 4       3.352383         0.701705            20.010897   \n",
 597 |        "1      gzip 5       3.454900         0.710556            17.201050   \n",
 598 |        "2      gzip 6       3.491963         0.713628            14.601151   \n",
 599 |        "3      gzip 7       3.502496         0.714489            13.574471   \n",
 600 |        "4      gzip 8       3.510398         0.715132            11.716914   \n",
 601 |        "5      gzip 9       3.510933         0.715175            11.625863   \n",
 602 |        "6    brotli 4       3.565088         0.719502            16.478388   \n",
 603 |        "7    brotli 5       3.829246         0.738852            11.519678   \n",
 604 |        "8    brotli 6       3.863344         0.741157            10.184582   \n",
 605 |        "9    brotli 7       3.892121         0.743071             7.572282   \n",
 606 |        "10   brotli 8       3.904304         0.743872             5.926585   \n",
 607 |        "11   brotli 9       3.917189         0.744715             4.506134   \n",
 608 |        "12  brotli 10       4.177070         0.760598             1.226846   \n",
 609 |        "13  brotli 11       4.259316         0.765221             0.539299   \n",
 610 |        "\n",
 611 |        "    rates_unbundled  savings_unbundled  speed_unbundled(MB/s)  \n",
 612 |        "0          2.714575           0.631618               2.922456  \n",
 613 |        "1          2.770296           0.639028               2.792084  \n",
 614 |        "2          2.792162           0.641855               2.574496  \n",
 615 |        "3          2.798391           0.642652               2.451160  \n",
 616 |        "4          2.804235           0.643397               2.200946  \n",
 617 |        "5          2.804699           0.643455               2.138140  \n",
 618 |        "6          2.903769           0.655620               2.062052  \n",
 619 |        "7          3.112397           0.678704               1.802857  \n",
 620 |        "8          3.128816           0.680390               1.697186  \n",
 621 |        "9          3.145081           0.682043               1.495760  \n",
 622 |        "10         3.151561           0.682697               1.312970  \n",
 623 |        "11         3.159067           0.683451               1.132643  \n",
 624 |        "12         3.335079           0.700157               0.508489  \n",
 625 |        "13         3.402424           0.706092               0.302382  "
 626 |       ]
 627 |      },
 628 |      "execution_count": 419,
 629 |      "metadata": {},
 630 |      "output_type": "execute_result"
 631 |     }
 632 |    ],
 633 |    "source": [
 634 |     "frame = pd.DataFrame()\n",
 635 |     "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n",
 636 |     "                 \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n",
 637 |     "\n",
 638 |     "frame[\"rates_bundled\"] = np.hstack((np.mean(rates_gzip_bundled[:600], axis=0), \n",
 639 |     "                                    np.mean(rates_brotli_bundled[:600], axis=0)))\n",
 640 |     "frame[\"savings_bundled\"] = 1 - 1 / np.hstack((np.mean(rates_gzip_bundled[:600], axis=0), \n",
 641 |     "                                              np.mean(rates_brotli_bundled[:600], axis=0)))\n",
 642 |     "frame[\"speed_bundled(MB/s)\"] = np.hstack((np.mean(speed_gzip_bundled[:600], axis=0), \n",
 643 |     "                                          np.mean(speed_brotli_bundled[:600], axis=0))) / 1000000\n",
 644 |     "\n",
 645 |     "frame[\"rates_unbundled\"] = np.hstack((np.mean(rates_gzip_unbundled, axis=0), \n",
 646 |     "                                      np.mean(rates_brotli_unbundled, axis=0)))\n",
 647 |     "frame[\"savings_unbundled\"] = 1 - 1 / np.hstack((np.mean(rates_gzip_unbundled, axis=0), \n",
 648 |     "                                                np.mean(rates_brotli_unbundled, axis=0)))\n",
 649 |     "frame[\"speed_unbundled(MB/s)\"] = np.hstack((np.mean(speed_gzip_unbundled, axis=0), \n",
 650 |     "                                            np.mean(speed_brotli_unbundled, axis=0))) / 1000000\n",
 651 |     "frame"
 652 |    ]
 653 |   },
 654 |   {
 655 |    "cell_type": "markdown",
 656 |    "metadata": {},
 657 |    "source": [
 658 |     "### Group results by non compressed size ranges"
 659 |    ]
 660 |   },
 661 |   {
 662 |    "cell_type": "code",
 663 |    "execution_count": 404,
 664 |    "metadata": {},
 665 |    "outputs": [
 666 |     {
 667 |      "name": "stdout",
 668 |      "output_type": "stream",
 669 |      "text": [
 670 |       "20000 - 100000 bytes\n"
 671 |      ]
 672 |     },
 673 |     {
 674 |      "data": {
 675 |       "text/html": [
 676 |        "<div>\n",
 677 |        "<style scoped>\n",
 678 |        "    .dataframe tbody tr th:only-of-type {\n",
 679 |        "        vertical-align: middle;\n",
 680 |        "    }\n",
 681 |        "\n",
 682 |        "    .dataframe tbody tr th {\n",
 683 |        "        vertical-align: top;\n",
 684 |        "    }\n",
 685 |        "\n",
 686 |        "    .dataframe thead th {\n",
 687 |        "        text-align: right;\n",
 688 |        "    }\n",
 689 |        "</style>\n",
 690 |        "<table border=\"1\" class=\"dataframe\">\n",
 691 |        "  <thead>\n",
 692 |        "    <tr style=\"text-align: right;\">\n",
 693 |        "      <th></th>\n",
 694 |        "      <th>name</th>\n",
 695 |        "      <th>rates_bundled</th>\n",
 696 |        "      <th>savings_bundled</th>\n",
 697 |        "      <th>speed_bundled(MB/s)</th>\n",
 698 |        "      <th>rates_unbundled</th>\n",
 699 |        "      <th>savings_unbundled</th>\n",
 700 |        "      <th>speed_unbundled(MB/s)</th>\n",
 701 |        "    </tr>\n",
 702 |        "  </thead>\n",
 703 |        "  <tbody>\n",
 704 |        "    <tr>\n",
 705 |        "      <th>0</th>\n",
 706 |        "      <td>gzip 4</td>\n",
 707 |        "      <td>3.219473</td>\n",
 708 |        "      <td>0.689390</td>\n",
 709 |        "      <td>19.906293</td>\n",
 710 |        "      <td>2.639814</td>\n",
 711 |        "      <td>0.621185</td>\n",
 712 |        "      <td>2.741001</td>\n",
 713 |        "    </tr>\n",
 714 |        "    <tr>\n",
 715 |        "      <th>1</th>\n",
 716 |        "      <td>gzip 5</td>\n",
 717 |        "      <td>3.316049</td>\n",
 718 |        "      <td>0.698436</td>\n",
 719 |        "      <td>16.875470</td>\n",
 720 |        "      <td>2.688858</td>\n",
 721 |        "      <td>0.628095</td>\n",
 722 |        "      <td>2.636272</td>\n",
 723 |        "    </tr>\n",
 724 |        "    <tr>\n",
 725 |        "      <th>2</th>\n",
 726 |        "      <td>gzip 6</td>\n",
 727 |        "      <td>3.350974</td>\n",
 728 |        "      <td>0.701579</td>\n",
 729 |        "      <td>14.277811</td>\n",
 730 |        "      <td>2.703293</td>\n",
 731 |        "      <td>0.630081</td>\n",
 732 |        "      <td>2.391124</td>\n",
 733 |        "    </tr>\n",
 734 |        "    <tr>\n",
 735 |        "      <th>3</th>\n",
 736 |        "      <td>gzip 7</td>\n",
 737 |        "      <td>3.360854</td>\n",
 738 |        "      <td>0.702457</td>\n",
 739 |        "      <td>13.101073</td>\n",
 740 |        "      <td>2.708538</td>\n",
 741 |        "      <td>0.630797</td>\n",
 742 |        "      <td>2.357168</td>\n",
 743 |        "    </tr>\n",
 744 |        "    <tr>\n",
 745 |        "      <th>4</th>\n",
 746 |        "      <td>gzip 8</td>\n",
 747 |        "      <td>3.367366</td>\n",
 748 |        "      <td>0.703032</td>\n",
 749 |        "      <td>11.325969</td>\n",
 750 |        "      <td>2.713746</td>\n",
 751 |        "      <td>0.631506</td>\n",
 752 |        "      <td>2.101626</td>\n",
 753 |        "    </tr>\n",
 754 |        "    <tr>\n",
 755 |        "      <th>5</th>\n",
 756 |        "      <td>gzip 9</td>\n",
 757 |        "      <td>3.367827</td>\n",
 758 |        "      <td>0.703073</td>\n",
 759 |        "      <td>11.248899</td>\n",
 760 |        "      <td>2.714211</td>\n",
 761 |        "      <td>0.631569</td>\n",
 762 |        "      <td>2.081291</td>\n",
 763 |        "    </tr>\n",
 764 |        "    <tr>\n",
 765 |        "      <th>6</th>\n",
 766 |        "      <td>brotli 4</td>\n",
 767 |        "      <td>3.414766</td>\n",
 768 |        "      <td>0.707154</td>\n",
 769 |        "      <td>16.201073</td>\n",
 770 |        "      <td>2.794675</td>\n",
 771 |        "      <td>0.642177</td>\n",
 772 |        "      <td>1.889179</td>\n",
 773 |        "    </tr>\n",
 774 |        "    <tr>\n",
 775 |        "      <th>7</th>\n",
 776 |        "      <td>brotli 5</td>\n",
 777 |        "      <td>3.663572</td>\n",
 778 |        "      <td>0.727042</td>\n",
 779 |        "      <td>11.228977</td>\n",
 780 |        "      <td>2.989190</td>\n",
 781 |        "      <td>0.665461</td>\n",
 782 |        "      <td>1.658310</td>\n",
 783 |        "    </tr>\n",
 784 |        "    <tr>\n",
 785 |        "      <th>8</th>\n",
 786 |        "      <td>brotli 6</td>\n",
 787 |        "      <td>3.697175</td>\n",
 788 |        "      <td>0.729523</td>\n",
 789 |        "      <td>9.944989</td>\n",
 790 |        "      <td>3.001100</td>\n",
 791 |        "      <td>0.666789</td>\n",
 792 |        "      <td>1.575014</td>\n",
 793 |        "    </tr>\n",
 794 |        "    <tr>\n",
 795 |        "      <th>9</th>\n",
 796 |        "      <td>brotli 7</td>\n",
 797 |        "      <td>3.725454</td>\n",
 798 |        "      <td>0.731576</td>\n",
 799 |        "      <td>7.324780</td>\n",
 800 |        "      <td>3.012006</td>\n",
 801 |        "      <td>0.667995</td>\n",
 802 |        "      <td>1.380246</td>\n",
 803 |        "    </tr>\n",
 804 |        "    <tr>\n",
 805 |        "      <th>10</th>\n",
 806 |        "      <td>brotli 8</td>\n",
 807 |        "      <td>3.738544</td>\n",
 808 |        "      <td>0.732516</td>\n",
 809 |        "      <td>5.745074</td>\n",
 810 |        "      <td>3.015555</td>\n",
 811 |        "      <td>0.668386</td>\n",
 812 |        "      <td>1.223964</td>\n",
 813 |        "    </tr>\n",
 814 |        "    <tr>\n",
 815 |        "      <th>11</th>\n",
 816 |        "      <td>brotli 9</td>\n",
 817 |        "      <td>3.751895</td>\n",
 818 |        "      <td>0.733468</td>\n",
 819 |        "      <td>4.353647</td>\n",
 820 |        "      <td>3.021005</td>\n",
 821 |        "      <td>0.668984</td>\n",
 822 |        "      <td>1.049966</td>\n",
 823 |        "    </tr>\n",
 824 |        "    <tr>\n",
 825 |        "      <th>12</th>\n",
 826 |        "      <td>brotli 10</td>\n",
 827 |        "      <td>4.005407</td>\n",
 828 |        "      <td>0.750338</td>\n",
 829 |        "      <td>1.161323</td>\n",
 830 |        "      <td>3.181356</td>\n",
 831 |        "      <td>0.685669</td>\n",
 832 |        "      <td>0.506229</td>\n",
 833 |        "    </tr>\n",
 834 |        "    <tr>\n",
 835 |        "      <th>13</th>\n",
 836 |        "      <td>brotli 11</td>\n",
 837 |        "      <td>4.081090</td>\n",
 838 |        "      <td>0.754967</td>\n",
 839 |        "      <td>0.511165</td>\n",
 840 |        "      <td>3.238609</td>\n",
 841 |        "      <td>0.691225</td>\n",
 842 |        "      <td>0.304395</td>\n",
 843 |        "    </tr>\n",
 844 |        "  </tbody>\n",
 845 |        "</table>\n",
 846 |        "</div>"
 847 |       ],
 848 |       "text/plain": [
 849 |        "         name  rates_bundled  savings_bundled  speed_bundled(MB/s)  \\\n",
 850 |        "0      gzip 4       3.219473         0.689390            19.906293   \n",
 851 |        "1      gzip 5       3.316049         0.698436            16.875470   \n",
 852 |        "2      gzip 6       3.350974         0.701579            14.277811   \n",
 853 |        "3      gzip 7       3.360854         0.702457            13.101073   \n",
 854 |        "4      gzip 8       3.367366         0.703032            11.325969   \n",
 855 |        "5      gzip 9       3.367827         0.703073            11.248899   \n",
 856 |        "6    brotli 4       3.414766         0.707154            16.201073   \n",
 857 |        "7    brotli 5       3.663572         0.727042            11.228977   \n",
 858 |        "8    brotli 6       3.697175         0.729523             9.944989   \n",
 859 |        "9    brotli 7       3.725454         0.731576             7.324780   \n",
 860 |        "10   brotli 8       3.738544         0.732516             5.745074   \n",
 861 |        "11   brotli 9       3.751895         0.733468             4.353647   \n",
 862 |        "12  brotli 10       4.005407         0.750338             1.161323   \n",
 863 |        "13  brotli 11       4.081090         0.754967             0.511165   \n",
 864 |        "\n",
 865 |        "    rates_unbundled  savings_unbundled  speed_unbundled(MB/s)  \n",
 866 |        "0          2.639814           0.621185               2.741001  \n",
 867 |        "1          2.688858           0.628095               2.636272  \n",
 868 |        "2          2.703293           0.630081               2.391124  \n",
 869 |        "3          2.708538           0.630797               2.357168  \n",
 870 |        "4          2.713746           0.631506               2.101626  \n",
 871 |        "5          2.714211           0.631569               2.081291  \n",
 872 |        "6          2.794675           0.642177               1.889179  \n",
 873 |        "7          2.989190           0.665461               1.658310  \n",
 874 |        "8          3.001100           0.666789               1.575014  \n",
 875 |        "9          3.012006           0.667995               1.380246  \n",
 876 |        "10         3.015555           0.668386               1.223964  \n",
 877 |        "11         3.021005           0.668984               1.049966  \n",
 878 |        "12         3.181356           0.685669               0.506229  \n",
 879 |        "13         3.238609           0.691225               0.304395  "
 880 |       ]
 881 |      },
 882 |      "execution_count": 404,
 883 |      "metadata": {},
 884 |      "output_type": "execute_result"
 885 |     }
 886 |    ],
 887 |    "source": [
 888 |     "# ranges are (20000, 100000), (100000, 1000000), (1000000, 3000000) in bytes\n",
 889 |     "init_sizes_unbundled = np.array(init_sizes_unbundled)\n",
 890 |     "group1 = np.where((init_sizes_unbundled > 2000)*(init_sizes_unbundled <= 100000))[0]\n",
 891 |     "group2 = np.where((init_sizes_unbundled > 100000)*(init_sizes_unbundled <= 1000000))[0]\n",
 892 |     "group3 = np.where((init_sizes_unbundled > 1000000)*(init_sizes_unbundled <= 3000000))[0]\n",
 893 |     "\n",
 894 |     "print(20000, \"-\", 100000, \"bytes\")\n",
 895 |     "frame = pd.DataFrame()\n",
 896 |     "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n",
 897 |     "                 \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n",
 898 |     "\n",
 899 |     "\n",
 900 |     "frame[\"rates_bundled\"] = np.hstack((np.mean(np.array(rates_gzip_bundled)[group1], axis=0), \n",
 901 |     "                                    np.mean(np.array(rates_brotli_bundled)[group1], axis=0)))\n",
 902 |     "frame[\"savings_bundled\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip_bundled)[group1], axis=0), \n",
 903 |     "                                              np.mean(np.array(rates_brotli_bundled)[group1], axis=0)))\n",
 904 |     "frame[\"speed_bundled(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip_bundled)[group1], axis=0), \n",
 905 |     "                                           np.mean(np.array(speed_brotli_bundled)[group1], axis=0))) / 1000000\n",
 906 |     "\n",
 907 |     "frame[\"rates_unbundled\"] = np.hstack((np.mean(np.array(rates_gzip_unbundled)[group1], axis=0), \n",
 908 |     "                                    np.mean(np.array(rates_brotli_unbundled)[group1], axis=0)))\n",
 909 |     "frame[\"savings_unbundled\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip_unbundled)[group1], axis=0), \n",
 910 |     "                                              np.mean(np.array(rates_brotli_unbundled)[group1], axis=0)))\n",
 911 |     "frame[\"speed_unbundled(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip_unbundled)[group1], axis=0), \n",
 912 |     "                                           np.mean(np.array(speed_brotli_unbundled)[group1], axis=0))) / 1000000\n",
 913 |     "\n",
 914 |     "frame"
 915 |    ]
 916 |   },
 917 |   {
 918 |    "cell_type": "code",
 919 |    "execution_count": 406,
 920 |    "metadata": {},
 921 |    "outputs": [
 922 |     {
 923 |      "name": "stdout",
 924 |      "output_type": "stream",
 925 |      "text": [
 926 |       "100000 - 1000000 bytes\n"
 927 |      ]
 928 |     },
 929 |     {
 930 |      "data": {
 931 |       "text/html": [
 932 |        "<div>\n",
 933 |        "<style scoped>\n",
 934 |        "    .dataframe tbody tr th:only-of-type {\n",
 935 |        "        vertical-align: middle;\n",
 936 |        "    }\n",
 937 |        "\n",
 938 |        "    .dataframe tbody tr th {\n",
 939 |        "        vertical-align: top;\n",
 940 |        "    }\n",
 941 |        "\n",
 942 |        "    .dataframe thead th {\n",
 943 |        "        text-align: right;\n",
 944 |        "    }\n",
 945 |        "</style>\n",
 946 |        "<table border=\"1\" class=\"dataframe\">\n",
 947 |        "  <thead>\n",
 948 |        "    <tr style=\"text-align: right;\">\n",
 949 |        "      <th></th>\n",
 950 |        "      <th>name</th>\n",
 951 |        "      <th>rates_bundled</th>\n",
 952 |        "      <th>savings_bundled</th>\n",
 953 |        "      <th>speed_bundled(MB/s)</th>\n",
 954 |        "      <th>rates_unbundled</th>\n",
 955 |        "      <th>savings_unbundled</th>\n",
 956 |        "      <th>speed_unbundled(MB/s)</th>\n",
 957 |        "    </tr>\n",
 958 |        "  </thead>\n",
 959 |        "  <tbody>\n",
 960 |        "    <tr>\n",
 961 |        "      <th>0</th>\n",
 962 |        "      <td>gzip 4</td>\n",
 963 |        "      <td>3.255438</td>\n",
 964 |        "      <td>0.692822</td>\n",
 965 |        "      <td>21.709783</td>\n",
 966 |        "      <td>2.834153</td>\n",
 967 |        "      <td>0.647161</td>\n",
 968 |        "      <td>3.442193</td>\n",
 969 |        "    </tr>\n",
 970 |        "    <tr>\n",
 971 |        "      <th>1</th>\n",
 972 |        "      <td>gzip 5</td>\n",
 973 |        "      <td>3.352408</td>\n",
 974 |        "      <td>0.701707</td>\n",
 975 |        "      <td>18.111059</td>\n",
 976 |        "      <td>2.901749</td>\n",
 977 |        "      <td>0.655380</td>\n",
 978 |        "      <td>3.270738</td>\n",
 979 |        "    </tr>\n",
 980 |        "    <tr>\n",
 981 |        "      <th>2</th>\n",
 982 |        "      <td>gzip 6</td>\n",
 983 |        "      <td>3.385023</td>\n",
 984 |        "      <td>0.704581</td>\n",
 985 |        "      <td>15.608381</td>\n",
 986 |        "      <td>2.937161</td>\n",
 987 |        "      <td>0.659535</td>\n",
 988 |        "      <td>3.016377</td>\n",
 989 |        "    </tr>\n",
 990 |        "    <tr>\n",
 991 |        "      <th>3</th>\n",
 992 |        "      <td>gzip 7</td>\n",
 993 |        "      <td>3.395387</td>\n",
 994 |        "      <td>0.705483</td>\n",
 995 |        "      <td>14.415784</td>\n",
 996 |        "      <td>2.945246</td>\n",
 997 |        "      <td>0.660470</td>\n",
 998 |        "      <td>2.766044</td>\n",
 999 |        "    </tr>\n",
1000 |        "    <tr>\n",
1001 |        "      <th>4</th>\n",
1002 |        "      <td>gzip 8</td>\n",
1003 |        "      <td>3.402633</td>\n",
1004 |        "      <td>0.706110</td>\n",
1005 |        "      <td>12.668371</td>\n",
1006 |        "      <td>2.952941</td>\n",
1007 |        "      <td>0.661355</td>\n",
1008 |        "      <td>2.463421</td>\n",
1009 |        "    </tr>\n",
1010 |        "    <tr>\n",
1011 |        "      <th>5</th>\n",
1012 |        "      <td>gzip 9</td>\n",
1013 |        "      <td>3.403091</td>\n",
1014 |        "      <td>0.706150</td>\n",
1015 |        "      <td>12.492436</td>\n",
1016 |        "      <td>2.953505</td>\n",
1017 |        "      <td>0.661419</td>\n",
1018 |        "      <td>2.303074</td>\n",
1019 |        "    </tr>\n",
1020 |        "    <tr>\n",
1021 |        "      <th>6</th>\n",
1022 |        "      <td>brotli 4</td>\n",
1023 |        "      <td>3.461648</td>\n",
1024 |        "      <td>0.711120</td>\n",
1025 |        "      <td>17.202166</td>\n",
1026 |        "      <td>3.086969</td>\n",
1027 |        "      <td>0.676058</td>\n",
1028 |        "      <td>2.547228</td>\n",
1029 |        "    </tr>\n",
1030 |        "    <tr>\n",
1031 |        "      <th>7</th>\n",
1032 |        "      <td>brotli 5</td>\n",
1033 |        "      <td>3.710503</td>\n",
1034 |        "      <td>0.730495</td>\n",
1035 |        "      <td>11.969930</td>\n",
1036 |        "      <td>3.321406</td>\n",
1037 |        "      <td>0.698923</td>\n",
1038 |        "      <td>2.191857</td>\n",
1039 |        "    </tr>\n",
1040 |        "    <tr>\n",
1041 |        "      <th>8</th>\n",
1042 |        "      <td>brotli 6</td>\n",
1043 |        "      <td>3.740765</td>\n",
1044 |        "      <td>0.732675</td>\n",
1045 |        "      <td>10.554053</td>\n",
1046 |        "      <td>3.346479</td>\n",
1047 |        "      <td>0.701179</td>\n",
1048 |        "      <td>2.032891</td>\n",
1049 |        "    </tr>\n",
1050 |        "    <tr>\n",
1051 |        "      <th>9</th>\n",
1052 |        "      <td>brotli 7</td>\n",
1053 |        "      <td>3.764571</td>\n",
1054 |        "      <td>0.734365</td>\n",
1055 |        "      <td>7.812185</td>\n",
1056 |        "      <td>3.373016</td>\n",
1057 |        "      <td>0.703529</td>\n",
1058 |        "      <td>1.782097</td>\n",
1059 |        "    </tr>\n",
1060 |        "    <tr>\n",
1061 |        "      <th>10</th>\n",
1062 |        "      <td>brotli 8</td>\n",
1063 |        "      <td>3.774702</td>\n",
1064 |        "      <td>0.735078</td>\n",
1065 |        "      <td>6.037380</td>\n",
1066 |        "      <td>3.384885</td>\n",
1067 |        "      <td>0.704569</td>\n",
1068 |        "      <td>1.520304</td>\n",
1069 |        "    </tr>\n",
1070 |        "    <tr>\n",
1071 |        "      <th>11</th>\n",
1072 |        "      <td>brotli 9</td>\n",
1073 |        "      <td>3.785685</td>\n",
1074 |        "      <td>0.735847</td>\n",
1075 |        "      <td>4.591869</td>\n",
1076 |        "      <td>3.396362</td>\n",
1077 |        "      <td>0.705567</td>\n",
1078 |        "      <td>1.313113</td>\n",
1079 |        "    </tr>\n",
1080 |        "    <tr>\n",
1081 |        "      <th>12</th>\n",
1082 |        "      <td>brotli 10</td>\n",
1083 |        "      <td>4.033893</td>\n",
1084 |        "      <td>0.752101</td>\n",
1085 |        "      <td>1.202672</td>\n",
1086 |        "      <td>3.598985</td>\n",
1087 |        "      <td>0.722144</td>\n",
1088 |        "      <td>0.519223</td>\n",
1089 |        "    </tr>\n",
1090 |        "    <tr>\n",
1091 |        "      <th>13</th>\n",
1092 |        "      <td>brotli 11</td>\n",
1093 |        "      <td>4.104526</td>\n",
1094 |        "      <td>0.756367</td>\n",
1095 |        "      <td>0.523913</td>\n",
1096 |        "      <td>3.683567</td>\n",
1097 |        "      <td>0.728524</td>\n",
1098 |        "      <td>0.301002</td>\n",
1099 |        "    </tr>\n",
1100 |        "  </tbody>\n",
1101 |        "</table>\n",
1102 |        "</div>"
1103 |       ],
1104 |       "text/plain": [
1105 |        "         name  rates_bundled  savings_bundled  speed_bundled(MB/s)  \\\n",
1106 |        "0      gzip 4       3.255438         0.692822            21.709783   \n",
1107 |        "1      gzip 5       3.352408         0.701707            18.111059   \n",
1108 |        "2      gzip 6       3.385023         0.704581            15.608381   \n",
1109 |        "3      gzip 7       3.395387         0.705483            14.415784   \n",
1110 |        "4      gzip 8       3.402633         0.706110            12.668371   \n",
1111 |        "5      gzip 9       3.403091         0.706150            12.492436   \n",
1112 |        "6    brotli 4       3.461648         0.711120            17.202166   \n",
1113 |        "7    brotli 5       3.710503         0.730495            11.969930   \n",
1114 |        "8    brotli 6       3.740765         0.732675            10.554053   \n",
1115 |        "9    brotli 7       3.764571         0.734365             7.812185   \n",
1116 |        "10   brotli 8       3.774702         0.735078             6.037380   \n",
1117 |        "11   brotli 9       3.785685         0.735847             4.591869   \n",
1118 |        "12  brotli 10       4.033893         0.752101             1.202672   \n",
1119 |        "13  brotli 11       4.104526         0.756367             0.523913   \n",
1120 |        "\n",
1121 |        "    rates_unbundled  savings_unbundled  speed_unbundled(MB/s)  \n",
1122 |        "0          2.834153           0.647161               3.442193  \n",
1123 |        "1          2.901749           0.655380               3.270738  \n",
1124 |        "2          2.937161           0.659535               3.016377  \n",
1125 |        "3          2.945246           0.660470               2.766044  \n",
1126 |        "4          2.952941           0.661355               2.463421  \n",
1127 |        "5          2.953505           0.661419               2.303074  \n",
1128 |        "6          3.086969           0.676058               2.547228  \n",
1129 |        "7          3.321406           0.698923               2.191857  \n",
1130 |        "8          3.346479           0.701179               2.032891  \n",
1131 |        "9          3.373016           0.703529               1.782097  \n",
1132 |        "10         3.384885           0.704569               1.520304  \n",
1133 |        "11         3.396362           0.705567               1.313113  \n",
1134 |        "12         3.598985           0.722144               0.519223  \n",
1135 |        "13         3.683567           0.728524               0.301002  "
1136 |       ]
1137 |      },
1138 |      "execution_count": 406,
1139 |      "metadata": {},
1140 |      "output_type": "execute_result"
1141 |     }
1142 |    ],
1143 |    "source": [
1144 |     "print(100000, \"-\", 1000000, \"bytes\")\n",
1145 |     "frame = pd.DataFrame()\n",
1146 |     "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n",
1147 |     "                 \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n",
1148 |     "frame[\"rates_bundled\"] = np.hstack((np.mean(np.array(rates_gzip_bundled)[group2], axis=0), \n",
1149 |     "                                    np.mean(np.array(rates_brotli_bundled)[group2], axis=0)))\n",
1150 |     "frame[\"savings_bundled\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip_bundled)[group2], axis=0), \n",
1151 |     "                                              np.mean(np.array(rates_brotli_bundled)[group2], axis=0)))\n",
1152 |     "frame[\"speed_bundled(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip_bundled)[group2], axis=0), \n",
1153 |     "                                           np.mean(np.array(speed_brotli_bundled)[group2], axis=0))) / 1000000\n",
1154 |     "\n",
1155 |     "frame[\"rates_unbundled\"] = np.hstack((np.mean(np.array(rates_gzip_unbundled)[group2], axis=0), \n",
1156 |     "                                    np.mean(np.array(rates_brotli_unbundled)[group2], axis=0)))\n",
1157 |     "frame[\"savings_unbundled\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip_unbundled)[group2], axis=0), \n",
1158 |     "                                              np.mean(np.array(rates_brotli_unbundled)[group2], axis=0)))\n",
1159 |     "frame[\"speed_unbundled(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip_unbundled)[group2], axis=0), \n",
1160 |     "                                           np.mean(np.array(speed_brotli_unbundled)[group2], axis=0))) / 1000000\n",
1161 |     "\n",
1162 |     "frame"
1163 |    ]
1164 |   },
1165 |   {
1166 |    "cell_type": "code",
1167 |    "execution_count": 407,
1168 |    "metadata": {},
1169 |    "outputs": [
1170 |     {
1171 |      "name": "stdout",
1172 |      "output_type": "stream",
1173 |      "text": [
1174 |       "1000000 - 3000000 bytes\n"
1175 |      ]
1176 |     },
1177 |     {
1178 |      "data": {
1179 |       "text/html": [
1180 |        "<div>\n",
1181 |        "<style scoped>\n",
1182 |        "    .dataframe tbody tr th:only-of-type {\n",
1183 |        "        vertical-align: middle;\n",
1184 |        "    }\n",
1185 |        "\n",
1186 |        "    .dataframe tbody tr th {\n",
1187 |        "        vertical-align: top;\n",
1188 |        "    }\n",
1189 |        "\n",
1190 |        "    .dataframe thead th {\n",
1191 |        "        text-align: right;\n",
1192 |        "    }\n",
1193 |        "</style>\n",
1194 |        "<table border=\"1\" class=\"dataframe\">\n",
1195 |        "  <thead>\n",
1196 |        "    <tr style=\"text-align: right;\">\n",
1197 |        "      <th></th>\n",
1198 |        "      <th>name</th>\n",
1199 |        "      <th>rates_bundled</th>\n",
1200 |        "      <th>savings_bundled</th>\n",
1201 |        "      <th>speed_bundled(MB/s)</th>\n",
1202 |        "      <th>rates_unbundled</th>\n",
1203 |        "      <th>savings_unbundled</th>\n",
1204 |        "      <th>speed_unbundled(MB/s)</th>\n",
1205 |        "    </tr>\n",
1206 |        "  </thead>\n",
1207 |        "  <tbody>\n",
1208 |        "    <tr>\n",
1209 |        "      <th>0</th>\n",
1210 |        "      <td>gzip 4</td>\n",
1211 |        "      <td>3.207758</td>\n",
1212 |        "      <td>0.688256</td>\n",
1213 |        "      <td>22.311194</td>\n",
1214 |        "      <td>2.806185</td>\n",
1215 |        "      <td>0.643644</td>\n",
1216 |        "      <td>1.718255</td>\n",
1217 |        "    </tr>\n",
1218 |        "    <tr>\n",
1219 |        "      <th>1</th>\n",
1220 |        "      <td>gzip 5</td>\n",
1221 |        "      <td>3.298249</td>\n",
1222 |        "      <td>0.696809</td>\n",
1223 |        "      <td>19.365311</td>\n",
1224 |        "      <td>2.861184</td>\n",
1225 |        "      <td>0.650494</td>\n",
1226 |        "      <td>1.701731</td>\n",
1227 |        "    </tr>\n",
1228 |        "    <tr>\n",
1229 |        "      <th>2</th>\n",
1230 |        "      <td>gzip 6</td>\n",
1231 |        "      <td>3.327129</td>\n",
1232 |        "      <td>0.699441</td>\n",
1233 |        "      <td>15.542296</td>\n",
1234 |        "      <td>2.875304</td>\n",
1235 |        "      <td>0.652211</td>\n",
1236 |        "      <td>1.687635</td>\n",
1237 |        "    </tr>\n",
1238 |        "    <tr>\n",
1239 |        "      <th>3</th>\n",
1240 |        "      <td>gzip 7</td>\n",
1241 |        "      <td>3.334811</td>\n",
1242 |        "      <td>0.700133</td>\n",
1243 |        "      <td>13.912655</td>\n",
1244 |        "      <td>2.881370</td>\n",
1245 |        "      <td>0.652943</td>\n",
1246 |        "      <td>1.516255</td>\n",
1247 |        "    </tr>\n",
1248 |        "    <tr>\n",
1249 |        "      <th>4</th>\n",
1250 |        "      <td>gzip 8</td>\n",
1251 |        "      <td>3.338078</td>\n",
1252 |        "      <td>0.700426</td>\n",
1253 |        "      <td>12.395455</td>\n",
1254 |        "      <td>2.884158</td>\n",
1255 |        "      <td>0.653278</td>\n",
1256 |        "      <td>1.667812</td>\n",
1257 |        "    </tr>\n",
1258 |        "    <tr>\n",
1259 |        "      <th>5</th>\n",
1260 |        "      <td>gzip 9</td>\n",
1261 |        "      <td>3.338087</td>\n",
1262 |        "      <td>0.700427</td>\n",
1263 |        "      <td>12.474473</td>\n",
1264 |        "      <td>2.884215</td>\n",
1265 |        "      <td>0.653285</td>\n",
1266 |        "      <td>1.642354</td>\n",
1267 |        "    </tr>\n",
1268 |        "    <tr>\n",
1269 |        "      <th>6</th>\n",
1270 |        "      <td>brotli 4</td>\n",
1271 |        "      <td>3.389592</td>\n",
1272 |        "      <td>0.704979</td>\n",
1273 |        "      <td>18.884871</td>\n",
1274 |        "      <td>2.948077</td>\n",
1275 |        "      <td>0.660796</td>\n",
1276 |        "      <td>1.000014</td>\n",
1277 |        "    </tr>\n",
1278 |        "    <tr>\n",
1279 |        "      <th>7</th>\n",
1280 |        "      <td>brotli 5</td>\n",
1281 |        "      <td>3.631806</td>\n",
1282 |        "      <td>0.724655</td>\n",
1283 |        "      <td>12.310412</td>\n",
1284 |        "      <td>3.142986</td>\n",
1285 |        "      <td>0.681831</td>\n",
1286 |        "      <td>1.002287</td>\n",
1287 |        "    </tr>\n",
1288 |        "    <tr>\n",
1289 |        "      <th>8</th>\n",
1290 |        "      <td>brotli 6</td>\n",
1291 |        "      <td>3.662058</td>\n",
1292 |        "      <td>0.726929</td>\n",
1293 |        "      <td>10.677770</td>\n",
1294 |        "      <td>3.153725</td>\n",
1295 |        "      <td>0.682915</td>\n",
1296 |        "      <td>0.976053</td>\n",
1297 |        "    </tr>\n",
1298 |        "    <tr>\n",
1299 |        "      <th>9</th>\n",
1300 |        "      <td>brotli 7</td>\n",
1301 |        "      <td>3.687079</td>\n",
1302 |        "      <td>0.728783</td>\n",
1303 |        "      <td>7.824110</td>\n",
1304 |        "      <td>3.164575</td>\n",
1305 |        "      <td>0.684002</td>\n",
1306 |        "      <td>0.983152</td>\n",
1307 |        "    </tr>\n",
1308 |        "    <tr>\n",
1309 |        "      <th>10</th>\n",
1310 |        "      <td>brotli 8</td>\n",
1311 |        "      <td>3.696520</td>\n",
1312 |        "      <td>0.729475</td>\n",
1313 |        "      <td>5.929425</td>\n",
1314 |        "      <td>3.167617</td>\n",
1315 |        "      <td>0.684305</td>\n",
1316 |        "      <td>0.959331</td>\n",
1317 |        "    </tr>\n",
1318 |        "    <tr>\n",
1319 |        "      <th>11</th>\n",
1320 |        "      <td>brotli 9</td>\n",
1321 |        "      <td>3.705561</td>\n",
1322 |        "      <td>0.730135</td>\n",
1323 |        "      <td>4.381963</td>\n",
1324 |        "      <td>3.171627</td>\n",
1325 |        "      <td>0.684704</td>\n",
1326 |        "      <td>0.914153</td>\n",
1327 |        "    </tr>\n",
1328 |        "    <tr>\n",
1329 |        "      <th>12</th>\n",
1330 |        "      <td>brotli 10</td>\n",
1331 |        "      <td>3.980056</td>\n",
1332 |        "      <td>0.748747</td>\n",
1333 |        "      <td>1.136612</td>\n",
1334 |        "      <td>3.350162</td>\n",
1335 |        "      <td>0.701507</td>\n",
1336 |        "      <td>0.511365</td>\n",
1337 |        "    </tr>\n",
1338 |        "    <tr>\n",
1339 |        "      <th>13</th>\n",
1340 |        "      <td>brotli 11</td>\n",
1341 |        "      <td>4.047179</td>\n",
1342 |        "      <td>0.752914</td>\n",
1343 |        "      <td>0.514281</td>\n",
1344 |        "      <td>3.417089</td>\n",
1345 |        "      <td>0.707353</td>\n",
1346 |        "      <td>0.318949</td>\n",
1347 |        "    </tr>\n",
1348 |        "  </tbody>\n",
1349 |        "</table>\n",
1350 |        "</div>"
1351 |       ],
1352 |       "text/plain": [
1353 |        "         name  rates_bundled  savings_bundled  speed_bundled(MB/s)  \\\n",
1354 |        "0      gzip 4       3.207758         0.688256            22.311194   \n",
1355 |        "1      gzip 5       3.298249         0.696809            19.365311   \n",
1356 |        "2      gzip 6       3.327129         0.699441            15.542296   \n",
1357 |        "3      gzip 7       3.334811         0.700133            13.912655   \n",
1358 |        "4      gzip 8       3.338078         0.700426            12.395455   \n",
1359 |        "5      gzip 9       3.338087         0.700427            12.474473   \n",
1360 |        "6    brotli 4       3.389592         0.704979            18.884871   \n",
1361 |        "7    brotli 5       3.631806         0.724655            12.310412   \n",
1362 |        "8    brotli 6       3.662058         0.726929            10.677770   \n",
1363 |        "9    brotli 7       3.687079         0.728783             7.824110   \n",
1364 |        "10   brotli 8       3.696520         0.729475             5.929425   \n",
1365 |        "11   brotli 9       3.705561         0.730135             4.381963   \n",
1366 |        "12  brotli 10       3.980056         0.748747             1.136612   \n",
1367 |        "13  brotli 11       4.047179         0.752914             0.514281   \n",
1368 |        "\n",
1369 |        "    rates_unbundled  savings_unbundled  speed_unbundled(MB/s)  \n",
1370 |        "0          2.806185           0.643644               1.718255  \n",
1371 |        "1          2.861184           0.650494               1.701731  \n",
1372 |        "2          2.875304           0.652211               1.687635  \n",
1373 |        "3          2.881370           0.652943               1.516255  \n",
1374 |        "4          2.884158           0.653278               1.667812  \n",
1375 |        "5          2.884215           0.653285               1.642354  \n",
1376 |        "6          2.948077           0.660796               1.000014  \n",
1377 |        "7          3.142986           0.681831               1.002287  \n",
1378 |        "8          3.153725           0.682915               0.976053  \n",
1379 |        "9          3.164575           0.684002               0.983152  \n",
1380 |        "10         3.167617           0.684305               0.959331  \n",
1381 |        "11         3.171627           0.684704               0.914153  \n",
1382 |        "12         3.350162           0.701507               0.511365  \n",
1383 |        "13         3.417089           0.707353               0.318949  "
1384 |       ]
1385 |      },
1386 |      "execution_count": 407,
1387 |      "metadata": {},
1388 |      "output_type": "execute_result"
1389 |     }
1390 |    ],
1391 |    "source": [
1392 |     "print(1000000, \"-\", 3000000, \"bytes\")\n",
1393 |     "frame = pd.DataFrame()\n",
1394 |     "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n",
1395 |     "                 \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n",
1396 |     "frame[\"rates_bundled\"] = np.hstack((np.mean(np.array(rates_gzip_bundled)[group3], axis=0), \n",
1397 |     "                                    np.mean(np.array(rates_brotli_bundled)[group3], axis=0)))\n",
1398 |     "frame[\"savings_bundled\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip_bundled)[group3], axis=0), \n",
1399 |     "                                              np.mean(np.array(rates_brotli_bundled)[group3], axis=0)))\n",
1400 |     "frame[\"speed_bundled(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip_bundled)[group3], axis=0), \n",
1401 |     "                                           np.mean(np.array(speed_brotli_bundled)[group3], axis=0))) / 1000000\n",
1402 |     "\n",
1403 |     "frame[\"rates_unbundled\"] = np.hstack((np.mean(np.array(rates_gzip_unbundled)[group3], axis=0), \n",
1404 |     "                                    np.mean(np.array(rates_brotli_unbundled)[group3], axis=0)))\n",
1405 |     "frame[\"savings_unbundled\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip_unbundled)[group3], axis=0), \n",
1406 |     "                                              np.mean(np.array(rates_brotli_unbundled)[group3], axis=0)))\n",
1407 |     "frame[\"speed_unbundled(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip_unbundled)[group3], axis=0), \n",
1408 |     "                                           np.mean(np.array(speed_brotli_unbundled)[group3], axis=0))) / 1000000\n",
1409 |     "\n",
1410 |     "frame"
1411 |    ]
1412 |   },
1413 |   {
1414 |    "cell_type": "markdown",
1415 |    "metadata": {},
1416 |    "source": [
1417 |     "### Compare the results for each example"
1418 |    ]
1419 |   },
1420 |   {
1421 |    "cell_type": "code",
1422 |    "execution_count": 389,
1423 |    "metadata": {},
1424 |    "outputs": [
1425 |     {
1426 |      "data": {
1427 |       "text/html": [
1428 |        "<div>\n",
1429 |        "<style scoped>\n",
1430 |        "    .dataframe tbody tr th:only-of-type {\n",
1431 |        "        vertical-align: middle;\n",
1432 |        "    }\n",
1433 |        "\n",
1434 |        "    .dataframe tbody tr th {\n",
1435 |        "        vertical-align: top;\n",
1436 |        "    }\n",
1437 |        "\n",
1438 |        "    .dataframe thead th {\n",
1439 |        "        text-align: right;\n",
1440 |        "    }\n",
1441 |        "</style>\n",
1442 |        "<table border=\"1\" class=\"dataframe\">\n",
1443 |        "  <thead>\n",
1444 |        "    <tr style=\"text-align: right;\">\n",
1445 |        "      <th></th>\n",
1446 |        "      <th>name</th>\n",
1447 |        "      <th>ratio of rates</th>\n",
1448 |        "      <th>ratio of times</th>\n",
1449 |        "    </tr>\n",
1450 |        "  </thead>\n",
1451 |        "  <tbody>\n",
1452 |        "    <tr>\n",
1453 |        "      <th>0</th>\n",
1454 |        "      <td>gzip 4</td>\n",
1455 |        "      <td>1.427097</td>\n",
1456 |        "      <td>0.632208</td>\n",
1457 |        "    </tr>\n",
1458 |        "    <tr>\n",
1459 |        "      <th>1</th>\n",
1460 |        "      <td>gzip 5</td>\n",
1461 |        "      <td>1.453109</td>\n",
1462 |        "      <td>0.749165</td>\n",
1463 |        "    </tr>\n",
1464 |        "    <tr>\n",
1465 |        "      <th>2</th>\n",
1466 |        "      <td>gzip 6</td>\n",
1467 |        "      <td>1.464445</td>\n",
1468 |        "      <td>0.894621</td>\n",
1469 |        "    </tr>\n",
1470 |        "    <tr>\n",
1471 |        "      <th>3</th>\n",
1472 |        "      <td>gzip 7</td>\n",
1473 |        "      <td>1.467382</td>\n",
1474 |        "      <td>0.906732</td>\n",
1475 |        "    </tr>\n",
1476 |        "    <tr>\n",
1477 |        "      <th>4</th>\n",
1478 |        "      <td>gzip 8</td>\n",
1479 |        "      <td>1.469844</td>\n",
1480 |        "      <td>1.163363</td>\n",
1481 |        "    </tr>\n",
1482 |        "    <tr>\n",
1483 |        "      <th>5</th>\n",
1484 |        "      <td>gzip 9</td>\n",
1485 |        "      <td>1.470023</td>\n",
1486 |        "      <td>1.152780</td>\n",
1487 |        "    </tr>\n",
1488 |        "    <tr>\n",
1489 |        "      <th>6</th>\n",
1490 |        "      <td>brotli 4</td>\n",
1491 |        "      <td>1.393734</td>\n",
1492 |        "      <td>0.490474</td>\n",
1493 |        "    </tr>\n",
1494 |        "    <tr>\n",
1495 |        "      <th>7</th>\n",
1496 |        "      <td>brotli 5</td>\n",
1497 |        "      <td>1.409129</td>\n",
1498 |        "      <td>0.684028</td>\n",
1499 |        "    </tr>\n",
1500 |        "    <tr>\n",
1501 |        "      <th>8</th>\n",
1502 |        "      <td>brotli 6</td>\n",
1503 |        "      <td>1.418572</td>\n",
1504 |        "      <td>0.757256</td>\n",
1505 |        "    </tr>\n",
1506 |        "    <tr>\n",
1507 |        "      <th>9</th>\n",
1508 |        "      <td>brotli 7</td>\n",
1509 |        "      <td>1.426384</td>\n",
1510 |        "      <td>1.009216</td>\n",
1511 |        "    </tr>\n",
1512 |        "    <tr>\n",
1513 |        "      <th>10</th>\n",
1514 |        "      <td>brotli 8</td>\n",
1515 |        "      <td>1.429969</td>\n",
1516 |        "      <td>1.295686</td>\n",
1517 |        "    </tr>\n",
1518 |        "    <tr>\n",
1519 |        "      <th>11</th>\n",
1520 |        "      <td>brotli 9</td>\n",
1521 |        "      <td>1.433369</td>\n",
1522 |        "      <td>1.711266</td>\n",
1523 |        "    </tr>\n",
1524 |        "    <tr>\n",
1525 |        "      <th>12</th>\n",
1526 |        "      <td>brotli 10</td>\n",
1527 |        "      <td>1.460546</td>\n",
1528 |        "      <td>5.122315</td>\n",
1529 |        "    </tr>\n",
1530 |        "    <tr>\n",
1531 |        "      <th>13</th>\n",
1532 |        "      <td>brotli 11</td>\n",
1533 |        "      <td>1.465019</td>\n",
1534 |        "      <td>9.145809</td>\n",
1535 |        "    </tr>\n",
1536 |        "  </tbody>\n",
1537 |        "</table>\n",
1538 |        "</div>"
1539 |       ],
1540 |       "text/plain": [
1541 |        "         name  ratio of rates  ratio of times\n",
1542 |        "0      gzip 4        1.427097        0.632208\n",
1543 |        "1      gzip 5        1.453109        0.749165\n",
1544 |        "2      gzip 6        1.464445        0.894621\n",
1545 |        "3      gzip 7        1.467382        0.906732\n",
1546 |        "4      gzip 8        1.469844        1.163363\n",
1547 |        "5      gzip 9        1.470023        1.152780\n",
1548 |        "6    brotli 4        1.393734        0.490474\n",
1549 |        "7    brotli 5        1.409129        0.684028\n",
1550 |        "8    brotli 6        1.418572        0.757256\n",
1551 |        "9    brotli 7        1.426384        1.009216\n",
1552 |        "10   brotli 8        1.429969        1.295686\n",
1553 |        "11   brotli 9        1.433369        1.711266\n",
1554 |        "12  brotli 10        1.460546        5.122315\n",
1555 |        "13  brotli 11        1.465019        9.145809"
1556 |       ]
1557 |      },
1558 |      "execution_count": 389,
1559 |      "metadata": {},
1560 |      "output_type": "execute_result"
1561 |     }
1562 |    ],
1563 |    "source": [
1564 |     "ratio_of_rates = []\n",
1565 |     "ratio_of_times = []\n",
1566 |     "for i in range(len(rates_gzip_unbundled)):\n",
1567 |     "    ratio_of_rates.append(np.hstack((np.array(rates_gzip_bundled[i]) / np.array(rates_gzip_unbundled[i]),\n",
1568 |     "                                     np.array(rates_brotli_bundled[i]) / np.array(rates_brotli_unbundled[i]))))\n",
1569 |     "    ratio_of_times.append(np.hstack((np.array(times_gzip_bundled[i]) / np.array(times_gzip_unbundled[i]),\n",
1570 |     "                                     np.array(times_brotli_bundled[i]) / np.array(times_brotli_unbundled[i]))))\n",
1571 |     "    \n",
1572 |     "frame = pd.DataFrame()\n",
1573 |     "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n",
1574 |     "                 \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n",
1575 |     "frame[\"ratio of rates\"] = np.mean(ratio_of_rates, axis=0)\n",
1576 |     "frame[\"ratio of times\"] = np.mean(ratio_of_times, axis=0)\n",
1577 |     "frame"
1578 |    ]
1579 |   }
1580 |  ],
1581 |  "metadata": {
1582 |   "kernelspec": {
1583 |    "display_name": "Python 3",
1584 |    "language": "python",
1585 |    "name": "python3"
1586 |   },
1587 |   "language_info": {
1588 |    "codemirror_mode": {
1589 |     "name": "ipython",
1590 |     "version": 3
1591 |    },
1592 |    "file_extension": ".py",
1593 |    "mimetype": "text/x-python",
1594 |    "name": "python",
1595 |    "nbconvert_exporter": "python",
1596 |    "pygments_lexer": "ipython3",
1597 |    "version": "3.6.7"
1598 |   }
1599 |  },
1600 |  "nbformat": 4,
1601 |  "nbformat_minor": 2
1602 | }
1603 | 


--------------------------------------------------------------------------------