├── compression_experiments ├── gzip_compress.sh ├── npm_install_packages.sh ├── brotli_compress.sh ├── find_urls_save.sh ├── js_dataset_compression.pdf ├── http_archive_compression.pdf ├── npm_packages_compression.pdf ├── npm_packages_compression.ipynb ├── js_dataset_compression.ipynb └── http_archive_compression.ipynb ├── brotli_bits_types ├── brotli_decompress_and_log.sh └── bits_types_in_compressed_file.pdf ├── comparison_script ├── analyze_rates.pdf ├── makefile ├── README.md └── compression.cc ├── analyze_block_splitting ├── analyze_block_splitting.pdf ├── makefile └── save_block_splitting.cc ├── third_party └── bundle_analyzer │ ├── package.json │ ├── predictions.js │ ├── get_chunks.js │ ├── package-lock.json │ ├── index.js │ └── module-clues.js ├── README.md ├── contributing.md ├── code-of-conduct.md └── LICENSE /compression_experiments/gzip_compress.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | { time gzip -$1 -f -k -c $4 > $3; } 2> $2 4 | -------------------------------------------------------------------------------- /compression_experiments/npm_install_packages.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cat package.txt | xargs npm install 4 | -------------------------------------------------------------------------------- /compression_experiments/brotli_compress.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | { time brotli -q $1 -f -o $3 $4 ; } 2> $2 4 | -------------------------------------------------------------------------------- /brotli_bits_types/brotli_decompress_and_log.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | brotli -d -q 11 -f -o example.txt example.txt.br > log.txt 3 | -------------------------------------------------------------------------------- /compression_experiments/find_urls_save.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | find node_modules -name "*.js" -type f > urls_for_package.txt 4 | -------------------------------------------------------------------------------- /comparison_script/analyze_rates.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleChromeLabs/dynamic-web-bundle-serving/master/comparison_script/analyze_rates.pdf -------------------------------------------------------------------------------- /analyze_block_splitting/analyze_block_splitting.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleChromeLabs/dynamic-web-bundle-serving/master/analyze_block_splitting/analyze_block_splitting.pdf -------------------------------------------------------------------------------- /brotli_bits_types/bits_types_in_compressed_file.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleChromeLabs/dynamic-web-bundle-serving/master/brotli_bits_types/bits_types_in_compressed_file.pdf -------------------------------------------------------------------------------- /compression_experiments/js_dataset_compression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleChromeLabs/dynamic-web-bundle-serving/master/compression_experiments/js_dataset_compression.pdf -------------------------------------------------------------------------------- /compression_experiments/http_archive_compression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleChromeLabs/dynamic-web-bundle-serving/master/compression_experiments/http_archive_compression.pdf -------------------------------------------------------------------------------- /compression_experiments/npm_packages_compression.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoogleChromeLabs/dynamic-web-bundle-serving/master/compression_experiments/npm_packages_compression.pdf -------------------------------------------------------------------------------- /third_party/bundle_analyzer/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bundle-analyzer", 3 | "version": "0.0.1", 4 | "main": "index.js", 5 | "type": "module", 6 | "scripts": { 7 | "start": "node --experimental-modules demo-server.js" 8 | }, 9 | "dependencies": { 10 | "acorn-loose": "^7.0.0", 11 | "dlv": "^1.1.3", 12 | "fs": "^0.0.1-security" 13 | }, 14 | "engines": { 15 | "node": "12.x" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dynamic Web Bundle Serving 2 | 3 | Experiments in prototyping dynamic web bundle serving 4 | 5 | ## What is here 6 | The `comparison_script` folder contains the script for running the compression for bundled and unbundled approaches for different compression algorithms . See more in [README](comparison_script/README.md) 7 | 8 | The `compression_experiments` folder contains some research on compression of js code and bundles of js code from different resources. 9 | -------------------------------------------------------------------------------- /comparison_script/makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | CXX=g++ 16 | CXXFLAGS=-g -Wall -MMD -std=c++11 17 | LDLIBS=-lstdc++ -lbrotlienc -lz 18 | 19 | all: compression 20 | 21 | #compression.o: compression.cc 22 | # g++ -std=c++11 -c compression.cc 23 | 24 | compression: compression.o 25 | 26 | clean: 27 | rm compression.o compression 28 | -------------------------------------------------------------------------------- /analyze_block_splitting/makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2020 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | CXX=g++ 16 | CXXFLAGS=-g -Wall -MMD -std=c++11 17 | LDLIBS=-lstdc++ -lbrotlienc -lz 18 | 19 | all: save_block_splitting 20 | 21 | #save_block_splitting.o: save_block_splitting.cc 22 | # g++ -std=c++11 -c save_block_splitting.cc 23 | 24 | save_block_splitting: save_block_splitting.o 25 | 26 | clean: 27 | rm save_block_splitting.o save_block_splitting 28 | -------------------------------------------------------------------------------- /third_party/bundle_analyzer/predictions.js: -------------------------------------------------------------------------------- 1 | import moduleClues from "./module-clues.js"; 2 | 3 | /** 4 | * Uses the list of module clues to find prediction matches for a minified webpack module, 5 | * provided as a string 6 | * 7 | * @param {string} functionText - A function extracted from a webpack bundle 8 | */ 9 | export function predictModuleName(functionText) { 10 | // Note, this is obviously a potential performance problem--as we're currently doing a 11 | // full search of the (potententially very large) bundle for every clue in the list. 12 | // Running this in a worker seems to solve the problem for now, but if the clue list got 13 | // very long, a different solution might be required. 14 | let matchedClue = moduleClues.find(clue => 15 | typeof clue.includes === "object" // Handle regex clues differently from strings 16 | ? clue.includes.test(functionText) 17 | : functionText.includes(clue.includes) 18 | ); 19 | if (!matchedClue) { 20 | matchedClue = { module: "", url: "", includes: "" }; 21 | } 22 | return matchedClue; 23 | } 24 | 25 | -------------------------------------------------------------------------------- /third_party/bundle_analyzer/get_chunks.js: -------------------------------------------------------------------------------- 1 | /** @license 2 | * Copyright 2020 Google Inc. All Rights Reserved. 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import { createServer } from 'http'; 18 | import { readFile } from 'fs'; 19 | import { analyze } from './index.js'; 20 | import { writeFileSync } from 'fs'; 21 | 22 | var args = process.argv.slice(2); 23 | readFile(args[0], 'utf8', function (err,data) { 24 | if (err) { 25 | return console.log(err); 26 | } 27 | const result = analyze(data); 28 | writeFileSync(args[1], JSON.stringify(result, 0, 2)); 29 | }); 30 | -------------------------------------------------------------------------------- /contributing.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution; 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | 25 | ## Community Guidelines 26 | 27 | This project follows [Google's Open Source Community 28 | Guidelines](https://opensource.google/conduct/). 29 | -------------------------------------------------------------------------------- /third_party/bundle_analyzer/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "bundle-analyzer", 3 | "version": "0.0.1", 4 | "lockfileVersion": 1, 5 | "requires": true, 6 | "dependencies": { 7 | "acorn": { 8 | "version": "7.2.0", 9 | "resolved": "https://registry.npmjs.org/acorn/-/acorn-7.2.0.tgz", 10 | "integrity": "sha512-apwXVmYVpQ34m/i71vrApRrRKCWQnZZF1+npOD0WV5xZFfwWOmKGQ2RWlfdy9vWITsenisM8M0Qeq8agcFHNiQ==" 11 | }, 12 | "acorn-loose": { 13 | "version": "7.0.0", 14 | "resolved": "https://registry.npmjs.org/acorn-loose/-/acorn-loose-7.0.0.tgz", 15 | "integrity": "sha512-TIqpAWkqpdBXfj1XDVBQ/jNbAb6ByGfoqkcz2Pwd8mEHUndxOCw9FR6TqkMCMAr5XV8zYx0+m9GcGjxZzQuA2w==", 16 | "requires": { 17 | "acorn": "^7.0.0" 18 | } 19 | }, 20 | "dlv": { 21 | "version": "1.1.3", 22 | "resolved": "https://registry.npmjs.org/dlv/-/dlv-1.1.3.tgz", 23 | "integrity": "sha512-+HlytyjlPKnIG8XuRG8WvmBP8xs8P71y+SKKS6ZXWoEgLuePxtDoUEiH7WkdePWrQ5JBpE6aoVqfZfJUQkjXwA==" 24 | }, 25 | "fs": { 26 | "version": "0.0.1-security", 27 | "resolved": "https://registry.npmjs.org/fs/-/fs-0.0.1-security.tgz", 28 | "integrity": "sha1-invTcYa23d84E/I4WLV+yq9eQdQ=" 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /comparison_script/README.md: -------------------------------------------------------------------------------- 1 | # Comparison of bundled and unbundled approaches for js scripts compression. 2 | 3 | The script compares an unbundled approach with a bundled one for js scripts compression for gzip and brotli of different levels. 4 | After running the script you will get a file `compression_results.json` of results of the compression. An example of such a file could be found [here](compression_results.json). 5 | 6 | 7 | ## How to run the script 8 | Compression script has an argument ‘number_repetitions’ which shows how many times to compress each bundle so the result would be more stable. 9 | 10 | ``` 11 | $ cd bundle_analyzer 12 | $ npm i 13 | $ cd .. 14 | $ make 15 | $ ./compression 1 16 | ``` 17 | 18 | Before running make sure you have a folder named `bundles_source` in current directory. This folder should contain a file `bundle_files.txt` with names of files with bundles. 19 | Example of `bundles_files.txt`: 20 | ``` 21 | bundles_source/bundle_0.txt 22 | bundles_source/bundle_1.txt 23 | bundles_source/bundle_2.txt 24 | bundles_source/bundle_3.txt 25 | bundles_source/bundle_4.txt 26 | bundles_source/bundle_5.txt 27 | ``` 28 | 29 | The folder `bundles_source` should also have files written in `bundle_files.txt` containing text of one bundle each. 30 | -------------------------------------------------------------------------------- /third_party/bundle_analyzer/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @see https://github.com/azukaru/bundle-inspector/blob/master/src/lib/ASTanalyzer.js 3 | */ 4 | 5 | import acornLoose from "acorn-loose"; 6 | import get from "dlv"; 7 | import { predictModuleName } from "./predictions.js"; 8 | 9 | /** Dot-notated paths to look for within a bundle */ 10 | const bundleFunctionPaths = [ 11 | "body.0.expression.right.arguments.0.elements", 12 | "body.0.expression.argument.arguments.0.elements", 13 | "body.0.expression.arguments.0.properties", 14 | "body.0.expression.arguments.0.elements.1.elements", 15 | "body.0.expression.arguments.0.elements.1.properties", 16 | "body.0.expression.right.arguments.1.body.body.0.declarations.0.init.arguments.1.elements", 17 | "body.2.expression.arguments.0.elements.1.properties" 18 | ].map(p => p.split(".")); 19 | 20 | /** 21 | * Tries several different path signatures to find the list of functions contained 22 | * within an AST representation of a webpack bundle 23 | * 24 | * @param {Object} syntaxTree - An AST representation of a webpack bundle 25 | */ 26 | function getFunctionNodes(syntaxTree) { 27 | return bundleFunctionPaths.reduce( 28 | (acc, path) => acc || get(syntaxTree, path), 29 | null 30 | ); 31 | } 32 | 33 | /** 34 | * Analyzes a webpack bundle in string form and returns useable stats and a list of functions 35 | * 36 | * @param {string} bundle - A webpack bundle 37 | * @return {object} A parsed bundle, with properties for bundle-wide stats and a list of functions 38 | */ 39 | export function analyze(bundle) { 40 | const ast = acornLoose.parse(bundle); 41 | const functionNodes = getFunctionNodes(ast).filter(Boolean); 42 | const functions = functionNodes.map((node, index) => { 43 | const code = bundle.substring(node.start, node.end); 44 | const name = predictModuleName(code); 45 | const id = index + 1; 46 | 47 | return { id, name, code }; 48 | }); 49 | return functions; 50 | } 51 | 52 | -------------------------------------------------------------------------------- /analyze_block_splitting/save_block_splitting.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | int DEFAULT_WINDOW = 24; 26 | 27 | size_t FileSize(FILE* file) { 28 | fseek(file, 0, SEEK_END); 29 | size_t size = ftell(file); 30 | fseek(file, 0, SEEK_SET); 31 | return size; 32 | } 33 | 34 | FILE* OpenFile(const char* filename, const char* mode) { 35 | FILE* file = fopen(filename, mode); 36 | if (file == NULL) { 37 | perror("fopen failed"); 38 | } 39 | return file; 40 | } 41 | 42 | void ReadData(FILE* file, unsigned char** data, size_t* size) { 43 | *size = FileSize(file); 44 | *data = (unsigned char*) malloc(*size); 45 | if (0 == fread(*data, 1, *size, file)) { 46 | throw "Failed to read from file"; 47 | } 48 | return; 49 | } 50 | 51 | void BrotliCompressAndSaveBlockSplitting(int level, int window, const unsigned char* input_data, size_t input_size, unsigned char* output_data, size_t output_buffer_size) { 52 | ShouldSaveBlockSplit(); 53 | if (!BrotliEncoderCompress(level, window, BROTLI_MODE_GENERIC, input_size, input_data, &output_buffer_size, output_data)) { 54 | throw "Failure in BrotliCompress"; 55 | } 56 | } 57 | 58 | int MinWindowLargerThanFile(int fileSize, int max) { 59 | int window = 24; 60 | if (fileSize > 0) { 61 | window = 10; 62 | while (((size_t)1 << (window)) - 16 < (uint64_t)fileSize) { 63 | ++window; 64 | if (window == max) break; 65 | } 66 | } 67 | return window; 68 | } 69 | 70 | 71 | 72 | int main (int argc, char** argv) { 73 | try { 74 | char* bundle_file = argv[1]; 75 | FILE* infile = OpenFile(bundle_file, "rb"); 76 | if (infile == NULL) { 77 | exit(1); 78 | } 79 | unsigned char* input_data = NULL; 80 | size_t input_size = 0; 81 | ReadData(infile, &input_data, &input_size); 82 | fclose(infile); 83 | size_t output_buffer_size = input_size * 2; 84 | unsigned char* output_data = (unsigned char*) malloc(output_buffer_size); 85 | int window = MinWindowLargerThanFile(input_size, DEFAULT_WINDOW); 86 | BrotliCompressAndSaveBlockSplitting(11, window, input_data, input_size, output_data, output_buffer_size); 87 | } catch (const char* message) { 88 | std::cout << "Error\n"; 89 | } 90 | return 0; 91 | } 92 | -------------------------------------------------------------------------------- /code-of-conduct.md: -------------------------------------------------------------------------------- 1 | # Google Open Source Community Guidelines 2 | 3 | At Google, we recognize and celebrate the creativity and collaboration of open 4 | source contributors and the diversity of skills, experiences, cultures, and 5 | opinions they bring to the projects and communities they participate in. 6 | 7 | Every one of Google's open source projects and communities are inclusive 8 | environments, based on treating all individuals respectfully, regardless of 9 | gender identity and expression, sexual orientation, disabilities, 10 | neurodiversity, physical appearance, body size, ethnicity, nationality, race, 11 | age, religion, or similar personal characteristic. 12 | 13 | We value diverse opinions, but we value respectful behavior more. 14 | 15 | Respectful behavior includes: 16 | 17 | * Being considerate, kind, constructive, and helpful. 18 | * Not engaging in demeaning, discriminatory, harassing, hateful, sexualized, or 19 | physically threatening behavior, speech, and imagery. 20 | * Not engaging in unwanted physical contact. 21 | 22 | Some Google open source projects [may adopt][] an explicit project code of 23 | conduct, which may have additional detailed expectations for participants. Most 24 | of those projects will use our [modified Contributor Covenant][]. 25 | 26 | [may adopt]: https://opensource.google/docs/releasing/preparing/#conduct 27 | [modified Contributor Covenant]: https://opensource.google/docs/releasing/template/CODE_OF_CONDUCT/ 28 | 29 | ## Resolve peacefully 30 | 31 | We do not believe that all conflict is necessarily bad; healthy debate and 32 | disagreement often yields positive results. However, it is never okay to be 33 | disrespectful. 34 | 35 | If you see someone behaving disrespectfully, you are encouraged to address the 36 | behavior directly with those involved. Many issues can be resolved quickly and 37 | easily, and this gives people more control over the outcome of their dispute. 38 | If you are unable to resolve the matter for any reason, or if the behavior is 39 | threatening or harassing, report it. We are dedicated to providing an 40 | environment where participants feel welcome and safe. 41 | 42 | ## Reporting problems 43 | 44 | Some Google open source projects may adopt a project-specific code of conduct. 45 | In those cases, a Google employee will be identified as the Project Steward, 46 | who will receive and handle reports of code of conduct violations. In the event 47 | that a project hasn’t identified a Project Steward, you can report problems by 48 | emailing opensource@google.com. 49 | 50 | We will investigate every complaint, but you may not receive a direct response. 51 | We will use our discretion in determining when and how to follow up on reported 52 | incidents, which may range from not taking action to permanent expulsion from 53 | the project and project-sponsored spaces. We will notify the accused of the 54 | report and provide them an opportunity to discuss it before any action is 55 | taken. The identity of the reporter will be omitted from the details of the 56 | report supplied to the accused. In potentially harmful situations, such as 57 | ongoing harassment or threats to anyone's safety, we may take action without 58 | notice. 59 | 60 | *This document was adapted from the [IndieWeb Code of Conduct][] and can also 61 | be found at .* 62 | 63 | [IndieWeb Code of Conduct]: https://indieweb.org/code-of-conduct 64 | -------------------------------------------------------------------------------- /third_party/bundle_analyzer/module-clues.js: -------------------------------------------------------------------------------- 1 | // Exports an array of "clue" objects for use in automatically deducing the source of 2 | // minified code in webpack bundles 3 | 4 | export default [ 5 | { 6 | module: "bluebird", 7 | includes: "prototype.disableTrampolineIfNecessary", 8 | url: "https://github.com/petkaantonov/bluebird", 9 | polyfill: true 10 | }, 11 | { 12 | module: "bn", 13 | includes: /67108864\s?&\s?67108863/, 14 | url: "https://github.com/indutny/bn.js/" 15 | }, 16 | { 17 | module: "character-entities", 18 | includes: "CapitalDifferentialD:", 19 | url: "https://www.npmjs.com/package/character-entities" 20 | }, 21 | { 22 | module: "corejs/promise", 23 | includes: "Promise can't be resolved itself", 24 | url: 25 | "https://github.com/zloirock/core-js/blob/master/packages/core-js/modules/es.promise.js", 26 | polyfill: true 27 | }, 28 | { 29 | module: "elliptic", 30 | includes: "prototype._getEndoRoots", 31 | url: "https://github.com/indutny/elliptic" 32 | }, 33 | { 34 | module: "emotion", 35 | includes: 'querySelectorAll("style[data-emotion-"', 36 | url: "https://github.com/emotion-js/emotion" 37 | }, 38 | { 39 | module: "fingerprint2", 40 | includes: "getWebglVendorAndRenderer", 41 | url: "https://github.com/Valve/fingerprintjs2/" 42 | }, 43 | { 44 | module: "i18next", 45 | includes: "options.overloadTranslationOptionHandler", 46 | url: "https://www.i18next.com/" 47 | }, 48 | { 49 | module: "immutable-js", 50 | includes: "Expected Array or iterable object of values, or keyed object", 51 | url: "https://github.com/immutable-js/immutable-js" 52 | }, 53 | { 54 | module: "lodash", 55 | includes: "__lodash_placeholder__", 56 | url: "https://lodash.com/" 57 | }, 58 | { 59 | module: "marked", 60 | includes: 'Error("Infinite loop on byte: "', 61 | url: "https://github.com/markedjs/marked" 62 | }, 63 | { 64 | module: "mobile-detect", 65 | includes: "PrestigioTablet", 66 | url: "http://hgoebl.github.io/mobile-detect.js/" 67 | }, 68 | { 69 | module: "moment", 70 | includes: "localeData().monthsShort", 71 | url: "https://momentjs.com/" 72 | }, 73 | { 74 | module: "next/route", 75 | includes: 'Error("Cannot update unavailable route:', 76 | url: "" 77 | }, 78 | { 79 | module: "next/client/index", 80 | includes: 'emit("before-reactdom-render"', 81 | url: 82 | "https://github.com/zeit/next.js/blob/canary/packages/next/client/index.js" 83 | }, 84 | { 85 | module: "node/buffer", 86 | includes: 87 | 'Error("If encoding is specified then the first argument must be a string")', 88 | url: "" 89 | }, 90 | { 91 | module: "node/url", 92 | includes: "\"Parameter 'url' must be a string, not \"", 93 | url: "https://github.com/nodejs/node/blob/master/lib/url.js" 94 | }, 95 | { 96 | module: "nprogress", 97 | includes: "nprogress-custom-parent", 98 | url: "https://github.com/rstacruz/nprogress" 99 | }, 100 | { 101 | module: "popmotion", 102 | includes: "are of different format, or a value might have changed value", 103 | url: "https://popmotion.io/" 104 | }, 105 | { 106 | module: "preact", 107 | includes: "__preactattr_", 108 | url: "https://preactjs.com/" 109 | }, 110 | { 111 | module: "raven-js", 112 | includes: "Error: Raven has already been configured", 113 | url: "https://www.npmjs.com/package/raven-js" 114 | }, 115 | { 116 | module: "react-aria-modal", 117 | includes: "react-aria-modal instances should have", 118 | url: "https://github.com/davidtheclark/react-aria-modal" 119 | }, 120 | { 121 | module: "react-i18Next", 122 | includes: "getI18nTranslate.bind", 123 | url: "https://react.i18next.com/" 124 | }, 125 | { 126 | module: "react-helmet", 127 | includes: "convertReactPropstoHtmlAttributes", 128 | url: "https://github.com/nfl/react-helmet" 129 | }, 130 | { 131 | module: "react-intl", 132 | includes: "pluralRuleFunction:function", 133 | url: "https://github.com/formatjs/react-intl" 134 | }, 135 | { 136 | module: "react-redux", 137 | includes: "parentSub.addNestedSub", 138 | url: "https://react-redux.js.org/" 139 | }, 140 | { 141 | module: "regenerator-runtime", 142 | includes: 'Error("Generator is already running")', 143 | url: "https://www.npmjs.com/package/regenerator-runtime", 144 | polyfill: true 145 | }, 146 | { 147 | module: "styled-jsx/stylesheet", 148 | includes: "StyleSheet: illegal rule:", 149 | url: "https://github.com/zeit/styled-jsx" 150 | }, 151 | { 152 | module: "whatwg-fetch", 153 | includes: 'searchParams:"URLSearchParams"', 154 | url: "https://www.npmjs.com/package/whatwg-fetch", 155 | polyfill: true 156 | }, 157 | // VAGUE SELECTORS BELOW 158 | { 159 | module: "possible application code", 160 | includes: "this hasn't been initialised - super() hasn't been called", //Babel adds this during transpilation 161 | url: "" 162 | }, 163 | { 164 | module: "react module", 165 | includes: "https://reactjs.org/docs/error-decoder.html?invariant=" 166 | } 167 | ]; 168 | 169 | // Snippet for authoring clues: 170 | // "Module_Clue": { 171 | // "prefix": "clue", 172 | // "body": [ 173 | // "{", 174 | // "\tmodule: '$1',", 175 | // "\tincludes: '$2',", 176 | // "\turl: '$3'", 177 | // "}," 178 | // ], 179 | // "description": "Clue syntax for bundle analyzer" 180 | // } 181 | 182 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /compression_experiments/npm_packages_compression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Copyright 2020 Google Inc. All Rights Reserved.\n", 8 | "\n", 9 | "Licensed under the Apache License, Version 2.0 (the \"License\");
\n", 10 | "you may not use this file except in compliance with the License.
\n", 11 | "You may obtain a copy of the License at
\n", 12 | "\n", 13 | " http://www.apache.org/licenses/LICENSE-2.0\n", 14 | "\n", 15 | "Unless required by applicable law or agreed to in writing, software\n", 16 | "distributed under the License is distributed on an \"AS IS\" BASIS,\n", 17 | "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\n", 18 | "\n", 19 | "See the License for the specific language governing permissions and \n", 20 | "limitations under the License." 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import numpy as np\n", 30 | "import json\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "from tqdm import tqdm\n", 33 | "import random\n", 34 | "import subprocess\n", 35 | "import time\n", 36 | "import os" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "with open(\"packages_npm.txt\") as file:\n", 46 | " packages = file.read().strip().split('\\n')" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 10, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "def get_seconds(time): \n", 56 | " min_ind = time.find('m')\n", 57 | " mins = int(time[:min_ind])\n", 58 | " second = float(time[min_ind + 1:-1])\n", 59 | " return mins * 60 + second\n", 60 | "\n", 61 | "def log(file, msg):\n", 62 | " f = open(file, 'a+')\n", 63 | " f.write(msg + '\\n')\n", 64 | " f.close()" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 11, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "rates_gzip = []\n", 74 | "rates_brotli = []\n", 75 | "times_gzip = []\n", 76 | "times_brotli = []\n", 77 | "speed_gzip = []\n", 78 | "speed_brotli = []\n", 79 | "init_sizes = []\n", 80 | "all_urls = []\n", 81 | "\n", 82 | "for i in range(len(packages)):\n", 83 | " with open(\"package.txt\", \"w\") as file:\n", 84 | " file.write(packages[i])\n", 85 | " #delete the current node_modules directories containing previous package\n", 86 | " result = subprocess.run([\"rm\", \"-rf\", \"node_modules\"])\n", 87 | " #install the package and save the names of js scripts\n", 88 | " result = subprocess.run([\"bash\", \"npm_install_packages.sh\"])\n", 89 | " result = subprocess.run([\"bash\", \"find_urls_save.sh\"])\n", 90 | " with open(\"urls_for_package.txt\") as file:\n", 91 | " urls = file.read().strip().split('\\n')\n", 92 | " all_urls.append(urls)\n", 93 | " \n", 94 | " #concatenate all scripts of that package together to simulate web bundle\n", 95 | " script_concatenated = \"\"\n", 96 | " for url in all_urls[i]:\n", 97 | " if url == \"\":\n", 98 | " continue\n", 99 | " if not os.path.exists(url):\n", 100 | " print(i)\n", 101 | " print(\"DOESN'T EXIST: \", url)\n", 102 | " continue\n", 103 | " with open(url) as file:\n", 104 | " script_concatenated += file.read()\n", 105 | " \n", 106 | " rates_gzip_compressed = []\n", 107 | " rates_brotli_compressed = []\n", 108 | " times_gzip_compressed = []\n", 109 | " times_brotli_compressed = []\n", 110 | " speed_gzip_compressed = []\n", 111 | " speed_brotli_compressed = []\n", 112 | " \n", 113 | " with open(\"example2.txt\", \"w\") as file:\n", 114 | " file.write(script_concatenated)\n", 115 | " size_non_compressed = os.stat(\"example2.txt\").st_size\n", 116 | " init_sizes.append(size_non_compressed)\n", 117 | " \n", 118 | " # do the gzip compression with different levels\n", 119 | " for level in range(4, 10):\n", 120 | " result = subprocess.run([\"bash\", \"gzip_compress.sh\", str(level), \"time2.txt\", \n", 121 | " \"example_gzip2.txt.gz\", \"example2.txt\"])\n", 122 | " with open(\"time2.txt\") as file:\n", 123 | " user_sys = file.read().strip().split('\\n')[1:]\n", 124 | " time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n", 125 | " size_gzip_compressed = os.stat(\"example_gzip2.txt.gz\").st_size\n", 126 | " rates_gzip_compressed.append(size_non_compressed / size_gzip_compressed)\n", 127 | " times_gzip_compressed.append(time)\n", 128 | " speed_gzip_compressed.append(size_non_compressed / time)\n", 129 | "\n", 130 | " # do the brotli compression with different levels\n", 131 | " for level in range(4, 12):\n", 132 | " result = subprocess.run([\"bash\", \"brotli_compress.sh\", str(level), \"time2.txt\", \n", 133 | " \"example_brotli2.txt.br\", \"example2.txt\"])\n", 134 | " with open(\"time2.txt\") as file:\n", 135 | " user_sys = file.read().strip().split('\\n')[1:]\n", 136 | " time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n", 137 | " size_br_compressed = os.stat(\"example_brotli2.txt.br\").st_size\n", 138 | " rates_brotli_compressed.append(size_non_compressed / size_br_compressed)\n", 139 | " times_brotli_compressed.append(time)\n", 140 | " speed_brotli_compressed.append(size_non_compressed / time)\n", 141 | " \n", 142 | " rates_gzip.append(rates_gzip_compressed)\n", 143 | " rates_brotli.append(rates_brotli_compressed)\n", 144 | " times_gzip.append(times_gzip_compressed)\n", 145 | " times_brotli.append(times_brotli_compressed)\n", 146 | " speed_gzip.append(speed_gzip_compressed)\n", 147 | " speed_brotli.append(speed_brotli_compressed)\n", 148 | " \n", 149 | " if i != 0 and i % 100 == 0:\n", 150 | " log(\"logs3.txt\", \"rates_gzip: \" + str(np.mean(rates_gzip, axis=0)))\n", 151 | " log(\"logs3.txt\", \"rates_brotli: \" + str(np.mean(rates_brotli, axis=0)))\n", 152 | " log(\"logs3.txt\", \"times_gzip: \" + str(np.mean(times_gzip, axis=0)))\n", 153 | " log(\"logs3.txt\", \"times_brotli: \" + str(np.mean(times_brotli, axis=0)))\n", 154 | " log(\"logs3.txt\", \"speed_gzip: \" + str(np.mean(speed_gzip, axis=0)))\n", 155 | " log(\"logs3.txt\", \"speed_brotli: \" + str(np.mean(speed_brotli, axis=0)))" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 13, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/html": [ 166 | "
\n", 167 | "\n", 180 | "\n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | "
nameratessavingsspeed(MB/s)
0gzip 44.6121110.78318058.640409
1gzip 54.8302130.79297042.773977
2gzip 64.9427990.79768529.547876
3gzip 74.9663020.79864324.132054
4gzip 84.9842920.79937014.945622
5gzip 94.9867030.79946712.569951
6brotli 48.2076420.87816248.296430
7brotli 58.5302520.88277029.497596
8brotli 69.0823330.88989622.155465
9brotli 79.4985210.89472014.364543
10brotli 89.7134800.8970509.417515
11brotli 99.9342220.8993386.265608
12brotli 1011.0895140.9098251.230184
13brotli 1111.3785840.9121160.571473
\n", 291 | "
" 292 | ], 293 | "text/plain": [ 294 | " name rates savings speed(MB/s)\n", 295 | "0 gzip 4 4.612111 0.783180 58.640409\n", 296 | "1 gzip 5 4.830213 0.792970 42.773977\n", 297 | "2 gzip 6 4.942799 0.797685 29.547876\n", 298 | "3 gzip 7 4.966302 0.798643 24.132054\n", 299 | "4 gzip 8 4.984292 0.799370 14.945622\n", 300 | "5 gzip 9 4.986703 0.799467 12.569951\n", 301 | "6 brotli 4 8.207642 0.878162 48.296430\n", 302 | "7 brotli 5 8.530252 0.882770 29.497596\n", 303 | "8 brotli 6 9.082333 0.889896 22.155465\n", 304 | "9 brotli 7 9.498521 0.894720 14.364543\n", 305 | "10 brotli 8 9.713480 0.897050 9.417515\n", 306 | "11 brotli 9 9.934222 0.899338 6.265608\n", 307 | "12 brotli 10 11.089514 0.909825 1.230184\n", 308 | "13 brotli 11 11.378584 0.912116 0.571473" 309 | ] 310 | }, 311 | "execution_count": 13, 312 | "metadata": {}, 313 | "output_type": "execute_result" 314 | } 315 | ], 316 | "source": [ 317 | "import pandas as pd\n", 318 | "frame = pd.DataFrame()\n", 319 | "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n", 320 | " \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n", 321 | "\n", 322 | "frame[\"rates\"] = np.hstack((np.mean(rates_gzip, axis=0), np.mean(rates_brotli, axis=0)))\n", 323 | "frame[\"savings\"] = 1 - 1 / np.hstack((np.mean(rates_gzip, axis=0), np.mean(rates_brotli, axis=0)))\n", 324 | "frame[\"speed(MB/s)\"] = np.hstack((np.mean(speed_gzip, axis=0), np.mean(speed_brotli, axis=0))) / 1000000\n", 325 | "\n", 326 | "frame" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 25, 332 | "metadata": {}, 333 | "outputs": [ 334 | { 335 | "name": "stdout", 336 | "output_type": "stream", 337 | "text": [ 338 | "non compressed size range 34.465761MB-81.676873MB\n" 339 | ] 340 | } 341 | ], 342 | "source": [ 343 | "print(\"non compressed size range {}MB-{}MB\".format(np.min(init_sizes) / 1000000, np.max(init_sizes)/ 1000000))" 344 | ] 345 | } 346 | ], 347 | "metadata": { 348 | "kernelspec": { 349 | "display_name": "Python 3", 350 | "language": "python", 351 | "name": "python3" 352 | }, 353 | "language_info": { 354 | "codemirror_mode": { 355 | "name": "ipython", 356 | "version": 3 357 | }, 358 | "file_extension": ".py", 359 | "mimetype": "text/x-python", 360 | "name": "python", 361 | "nbconvert_exporter": "python", 362 | "pygments_lexer": "ipython3", 363 | "version": "3.6.7" 364 | } 365 | }, 366 | "nbformat": 4, 367 | "nbformat_minor": 2 368 | } 369 | -------------------------------------------------------------------------------- /comparison_script/compression.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "json.hpp" 29 | 30 | 31 | using json = nlohmann::json; 32 | 33 | int DEFAULT_WINDOW = 24; 34 | 35 | struct CompressionStatistics { 36 | float compressed_size; 37 | float compression_time; 38 | float decompression_time; 39 | CompressionStatistics(float size, float comp_time, float decomp_time) : compressed_size(size), compression_time(comp_time), 40 | decompression_time(decomp_time) {} 41 | }; 42 | 43 | size_t FileSize(FILE* file) { 44 | fseek(file, 0, SEEK_END); 45 | size_t size = ftell(file); 46 | fseek(file, 0, SEEK_SET); 47 | return size; 48 | } 49 | 50 | FILE* OpenFile(const char* filename, const char* mode) { 51 | FILE* file = fopen(filename, mode); 52 | if (file == NULL) { 53 | perror("fopen failed"); 54 | } 55 | return file; 56 | } 57 | 58 | void ReadData(FILE* file, unsigned char** data, size_t* size) { 59 | *size = FileSize(file); 60 | *data = (unsigned char*) malloc(*size); 61 | if (0 == fread(*data, 1, *size, file)) { 62 | throw "Failed to read from file"; 63 | } 64 | return; 65 | } 66 | 67 | void GetNamesFromFile(std::string file_name, std::vector& names) { 68 | std::string line; 69 | std::ifstream infile(file_name); 70 | while (std::getline(infile, line)) { 71 | names.push_back(line); 72 | } 73 | infile.close(); 74 | } 75 | 76 | float GetSeconds(std::string time_string) { 77 | size_t ind = time_string.find("\t"); 78 | auto time = time_string.substr(ind, time_string.size() - ind); 79 | auto minutes_ind = time.find('m'); 80 | auto minutes = time.substr(0, minutes_ind); 81 | auto seconds = time.substr(minutes_ind + 1, time.size() - minutes_ind - 2); 82 | return std::stof(minutes) * 60 + std::stof(seconds); 83 | } 84 | 85 | size_t BrotliCompress(int level, int window, const unsigned char* input_data, size_t input_size, unsigned char* output_data, size_t output_buffer_size) { 86 | if (!BrotliEncoderCompress(level, window, BROTLI_MODE_GENERIC, input_size, input_data, &output_buffer_size, output_data)) { 87 | throw "Failure in BrotliCompress"; 88 | } 89 | return output_buffer_size; 90 | } 91 | 92 | size_t BrotliDecompress(const unsigned char* input_data, size_t input_size, unsigned char* output_data, size_t output_buffer_size) { 93 | if (BrotliDecoderDecompress(input_size, input_data, &output_buffer_size, output_data) != 1) { 94 | throw "Failure in BrotliDecompress"; 95 | } 96 | return output_buffer_size; 97 | } 98 | 99 | size_t GzipCompress(int level, int window, const unsigned char* input_data, size_t input_size, unsigned char* output_data, size_t output_buffer_size, int& time) { 100 | std::ofstream out("example.txt"); 101 | out.write((const char*)input_data, input_size); 102 | out.close(); 103 | 104 | std::string command = "{ time gzip -" + std::to_string(level) + 105 | " -f -k -c example.txt > example_gzip.txt.gz; } 2> time.txt"; 106 | system(command.c_str()); 107 | 108 | std::vector times; 109 | GetNamesFromFile("time.txt", times); 110 | time = GetSeconds(times[2]) + GetSeconds(times[3]); 111 | 112 | std::ifstream infile("example_gzip.txt.gz"); 113 | infile.seekg(0,std::ios_base::end); 114 | auto length = infile.tellg(); 115 | return static_cast(length); 116 | } 117 | 118 | size_t ZlibCompress(int level, int window, const unsigned char* input_data, size_t input_size, unsigned char* output_data, size_t output_buffer_size) { 119 | z_stream strm; 120 | strm.zalloc = Z_NULL; 121 | strm.zfree = Z_NULL; 122 | strm.opaque = Z_NULL; 123 | if (Z_OK != deflateInit2(&strm, level, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY)) { 124 | throw "Failure in deflateInit"; 125 | } 126 | strm.avail_in = input_size; 127 | strm.next_in = (unsigned char*) input_data; 128 | strm.avail_out = output_buffer_size; 129 | strm.next_out = output_data; 130 | if (Z_STREAM_ERROR == deflate(&strm, Z_FINISH)) { 131 | throw "Failure in deflate"; 132 | } 133 | if (0 != strm.avail_in) { 134 | throw "Failed to consume entire input in deflate"; 135 | } 136 | size_t output_size = output_buffer_size - strm.avail_out; 137 | deflateEnd(&strm); 138 | return output_size; 139 | } 140 | 141 | size_t ZlibDecompress(const unsigned char* input_data, size_t input_size, unsigned char* output_data, size_t output_buffer_size) { 142 | z_stream strm; 143 | strm.zalloc = Z_NULL; 144 | strm.zfree = Z_NULL; 145 | strm.opaque = Z_NULL; 146 | strm.avail_in = input_size; 147 | strm.next_in = (unsigned char*) input_data; 148 | strm.avail_out = output_buffer_size; 149 | strm.next_out = output_data; 150 | 151 | if (Z_OK != inflateInit2(&strm, 15 + 16)) { 152 | std::cout << "Failure in inflateInit"; 153 | throw "Failure in inflateInit"; 154 | } 155 | if (Z_STREAM_ERROR == inflate(&strm, Z_NO_FLUSH)) { 156 | std::cout << "Failure in inflate"; 157 | throw "Failure in inflate"; 158 | } 159 | 160 | size_t output_size = output_buffer_size - strm.avail_out; 161 | inflateEnd(&strm); 162 | return output_size; 163 | } 164 | 165 | typedef size_t (*CompressionFunc)(int, int, const unsigned char*, size_t, unsigned char*, size_t); 166 | typedef size_t (*DecompressionFunc)(const unsigned char*, size_t, unsigned char*, size_t); 167 | CompressionStatistics MeasureCompress(int level, int window, 168 | const unsigned char* input_data, size_t input_size, 169 | unsigned char* output_data, size_t output_buffer_size, 170 | CompressionFunc compress, DecompressionFunc decompress, int repetitions) { 171 | size_t total_output_size = 0; 172 | clock_t start = clock(); 173 | for (int i = 0 ; i < repetitions ; i++) { 174 | total_output_size += compress(level, window, input_data, input_size, output_data, output_buffer_size); 175 | } 176 | clock_t end = clock(); 177 | float elapsed_time_compress = (float) (end - start) / CLOCKS_PER_SEC; 178 | float compressed_size = (float) total_output_size / repetitions; 179 | 180 | size_t decompressed_size = input_size * 2; 181 | unsigned char* decompressed_data = (unsigned char*) malloc(decompressed_size); 182 | size_t total_decopress_size = 0; 183 | start = clock(); 184 | for (int i = 0 ; i < repetitions ; i++) { 185 | total_decopress_size = decompress(output_data, output_buffer_size, decompressed_data, decompressed_size); 186 | } 187 | end = clock(); 188 | float elapsed_time_decompress = (float) (end - start) / CLOCKS_PER_SEC; 189 | 190 | assert(total_decopress_size == input_size); 191 | assert(memcmp(decompressed_data, input_data, input_size)==0); 192 | return CompressionStatistics(compressed_size, elapsed_time_compress, elapsed_time_decompress); 193 | } 194 | 195 | int MinWindowLargerThanFile(int fileSize, int max) { 196 | int window = 24; 197 | if (fileSize > 0) { 198 | window = 10; 199 | while (((size_t)1 << (window)) - 16 < (uint64_t)fileSize) { 200 | ++window; 201 | if (window == max) break; 202 | } 203 | } 204 | return window; 205 | } 206 | 207 | bool Execute(const char* cmd) { 208 | char buffer[128]; 209 | std::string stdout = ""; 210 | FILE* pipe = popen(cmd, "r"); 211 | if (!pipe) throw std::runtime_error("popen() failed!"); 212 | try { 213 | while (fgets(buffer, sizeof buffer, pipe) != NULL) { 214 | stdout += buffer; 215 | } 216 | } catch (...) { 217 | pclose(pipe); 218 | throw; 219 | } 220 | pclose(pipe); 221 | if (stdout.find("TypeError") != std::string::npos) { 222 | return false; 223 | } 224 | return true; 225 | } 226 | 227 | void BundledCompression(const unsigned char* input_data, size_t input_size, 228 | unsigned char* output_data, size_t output_buffer_size, 229 | std::ostream & results, int repetitions) { 230 | 231 | results << "\"bundled\":{"; 232 | int window = MinWindowLargerThanFile(input_size, DEFAULT_WINDOW); 233 | std::string name = "brotli"; 234 | for (int level = 1; level <= 11; level ++) { 235 | CompressionStatistics comp_results = MeasureCompress(level, window, input_data, input_size, 236 | output_data, output_buffer_size, 237 | BrotliCompress, BrotliDecompress, repetitions); 238 | float rate = input_size / comp_results.compressed_size; 239 | float speed = (float) (input_size * repetitions) / (comp_results.compression_time * 1024 * 1024); 240 | float decompession_speed = (float) (comp_results.compressed_size * repetitions) / (comp_results.decompression_time * 1024 * 1024); 241 | results << "\"" << name << level << "_compression_rate\":" << std::setprecision(4) << rate << ", \""; 242 | results << name << level << "_compressed_size\":" << std::setprecision(4) << comp_results.compressed_size << ", \""; 243 | results << name << level << "_speed\":" << std::setprecision(4) << speed << ", \""; 244 | results << name << level << "_decompression_speed\":" << std::setprecision(4) << decompession_speed << ",\n"; 245 | } 246 | 247 | name = "zlib"; 248 | for (int level = 1; level <= 9 ; level ++) { 249 | CompressionStatistics comp_results = MeasureCompress(level, window, input_data, input_size, 250 | output_data, output_buffer_size, 251 | ZlibCompress, ZlibDecompress, repetitions); 252 | float rate = input_size / comp_results.compressed_size; 253 | float speed = (float) (input_size * repetitions) / (comp_results.compression_time * 1024 * 1024); 254 | float decompession_speed = (float) (comp_results.compressed_size * repetitions) / (comp_results.decompression_time * 1024 * 1024); 255 | results << "\"" << name << level << "_compression_rate\":" << std::setprecision(4) << rate << ", \""; 256 | results << name << level << "_compressed_size\":" << std::setprecision(4) << comp_results.compressed_size << ", \""; 257 | results << name << level << "_speed\":" << std::setprecision(4) << speed << ", \""; 258 | results << name << level << "_decompression_speed\":" << std::setprecision(4) << decompession_speed; 259 | if (level < 9) { 260 | results << ",\n"; 261 | } else { 262 | results << "}\n"; 263 | } 264 | } 265 | } 266 | 267 | 268 | void UnbundledCompression(const unsigned char* input_data, size_t input_size, 269 | unsigned char* output_data, size_t output_buffer_size, 270 | std::ostream & results, int repetitions, std::string file_name) { 271 | 272 | std::string command = "node --experimental-modules third_party/bundle_analyzer/get_chunks.js " + file_name + 273 | " parsed_bundle.json 2>&1"; 274 | bool execution_result = Execute(command.c_str()); 275 | if (!execution_result) { 276 | results << "\"unbundled\":{\"chunks_execution_result\":" << execution_result << "},\n"; 277 | return; 278 | } 279 | 280 | std::ifstream parsed_bundle("parsed_bundle.json"); 281 | json chunks; 282 | parsed_bundle >> chunks; 283 | std::string code; 284 | std::vector compressed_sizes(11 + 9, 0); 285 | std::vector compression_times(11 + 9, 0); 286 | std::vector decompression_times(11 + 9, 0); 287 | int overall_size = 0; 288 | results << "\"chunks_count\":" << chunks.size() << ",\n"; 289 | if (!chunks.size()) { 290 | results << "\"unbundled\":{\"chunks_execution_result\":" << execution_result << "},\n"; 291 | return; 292 | } 293 | for (const auto& chunk : chunks) { 294 | code = chunk["code"]; 295 | int chunk_size = code.size(); 296 | std::vector statistics_chunk; 297 | int window = MinWindowLargerThanFile(chunk_size, DEFAULT_WINDOW); 298 | std::string name = "brotli"; 299 | overall_size += chunk_size; 300 | for (int level = 1; level <= 11; level ++) { 301 | CompressionStatistics comp_results = MeasureCompress(level, window, (unsigned char*)code.c_str(), chunk_size, 302 | output_data, output_buffer_size, BrotliCompress, BrotliDecompress, repetitions); 303 | compressed_sizes[level - 1] += comp_results.compressed_size; 304 | compression_times[level - 1] += comp_results.compression_time; 305 | decompression_times[level - 1] += comp_results.decompression_time; 306 | } 307 | 308 | for (int level = 1; level <= 9; level ++) { 309 | CompressionStatistics comp_results = MeasureCompress(level, window, (unsigned char*)code.c_str(), chunk_size, 310 | output_data, output_buffer_size, ZlibCompress, ZlibDecompress, repetitions); 311 | compressed_sizes[11 + level - 1] += comp_results.compressed_size; 312 | compression_times[11 + level - 1] += comp_results.compression_time; 313 | decompression_times[11 + level - 1] += comp_results.decompression_time; 314 | } 315 | } 316 | 317 | results << "\"overall_chunks_size\":" << overall_size << ",\n"; 318 | results << "\"unbundled\":{"; 319 | std::string name = "brotli"; 320 | for (int level = 1; level <= 11; level ++) { 321 | float rate = input_size / compressed_sizes[level - 1]; 322 | float speed = (float) (input_size * repetitions) / (compression_times[level - 1] * 1024 * 1024); 323 | float decompression_speed = (float) (compressed_sizes[level - 1] * repetitions) / (decompression_times[level - 1] * 1024 * 1024); 324 | results << "\"" << name << level << "_compression_rate\":" << std::setprecision(4) << rate << ", \""; 325 | results << name << level << "_compressed_size\":" << std::setprecision(4) << compressed_sizes[level - 1] << ", \""; 326 | results << name << level << "_speed\":" << std::setprecision(4) << speed << ", \""; 327 | results << name << level << "_decompression_speed\":" << std::setprecision(4) << decompression_speed << ",\n"; 328 | } 329 | 330 | name = "zlib"; 331 | for (int level = 1; level <= 9; level ++) { 332 | float rate = input_size / compressed_sizes[11 + level - 1]; 333 | float speed = (float) (input_size * repetitions) / (compression_times[11 + level - 1] * 1024 * 1024); 334 | float decompression_speed = (float) (compressed_sizes[11 + level - 1] * repetitions) / (decompression_times[11 + level - 1] * 1024 * 1024); 335 | results << "\"" << name << level << "_compression_rate\":" << std::setprecision(4) << rate << ", \""; 336 | results << name << level << "_compressed_size\":" << std::setprecision(4) << compressed_sizes[11 + level - 1] << ", \""; 337 | results << name << level << "_speed\":" << std::setprecision(4) << speed << ", \""; 338 | results << name << level << "_decompression_speed\":" << std::setprecision(4) << decompression_speed; 339 | if (level < 9) { 340 | results << ",\n"; 341 | } else { 342 | results << "},\n"; 343 | } 344 | } 345 | } 346 | 347 | 348 | int main (int argc, char** argv) { 349 | try { 350 | std::vector bundle_files; 351 | GetNamesFromFile("bundles_source/bundle_files.txt", bundle_files); 352 | int repetitions = std::stoi(std::string(argv[1])); 353 | std::ostringstream results; 354 | results << "["; 355 | for (int i = 0; i < bundle_files.size(); ++i) { 356 | FILE* infile = OpenFile(bundle_files[i].c_str(), "rb"); 357 | if (infile == NULL) { 358 | exit(1); 359 | } 360 | unsigned char* input_data = NULL; 361 | size_t input_size = 0; 362 | ReadData(infile, &input_data, &input_size); 363 | fclose(infile); 364 | size_t output_buffer_size = input_size * 2; 365 | unsigned char* output_data = (unsigned char*) malloc(output_buffer_size); 366 | 367 | 368 | results << "{\"valid\":true, \"original_size\":" << input_size << ",\n"; 369 | 370 | UnbundledCompression(input_data, input_size, output_data, output_buffer_size, 371 | results, repetitions, bundle_files[i]); 372 | 373 | BundledCompression(input_data, input_size, output_data, output_buffer_size, 374 | results, repetitions); 375 | if (i == bundle_files.size() - 1) { 376 | results << "}]\n"; 377 | } else { 378 | results << "},\n"; 379 | } 380 | std::cout << "bundle number " << i << " completed\n"; 381 | } 382 | std::ofstream out("compression_results.json"); 383 | out << results.str(); 384 | out.close(); 385 | } catch (const char* message) { 386 | std::ofstream out("compression_results.json"); 387 | out << "{\"valid\":false, \"message\":\"" << message << "\"}\n"; 388 | out.close(); 389 | } 390 | return 0; 391 | } 392 | -------------------------------------------------------------------------------- /compression_experiments/js_dataset_compression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Copyright 2020 Google Inc. All Rights Reserved.\n", 8 | "\n", 9 | "Licensed under the Apache License, Version 2.0 (the \"License\");
\n", 10 | "you may not use this file except in compliance with the License.
\n", 11 | "You may obtain a copy of the License at
\n", 12 | "\n", 13 | " http://www.apache.org/licenses/LICENSE-2.0\n", 14 | "\n", 15 | "Unless required by applicable law or agreed to in writing, software\n", 16 | "distributed under the License is distributed on an \"AS IS\" BASIS,\n", 17 | "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\n", 18 | "\n", 19 | "See the License for the specific language governing permissions and \n", 20 | "limitations under the License." 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 15, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import numpy as np\n", 30 | "import json\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "from tqdm import tqdm\n", 33 | "import random\n", 34 | "import subprocess\n", 35 | "import time\n", 36 | "import os" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "### Read the data" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 5, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "# js_scripts.txt constains the paths to js files\n", 53 | "with open(\"js_dataset/js_scripts.txt\") as file:\n", 54 | " scripts = file.read().strip().split('\\n')\n", 55 | " \n", 56 | "# dirs_data.txt constains the names of directories in data directory of js 150 dataset\n", 57 | "# we assume that different directories indicates different js apps\n", 58 | "with open(\"js_dataset/dirs_data.txt\") as file:\n", 59 | " dirs = file.read().strip().split('\\n')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 18, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stderr", 69 | "output_type": "stream", 70 | "text": [ 71 | "100%|██████████| 9620/9620 [06:50<00:00, 23.42it/s]\n" 72 | ] 73 | } 74 | ], 75 | "source": [ 76 | "# group script paths by directories\n", 77 | "scripts_by_dirs = []\n", 78 | "\n", 79 | "for directory in tqdm(dirs):\n", 80 | " dir_scripts = []\n", 81 | " for script in scripts:\n", 82 | " if script.startswith(\"data/\" + directory):\n", 83 | " dir_scripts.append(script)\n", 84 | " if len(dir_scripts):\n", 85 | " scripts_by_dirs.append(dir_scripts)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "### Perform compression" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 21, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "def get_seconds(time): \n", 102 | " min_ind = time.find('m')\n", 103 | " mins = int(time[:min_ind])\n", 104 | " second = float(time[min_ind + 1:-1])\n", 105 | " return mins * 60 + second\n", 106 | "\n", 107 | "def log(file, msg):\n", 108 | " f = open(file, 'a+')\n", 109 | " f.write(msg + '\\n')\n", 110 | " f.close()" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": 25, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "rates_gzip = []\n", 120 | "rates_brotli = []\n", 121 | "times_gzip = []\n", 122 | "times_brotli = []\n", 123 | "speed_gzip = []\n", 124 | "speed_brotli = []\n", 125 | "init_sizes = []\n", 126 | "\n", 127 | "for i in range(len(scripts_by_dirs)):\n", 128 | " \n", 129 | " #concatenate all scripts inside the directory to simulate web bundle\n", 130 | " script_concatenated = \"\"\n", 131 | " for url in scripts_by_dirs[i]:\n", 132 | " if url == \"\":\n", 133 | " continue\n", 134 | " if not os.path.exists(\"js_dataset/\" + url):\n", 135 | " print(\"DOESN'T EXIST: \", url)\n", 136 | " continue\n", 137 | " try:\n", 138 | " with open(\"js_dataset/\" + url) as file:\n", 139 | " script_concatenated += file.read()\n", 140 | " except:\n", 141 | " print(\"didn't read\")\n", 142 | " \n", 143 | " rates_gzip_compressed = []\n", 144 | " rates_brotli_compressed = []\n", 145 | " times_gzip_compressed = []\n", 146 | " times_brotli_compressed = []\n", 147 | " speed_gzip_compressed = []\n", 148 | " speed_brotli_compressed = []\n", 149 | " \n", 150 | " with open(\"example2.txt\", \"w\") as file:\n", 151 | " file.write(script_concatenated)\n", 152 | " size_non_compressed = os.stat(\"example2.txt\").st_size\n", 153 | " init_sizes.append(size_non_compressed)\n", 154 | " \n", 155 | " # do the gzip compression with different levels\n", 156 | " for level in range(4, 10):\n", 157 | " result = subprocess.run([\"bash\", \"gzip_compress.sh\", str(level), \"time2.txt\", \n", 158 | " \"example_gzip2.txt.gz\", \"example2.txt\"])\n", 159 | " with open(\"time2.txt\") as file:\n", 160 | " user_sys = file.read().strip().split('\\n')[1:]\n", 161 | " time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n", 162 | " size_gzip_compressed = os.stat(\"example_gzip2.txt.gz\").st_size\n", 163 | " rates_gzip_compressed.append(size_non_compressed / size_gzip_compressed)\n", 164 | " times_gzip_compressed.append(time)\n", 165 | " speed_gzip_compressed.append(size_non_compressed / time)\n", 166 | "\n", 167 | " # do the brotli compression with different levels\n", 168 | " for level in range(4, 12):\n", 169 | " result = subprocess.run([\"bash\", \"brotli_compress.sh\", str(level), \"time2.txt\", \n", 170 | " \"example_brotli2.txt.br\", \"example2.txt\"])\n", 171 | " with open(\"time2.txt\") as file:\n", 172 | " user_sys = file.read().strip().split('\\n')[1:]\n", 173 | " time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n", 174 | " size_br_compressed = os.stat(\"example_brotli2.txt.br\").st_size\n", 175 | " rates_brotli_compressed.append(size_non_compressed / size_br_compressed)\n", 176 | " times_brotli_compressed.append(time)\n", 177 | " speed_brotli_compressed.append(size_non_compressed / time)\n", 178 | " \n", 179 | " rates_gzip.append(rates_gzip_compressed)\n", 180 | " rates_brotli.append(rates_brotli_compressed)\n", 181 | " times_gzip.append(times_gzip_compressed)\n", 182 | " times_brotli.append(times_brotli_compressed)\n", 183 | " speed_gzip.append(speed_gzip_compressed)\n", 184 | " speed_brotli.append(speed_brotli_compressed)\n", 185 | " \n", 186 | " if i != 0 and i % 500 == 0:\n", 187 | " log(\"logs4.txt\", \"rates_gzip: \" + str(np.mean(rates_gzip, axis=0)))\n", 188 | " log(\"logs4.txt\", \"rates_brotli: \" + str(np.mean(rates_brotli, axis=0)))\n", 189 | " log(\"logs4.txt\", \"times_gzip: \" + str(np.mean(times_gzip, axis=0)))\n", 190 | " log(\"logs4.txt\", \"times_brotli: \" + str(np.mean(times_brotli, axis=0)))\n", 191 | " log(\"logs4.txt\", \"speed_gzip: \" + str(np.mean(speed_gzip, axis=0)))\n", 192 | " log(\"logs4.txt\", \"speed_brotli: \" + str(np.mean(speed_brotli, axis=0)))" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 27, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/html": [ 203 | "
\n", 204 | "\n", 217 | "\n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | "
nameratessavingsspeed(MB/s)
0gzip 43.8250690.73856715.719552
1gzip 53.9489320.74676713.392738
2gzip 64.0031790.75019910.956911
3gzip 74.0176950.7511019.777660
4gzip 84.0293320.7518207.136008
5gzip 94.0317060.7519666.170267
6brotli 44.1357260.75820412.866184
7brotli 54.4965710.7776089.528445
8brotli 64.5438360.7799228.582947
9brotli 74.5823190.7817706.631221
10brotli 84.5998970.7826045.447145
11brotli 94.6220020.7836444.209170
12brotli 104.9301000.7971641.157362
13brotli 115.0196020.8007810.506957
\n", 328 | "
" 329 | ], 330 | "text/plain": [ 331 | " name rates savings speed(MB/s)\n", 332 | "0 gzip 4 3.825069 0.738567 15.719552\n", 333 | "1 gzip 5 3.948932 0.746767 13.392738\n", 334 | "2 gzip 6 4.003179 0.750199 10.956911\n", 335 | "3 gzip 7 4.017695 0.751101 9.777660\n", 336 | "4 gzip 8 4.029332 0.751820 7.136008\n", 337 | "5 gzip 9 4.031706 0.751966 6.170267\n", 338 | "6 brotli 4 4.135726 0.758204 12.866184\n", 339 | "7 brotli 5 4.496571 0.777608 9.528445\n", 340 | "8 brotli 6 4.543836 0.779922 8.582947\n", 341 | "9 brotli 7 4.582319 0.781770 6.631221\n", 342 | "10 brotli 8 4.599897 0.782604 5.447145\n", 343 | "11 brotli 9 4.622002 0.783644 4.209170\n", 344 | "12 brotli 10 4.930100 0.797164 1.157362\n", 345 | "13 brotli 11 5.019602 0.800781 0.506957" 346 | ] 347 | }, 348 | "execution_count": 27, 349 | "metadata": {}, 350 | "output_type": "execute_result" 351 | } 352 | ], 353 | "source": [ 354 | "import pandas as pd\n", 355 | "frame = pd.DataFrame()\n", 356 | "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n", 357 | " \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n", 358 | "\n", 359 | "frame[\"rates\"] = np.hstack((np.mean(rates_gzip, axis=0), np.mean(rates_brotli, axis=0)))\n", 360 | "frame[\"savings\"] = 1 - 1 / np.hstack((np.mean(rates_gzip, axis=0), np.mean(rates_brotli, axis=0)))\n", 361 | "frame[\"speed(MB/s)\"] = np.hstack((np.mean(speed_gzip, axis=0), np.mean(speed_brotli, axis=0))) / 1000000\n", 362 | "\n", 363 | "frame" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 46, 369 | "metadata": {}, 370 | "outputs": [ 371 | { 372 | "name": "stdout", 373 | "output_type": "stream", 374 | "text": [ 375 | "non compressed size range 0.0MB-519.170072MB\n" 376 | ] 377 | } 378 | ], 379 | "source": [ 380 | "print(\"non compressed size range {}MB-{}MB\".format(np.min(init_sizes) / 1000000, np.max(init_sizes)/ 1000000))" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "metadata": {}, 386 | "source": [ 387 | "### Group results by non compressed size ranges" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": 49, 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "name": "stdout", 397 | "output_type": "stream", 398 | "text": [ 399 | "0 - 100000 bytes\n" 400 | ] 401 | }, 402 | { 403 | "data": { 404 | "text/html": [ 405 | "
\n", 406 | "\n", 419 | "\n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | "
nameratessavingsspeed(MB/s)
0gzip 43.5800080.7206717.447231
1gzip 53.6766720.7280157.008153
2gzip 63.7128790.7306676.428090
3gzip 73.7232380.7314176.065006
4gzip 83.7301480.7319145.120283
5gzip 93.7314930.7320114.732681
6brotli 43.6940640.7292955.004788
7brotli 54.0116370.7507254.648579
8brotli 64.0335700.7520814.471990
9brotli 74.0498760.7530793.971136
10brotli 84.0568820.7535053.708456
11brotli 94.0650700.7540023.146465
12brotli 104.3187490.7684511.005612
13brotli 114.4268460.7741060.470691
\n", 530 | "
" 531 | ], 532 | "text/plain": [ 533 | " name rates savings speed(MB/s)\n", 534 | "0 gzip 4 3.580008 0.720671 7.447231\n", 535 | "1 gzip 5 3.676672 0.728015 7.008153\n", 536 | "2 gzip 6 3.712879 0.730667 6.428090\n", 537 | "3 gzip 7 3.723238 0.731417 6.065006\n", 538 | "4 gzip 8 3.730148 0.731914 5.120283\n", 539 | "5 gzip 9 3.731493 0.732011 4.732681\n", 540 | "6 brotli 4 3.694064 0.729295 5.004788\n", 541 | "7 brotli 5 4.011637 0.750725 4.648579\n", 542 | "8 brotli 6 4.033570 0.752081 4.471990\n", 543 | "9 brotli 7 4.049876 0.753079 3.971136\n", 544 | "10 brotli 8 4.056882 0.753505 3.708456\n", 545 | "11 brotli 9 4.065070 0.754002 3.146465\n", 546 | "12 brotli 10 4.318749 0.768451 1.005612\n", 547 | "13 brotli 11 4.426846 0.774106 0.470691" 548 | ] 549 | }, 550 | "execution_count": 49, 551 | "metadata": {}, 552 | "output_type": "execute_result" 553 | } 554 | ], 555 | "source": [ 556 | "splits = [0, 100000, 1000000, 519170072]\n", 557 | "init_sizes = np.array(init_sizes)\n", 558 | "group1 = np.where((init_sizes >= 0)*(init_sizes <= 100000))[0]\n", 559 | "group2 = np.where((init_sizes > 100000)*(init_sizes <= 1000000))[0]\n", 560 | "group3 = np.where((init_sizes > 1000000)*(init_sizes <= 519170072))[0]\n", 561 | "\n", 562 | "print(0, \"-\", 100000, \"bytes\")\n", 563 | "frame = pd.DataFrame()\n", 564 | "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n", 565 | " \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n", 566 | "\n", 567 | "frame[\"rates\"] = np.hstack((np.mean(np.array(rates_gzip)[group1], axis=0), np.mean(np.array(rates_brotli)[group1], axis=0)))\n", 568 | "frame[\"savings\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip)[group1], axis=0), np.mean(np.array(rates_brotli)[group1], axis=0)))\n", 569 | "frame[\"speed(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip)[group1], axis=0), np.mean(np.array(speed_brotli)[group1], axis=0))) / 1000000\n", 570 | "\n", 571 | "frame" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": 50, 577 | "metadata": {}, 578 | "outputs": [ 579 | { 580 | "name": "stdout", 581 | "output_type": "stream", 582 | "text": [ 583 | "100000 - 1000000 bytes\n" 584 | ] 585 | }, 586 | { 587 | "data": { 588 | "text/html": [ 589 | "
\n", 590 | "\n", 603 | "\n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | "
nameratessavingsspeed(MB/s)
0gzip 44.6105150.78310440.486917
1gzip 54.8216050.79260032.909052
2gzip 64.9270230.79703825.098103
3gzip 74.9538740.79813821.498278
4gzip 84.9767790.79906713.639378
5gzip 94.9818610.79927210.864447
6brotli 45.0869000.80341735.662622
7brotli 55.5400470.81949624.545289
8brotli 65.6290820.82235121.281608
9brotli 75.7070980.82478014.285831
10brotli 85.7421950.82585110.013604
11brotli 95.7772230.8269066.719082
12brotli 106.2132300.8390531.659122
13brotli 116.3597960.8427620.617029
\n", 714 | "
" 715 | ], 716 | "text/plain": [ 717 | " name rates savings speed(MB/s)\n", 718 | "0 gzip 4 4.610515 0.783104 40.486917\n", 719 | "1 gzip 5 4.821605 0.792600 32.909052\n", 720 | "2 gzip 6 4.927023 0.797038 25.098103\n", 721 | "3 gzip 7 4.953874 0.798138 21.498278\n", 722 | "4 gzip 8 4.976779 0.799067 13.639378\n", 723 | "5 gzip 9 4.981861 0.799272 10.864447\n", 724 | "6 brotli 4 5.086900 0.803417 35.662622\n", 725 | "7 brotli 5 5.540047 0.819496 24.545289\n", 726 | "8 brotli 6 5.629082 0.822351 21.281608\n", 727 | "9 brotli 7 5.707098 0.824780 14.285831\n", 728 | "10 brotli 8 5.742195 0.825851 10.013604\n", 729 | "11 brotli 9 5.777223 0.826906 6.719082\n", 730 | "12 brotli 10 6.213230 0.839053 1.659122\n", 731 | "13 brotli 11 6.359796 0.842762 0.617029" 732 | ] 733 | }, 734 | "execution_count": 50, 735 | "metadata": {}, 736 | "output_type": "execute_result" 737 | } 738 | ], 739 | "source": [ 740 | "print(100000, \"-\", 1000000, \"bytes\")\n", 741 | "frame = pd.DataFrame()\n", 742 | "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n", 743 | " \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n", 744 | "\n", 745 | "frame[\"rates\"] = np.hstack((np.mean(np.array(rates_gzip)[group2], axis=0), np.mean(np.array(rates_brotli)[group2], axis=0)))\n", 746 | "frame[\"savings\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip)[group2], axis=0), np.mean(np.array(rates_brotli)[group2], axis=0)))\n", 747 | "frame[\"speed(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip)[group2], axis=0), np.mean(np.array(speed_brotli)[group2], axis=0))) / 1000000\n", 748 | "\n", 749 | "frame" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": 51, 755 | "metadata": {}, 756 | "outputs": [ 757 | { 758 | "name": "stdout", 759 | "output_type": "stream", 760 | "text": [ 761 | "1000000 - 519170072 bytes\n" 762 | ] 763 | }, 764 | { 765 | "data": { 766 | "text/html": [ 767 | "
\n", 768 | "\n", 781 | "\n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | "
nameratessavingsspeed(MB/s)
0gzip 44.9475840.79788162.609464
1gzip 55.1957650.80753647.516889
2gzip 65.3668910.81367233.629151
3gzip 75.4055440.81500527.704937
4gzip 85.4588390.81681116.148692
5gzip 95.4689530.81715012.309037
6brotli 48.5517820.88306561.243214
7brotli 59.3498770.89304735.094220
8brotli 69.7163330.89708129.873129
9brotli 710.0189520.90018923.305218
10brotli 810.1692930.90166518.596231
11brotli 910.4183850.90401613.694826
12brotli 1011.2154290.9108371.773174
13brotli 1110.6185840.9058250.704833
\n", 892 | "
" 893 | ], 894 | "text/plain": [ 895 | " name rates savings speed(MB/s)\n", 896 | "0 gzip 4 4.947584 0.797881 62.609464\n", 897 | "1 gzip 5 5.195765 0.807536 47.516889\n", 898 | "2 gzip 6 5.366891 0.813672 33.629151\n", 899 | "3 gzip 7 5.405544 0.815005 27.704937\n", 900 | "4 gzip 8 5.458839 0.816811 16.148692\n", 901 | "5 gzip 9 5.468953 0.817150 12.309037\n", 902 | "6 brotli 4 8.551782 0.883065 61.243214\n", 903 | "7 brotli 5 9.349877 0.893047 35.094220\n", 904 | "8 brotli 6 9.716333 0.897081 29.873129\n", 905 | "9 brotli 7 10.018952 0.900189 23.305218\n", 906 | "10 brotli 8 10.169293 0.901665 18.596231\n", 907 | "11 brotli 9 10.418385 0.904016 13.694826\n", 908 | "12 brotli 10 11.215429 0.910837 1.773174\n", 909 | "13 brotli 11 10.618584 0.905825 0.704833" 910 | ] 911 | }, 912 | "execution_count": 51, 913 | "metadata": {}, 914 | "output_type": "execute_result" 915 | } 916 | ], 917 | "source": [ 918 | "print(1000000, \"-\", 519170072, \"bytes\")\n", 919 | "frame = pd.DataFrame()\n", 920 | "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n", 921 | " \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n", 922 | "\n", 923 | "frame[\"rates\"] = np.hstack((np.mean(np.array(rates_gzip)[group3], axis=0), np.mean(np.array(rates_brotli)[group3], axis=0)))\n", 924 | "frame[\"savings\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip)[group3], axis=0), np.mean(np.array(rates_brotli)[group3], axis=0)))\n", 925 | "frame[\"speed(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip)[group3], axis=0), np.mean(np.array(speed_brotli)[group3], axis=0))) / 1000000\n", 926 | "\n", 927 | "frame" 928 | ] 929 | } 930 | ], 931 | "metadata": { 932 | "kernelspec": { 933 | "display_name": "Python 3", 934 | "language": "python", 935 | "name": "python3" 936 | }, 937 | "language_info": { 938 | "codemirror_mode": { 939 | "name": "ipython", 940 | "version": 3 941 | }, 942 | "file_extension": ".py", 943 | "mimetype": "text/x-python", 944 | "name": "python", 945 | "nbconvert_exporter": "python", 946 | "pygments_lexer": "ipython3", 947 | "version": "3.6.7" 948 | } 949 | }, 950 | "nbformat": 4, 951 | "nbformat_minor": 2 952 | } 953 | -------------------------------------------------------------------------------- /compression_experiments/http_archive_compression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Copyright 2020 Google Inc. All Rights Reserved.\n", 8 | "\n", 9 | "Licensed under the Apache License, Version 2.0 (the \"License\");
\n", 10 | "you may not use this file except in compliance with the License.
\n", 11 | "You may obtain a copy of the License at
\n", 12 | "\n", 13 | " http://www.apache.org/licenses/LICENSE-2.0\n", 14 | "\n", 15 | "Unless required by applicable law or agreed to in writing, software\n", 16 | "distributed under the License is distributed on an \"AS IS\" BASIS,\n", 17 | "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\n", 18 | "\n", 19 | "See the License for the specific language governing permissions and \n", 20 | "limitations under the License." 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 1, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import numpy as np\n", 30 | "import json\n", 31 | "import matplotlib.pyplot as plt\n", 32 | "from tqdm import tqdm\n", 33 | "import random\n", 34 | "import subprocess\n", 35 | "import time\n", 36 | "import pandas as pd\n", 37 | "import os" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "### Read the data" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 2, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "with open(\"webpacks/webpack_bodies.json\") as file:\n", 54 | " data = file.read()\n", 55 | " \n", 56 | "splitted_data = data.split('{\"page\"')[1:]\n", 57 | "splitted_data = [json.loads('{\"page\"' + line)[\"body\"] for line in splitted_data]\n", 58 | "\n", 59 | "for i in range(50):\n", 60 | " name = \"0\" * (12 - len(str(i))) + str(i)\n", 61 | " with open(\"webpacks/webpack_bodies_\" + name + \".json\") as file:\n", 62 | " data = file.read().split('{\"page\"')[1:]\n", 63 | " splitted_data += [json.loads('{\"page\"' + line)[\"body\"] for line in data]" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "## Perform compression for bundled approach" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 17, 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "def log(file, msg):\n", 80 | " f = open(file, 'a+')\n", 81 | " f.write(msg + '\\n')\n", 82 | " f.close()\n", 83 | " \n", 84 | "def get_seconds(time): \n", 85 | " min_ind = time.find('m')\n", 86 | " mins = int(time[:min_ind])\n", 87 | " second = float(time[min_ind + 1:-1])\n", 88 | " return mins * 60 + second" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 219, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "rates_gzip_bundled = []\n", 98 | "rates_brotli_bundled = []\n", 99 | "times_gzip_bundled = []\n", 100 | "times_brotli_bundled = []\n", 101 | "speed_gzip_bundled = []\n", 102 | "speed_brotli_bundled = []\n", 103 | "init_sizes_bundled = []\n", 104 | "for i in range(4000):\n", 105 | " rates_gzip_compressed = []\n", 106 | " rates_brotli_compressed = []\n", 107 | " times_gzip_compressed = []\n", 108 | " times_brotli_compressed = []\n", 109 | " speed_gzip_compressed = []\n", 110 | " speed_brotli_compressed = []\n", 111 | "\n", 112 | " # write the text of a bundle to file to use it for compression later\n", 113 | " with open(\"example.txt\", \"w\") as file:\n", 114 | " file.write(splitted_data[i])\n", 115 | " size_non_compressed = os.stat(\"example.txt\").st_size\n", 116 | " init_sizes_bundled.append(size_non_compressed)\n", 117 | "\n", 118 | " # do the gzip compression with different levels\n", 119 | " for level in range(4, 10):\n", 120 | " result = subprocess.run([\"bash\", \"gzip_compress.sh\", str(level), \"time.txt\", \n", 121 | " \"example_gzip.txt.gz\", \"example.txt\"])\n", 122 | " #previous script saves the time into the file\n", 123 | " with open(\"time.txt\") as file:\n", 124 | " user_sys = file.read().strip().split('\\n')[1:]\n", 125 | " time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n", 126 | " size_gzip_compressed = os.stat(\"example_gzip.txt.gz\").st_size\n", 127 | " rates_gzip_compressed.append(size_non_compressed / size_gzip_compressed)\n", 128 | " times_gzip_compressed.append(time)\n", 129 | " speed_gzip_compressed.append(size_non_compressed / time)\n", 130 | "\n", 131 | " # do the brotli compression with different levels\n", 132 | " for level in range(4, 12):\n", 133 | " result = subprocess.run([\"bash\", \"brotli_compress.sh\", str(level), \"time.txt\", \n", 134 | " \"example_brotli.txt.br\", \"example.txt\"])\n", 135 | " with open(\"time.txt\") as file:\n", 136 | " user_sys = file.read().strip().split('\\n')[1:]\n", 137 | " time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n", 138 | " size_br_compressed = os.stat(\"example_brotli.txt.br\").st_size\n", 139 | " rates_brotli_compressed.append(size_non_compressed / size_br_compressed)\n", 140 | " times_brotli_compressed.append(time)\n", 141 | " speed_brotli_compressed.append(size_non_compressed / time)\n", 142 | " \n", 143 | " rates_gzip_bundled.append(rates_gzip_compressed)\n", 144 | " rates_brotli_bundled.append(rates_brotli_compressed)\n", 145 | " times_gzip_bundled.append(times_gzip_compressed)\n", 146 | " times_brotli_bundled.append(times_brotli_compressed)\n", 147 | " speed_gzip_bundled.append(speed_gzip_compressed)\n", 148 | " speed_brotli_bundled.append(speed_brotli_compressed)\n", 149 | " \n", 150 | " if i != 0 and i % 50 == 0:\n", 151 | " log(\"logs.txt\", \"rates_gzip: \" + str(np.mean(rates_gzip_bundled, axis=0)))\n", 152 | " log(\"logs.txt\", \"rates_brotli: \" + str(np.mean(rates_brotli_bundled, axis=0)))\n", 153 | " log(\"logs.txt\", \"times_gzip: \" + str(np.mean(times_gzip_bundled, axis=0)))\n", 154 | " log(\"logs.txt\", \"times_brotli: \" + str(np.mean(times_brotli_bundled, axis=0)))\n", 155 | " log(\"logs.txt\", \"speed_gzip: \" + str(np.mean(speed_gzip_bundled, axis=0)))\n", 156 | " log(\"logs.txt\", \"speed_brotli: \" + str(np.mean(speed_brotli_bundled, axis=0)))" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 408, 162 | "metadata": {}, 163 | "outputs": [ 164 | { 165 | "data": { 166 | "text/html": [ 167 | "
\n", 168 | "\n", 181 | "\n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | "
nameratessavingsspeed(MB/s)
0gzip 43.2601820.72695120.650285
1gzip 53.3578550.73646517.320770
2gzip 63.3923490.73978914.828584
3gzip 73.4025810.74067813.650937
4gzip 83.4096670.74125711.943546
5gzip 93.4101500.74129011.749338
6brotli 43.4603570.71101216.654836
7brotli 53.7142790.73076911.635901
8brotli 63.7456540.73302410.270090
9brotli 73.7717630.7348727.601850
10brotli 83.7825080.7356255.944935
11brotli 93.7938220.7364144.489888
12brotli 104.0443630.7527421.181333
13brotli 114.1188590.7572140.519743
\n", 292 | "
" 293 | ], 294 | "text/plain": [ 295 | " name rates savings speed(MB/s)\n", 296 | "0 gzip 4 3.260182 0.726951 20.650285\n", 297 | "1 gzip 5 3.357855 0.736465 17.320770\n", 298 | "2 gzip 6 3.392349 0.739789 14.828584\n", 299 | "3 gzip 7 3.402581 0.740678 13.650937\n", 300 | "4 gzip 8 3.409667 0.741257 11.943546\n", 301 | "5 gzip 9 3.410150 0.741290 11.749338\n", 302 | "6 brotli 4 3.460357 0.711012 16.654836\n", 303 | "7 brotli 5 3.714279 0.730769 11.635901\n", 304 | "8 brotli 6 3.745654 0.733024 10.270090\n", 305 | "9 brotli 7 3.771763 0.734872 7.601850\n", 306 | "10 brotli 8 3.782508 0.735625 5.944935\n", 307 | "11 brotli 9 3.793822 0.736414 4.489888\n", 308 | "12 brotli 10 4.044363 0.752742 1.181333\n", 309 | "13 brotli 11 4.118859 0.757214 0.519743" 310 | ] 311 | }, 312 | "execution_count": 408, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "frame = pd.DataFrame()\n", 319 | "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n", 320 | " \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n", 321 | "\n", 322 | "frame[\"rates\"] = np.hstack((np.mean(rates_gzip_bundled, axis=0), np.mean(rates_brotli_bundled, axis=0)))\n", 323 | "frame[\"savings\"] = 1 - 1 / np.hstack((np.mean(rates_gzip, axis=0), np.mean(rates_brotli_bundled, axis=0)))\n", 324 | "frame[\"speed(MB/s)\"] = np.hstack((np.mean(speed_gzip_bundled, axis=0), np.mean(speed_brotli_bundled, axis=0))) / 1000000\n", 325 | "\n", 326 | "frame" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "## Unbundled approach" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 214, 339 | "metadata": {}, 340 | "outputs": [], 341 | "source": [ 342 | "rates_gzip_unbundled = []\n", 343 | "rates_brotli_unbundled = []\n", 344 | "times_gzip_unbundled = []\n", 345 | "times_brotli_unbundled = []\n", 346 | "speed_gzip_unbundled = []\n", 347 | "speed_brotli_unbundled = []\n", 348 | "init_sizes_unbundled = []\n", 349 | "\n", 350 | "for i in range(600):\n", 351 | " # write the text of a bundle to file to use it for getting chunks from bundle later\n", 352 | " with open(\"third_party/bundle_analyzer/text_bundle.txt\", \"w\") as file:\n", 353 | " file.write(splitted_data[i])\n", 354 | " try:\n", 355 | " # save chunks from bundle to parsed_bundle.json file\n", 356 | " result = subprocess.run([\"node\", \"--experimental-modules\", \"third_party/bundle_analyzer/get_chunks.js\"])\n", 357 | " except:\n", 358 | " continue\n", 359 | " # get chunks\n", 360 | " with open(\"parsed_bundle.json\") as file:\n", 361 | " codes = [line['code'] for line in json.loads(file.read())]\n", 362 | "\n", 363 | " sizes_gzip_compressed = np.zeros(6)\n", 364 | " sizes_brotli_compressed = np.zeros(8)\n", 365 | " times_gzip_compressed = np.zeros(6)\n", 366 | " times_brotli_compressed = np.zeros(8)\n", 367 | " overall_init_size = 0\n", 368 | "\n", 369 | " for code in codes:\n", 370 | " if not code:\n", 371 | " continue\n", 372 | " # write the text of a bundle to file to use it for compression later\n", 373 | " with open(\"example.txt\", \"w\") as file:\n", 374 | " file.write(code)\n", 375 | " overall_init_size += os.stat(\"example.txt\").st_size\n", 376 | "\n", 377 | " # do the gzip compression with different levels\n", 378 | " for level in range(4, 10):\n", 379 | " result = subprocess.run([\"bash\", \"gzip_compress.sh\", str(level), \"time.txt\", \n", 380 | " \"example_gzip.txt.gz\", \"example.txt\"])\n", 381 | " with open(\"time.txt\") as file:\n", 382 | " user_sys = file.read().strip().split('\\n')[1:]\n", 383 | " time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n", 384 | " sizes_gzip_compressed[level - 4] += os.stat(\"example_gzip.txt.gz\").st_size\n", 385 | " times_gzip_compressed[level - 4] += time\n", 386 | "\n", 387 | " # do the brotli compression with different levels\n", 388 | " for level in range(4, 12):\n", 389 | " result = subprocess.run([\"bash\", \"brotli_compress.sh\", str(level), \"time.txt\", \n", 390 | " \"example_brotli.txt.br\", \"example.txt\"])\n", 391 | " with open(\"time.txt\") as file:\n", 392 | " user_sys = file.read().strip().split('\\n')[1:]\n", 393 | " time = get_seconds(user_sys[0].split('\\t')[1]) + get_seconds(user_sys[1].split('\\t')[1])\n", 394 | " sizes_brotli_compressed[level - 4] += os.stat(\"example_brotli.txt.br\").st_size\n", 395 | " times_brotli_compressed[level - 4] += time\n", 396 | "\n", 397 | " rates_gzip_unbundled.append(overall_init_size / sizes_gzip_compressed)\n", 398 | " rates_brotli_unbundled.append(overall_init_size / sizes_brotli_compressed)\n", 399 | " times_gzip_unbundled.append(times_gzip_compressed)\n", 400 | " times_brotli_unbundled.append(times_brotli_compressed)\n", 401 | " speed_gzip_unbundled.append(overall_init_size / times_gzip_compressed)\n", 402 | " speed_brotli_unbundled.append(overall_init_size / times_brotli_compressed)\n", 403 | " init_sizes_unbundled.append(overall_init_size)\n", 404 | " \n", 405 | " if i != 0 and i % 100 == 0:\n", 406 | " log(\"logs2.txt\", \"rates_gzip: \" + str(np.mean(rates_gzip_unbundled, axis=0)))\n", 407 | " log(\"logs2.txt\", \"rates_brotli: \" + str(np.mean(rates_brotli_unbundled, axis=0)))\n", 408 | " log(\"logs2.txt\", \"times_gzip: \" + str(np.mean(times_gzip_unbundled, axis=0)))\n", 409 | " log(\"logs2.txt\", \"times_brotli: \" + str(np.mean(times_brotli_unbundled, axis=0)))\n", 410 | " log(\"logs2.txt\", \"speed_gzip: \" + str(np.mean(speed_gzip_unbundled, axis=0)))\n", 411 | " log(\"logs2.txt\", \"speed_brotli: \" + str(np.mean(speed_brotli_unbundled, axis=0)))" 412 | ] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "execution_count": 419, 417 | "metadata": {}, 418 | "outputs": [ 419 | { 420 | "data": { 421 | "text/html": [ 422 | "
\n", 423 | "\n", 436 | "\n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | "
namerates_bundledsavings_bundledspeed_bundled(MB/s)rates_unbundledsavings_unbundledspeed_unbundled(MB/s)
0gzip 43.3523830.70170520.0108972.7145750.6316182.922456
1gzip 53.4549000.71055617.2010502.7702960.6390282.792084
2gzip 63.4919630.71362814.6011512.7921620.6418552.574496
3gzip 73.5024960.71448913.5744712.7983910.6426522.451160
4gzip 83.5103980.71513211.7169142.8042350.6433972.200946
5gzip 93.5109330.71517511.6258632.8046990.6434552.138140
6brotli 43.5650880.71950216.4783882.9037690.6556202.062052
7brotli 53.8292460.73885211.5196783.1123970.6787041.802857
8brotli 63.8633440.74115710.1845823.1288160.6803901.697186
9brotli 73.8921210.7430717.5722823.1450810.6820431.495760
10brotli 83.9043040.7438725.9265853.1515610.6826971.312970
11brotli 93.9171890.7447154.5061343.1590670.6834511.132643
12brotli 104.1770700.7605981.2268463.3350790.7001570.508489
13brotli 114.2593160.7652210.5392993.4024240.7060920.302382
\n", 592 | "
" 593 | ], 594 | "text/plain": [ 595 | " name rates_bundled savings_bundled speed_bundled(MB/s) \\\n", 596 | "0 gzip 4 3.352383 0.701705 20.010897 \n", 597 | "1 gzip 5 3.454900 0.710556 17.201050 \n", 598 | "2 gzip 6 3.491963 0.713628 14.601151 \n", 599 | "3 gzip 7 3.502496 0.714489 13.574471 \n", 600 | "4 gzip 8 3.510398 0.715132 11.716914 \n", 601 | "5 gzip 9 3.510933 0.715175 11.625863 \n", 602 | "6 brotli 4 3.565088 0.719502 16.478388 \n", 603 | "7 brotli 5 3.829246 0.738852 11.519678 \n", 604 | "8 brotli 6 3.863344 0.741157 10.184582 \n", 605 | "9 brotli 7 3.892121 0.743071 7.572282 \n", 606 | "10 brotli 8 3.904304 0.743872 5.926585 \n", 607 | "11 brotli 9 3.917189 0.744715 4.506134 \n", 608 | "12 brotli 10 4.177070 0.760598 1.226846 \n", 609 | "13 brotli 11 4.259316 0.765221 0.539299 \n", 610 | "\n", 611 | " rates_unbundled savings_unbundled speed_unbundled(MB/s) \n", 612 | "0 2.714575 0.631618 2.922456 \n", 613 | "1 2.770296 0.639028 2.792084 \n", 614 | "2 2.792162 0.641855 2.574496 \n", 615 | "3 2.798391 0.642652 2.451160 \n", 616 | "4 2.804235 0.643397 2.200946 \n", 617 | "5 2.804699 0.643455 2.138140 \n", 618 | "6 2.903769 0.655620 2.062052 \n", 619 | "7 3.112397 0.678704 1.802857 \n", 620 | "8 3.128816 0.680390 1.697186 \n", 621 | "9 3.145081 0.682043 1.495760 \n", 622 | "10 3.151561 0.682697 1.312970 \n", 623 | "11 3.159067 0.683451 1.132643 \n", 624 | "12 3.335079 0.700157 0.508489 \n", 625 | "13 3.402424 0.706092 0.302382 " 626 | ] 627 | }, 628 | "execution_count": 419, 629 | "metadata": {}, 630 | "output_type": "execute_result" 631 | } 632 | ], 633 | "source": [ 634 | "frame = pd.DataFrame()\n", 635 | "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n", 636 | " \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n", 637 | "\n", 638 | "frame[\"rates_bundled\"] = np.hstack((np.mean(rates_gzip_bundled[:600], axis=0), \n", 639 | " np.mean(rates_brotli_bundled[:600], axis=0)))\n", 640 | "frame[\"savings_bundled\"] = 1 - 1 / np.hstack((np.mean(rates_gzip_bundled[:600], axis=0), \n", 641 | " np.mean(rates_brotli_bundled[:600], axis=0)))\n", 642 | "frame[\"speed_bundled(MB/s)\"] = np.hstack((np.mean(speed_gzip_bundled[:600], axis=0), \n", 643 | " np.mean(speed_brotli_bundled[:600], axis=0))) / 1000000\n", 644 | "\n", 645 | "frame[\"rates_unbundled\"] = np.hstack((np.mean(rates_gzip_unbundled, axis=0), \n", 646 | " np.mean(rates_brotli_unbundled, axis=0)))\n", 647 | "frame[\"savings_unbundled\"] = 1 - 1 / np.hstack((np.mean(rates_gzip_unbundled, axis=0), \n", 648 | " np.mean(rates_brotli_unbundled, axis=0)))\n", 649 | "frame[\"speed_unbundled(MB/s)\"] = np.hstack((np.mean(speed_gzip_unbundled, axis=0), \n", 650 | " np.mean(speed_brotli_unbundled, axis=0))) / 1000000\n", 651 | "frame" 652 | ] 653 | }, 654 | { 655 | "cell_type": "markdown", 656 | "metadata": {}, 657 | "source": [ 658 | "### Group results by non compressed size ranges" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": 404, 664 | "metadata": {}, 665 | "outputs": [ 666 | { 667 | "name": "stdout", 668 | "output_type": "stream", 669 | "text": [ 670 | "20000 - 100000 bytes\n" 671 | ] 672 | }, 673 | { 674 | "data": { 675 | "text/html": [ 676 | "
\n", 677 | "\n", 690 | "\n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | "
namerates_bundledsavings_bundledspeed_bundled(MB/s)rates_unbundledsavings_unbundledspeed_unbundled(MB/s)
0gzip 43.2194730.68939019.9062932.6398140.6211852.741001
1gzip 53.3160490.69843616.8754702.6888580.6280952.636272
2gzip 63.3509740.70157914.2778112.7032930.6300812.391124
3gzip 73.3608540.70245713.1010732.7085380.6307972.357168
4gzip 83.3673660.70303211.3259692.7137460.6315062.101626
5gzip 93.3678270.70307311.2488992.7142110.6315692.081291
6brotli 43.4147660.70715416.2010732.7946750.6421771.889179
7brotli 53.6635720.72704211.2289772.9891900.6654611.658310
8brotli 63.6971750.7295239.9449893.0011000.6667891.575014
9brotli 73.7254540.7315767.3247803.0120060.6679951.380246
10brotli 83.7385440.7325165.7450743.0155550.6683861.223964
11brotli 93.7518950.7334684.3536473.0210050.6689841.049966
12brotli 104.0054070.7503381.1613233.1813560.6856690.506229
13brotli 114.0810900.7549670.5111653.2386090.6912250.304395
\n", 846 | "
" 847 | ], 848 | "text/plain": [ 849 | " name rates_bundled savings_bundled speed_bundled(MB/s) \\\n", 850 | "0 gzip 4 3.219473 0.689390 19.906293 \n", 851 | "1 gzip 5 3.316049 0.698436 16.875470 \n", 852 | "2 gzip 6 3.350974 0.701579 14.277811 \n", 853 | "3 gzip 7 3.360854 0.702457 13.101073 \n", 854 | "4 gzip 8 3.367366 0.703032 11.325969 \n", 855 | "5 gzip 9 3.367827 0.703073 11.248899 \n", 856 | "6 brotli 4 3.414766 0.707154 16.201073 \n", 857 | "7 brotli 5 3.663572 0.727042 11.228977 \n", 858 | "8 brotli 6 3.697175 0.729523 9.944989 \n", 859 | "9 brotli 7 3.725454 0.731576 7.324780 \n", 860 | "10 brotli 8 3.738544 0.732516 5.745074 \n", 861 | "11 brotli 9 3.751895 0.733468 4.353647 \n", 862 | "12 brotli 10 4.005407 0.750338 1.161323 \n", 863 | "13 brotli 11 4.081090 0.754967 0.511165 \n", 864 | "\n", 865 | " rates_unbundled savings_unbundled speed_unbundled(MB/s) \n", 866 | "0 2.639814 0.621185 2.741001 \n", 867 | "1 2.688858 0.628095 2.636272 \n", 868 | "2 2.703293 0.630081 2.391124 \n", 869 | "3 2.708538 0.630797 2.357168 \n", 870 | "4 2.713746 0.631506 2.101626 \n", 871 | "5 2.714211 0.631569 2.081291 \n", 872 | "6 2.794675 0.642177 1.889179 \n", 873 | "7 2.989190 0.665461 1.658310 \n", 874 | "8 3.001100 0.666789 1.575014 \n", 875 | "9 3.012006 0.667995 1.380246 \n", 876 | "10 3.015555 0.668386 1.223964 \n", 877 | "11 3.021005 0.668984 1.049966 \n", 878 | "12 3.181356 0.685669 0.506229 \n", 879 | "13 3.238609 0.691225 0.304395 " 880 | ] 881 | }, 882 | "execution_count": 404, 883 | "metadata": {}, 884 | "output_type": "execute_result" 885 | } 886 | ], 887 | "source": [ 888 | "# ranges are (20000, 100000), (100000, 1000000), (1000000, 3000000) in bytes\n", 889 | "init_sizes_unbundled = np.array(init_sizes_unbundled)\n", 890 | "group1 = np.where((init_sizes_unbundled > 2000)*(init_sizes_unbundled <= 100000))[0]\n", 891 | "group2 = np.where((init_sizes_unbundled > 100000)*(init_sizes_unbundled <= 1000000))[0]\n", 892 | "group3 = np.where((init_sizes_unbundled > 1000000)*(init_sizes_unbundled <= 3000000))[0]\n", 893 | "\n", 894 | "print(20000, \"-\", 100000, \"bytes\")\n", 895 | "frame = pd.DataFrame()\n", 896 | "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n", 897 | " \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n", 898 | "\n", 899 | "\n", 900 | "frame[\"rates_bundled\"] = np.hstack((np.mean(np.array(rates_gzip_bundled)[group1], axis=0), \n", 901 | " np.mean(np.array(rates_brotli_bundled)[group1], axis=0)))\n", 902 | "frame[\"savings_bundled\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip_bundled)[group1], axis=0), \n", 903 | " np.mean(np.array(rates_brotli_bundled)[group1], axis=0)))\n", 904 | "frame[\"speed_bundled(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip_bundled)[group1], axis=0), \n", 905 | " np.mean(np.array(speed_brotli_bundled)[group1], axis=0))) / 1000000\n", 906 | "\n", 907 | "frame[\"rates_unbundled\"] = np.hstack((np.mean(np.array(rates_gzip_unbundled)[group1], axis=0), \n", 908 | " np.mean(np.array(rates_brotli_unbundled)[group1], axis=0)))\n", 909 | "frame[\"savings_unbundled\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip_unbundled)[group1], axis=0), \n", 910 | " np.mean(np.array(rates_brotli_unbundled)[group1], axis=0)))\n", 911 | "frame[\"speed_unbundled(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip_unbundled)[group1], axis=0), \n", 912 | " np.mean(np.array(speed_brotli_unbundled)[group1], axis=0))) / 1000000\n", 913 | "\n", 914 | "frame" 915 | ] 916 | }, 917 | { 918 | "cell_type": "code", 919 | "execution_count": 406, 920 | "metadata": {}, 921 | "outputs": [ 922 | { 923 | "name": "stdout", 924 | "output_type": "stream", 925 | "text": [ 926 | "100000 - 1000000 bytes\n" 927 | ] 928 | }, 929 | { 930 | "data": { 931 | "text/html": [ 932 | "
\n", 933 | "\n", 946 | "\n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | "
namerates_bundledsavings_bundledspeed_bundled(MB/s)rates_unbundledsavings_unbundledspeed_unbundled(MB/s)
0gzip 43.2554380.69282221.7097832.8341530.6471613.442193
1gzip 53.3524080.70170718.1110592.9017490.6553803.270738
2gzip 63.3850230.70458115.6083812.9371610.6595353.016377
3gzip 73.3953870.70548314.4157842.9452460.6604702.766044
4gzip 83.4026330.70611012.6683712.9529410.6613552.463421
5gzip 93.4030910.70615012.4924362.9535050.6614192.303074
6brotli 43.4616480.71112017.2021663.0869690.6760582.547228
7brotli 53.7105030.73049511.9699303.3214060.6989232.191857
8brotli 63.7407650.73267510.5540533.3464790.7011792.032891
9brotli 73.7645710.7343657.8121853.3730160.7035291.782097
10brotli 83.7747020.7350786.0373803.3848850.7045691.520304
11brotli 93.7856850.7358474.5918693.3963620.7055671.313113
12brotli 104.0338930.7521011.2026723.5989850.7221440.519223
13brotli 114.1045260.7563670.5239133.6835670.7285240.301002
\n", 1102 | "
" 1103 | ], 1104 | "text/plain": [ 1105 | " name rates_bundled savings_bundled speed_bundled(MB/s) \\\n", 1106 | "0 gzip 4 3.255438 0.692822 21.709783 \n", 1107 | "1 gzip 5 3.352408 0.701707 18.111059 \n", 1108 | "2 gzip 6 3.385023 0.704581 15.608381 \n", 1109 | "3 gzip 7 3.395387 0.705483 14.415784 \n", 1110 | "4 gzip 8 3.402633 0.706110 12.668371 \n", 1111 | "5 gzip 9 3.403091 0.706150 12.492436 \n", 1112 | "6 brotli 4 3.461648 0.711120 17.202166 \n", 1113 | "7 brotli 5 3.710503 0.730495 11.969930 \n", 1114 | "8 brotli 6 3.740765 0.732675 10.554053 \n", 1115 | "9 brotli 7 3.764571 0.734365 7.812185 \n", 1116 | "10 brotli 8 3.774702 0.735078 6.037380 \n", 1117 | "11 brotli 9 3.785685 0.735847 4.591869 \n", 1118 | "12 brotli 10 4.033893 0.752101 1.202672 \n", 1119 | "13 brotli 11 4.104526 0.756367 0.523913 \n", 1120 | "\n", 1121 | " rates_unbundled savings_unbundled speed_unbundled(MB/s) \n", 1122 | "0 2.834153 0.647161 3.442193 \n", 1123 | "1 2.901749 0.655380 3.270738 \n", 1124 | "2 2.937161 0.659535 3.016377 \n", 1125 | "3 2.945246 0.660470 2.766044 \n", 1126 | "4 2.952941 0.661355 2.463421 \n", 1127 | "5 2.953505 0.661419 2.303074 \n", 1128 | "6 3.086969 0.676058 2.547228 \n", 1129 | "7 3.321406 0.698923 2.191857 \n", 1130 | "8 3.346479 0.701179 2.032891 \n", 1131 | "9 3.373016 0.703529 1.782097 \n", 1132 | "10 3.384885 0.704569 1.520304 \n", 1133 | "11 3.396362 0.705567 1.313113 \n", 1134 | "12 3.598985 0.722144 0.519223 \n", 1135 | "13 3.683567 0.728524 0.301002 " 1136 | ] 1137 | }, 1138 | "execution_count": 406, 1139 | "metadata": {}, 1140 | "output_type": "execute_result" 1141 | } 1142 | ], 1143 | "source": [ 1144 | "print(100000, \"-\", 1000000, \"bytes\")\n", 1145 | "frame = pd.DataFrame()\n", 1146 | "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n", 1147 | " \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n", 1148 | "frame[\"rates_bundled\"] = np.hstack((np.mean(np.array(rates_gzip_bundled)[group2], axis=0), \n", 1149 | " np.mean(np.array(rates_brotli_bundled)[group2], axis=0)))\n", 1150 | "frame[\"savings_bundled\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip_bundled)[group2], axis=0), \n", 1151 | " np.mean(np.array(rates_brotli_bundled)[group2], axis=0)))\n", 1152 | "frame[\"speed_bundled(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip_bundled)[group2], axis=0), \n", 1153 | " np.mean(np.array(speed_brotli_bundled)[group2], axis=0))) / 1000000\n", 1154 | "\n", 1155 | "frame[\"rates_unbundled\"] = np.hstack((np.mean(np.array(rates_gzip_unbundled)[group2], axis=0), \n", 1156 | " np.mean(np.array(rates_brotli_unbundled)[group2], axis=0)))\n", 1157 | "frame[\"savings_unbundled\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip_unbundled)[group2], axis=0), \n", 1158 | " np.mean(np.array(rates_brotli_unbundled)[group2], axis=0)))\n", 1159 | "frame[\"speed_unbundled(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip_unbundled)[group2], axis=0), \n", 1160 | " np.mean(np.array(speed_brotli_unbundled)[group2], axis=0))) / 1000000\n", 1161 | "\n", 1162 | "frame" 1163 | ] 1164 | }, 1165 | { 1166 | "cell_type": "code", 1167 | "execution_count": 407, 1168 | "metadata": {}, 1169 | "outputs": [ 1170 | { 1171 | "name": "stdout", 1172 | "output_type": "stream", 1173 | "text": [ 1174 | "1000000 - 3000000 bytes\n" 1175 | ] 1176 | }, 1177 | { 1178 | "data": { 1179 | "text/html": [ 1180 | "
\n", 1181 | "\n", 1194 | "\n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | "
namerates_bundledsavings_bundledspeed_bundled(MB/s)rates_unbundledsavings_unbundledspeed_unbundled(MB/s)
0gzip 43.2077580.68825622.3111942.8061850.6436441.718255
1gzip 53.2982490.69680919.3653112.8611840.6504941.701731
2gzip 63.3271290.69944115.5422962.8753040.6522111.687635
3gzip 73.3348110.70013313.9126552.8813700.6529431.516255
4gzip 83.3380780.70042612.3954552.8841580.6532781.667812
5gzip 93.3380870.70042712.4744732.8842150.6532851.642354
6brotli 43.3895920.70497918.8848712.9480770.6607961.000014
7brotli 53.6318060.72465512.3104123.1429860.6818311.002287
8brotli 63.6620580.72692910.6777703.1537250.6829150.976053
9brotli 73.6870790.7287837.8241103.1645750.6840020.983152
10brotli 83.6965200.7294755.9294253.1676170.6843050.959331
11brotli 93.7055610.7301354.3819633.1716270.6847040.914153
12brotli 103.9800560.7487471.1366123.3501620.7015070.511365
13brotli 114.0471790.7529140.5142813.4170890.7073530.318949
\n", 1350 | "
" 1351 | ], 1352 | "text/plain": [ 1353 | " name rates_bundled savings_bundled speed_bundled(MB/s) \\\n", 1354 | "0 gzip 4 3.207758 0.688256 22.311194 \n", 1355 | "1 gzip 5 3.298249 0.696809 19.365311 \n", 1356 | "2 gzip 6 3.327129 0.699441 15.542296 \n", 1357 | "3 gzip 7 3.334811 0.700133 13.912655 \n", 1358 | "4 gzip 8 3.338078 0.700426 12.395455 \n", 1359 | "5 gzip 9 3.338087 0.700427 12.474473 \n", 1360 | "6 brotli 4 3.389592 0.704979 18.884871 \n", 1361 | "7 brotli 5 3.631806 0.724655 12.310412 \n", 1362 | "8 brotli 6 3.662058 0.726929 10.677770 \n", 1363 | "9 brotli 7 3.687079 0.728783 7.824110 \n", 1364 | "10 brotli 8 3.696520 0.729475 5.929425 \n", 1365 | "11 brotli 9 3.705561 0.730135 4.381963 \n", 1366 | "12 brotli 10 3.980056 0.748747 1.136612 \n", 1367 | "13 brotli 11 4.047179 0.752914 0.514281 \n", 1368 | "\n", 1369 | " rates_unbundled savings_unbundled speed_unbundled(MB/s) \n", 1370 | "0 2.806185 0.643644 1.718255 \n", 1371 | "1 2.861184 0.650494 1.701731 \n", 1372 | "2 2.875304 0.652211 1.687635 \n", 1373 | "3 2.881370 0.652943 1.516255 \n", 1374 | "4 2.884158 0.653278 1.667812 \n", 1375 | "5 2.884215 0.653285 1.642354 \n", 1376 | "6 2.948077 0.660796 1.000014 \n", 1377 | "7 3.142986 0.681831 1.002287 \n", 1378 | "8 3.153725 0.682915 0.976053 \n", 1379 | "9 3.164575 0.684002 0.983152 \n", 1380 | "10 3.167617 0.684305 0.959331 \n", 1381 | "11 3.171627 0.684704 0.914153 \n", 1382 | "12 3.350162 0.701507 0.511365 \n", 1383 | "13 3.417089 0.707353 0.318949 " 1384 | ] 1385 | }, 1386 | "execution_count": 407, 1387 | "metadata": {}, 1388 | "output_type": "execute_result" 1389 | } 1390 | ], 1391 | "source": [ 1392 | "print(1000000, \"-\", 3000000, \"bytes\")\n", 1393 | "frame = pd.DataFrame()\n", 1394 | "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n", 1395 | " \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n", 1396 | "frame[\"rates_bundled\"] = np.hstack((np.mean(np.array(rates_gzip_bundled)[group3], axis=0), \n", 1397 | " np.mean(np.array(rates_brotli_bundled)[group3], axis=0)))\n", 1398 | "frame[\"savings_bundled\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip_bundled)[group3], axis=0), \n", 1399 | " np.mean(np.array(rates_brotli_bundled)[group3], axis=0)))\n", 1400 | "frame[\"speed_bundled(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip_bundled)[group3], axis=0), \n", 1401 | " np.mean(np.array(speed_brotli_bundled)[group3], axis=0))) / 1000000\n", 1402 | "\n", 1403 | "frame[\"rates_unbundled\"] = np.hstack((np.mean(np.array(rates_gzip_unbundled)[group3], axis=0), \n", 1404 | " np.mean(np.array(rates_brotli_unbundled)[group3], axis=0)))\n", 1405 | "frame[\"savings_unbundled\"] = 1 - 1 / np.hstack((np.mean(np.array(rates_gzip_unbundled)[group3], axis=0), \n", 1406 | " np.mean(np.array(rates_brotli_unbundled)[group3], axis=0)))\n", 1407 | "frame[\"speed_unbundled(MB/s)\"] = np.hstack((np.mean(np.array(speed_gzip_unbundled)[group3], axis=0), \n", 1408 | " np.mean(np.array(speed_brotli_unbundled)[group3], axis=0))) / 1000000\n", 1409 | "\n", 1410 | "frame" 1411 | ] 1412 | }, 1413 | { 1414 | "cell_type": "markdown", 1415 | "metadata": {}, 1416 | "source": [ 1417 | "### Compare the results for each example" 1418 | ] 1419 | }, 1420 | { 1421 | "cell_type": "code", 1422 | "execution_count": 389, 1423 | "metadata": {}, 1424 | "outputs": [ 1425 | { 1426 | "data": { 1427 | "text/html": [ 1428 | "
\n", 1429 | "\n", 1442 | "\n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | "
nameratio of ratesratio of times
0gzip 41.4270970.632208
1gzip 51.4531090.749165
2gzip 61.4644450.894621
3gzip 71.4673820.906732
4gzip 81.4698441.163363
5gzip 91.4700231.152780
6brotli 41.3937340.490474
7brotli 51.4091290.684028
8brotli 61.4185720.757256
9brotli 71.4263841.009216
10brotli 81.4299691.295686
11brotli 91.4333691.711266
12brotli 101.4605465.122315
13brotli 111.4650199.145809
\n", 1538 | "
" 1539 | ], 1540 | "text/plain": [ 1541 | " name ratio of rates ratio of times\n", 1542 | "0 gzip 4 1.427097 0.632208\n", 1543 | "1 gzip 5 1.453109 0.749165\n", 1544 | "2 gzip 6 1.464445 0.894621\n", 1545 | "3 gzip 7 1.467382 0.906732\n", 1546 | "4 gzip 8 1.469844 1.163363\n", 1547 | "5 gzip 9 1.470023 1.152780\n", 1548 | "6 brotli 4 1.393734 0.490474\n", 1549 | "7 brotli 5 1.409129 0.684028\n", 1550 | "8 brotli 6 1.418572 0.757256\n", 1551 | "9 brotli 7 1.426384 1.009216\n", 1552 | "10 brotli 8 1.429969 1.295686\n", 1553 | "11 brotli 9 1.433369 1.711266\n", 1554 | "12 brotli 10 1.460546 5.122315\n", 1555 | "13 brotli 11 1.465019 9.145809" 1556 | ] 1557 | }, 1558 | "execution_count": 389, 1559 | "metadata": {}, 1560 | "output_type": "execute_result" 1561 | } 1562 | ], 1563 | "source": [ 1564 | "ratio_of_rates = []\n", 1565 | "ratio_of_times = []\n", 1566 | "for i in range(len(rates_gzip_unbundled)):\n", 1567 | " ratio_of_rates.append(np.hstack((np.array(rates_gzip_bundled[i]) / np.array(rates_gzip_unbundled[i]),\n", 1568 | " np.array(rates_brotli_bundled[i]) / np.array(rates_brotli_unbundled[i]))))\n", 1569 | " ratio_of_times.append(np.hstack((np.array(times_gzip_bundled[i]) / np.array(times_gzip_unbundled[i]),\n", 1570 | " np.array(times_brotli_bundled[i]) / np.array(times_brotli_unbundled[i]))))\n", 1571 | " \n", 1572 | "frame = pd.DataFrame()\n", 1573 | "frame[\"name\"] = [\"gzip 4\", \"gzip 5\", \"gzip 6\", \"gzip 7\", \"gzip 8\", \"gzip 9\",\n", 1574 | " \"brotli 4\", \"brotli 5\", \"brotli 6\", \"brotli 7\", \"brotli 8\", \"brotli 9\", \"brotli 10\", \"brotli 11\"]\n", 1575 | "frame[\"ratio of rates\"] = np.mean(ratio_of_rates, axis=0)\n", 1576 | "frame[\"ratio of times\"] = np.mean(ratio_of_times, axis=0)\n", 1577 | "frame" 1578 | ] 1579 | } 1580 | ], 1581 | "metadata": { 1582 | "kernelspec": { 1583 | "display_name": "Python 3", 1584 | "language": "python", 1585 | "name": "python3" 1586 | }, 1587 | "language_info": { 1588 | "codemirror_mode": { 1589 | "name": "ipython", 1590 | "version": 3 1591 | }, 1592 | "file_extension": ".py", 1593 | "mimetype": "text/x-python", 1594 | "name": "python", 1595 | "nbconvert_exporter": "python", 1596 | "pygments_lexer": "ipython3", 1597 | "version": "3.6.7" 1598 | } 1599 | }, 1600 | "nbformat": 4, 1601 | "nbformat_minor": 2 1602 | } 1603 | --------------------------------------------------------------------------------