├── CNAME ├── .gitignore ├── _sass └── custom │ └── custom.scss ├── _languages ├── Go.md ├── C++.md ├── Java.md ├── Ruby.md ├── Rust.md ├── Python.md └── TypeScript.md ├── _tags ├── aws.md ├── cli.md ├── dao.md ├── diff.md ├── dsl.md ├── sql.md ├── trie.md ├── chess.md ├── cloud.md ├── graph.md ├── builder.md ├── parsing.md ├── spinner.md ├── visitor.md ├── algorithm.md ├── bitboard.md ├── decorator.md ├── strategy.md ├── tokenizer.md ├── data-access.md ├── test-helper.md ├── token-bucket.md ├── data-structure.md ├── rate-limiting.md ├── spell-checking.md ├── test-framework.md ├── connection-pool.md ├── leader-election.md ├── template-method.md ├── status-reporting.md ├── chaos-engineering.md └── infrastructure-as-code.md ├── _projects ├── bat.md ├── buck.md ├── jest.md ├── httpie.md ├── puppet.md ├── scylla.md ├── aws-cdk.md ├── aws-cli.md ├── homebrew.md ├── protobuf.md ├── stockfish.md ├── terraform.md ├── zookeeper.md ├── error-prone.md ├── firecracker.md ├── vscode.md ├── chaos-monkey.md └── did_you_mean.md ├── blog.html ├── .github └── ISSUE_TEMPLATE │ ├── feature-request.md │ ├── article-improvement.md │ ├── bug.md │ └── new-example-proposal.md ├── _articles ├── TEST.md ├── bat-decorations.md ├── jest-test-sequencer.md ├── httpie-status-reporting.md ├── puppet-connection-pool.md ├── jest-diff.md ├── did_you_mean-spell-checking.md ├── vscode-skip-list.md ├── buck-artifact-cache-decorators.md ├── chaos-monkey-store.md ├── protobuf-tokenizer.md ├── aws-cdk-template-diff.md ├── error-prone-test-helper.md └── zookeeper-trie.md ├── _includes ├── footer_custom.html ├── need-your-help.html ├── article-meta.html ├── article-list.html └── nav.html ├── 404.html ├── Gemfile ├── _posts ├── 2021-07-16-welcome.html ├── 2021-08-09-puppet-connection-pool.html ├── 2021-09-25-scylla-raft-leader-election.md ├── 2021-08-09-jest-test-sequencer.html ├── 2021-08-31-jest-diff.md ├── 2021-09-15-vscode-skip-list.md ├── 2021-08-12-protobuf-tokenizer.html ├── 2021-08-05-firecracker-rate-limiting.html ├── 2021-08-13-buck-artifact-cache-decorator.md ├── 2021-08-26-did_you_mean-spell-checking.html └── 2021-09-09-puppet-graph-algorithms.md ├── CONTRIBUTING.md ├── ARTICLE_TEMPLATE.md ├── _config.yml ├── README.md ├── index.md ├── Gemfile.lock └── _layouts └── default.html /CNAME: -------------------------------------------------------------------------------- 1 | codecatalog.org -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _site 2 | .sass-cache 3 | .jekyll-cache 4 | .jekyll-metadata 5 | vendor 6 | -------------------------------------------------------------------------------- /_sass/custom/custom.scss: -------------------------------------------------------------------------------- 1 | .main-content table a { 2 | white-space: normal; 3 | } 4 | -------------------------------------------------------------------------------- /_languages/Go.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Go 4 | --- 5 | 6 | {% include article-list.html name="Go" language="Go" %} 7 | -------------------------------------------------------------------------------- /_languages/C++.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: C++ 4 | --- 5 | 6 | {% include article-list.html name="C++" language="C++" %} 7 | -------------------------------------------------------------------------------- /_languages/Java.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Java 4 | --- 5 | 6 | {% include article-list.html name="Java" language="Java" %} 7 | -------------------------------------------------------------------------------- /_languages/Ruby.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Ruby 4 | --- 5 | 6 | {% include article-list.html name="Ruby" language="Ruby" %} 7 | -------------------------------------------------------------------------------- /_languages/Rust.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Rust 4 | --- 5 | 6 | {% include article-list.html name="Rust" language="Rust" %} 7 | -------------------------------------------------------------------------------- /_tags/aws.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: aws 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'aws'" tag="aws" %} 7 | -------------------------------------------------------------------------------- /_tags/cli.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: cli 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'cli'" tag="cli" %} 7 | -------------------------------------------------------------------------------- /_tags/dao.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: dao 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'dao'" tag="dao" %} 7 | -------------------------------------------------------------------------------- /_tags/diff.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: diff 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'diff'" tag="diff" %} 7 | -------------------------------------------------------------------------------- /_tags/dsl.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: dsl 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'dsl'" tag="dsl" %} 7 | -------------------------------------------------------------------------------- /_tags/sql.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: sql 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'sql'" tag="sql" %} 7 | -------------------------------------------------------------------------------- /_tags/trie.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: trie 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'trie'" tag="trie" %} 7 | -------------------------------------------------------------------------------- /_languages/Python.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Python 4 | --- 5 | 6 | {% include article-list.html name="Python" language="Python" %} 7 | -------------------------------------------------------------------------------- /_tags/chess.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: chess 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'chess'" tag="chess" %} 7 | -------------------------------------------------------------------------------- /_tags/cloud.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: cloud 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'cloud'" tag="cloud" %} 7 | -------------------------------------------------------------------------------- /_tags/graph.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: graph 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'graph'" tag="graph" %} 7 | -------------------------------------------------------------------------------- /_projects/bat.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Bat 4 | key: bat 5 | --- 6 | 7 | {% include article-list.html name="Bat" project-key="bat" %} 8 | -------------------------------------------------------------------------------- /_projects/buck.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Buck 4 | key: buck 5 | --- 6 | 7 | {% include article-list.html name="Buck" project-key="buck" %} 8 | -------------------------------------------------------------------------------- /_projects/jest.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Jest 4 | key: jest 5 | --- 6 | 7 | {% include article-list.html name="Jest" project-key="jest" %} 8 | -------------------------------------------------------------------------------- /_tags/builder.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: builder 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'builder'" tag="builder" %} 7 | -------------------------------------------------------------------------------- /_tags/parsing.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: parsing 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'parsing'" tag="parsing" %} 7 | -------------------------------------------------------------------------------- /_tags/spinner.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: spinner 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'spinner'" tag="spinner" %} 7 | -------------------------------------------------------------------------------- /_tags/visitor.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: visitor 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'visitor'" tag="visitor" %} 7 | -------------------------------------------------------------------------------- /_languages/TypeScript.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: TypeScript 4 | --- 5 | 6 | {% include article-list.html name="TypeScript" language="TypeScript" %} 7 | -------------------------------------------------------------------------------- /_tags/algorithm.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: algorithm 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'algorithm'" tag="algorithm" %} 7 | -------------------------------------------------------------------------------- /_tags/bitboard.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: bitboard 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'bitboard'" tag="bitboard" %} 7 | -------------------------------------------------------------------------------- /_tags/decorator.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: decorator 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'decorator'" tag="decorator" %} 7 | -------------------------------------------------------------------------------- /_tags/strategy.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: strategy 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'strategy'" tag="strategy" %} 7 | -------------------------------------------------------------------------------- /_tags/tokenizer.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: tokenizer 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'tokenizer'" tag="tokenizer" %} 7 | -------------------------------------------------------------------------------- /blog.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: "Blog" 4 | permalink: blog 5 | --- 6 | 7 | {% for post in site.posts %} 8 | {{ post.content }} 9 | {% endfor %} -------------------------------------------------------------------------------- /_projects/httpie.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: HTTPie 4 | key: httpie 5 | --- 6 | 7 | {% include article-list.html name="HTTPie" project-key="httpie" %} 8 | -------------------------------------------------------------------------------- /_projects/puppet.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Puppet 4 | key: puppet 5 | --- 6 | 7 | {% include article-list.html name="Puppet" project-key="puppet" %} 8 | -------------------------------------------------------------------------------- /_projects/scylla.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Scylla 4 | key: scylla 5 | --- 6 | 7 | {% include article-list.html name="Scylla" project-key="scylla" %} 8 | -------------------------------------------------------------------------------- /_projects/aws-cdk.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: AWS CDK 4 | key: aws-cdk 5 | --- 6 | 7 | {% include article-list.html name="AWS CDK" project-key="aws-cdk" %} 8 | -------------------------------------------------------------------------------- /_projects/aws-cli.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: AWS CLI 4 | key: aws-cli 5 | --- 6 | 7 | {% include article-list.html name="AWS CLI" project-key="aws-cli" %} 8 | -------------------------------------------------------------------------------- /_tags/data-access.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: data-access 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'data-access'" tag="data-access" %} 7 | -------------------------------------------------------------------------------- /_tags/test-helper.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: test-helper 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'test-helper'" tag="test-helper" %} 7 | -------------------------------------------------------------------------------- /_projects/homebrew.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Homebrew 4 | key: homebrew 5 | --- 6 | 7 | {% include article-list.html name="Homebrew" project-key="homebrew" %} 8 | -------------------------------------------------------------------------------- /_projects/protobuf.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Protobuf 4 | key: protobuf 5 | --- 6 | 7 | {% include article-list.html name="Protobuf" project-key="protobuf" %} 8 | -------------------------------------------------------------------------------- /_tags/token-bucket.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: token-bucket 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'token-bucket'" tag="token-bucket" %} 7 | -------------------------------------------------------------------------------- /_projects/stockfish.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Stockfish 4 | key: stockfish 5 | --- 6 | 7 | {% include article-list.html name="Stockfish" project-key="stockfish" %} 8 | -------------------------------------------------------------------------------- /_projects/terraform.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Terraform 4 | key: terraform 5 | --- 6 | 7 | {% include article-list.html name="Terraform" project-key="terraform" %} 8 | -------------------------------------------------------------------------------- /_projects/zookeeper.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: ZooKeeper 4 | key: zookeeper 5 | --- 6 | 7 | {% include article-list.html name="ZooKeeper" project-key="zookeeper" %} 8 | -------------------------------------------------------------------------------- /_tags/data-structure.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: data-structure 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'data-structure'" tag="data-structure" %} 7 | -------------------------------------------------------------------------------- /_tags/rate-limiting.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: rate-limiting 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'rate-limiting'" tag="rate-limiting" %} 7 | -------------------------------------------------------------------------------- /_tags/spell-checking.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: spell-checking 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'spell-checking'" tag="spell-checking" %} 7 | -------------------------------------------------------------------------------- /_tags/test-framework.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: test-framework 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'test-framework'" tag="test-framework" %} 7 | -------------------------------------------------------------------------------- /_projects/error-prone.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: ErrorProne 4 | key: error-prone 5 | --- 6 | 7 | {% include article-list.html name="ErrorProne" project-key="error-prone" %} 8 | -------------------------------------------------------------------------------- /_tags/connection-pool.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: connection-pool 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'connection-pool'" tag="connection-pool" %} 7 | -------------------------------------------------------------------------------- /_tags/leader-election.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: leader-election 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'leader-election'" tag="leader-election" %} 7 | -------------------------------------------------------------------------------- /_tags/template-method.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: template-method 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'template-method'" tag="template-method" %} 7 | -------------------------------------------------------------------------------- /_projects/firecracker.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Firecracker 4 | key: firecracker 5 | --- 6 | 7 | {% include article-list.html name="Firecracker" project-key="firecracker" %} 8 | -------------------------------------------------------------------------------- /_projects/vscode.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Visual Studio Code 4 | key: vscode 5 | --- 6 | 7 | {% include article-list.html name="Visual Studio Code" project-key="vscode" %} 8 | -------------------------------------------------------------------------------- /_tags/status-reporting.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: status-reporting 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'status-reporting'" tag="status-reporting" %} 7 | -------------------------------------------------------------------------------- /_projects/chaos-monkey.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: Chaos Monkey 4 | key: chaos-monkey 5 | --- 6 | 7 | {% include article-list.html name="Chaos Monkey" project-key="chaos-monkey" %} 8 | -------------------------------------------------------------------------------- /_projects/did_you_mean.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: did_you_mean 4 | key: did_you_mean 5 | --- 6 | 7 | {% include article-list.html name="did_you_mean" project-key="did_you_mean" %} 8 | -------------------------------------------------------------------------------- /_tags/chaos-engineering.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: chaos-engineering 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'chaos-engineering'" tag="chaos-engineering" %} 7 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: Suggest an idea for this project. 4 | title: '' 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /_tags/infrastructure-as-code.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: infrastructure-as-code 4 | --- 5 | 6 | {% include article-list.html name="Tagged with 'infrastructure-as-code'" tag="infrastructure-as-code" %} 7 | -------------------------------------------------------------------------------- /_articles/TEST.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Test Article [TestLanguage]" 3 | layout: default 4 | nav_exclude: true 5 | 6 | status: DRAFT 7 | language: Test 8 | project: 9 | name: Test 10 | key: Test 11 | home-page: https://github.com/example/example 12 | tags: [test-tag-1] 13 | --- 14 | 15 | {% include article-meta.html article=page %} 16 | 17 | ## Content 18 | 19 | Test article, excluded from navigation. 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/article-improvement.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Article Improvement 3 | about: Suggest an improvement to an existing article or report an issue with it. 4 | title: '' 5 | labels: article improvement 6 | assignees: '' 7 | 8 | --- 9 | 10 | 13 | 14 | **Which article?** 15 | 16 | ... 17 | 18 | **How can it be improved?** 19 | 20 | ... 21 | 22 | -------------------------------------------------------------------------------- /_includes/footer_custom.html: -------------------------------------------------------------------------------- 1 | {% if page.id contains "/articles/" or page.id contains "/2021/" %} 2 | 10 | {% endif %} 11 | 12 |

{{ site.footer_content }}

13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Report a bug. 4 | title: "" 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | 15 | 16 | **Describe the bug you encountered:** 17 | 18 | ... 19 | 20 | **What did you expect to happen instead?** 21 | 22 | ... 23 | 24 | -------------------------------------------------------------------------------- /_includes/need-your-help.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Help Code Catalog grow: suggest your favorite code or weight in on open article proposals. 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /404.html: -------------------------------------------------------------------------------- 1 | --- 2 | permalink: /404.html 3 | layout: default 4 | --- 5 | 6 | 19 | 20 |
21 |

404

22 | 23 |

Page not found :(

24 |

The requested page could not be found.

25 |
26 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | 4 | gem "jekyll", "~> 3.9.0" 5 | gem 'just-the-docs' 6 | gem "github-pages", "~> 215", group: :jekyll_plugins 7 | group :jekyll_plugins do 8 | gem "jekyll-feed", "~> 0.12" 9 | end 10 | 11 | # Windows and JRuby does not include zoneinfo files, so bundle the tzinfo-data gem 12 | # and associated library. 13 | platforms :mingw, :x64_mingw, :mswin, :jruby do 14 | gem "tzinfo", "~> 1.2" 15 | gem "tzinfo-data" 16 | end 17 | 18 | # Performance-booster for watching directories on Windows 19 | gem "wdm", "~> 0.1.1", :platforms => [:mingw, :x64_mingw, :mswin] 20 | 21 | gem 'webrick' 22 | -------------------------------------------------------------------------------- /_posts/2021-07-16-welcome.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "Welcome to Code Catalog!" 4 | date: 2021-07-16 16:00:27 +0300 5 | --- 6 | 7 |
8 |

9 | 10 | {{ page.title }} 11 | 12 |

13 | 14 | 15 |

16 | Welcome to Code Catalog! If you are reading this, it means that we've launched. 17 |

18 | 19 |

20 | Stay up to date by subscribing to our Atom (RSS-like) feed or our newsletter. 21 |

22 |
23 | 24 | {% include need-your-help.html %} 25 | -------------------------------------------------------------------------------- /_posts/2021-08-09-puppet-connection-pool.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "New Article: HTTP Connection Pooling in Puppet" 4 | date: 2021-08-10T12:11:00+0300 5 | --- 6 | 7 |
8 |

9 | 10 | {{ page.title }} 11 | 12 |

13 | 14 | 15 |

16 | A new article about connection pools in Puppet has been added to the catalog. 17 |

18 | 19 |

20 | The code is written in Ruby. Its goal is to avoid making a new TCP connection and SSL handshake for each request by reusing persistent connections. 21 |

22 | 23 |

24 | Puppet - HTTP Connection Pool 25 |

26 |
27 | 28 | {% include need-your-help.html %} 29 | -------------------------------------------------------------------------------- /_posts/2021-09-25-scylla-raft-leader-election.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "New Article: Leader Election with Raft in Scylla" 4 | date: 2021-09-25T18:02:00+0300 5 | --- 6 | 7 | ## [{{ page.title }}]({{ site.baseurl }}{{ page.url }}) 8 | 9 | 10 | 11 | A [new article]({{ site.baseurl }}/articles/scylla-raft-leader-election) about leader election with Raft in [ScyllaDB](https://www.scylladb.com/) has been added to the catalog. 12 | 13 | [Raft](https://en.wikipedia.org/wiki/Raft_(algorithm)) is a consensus algorithm that was designed to be easy to understand. In Raft all interactions with clients go through the leader node, which first needs to be elected. In this article, we look into how this is implemented in Scylla. 14 | 15 | The code is written in C++. 16 | 17 | [Scylla - Leader Election with Raft]({{ site.baseurl }}/articles/scylla-raft-leader-election) 18 | 19 | {% include need-your-help.html %} 20 | -------------------------------------------------------------------------------- /_posts/2021-08-09-jest-test-sequencer.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "New Article: Test Sequencing in Jest" 4 | date: 2021-08-09 20:19:00 +0300 5 | --- 6 | 7 |
8 |

9 | 10 | {{ page.title }} 11 | 12 |

13 | 14 | 15 |

16 | A new article about ordering tests in Jest has been added to the catalog. 17 |

18 | 19 |

20 | The code is written in TypeScript and attempts to improve user-perceived responsiveness and speed of the test run. It's achieved by prioritizing tests that failed during the last run and slower tests. 21 |

22 | 23 |

24 | Jest - Test Sequencer 25 |

26 |
27 | 28 | {% include need-your-help.html %} 29 | -------------------------------------------------------------------------------- /_posts/2021-08-31-jest-diff.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "New Article: Displaying Diffs in Jest" 4 | date: 2021-08-31T21:16:00+0300 5 | --- 6 | 7 | ## [{{ page.title }}]({{ site.baseurl }}{{ page.url }}) 8 | 9 | 10 | 11 | A [new article]({{ site.baseurl }}/articles/jest-diff) about displaying diffs in [Jest](https://jestjs.io) has been added to the catalog. This is our second article featuring Jest after [Jest - Test Sequencer]({{ site.baseurl }}/articles/jest-test-sequencer). 12 | 13 | We discuss how Jest shows human-readable diffs between the expected and the actual values when test assertions fail. We review the main flow of diffing two JavaScript objects, but not the underlying sequence difference implementation, which is based on the Myers algorithm. 14 | 15 | The code is written in TypeScript. 16 | 17 | [Jest - Displaying Diffs]({{ site.baseurl }}/articles/jest-diff) 18 | 19 | {% include need-your-help.html %} 20 | -------------------------------------------------------------------------------- /_posts/2021-09-15-vscode-skip-list.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "New Article: Skip Lists in Visual Studio Code" 4 | date: 2021-09-15T09:27:00+0300 5 | --- 6 | 7 | ## [{{ page.title }}]({{ site.baseurl }}{{ page.url }}) 8 | 9 | 10 | 11 | A [new article]({{ site.baseurl }}/articles/vscode-skip-list) about skip list implementation in [Visual Studio Code](https://code.visualstudio.com/) has been added to the catalog. 12 | 13 | [Skip List](https://en.wikipedia.org/wiki/Skip_list) is a probabilistic data structure that can be used instead of balanced trees in most applications. Unlike other data structures with similar performance, like [Red-black trees](https://en.wikipedia.org/wiki/Red%E2%80%93black_tree), skip lists are notoriously easy to understand and implement. 14 | 15 | The code is written in TypeScript. 16 | 17 | [Visual Studio Code - Skip Lists]({{ site.baseurl }}/articles/vscode-skip-list) 18 | 19 | {% include need-your-help.html %} 20 | -------------------------------------------------------------------------------- /_includes/article-meta.html: -------------------------------------------------------------------------------- 1 |

{{ include.article.title }}

2 | 3 |
4 |
Status
5 |
{{ include.article.status }}
6 | 7 |
Project
8 |
{{ include.article.project.name }}
9 | 10 |
Project home page
11 |
{{ include.article.project.home-page }}
12 | 13 |
Language
14 |
{{ include.article.language }}
15 | 16 |
Tags
17 |
18 | {%- for tag in include.article.tags -%} 19 | #{{ tag }} 20 | {{ " " }} 21 | {%- endfor -%} 22 |
23 |
24 | 25 | {% include need-your-help.html %} 26 | 27 |
28 | 29 | Table of contents 30 | 31 | {: .text-delta } 32 | 1. TOC 33 | {:toc} 34 |
35 | -------------------------------------------------------------------------------- /_posts/2021-08-12-protobuf-tokenizer.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "New Article: Protocol Buffers Tokenizer" 4 | date: 2021-08-12T16:44:00+0300 5 | --- 6 | 7 |
8 |

9 | 10 | {{ page.title }} 11 | 12 |

13 | 14 | 15 |

16 | A new article about tokenizing Protocol Buffers has been added to the catalog. 17 |

18 | 19 |

20 | This code was written in C++ back in 2008, and it's still used very widely at Google and outside. What stands out about it are how thoroughly the design decisions are documented and the personal tone of the comments. 21 |

22 | 23 |

24 | Protocol Buffers - Tokenizer 25 |

26 |
27 | 28 | {% include need-your-help.html %} 29 | -------------------------------------------------------------------------------- /_posts/2021-08-05-firecracker-rate-limiting.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "New Article: Rate Limiting in Firecracker" 4 | date: 2021-08-05 13:18:00 +0300 5 | --- 6 | 7 |
8 |

9 | 10 | {{ page.title }} 11 | 12 |

13 | 14 | 15 |

16 | A new article about rate limiting operations and bandwidth in Firecracker has been added to the catalog. 17 |

18 | 19 |

20 | The implementations is based on the Token Bucket algorithm. The code is written in Rust. It's clear, simple and wonderfully documented. 21 |

22 | 23 |

24 | Firecracker - Rate Limiting 25 |

26 |
27 | 28 | {% include need-your-help.html %} 29 | -------------------------------------------------------------------------------- /_posts/2021-08-13-buck-artifact-cache-decorator.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "New Article: Decorating Artifact Caches in Buck" 4 | date: 2021-08-13T12:19:00+0300 5 | --- 6 | 7 |
8 |

9 | 10 | {{ page.title }} 11 | 12 |

13 | 14 | 15 |

16 | A new article about decorators for artifact caches in Buck, a multi-language build system developed and used by Facebook, has been added to the catalog. 17 |

18 | 19 |

20 | The code is written in Java. It uses the classical Decorator design pattern to add behavior to individual cache instances without affecting the behavior of other objects from the same class. 21 |

22 | 23 |

24 | Buck - Artifact Cache Decorators 25 |

26 |
27 | 28 | {% include need-your-help.html %} 29 | 30 | -------------------------------------------------------------------------------- /_posts/2021-08-26-did_you_mean-spell-checking.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "New Article: Correcting Typos in Ruby with did_you_mean" 4 | date: 2021-08-26T16:55:00+0300 5 | --- 6 | 7 |
8 |

9 | 10 | {{ page.title }} 11 | 12 |

13 | 14 | 15 |

16 | A new article about using spell-checking to correct typos in Ruby code with did_you_mean gem has been added to the catalog. The whole existence of this gem sparks controversy about static vs dynamic languages. 17 |

18 | 19 |

20 | We also highly recommend listening to the gem author's talk about it. 21 |

22 | 23 |

24 | did_you_mean - Correcting Typos in Ruby 25 |

26 |
27 | 28 | {% include need-your-help.html %} 29 | 30 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new-example-proposal.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: New example proposal 3 | about: Propose new example we could feature 4 | title: "[NEW EXAMPLE] PROJECT - TITLE" 5 | labels: new example 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## General 11 | 12 | * **Project name**: *e.g. Project Foo* 13 | * **Example name**: *e.g. Project Foo's implementation of bar algorithm.* 14 | * **Project home page**: *e.g. https://github.com/foo/bar* 15 | * **Programming language(s)**: *e.g. TypeScript. Only mention language(s) used in that particular system/class/module/etc. we consider featuring.* 16 | 17 | ## Description 18 | 19 | *Describe what the proposed example does and how it does it. Assume that the reader is not familiar with the project at all.* 20 | 21 | ## Links 22 | 23 | *Add links to the code. Include tests.* 24 | 25 | *Prefer permalinks, e.g. https://github.com/ainzzorl/goodcode/blob/73642be77db9de09591b95ff8ef9f2764c581e14/README.md rather than https://github.com/ainzzorl/goodcode/blob/main/README.md* 26 | 27 | ## What makes it interesting 28 | 29 | *What makes the example interesting? How is it instructive? What can the reader learn from it?* 30 | 31 | ## Related work 32 | 33 | *Are you aware of any existing articles/publications/videos/walkthroughs dedicated to this piece of code?* 34 | 35 | ## Other 36 | 37 | *Anything else you want to add.* 38 | -------------------------------------------------------------------------------- /_posts/2021-09-09-puppet-graph-algorithms.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: post 3 | title: "New Article: Graph Algorithms in Puppet" 4 | date: 2021-09-09T14:53:00+0300 5 | --- 6 | 7 | ## [{{ page.title }}]({{ site.baseurl }}{{ page.url }}) 8 | 9 | 10 | 11 | A [new article]({{ site.baseurl }}/articles/puppet-graph-algorithms) about graph algorithms in [Puppet](https://puppet.com) has been added to the catalog. This is our second article featuring Puppet after [Puppet - HTTP Connection Pool]({{ site.baseurl }}/articles/puppet-connection-pool). 12 | 13 | We discuss how Puppet uses standard graph algorithms, such as [Tarjan's](https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm) strongly connected components algorithm, to manage dependencies between resources. It strongly resembles our older article about [graph algorithms in Terraform]({{ site.baseurl }}/articles/terraform-graph-algorithms) (if you read this article long ago, consider giving it another look - we recently made significant updates to it). We consider continuing reviewing uses of graph algorithms in popular open-source projects. 14 | 15 | The code is written in Ruby. 16 | 17 | [Puppet - Graph Algorithms]({{ site.baseurl }}/articles/puppet-graph-algorithms) 18 | 19 | {% include need-your-help.html %} 20 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## How to contribute to Code Catalog 2 | 3 | Thank you for considering contributing to Code Catalog. This project cannot succeed without volunteer effort. 4 | 5 | ### Did you find an inaccuracy in an article, or can you propose how to improve it? 6 | 7 | * Leave a comment under the article. 8 | * [Open a new issue](https://github.com/ainzzorl/goodcode/issues/new?assignees=&labels=article+improvement&template=article-improvement.md&title=). 9 | * Create a patch and open a new pull request. 10 | 11 | ### Did you find a bug on https://codecatalog.org? 12 | 13 | * [Open a new issue](https://github.com/ainzzorl/goodcode/issues/new?assignees=&labels=bug&template=bug.md&title=). 14 | 15 | ### Do you know a good code example that we could feature in an article? 16 | 17 | * [Open a new issue to propose it](https://github.com/ainzzorl/goodcode/issues/new?assignees=&labels=new+example&template=new-example-proposal.md&title=%5BNEW+EXAMPLE%5D+PROJECT+-+TITLE). 18 | 19 | ### Do you want to help making new article better? 20 | 21 | * Search [proposed examples](https://github.com/ainzzorl/goodcode/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+example%22). Leave comments with your ideas and opinion. Or simply [react](https://github.blog/2016-03-10-add-reactions-to-pull-requests-issues-and-comments/) on the proposals. 22 | 23 | ### Do you want to write a new article? 24 | 25 | * First, propose it in an issue. Don't start writing the article until you got positive feedback from a maintainer. 26 | * Start writing from the [article template](./ARTICLE_TEMPLATE.md). When it's ready, open a new pull request. 27 | -------------------------------------------------------------------------------- /_includes/article-list.html: -------------------------------------------------------------------------------- 1 | {% assign articles = site["articles"] | sort:"nav_order" %} 2 | 3 | {%- if include.name -%} 4 |

{{ include.name }}

5 | {%- endif -%} 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | {%- for article in articles -%} 15 | {%- unless article.nav_exclude -%} 16 | {%- if include.project-key == null or article.project.key == include.project-key -%} 17 | {%- if include.language == null or article.language == include.language -%} 18 | {%- if include.tag == null or article.tags contains include.tag -%} 19 | 20 | 23 | 26 | 29 | 35 | 36 | {%- endif -%} 37 | {%- endif -%} 38 | {%- endif -%} 39 | {%- endunless -%} 40 | {%- endfor -%} 41 |
TitleProjectLanguageTags
21 | {{ article.short-title }} 22 | 24 | {{ article.project.name }} 25 | 27 | {{ article.language }} 28 | 30 | {%- for tag in article.tags -%} 31 | #{{ tag }} 32 | {{ " " }} 33 | {%- endfor -%} 34 |
42 | -------------------------------------------------------------------------------- /ARTICLE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "PROJECT-NAME - EXAMPLE-NAME [LANGUAGE]" 3 | layout: default 4 | last_modified_date: 20XX-XX-XXTXX:XX:XX+XXXX 5 | nav_order: BIGGEST-USED-NAV-ORDER + 1 6 | 7 | status: DRAFT 8 | language: e.g. Java 9 | short-title: title without the name of the project and the language 10 | project: 11 | name: e.g. Project Foo 12 | key: e.g. foo. It must be unique across all projects in the catalog. 13 | home-page: https://github.com/foo/bar 14 | tags: ['whatever', 'tags', 'you', 'find', 'relevant'] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | *Give a high-level description of the project. Explain the part(s) of it that include the example in more detail.* 22 | 23 | ## Problem 24 | 25 | *Explain the problem solved by the featured code example.* 26 | 27 | ## Overview 28 | 29 | *Give an overview of the code. What it does, how it works, what patterns it employs, etc.* 30 | 31 | ## Implementation details 32 | 33 | *Dive deeper into the implementation. Include code snippets. Insert them as-is, except remove irrelevant details when necessary. Add permalinks to everything.* 34 | 35 | ## Testing 36 | 37 | *Describe how it's tested. Include snippets and links.* 38 | 39 | ## Observations 40 | 41 | *Add more observations about the code. Optional. Consider writing observations in relevant sections of "Implementation details".* 42 | 43 | ## Related 44 | 45 | *Discuss similar and/or related implementations. Optional.* 46 | 47 | ## References 48 | 49 | *Add references to relevant documents.* 50 | 51 | ## Copyright notice 52 | 53 | *What license is the project under.* 54 | 55 | *Who owns the copyright.* 56 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | remote_theme: pmarsceill/just-the-docs 2 | 3 | plugins: 4 | - jekyll-feed 5 | 6 | # Exclude from processing. 7 | # The following items will not be processed, by default. 8 | # Any item listed under the `exclude:` key here will be automatically added to 9 | # the internal "default list". 10 | # 11 | # Excluded items can be processed by explicitly listing the directories or 12 | # their entries' file path in the `include:` list. 13 | # 14 | exclude: 15 | - .sass-cache/ 16 | - .jekyll-cache/ 17 | - gemfiles/ 18 | - Gemfile 19 | - Gemfile.lock 20 | - node_modules/ 21 | - vendor/bundle/ 22 | - vendor/cache/ 23 | - vendor/gems/ 24 | - vendor/ruby/ 25 | 26 | title: Code Catalog 27 | 28 | collections: 29 | articles: 30 | permalink: "/:collection/:path/" 31 | output: true 32 | 33 | projects: 34 | permalink: "/:collection/:path/" 35 | output: true 36 | 37 | languages: 38 | permalink: "/:collection/:path/" 39 | output: true 40 | 41 | tags: 42 | permalink: "/:collection/:path/" 43 | output: true 44 | 45 | just_the_docs: 46 | collections: 47 | articles: 48 | name: Articles 49 | nav_exclude: false 50 | search_exclude: false 51 | 52 | languages: 53 | name: By Language 54 | nav_exclude: false 55 | search_exclude: false 56 | 57 | projects: 58 | name: By Project 59 | nav_exclude: false 60 | search_exclude: false 61 | 62 | tags: 63 | name: By Tag 64 | nav_exclude: false 65 | search_exclude: false 66 | 67 | # Footer content 68 | # appears at the bottom of every page's main content 69 | 70 | # Back to top link 71 | back_to_top: true 72 | back_to_top_text: "Back to top" 73 | 74 | footer_content: "Copyright © 2021 Anton Emelyanov. Distributed by a Creative Commons Attribution 4.0 International license." 75 | 76 | # Footer last edited timestamp 77 | last_edit_timestamp: true # show or hide edit time - page must have `last_modified_date` defined in the frontmatter 78 | last_edit_time_format: "%b %e %Y at %I:%M %p" # uses ruby's time format: https://ruby-doc.org/stdlib-2.7.0/libdoc/time/rdoc/Time.html 79 | 80 | # Footer "Edit this page on GitHub" link text 81 | gh_edit_link: true 82 | gh_edit_link_text: "Edit this page on GitHub." 83 | gh_edit_repository: "https://github.com/ainzzorl/goodcode" 84 | gh_edit_branch: "main" 85 | gh_edit_view_mode: "tree" 86 | 87 | # Aux links for the upper right navigation 88 | aux_links: 89 | "Code Catalog on GitHub": 90 | - "//github.com/ainzzorl/goodcode" 91 | aux_links_new_tab: false 92 | 93 | # Google Analytics Tracking 94 | ga_tracking: UA-198267472-2 95 | ga_tracking_anonymize_ip: true # Use GDPR compliant Google Analytics settings 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learning from Open Source 2 | 3 | When you ask someone how to get better at coding, the second most common response - right after writing more code - is to read other people's code and learn from it. 4 | 5 | While it sounds very reasonable, it's hard to implement it in practice. So you find the source for your favorite software on GitHub and start reading it. Firstly, for any established and mature product, the codebase is probably huge, complex and very hard to get started with. It's unlikely that the design is documented anywhere. Even if the high-level architecture is documented, the structure of the codebase almost certainly isn't. It probably depends on some libraries and frameworks you've never heard of. Unfamiliar terms and creative code names are all over the place. Making sense of it can be a challenge even for seasoned professionals. And how do you even start exploring the code? Open a random source file? 6 | 7 | It doesn't mean that you should not try to understand good open source projects. You should, and the experience can be very rewarding. Yet, it's not an easy way, and the learning curve will be steep. 8 | 9 | [Code Catalog](https://codecatalog.org) offers a more straightforward way to learn from open-source projects by presenting a curated collection of annotated code examples that we find instructive. The examples are: 10 | * Taken from **popular, established open-source projects**. 11 | * **Instructive**. They solve general problems, similar to what other coders could be facing in their projects. They use patterns that you could apply one day. 12 | * Mostly **self-contained**. They can be understood with little knowledge of the surrounding context. 13 | * **Small**-ish. One example can be read in one sitting. 14 | * **Non-trivial**. 15 | * **Good code!** At least in our opinion. 16 | 17 | Please note that the annotations are written by people who did not author the code they are describing. The article author's understanding of the code can be incomplete or even completely wrong. Your discretion is advised. 18 | 19 | 20 | # Articles 21 | 22 | Read the articles on https://codecatalog.org. The sources are located under [_articles](./_articles). 23 | 24 | Also see [proposed examples](https://github.com/ainzzorl/goodcode/issues?q=is%3Aissue+is%3Aopen+label%3A%22new+example%22). 25 | 26 | # FAQ 27 | 28 | ### Who is this project for? 29 | 30 | Software practitioners and intermediate+ learners of programming. 31 | 32 | The reader is expected to be able to read non-trivial code in the language used by the example. Most examples expect little domain knowledge and provide context necessary to understand them. 33 | 34 | ### What can I learn from this? 35 | 36 | * Style and best practices from prominent open-source projects. 37 | * Applications of common patterns. 38 | * Battle-tested solutions to typical problems. 39 | 40 | ### Why so few examples? 41 | 42 | You'd be surprised how much time and effort it takes to find examples meeting our criteria, understand and then explain them. If it was easy, this project would add no value. 43 | 44 | We are working on adding more. See [proposed examples](https://github.com/ainzzorl/goodcode/issues?q=is%3Aopen+is%3Aissue+label%3A%22new+example%22). 45 | 46 | ### Who writes these articles? 47 | 48 | The founder of the project, [Anton Emelyanov](https://github.com/ainzzorl). Future content is expected to be crowd-sourced. Read more about [contributing to the project](#contributing). 49 | 50 | We believe that the code speaks for itself, though. Our job is to find interesting examples, provide necessary context and point out certain details. 51 | 52 | ### Why did you choose *Example X* from *Project Y* and not something else? It's not nearly the most interesting thing in *Project Y*. 53 | 54 | Because we find that *Example X* satisfies the criteria above. But there's no reason why there can only be one example from *Project Y*. Please [suggest adding another example](https://github.com/ainzzorl/goodcode/issues/new?assignees=&labels=new+example&template=new-example-proposal.md&title=%5BNEW+EXAMPLE%5D+PROJECT+-+TITLE). 55 | 56 | # Contributing 57 | 58 | This project cannot succeed without volunteer effort. Your help in improving existing or adding new articles is very welcome. Read the [contributing guide](https://github.com/ainzzorl/goodcode/blob/main/CONTRIBUTING.md) for details. 59 | -------------------------------------------------------------------------------- /index.md: -------------------------------------------------------------------------------- 1 | --- 2 | layout: default 3 | title: A collection of code examples from prominent open-source projects 4 | --- 5 | 6 | # Learning from Open Source 7 | 8 | [Code Catalog](https://codecatalog.org) is a collection of instructive code examples with annotations. The examples are: 9 | * Taken from **popular, established open-source projects**. 10 | * **Instructive**. They solve general problems, similar to what other coders could be facing in their projects. They use patterns that you could apply one day. 11 | * Mostly **self-contained**. They can be understood with little knowledge of the surrounding context. 12 | * **Small**-ish. One example can be read in one sitting. 13 | * **Non-trivial**. 14 | * **Good code!** At least in our opinion. 15 | 16 | # Catalog 17 | 18 | {% include article-list.html %} 19 | 20 | # FAQ 21 | 22 | ### Why do we need this? 23 | 24 | When you ask someone how to get better at coding, the second most common response - right after writing more code - is to read other people's code and learn from it. 25 | 26 | While it sounds very reasonable, it's hard to implement it in practice. So you find the source for your favorite software on GitHub and start reading it. Firstly, for any established and mature product, the codebase is probably huge, complex and very hard to get started with. It's unlikely that the design is documented anywhere. Even if the high-level architecture is documented, the structure of the codebase almost certainly isn't. It probably depends on some libraries and frameworks you've never heard of. Unfamiliar terms and creative code names are all over the place. Making sense of it can be a challenge even for seasoned professionals. And how do you even start exploring the code? Open a random source file? 27 | 28 | It doesn't mean that you should not try to understand good open source projects. You should, and the experience can be very rewarding. Yet, it's not an easy way, and the learning curve will be steep. 29 | 30 | Code Catalog strives to make learning from other people's code easier by finding good examples, explaining, annotating and categorizing them. 31 | 32 | If you find an example interesting, we encourage you to study the original codebase in more detail. You should have a better understanding of how it works by then. 33 | 34 | ### Who is this project for? 35 | 36 | Software practitioners and intermediate+ learners of programming. 37 | 38 | The reader is expected to be able to read non-trivial code in the language used by the example. Most examples expect little domain knowledge and provide context necessary to understand them. 39 | 40 | ### What can I learn from this? 41 | 42 | * Style and best practices from prominent open-source projects. 43 | * Applications of common patterns. 44 | * Battle-tested solutions to typical problems. 45 | 46 | ### Why so few examples? 47 | 48 | You'd be surprised how much time and effort it takes to find examples meeting our criteria, understand and then explain them. If it was easy, this project would add no value. 49 | 50 | We are working on adding more. See [proposed examples](https://github.com/ainzzorl/goodcode/issues?q=is%3Aopen+is%3Aissue+label%3A%22new+example%22). 51 | 52 | ### Who writes these articles? 53 | 54 | The founder of the project, [Anton Emelyanov](https://github.com/ainzzorl). Future content is expected to be crowd-sourced. Read more about [contributing to the project](#contributing). 55 | 56 | We believe that the code speaks for itself, though. Our job is to find interesting examples, provide necessary context and point out certain details. 57 | 58 | Please note that the articles are not written by the authors of the code they are describing. The article author's understanding of the code can be incomplete or even completely wrong. Your discretion is advised. 59 | 60 | ### Why did you choose *Example X* from *Project Y* and not something else? It's not nearly the most interesting thing in *Project Y*. 61 | 62 | Because we find that *Example X* satisfies the criteria above. But there's no reason why there can only be one example from *Project Y*. Please [suggest adding another example](https://github.com/ainzzorl/goodcode/issues/new?assignees=&labels=new+example&template=new-example-proposal.md&title=%5BNEW+EXAMPLE%5D+PROJECT+-+TITLE). 63 | 64 | # Contributing 65 | 66 | This project cannot succeed without volunteer effort. Your help in improving existing or adding new articles is very welcome. Read the [contributing guide](https://github.com/ainzzorl/goodcode/blob/main/CONTRIBUTING.md) for details. 67 | -------------------------------------------------------------------------------- /_includes/nav.html: -------------------------------------------------------------------------------- 1 | 120 | -------------------------------------------------------------------------------- /_articles/bat-decorations.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Bat - Text Decoration [Rust]" 3 | layout: default 4 | last_modified_date: 2021-08-01T18:34:00+0300 5 | nav_order: 8 6 | 7 | status: PUBLISHED 8 | language: Rust 9 | short-title: Text Decoration 10 | project: 11 | name: Bat 12 | key: bat 13 | home-page: https://github.com/sharkdp/bat 14 | tags: [cli, decorator] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | Bat is a *cat(1)* clone with syntax highlighting and Git integration. 22 | 23 | ## Problem 24 | 25 | Bat displays texts with "decorations": line numbers, change indicator, grid border. These decorations can be used in any combination depending on the user input. 26 | 27 | ## Overview 28 | 29 | Text printing is done by [`InteractivePrinter`](https://github.com/sharkdp/bat/blob/375d55aa5d7f3390e33febcc40a8d629b22926ae/src/printer.rs#L102-L114). [`InteractivePrinter`](https://github.com/sharkdp/bat/blob/375d55aa5d7f3390e33febcc40a8d629b22926ae/src/printer.rs#L102-L114) maintains a list of [`Decoration`](https://github.com/sharkdp/bat/blob/375d55aa5d7f3390e33febcc40a8d629b22926ae/src/decorations.rs#L12-L20)s and populates it based on user config. 30 | 31 | [`Decoration`](https://github.com/sharkdp/bat/blob/375d55aa5d7f3390e33febcc40a8d629b22926ae/src/decorations.rs#L12-L20) trait has a method `generate` accepting `line_number`, `continuation` (if the line is being broken into shorter lines) and [`InteractivePrinter`](https://github.com/sharkdp/bat/blob/375d55aa5d7f3390e33febcc40a8d629b22926ae/src/printer.rs#L102-L114) and returning [`DecorationText`](https://github.com/sharkdp/bat/blob/375d55aa5d7f3390e33febcc40a8d629b22926ae/src/decorations.rs#L6-L10). The printer then prints all enabled decorations before printing the line content. 32 | 33 | ## Implementation details 34 | 35 | [`Decoration` trait](https://github.com/sharkdp/bat/blob/375d55aa5d7f3390e33febcc40a8d629b22926ae/src/decorations.rs#L12-L20): 36 | ```rust 37 | pub(crate) trait Decoration { 38 | fn generate( 39 | &self, 40 | line_number: usize, 41 | continuation: bool, 42 | printer: &InteractivePrinter, 43 | ) -> DecorationText; 44 | fn width(&self) -> usize; 45 | } 46 | ``` 47 | 48 | It resembles the [Decorator pattern](https://en.wikipedia.org/wiki/Decorator_pattern), but it's not quite the same. The classical Decorator wraps the original class to augment its behavior without changing its interface. 49 | 50 | [One of the decorations](https://github.com/sharkdp/bat/blob/375d55aa5d7f3390e33febcc40a8d629b22926ae/src/decorations.rs#L22-L70): 51 | ```rust 52 | pub(crate) struct LineNumberDecoration { 53 | color: Style, 54 | cached_wrap: DecorationText, 55 | cached_wrap_invalid_at: usize, 56 | } 57 | 58 | impl LineNumberDecoration { 59 | pub(crate) fn new(colors: &Colors) -> Self { 60 | LineNumberDecoration { 61 | color: colors.line_number, 62 | cached_wrap_invalid_at: 10000, 63 | cached_wrap: DecorationText { 64 | text: colors.line_number.paint(" ".repeat(4)).to_string(), 65 | width: 4, 66 | }, 67 | } 68 | } 69 | } 70 | 71 | impl Decoration for LineNumberDecoration { 72 | fn generate( 73 | &self, 74 | line_number: usize, 75 | continuation: bool, 76 | _printer: &InteractivePrinter, 77 | ) -> DecorationText { 78 | if continuation { 79 | if line_number > self.cached_wrap_invalid_at { 80 | let new_width = self.cached_wrap.width + 1; 81 | return DecorationText { 82 | text: self.color.paint(" ".repeat(new_width)).to_string(), 83 | width: new_width, 84 | }; 85 | } 86 | 87 | self.cached_wrap.clone() 88 | } else { 89 | let plain: String = format!("{:4}", line_number); 90 | DecorationText { 91 | width: plain.len(), 92 | text: self.color.paint(plain).to_string(), 93 | } 94 | } 95 | } 96 | 97 | fn width(&self) -> usize { 98 | 4 99 | } 100 | } 101 | ``` 102 | 103 | [Instantiating decorations](https://github.com/sharkdp/bat/blob/375d55aa5d7f3390e33febcc40a8d629b22926ae/src/printer.rs#L133-L164): 104 | 105 | ```rust 106 | // Create decorations. 107 | let mut decorations: Vec> = Vec::new(); 108 | 109 | if config.style_components.numbers() { 110 | decorations.push(Box::new(LineNumberDecoration::new(&colors))); 111 | } 112 | 113 | #[cfg(feature = "git")] 114 | { 115 | if config.style_components.changes() { 116 | decorations.push(Box::new(LineChangesDecoration::new(&colors))); 117 | } 118 | } 119 | 120 | let mut panel_width: usize = 121 | decorations.len() + decorations.iter().fold(0, |a, x| a + x.width()); 122 | 123 | // The grid border decoration isn't added until after the panel_width calculation, since the 124 | // print_horizontal_line, print_header, and print_footer functions all assume the panel 125 | // width is without the grid border. 126 | if config.style_components.grid() && !decorations.is_empty() { 127 | decorations.push(Box::new(GridBorderDecoration::new(&colors))); 128 | } 129 | 130 | // Disable the panel if the terminal is too small (i.e. can't fit 5 characters with the 131 | // panel showing). 132 | if config.term_width 133 | < (decorations.len() + decorations.iter().fold(0, |a, x| a + x.width())) + 5 134 | { 135 | decorations.clear(); 136 | panel_width = 0; 137 | } 138 | ``` 139 | 140 | [Applying decorations in `InteractivePrinter`](https://github.com/sharkdp/bat/blob/375d55aa5d7f3390e33febcc40a8d629b22926ae/src/printer.rs#L412-L424): 141 | ```rust 142 | // Line decorations. 143 | if self.panel_width > 0 { 144 | let decorations = self 145 | .decorations 146 | .iter() 147 | .map(|ref d| d.generate(line_number, false, self)) 148 | .collect::>(); 149 | 150 | for deco in decorations { 151 | write!(handle, "{} ", deco.text)?; 152 | cursor_max -= deco.width + 1; 153 | } 154 | } 155 | ``` 156 | 157 | ## Testing 158 | 159 | It's tested with "snapshot tests". E.g. [this test input](https://github.com/sharkdp/bat/blob/master/tests/snapshots/sample.rs) is expected to be rendered to [this](https://github.com/sharkdp/bat/blob/master/tests/snapshots/output/numbers.snapshot.txt) when line number decoration is enabled. 160 | 161 | ## References 162 | 163 | * [GitHub repo](https://github.com/sharkdp/bat) 164 | 165 | ## Copyright notice 166 | 167 | Bat is licensed under the [Apache License 2.0](https://github.com/sharkdp/bat/blob/master/LICENSE-APACHE). 168 | 169 | Copyright (c) 2018-2021 bat-developers. 170 | -------------------------------------------------------------------------------- /_articles/jest-test-sequencer.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Jest - Test Sequencer [TypeScript]" 3 | layout: default 4 | last_modified_date: 2021-08-09T20:18:00+0300 5 | nav_order: 12 6 | 7 | status: PUBLISHED 8 | language: TypeScript 9 | short-title: Test Sequencer 10 | project: 11 | name: Jest 12 | key: jest 13 | home-page: https://github.com/facebook/jest 14 | tags: ['test-framework'] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | *Jest is a JavaScript testing framework designed to ensure correctness of any JavaScript codebase. It allows you to write tests with an approachable, familiar and feature-rich API that gives you results quickly.* - [The official website](https://jestjs.io/). 22 | 23 | Jest runs tests in parallel. The number of workers defaults to the number of the cores available on your machine minus one for the main thread - see [`--maxWorkers`](https://jestjs.io/docs/cli#--maxworkersnumstring). 24 | 25 | ## Problem 26 | 27 | Jest needs to decide which tests should run first. Sorting tests [is very important](https://github.com/facebook/jest/blob/6886eeb667e5e68fc5d8d30c0779d27bdf56c584/packages/jest-test-sequencer/src/index.ts#L140-L154) because it has a great impact on the user-perceived responsiveness and speed of the test run. 28 | 29 | ## Overview 30 | 31 | Tests are sorted based on: 32 | 1. Has it failed during the last run? 33 | * Since it's important to provide the most expected feedback as quickly as possible. 34 | 1. How long it took to run? 35 | * Because running long tests first is an effort to minimize worker idle time at the end of a long test run. 36 | 37 | And if that information is not available, they are sorted based on file size since big test files usually take longer to complete. 38 | 39 | ## Implementation details 40 | 41 | The [sorting method](https://github.com/facebook/jest/blob/master/packages/jest-test-sequencer/src/index.ts#L86-L113) implementing the logic described above. The code is straightforward enough to not require further explanation. 42 | 43 | Note that newly added tests will run after failed tests but before the rest of other tests. 44 | 45 | ```typescript 46 | sort(tests: Array): Array { 47 | const stats: {[path: string]: number} = {}; 48 | const fileSize = ({path, context: {hasteFS}}: Test) => 49 | stats[path] || (stats[path] = hasteFS.getSize(path) || 0); 50 | const hasFailed = (cache: Cache, test: Test) => 51 | cache[test.path] && cache[test.path][0] === FAIL; 52 | const time = (cache: Cache, test: Test) => 53 | cache[test.path] && cache[test.path][1]; 54 | 55 | tests.forEach(test => (test.duration = time(this._getCache(test), test))); 56 | return tests.sort((testA, testB) => { 57 | const cacheA = this._getCache(testA); 58 | const cacheB = this._getCache(testB); 59 | const failedA = hasFailed(cacheA, testA); 60 | const failedB = hasFailed(cacheB, testB); 61 | const hasTimeA = testA.duration != null; 62 | if (failedA !== failedB) { 63 | return failedA ? -1 : 1; 64 | } else if (hasTimeA != (testB.duration != null)) { 65 | // If only one of two tests has timing information, run it last 66 | return hasTimeA ? 1 : -1; 67 | } else if (testA.duration != null && testB.duration != null) { 68 | return testA.duration < testB.duration ? 1 : -1; 69 | } else { 70 | return fileSize(testA) < fileSize(testB) ? 1 : -1; 71 | } 72 | }); 73 | } 74 | ``` 75 | 76 | It's [invoked](https://github.com/facebook/jest/blob/5f4dd187d89070d07617444186684c20d9213031/packages/jest-core/src/runJest.ts#L187) from [runTest.ts](https://github.com/facebook/jest/blob/5f4dd187d89070d07617444186684c20d9213031/packages/jest-core/src/runJest.ts). 77 | 78 | [Reading](https://github.com/facebook/jest/blob/fdc74af37235354e077edeeee8aa2d1a4a863032/packages/jest-test-sequencer/src/index.ts#L45-L66) and [writing](https://github.com/facebook/jest/blob/fdc74af37235354e077edeeee8aa2d1a4a863032/packages/jest-test-sequencer/src/index.ts#L123-L140) the last run results. Note that different tests can run in different contexts. 79 | 80 | ```typescript 81 | _getCache(test: Test): Cache { 82 | const {context} = test; 83 | if (!this._cache.has(context) && context.config.cache) { 84 | const cachePath = this._getCachePath(context); 85 | if (fs.existsSync(cachePath)) { 86 | try { 87 | this._cache.set( 88 | context, 89 | JSON.parse(fs.readFileSync(cachePath, 'utf8')), 90 | ); 91 | } catch {} 92 | } 93 | } 94 | 95 | let cache = this._cache.get(context); 96 | if (!cache) { 97 | cache = {}; 98 | this._cache.set(context, cache); 99 | } 100 | 101 | return cache; 102 | } 103 | 104 | // ... 105 | 106 | cacheResults(tests: Array, results: AggregatedResult): void { 107 | const map = Object.create(null); 108 | tests.forEach(test => (map[test.path] = test)); 109 | results.testResults.forEach(testResult => { 110 | if (testResult && map[testResult.testFilePath] && !testResult.skipped) { 111 | const cache = this._getCache(map[testResult.testFilePath]); 112 | const perf = testResult.perfStats; 113 | cache[testResult.testFilePath] = [ 114 | testResult.numFailingTests ? FAIL : SUCCESS, 115 | perf.runtime || 0, 116 | ]; 117 | } 118 | }); 119 | 120 | this._cache.forEach((cache, context) => 121 | fs.writeFileSync(this._getCachePath(context), JSON.stringify(cache)), 122 | ); 123 | } 124 | ``` 125 | 126 | There's also a [method](https://github.com/facebook/jest/blob/master/packages/jest-test-sequencer/src/index.ts#L115-L121), supporting the `--onlyFailures` option, to get tests that failed during the last run. It must have been placed in `TestSequencer` because it has access to the data about the last run. 127 | 128 | ## Testing 129 | 130 | The ordering logic is covered by unit tests. E.g. testing that it "sorts based on failures, timing information and file size": 131 | 132 | ```typescript 133 | test('sorts based on failures, timing information and file size', () => { 134 | fs.readFileSync.mockImplementationOnce(() => 135 | JSON.stringify({ 136 | '/test-a.js': [SUCCESS, 5], 137 | '/test-ab.js': [FAIL, 1], 138 | '/test-c.js': [FAIL], 139 | '/test-d.js': [SUCCESS, 2], 140 | '/test-efg.js': [FAIL], 141 | }), 142 | ); 143 | expect( 144 | sequencer.sort( 145 | toTests([ 146 | '/test-a.js', 147 | '/test-ab.js', 148 | '/test-c.js', 149 | '/test-d.js', 150 | '/test-efg.js', 151 | ]), 152 | ), 153 | ).toEqual([ 154 | {context, duration: undefined, path: '/test-efg.js'}, 155 | {context, duration: undefined, path: '/test-c.js'}, 156 | {context, duration: 1, path: '/test-ab.js'}, 157 | {context, duration: 5, path: '/test-a.js'}, 158 | {context, duration: 2, path: '/test-d.js'}, 159 | ]); 160 | }); 161 | ``` 162 | 163 | ## Related 164 | 165 | * Ordering tests [in jUnit4](https://github.com/junit-team/junit4/blob/9ad61c6bf757be8d8968fd5977ab3ae15b0c5aba/src/main/java/org/junit/runner/manipulation/Sorter.java). As far as we can see, it doesn't do similar tricks. 166 | * Ordering tests [in RSpec](https://github.com/rspec/rspec-core/blob/dc898adc3f98d841a43e22cdf62ae2250266c7b6/lib/rspec/core/ordering.rb). It supports *random* and *recently modified* modes. 167 | 168 | ## References 169 | 170 | * [GitHub Repo](https://github.com/facebook/jest) 171 | * [Official Website](https://jestjs.io/) 172 | 173 | ## Copyright notice 174 | 175 | Jest is licensed under the [MIT License](https://github.com/facebook/jest/blob/master/LICENSE). 176 | 177 | Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 178 | -------------------------------------------------------------------------------- /_articles/httpie-status-reporting.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "HTTPie - Reporting Download Progress [Python]" 3 | layout: default 4 | last_modified_date: 2021-08-01T18:40:00+0300 5 | nav_order: 10 6 | 7 | status: PUBLISHED 8 | language: Python 9 | short-title: Reporting Download Progress 10 | project: 11 | name: HTTPie 12 | key: httpie 13 | home-page: https://github.com/httpie/httpie 14 | tags: [cli, status-reporting, spinner] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | HTTPie (pronounced aitch-tee-tee-pie) is a command-line HTTP client. Its goal is to make CLI interaction with web services as human-friendly as possible. 22 | 23 | ## Problem 24 | 25 | HTTPie features a download mode in which it acts similarly to wget. When enabled using the `--download, -d` flag, a progress bar must be shown while the response body is being saved to a file. 26 | 27 | ## Overview 28 | 29 | Status reporting runs in its own thread. It wakes up every `tick` seconds, compares the current state to the previous, calculates metrics (download percentage, downloaded size, speed, ETA) and writes them to console. When it's done, it writes a summary. 30 | 31 | The speed is calculated on the interval since the last update. ETA is calculated simply as `(total_size - downloaded) / speed`. 32 | 33 | ## Implementation details 34 | 35 | [The code in question](https://github.com/httpie/httpie/blob/64c31d554a367abf876bd355f07dca6e41476c3f/httpie/downloads.py#L369-L480) is rather short: 36 | 37 | ```python 38 | class ProgressReporterThread(threading.Thread): 39 | """ 40 | Reports download progress based on its status. 41 | Uses threading to periodically update the status (speed, ETA, etc.). 42 | """ 43 | 44 | def __init__( 45 | self, 46 | status: DownloadStatus, 47 | output: IO, 48 | tick=.1, 49 | update_interval=1 50 | ): 51 | super().__init__() 52 | self.status = status 53 | self.output = output 54 | self._tick = tick 55 | self._update_interval = update_interval 56 | self._spinner_pos = 0 57 | self._status_line = '' 58 | self._prev_bytes = 0 59 | self._prev_time = time() 60 | self._should_stop = threading.Event() 61 | 62 | def stop(self): 63 | """Stop reporting on next tick.""" 64 | self._should_stop.set() 65 | 66 | def run(self): 67 | while not self._should_stop.is_set(): 68 | if self.status.has_finished: 69 | self.sum_up() 70 | break 71 | 72 | self.report_speed() 73 | sleep(self._tick) 74 | 75 | def report_speed(self): 76 | 77 | now = time() 78 | 79 | if now - self._prev_time >= self._update_interval: 80 | downloaded = self.status.downloaded 81 | try: 82 | speed = ((downloaded - self._prev_bytes) 83 | / (now - self._prev_time)) 84 | except ZeroDivisionError: 85 | speed = 0 86 | 87 | if not self.status.total_size: 88 | self._status_line = PROGRESS_NO_CONTENT_LENGTH.format( 89 | downloaded=humanize_bytes(downloaded), 90 | speed=humanize_bytes(speed), 91 | ) 92 | else: 93 | try: 94 | percentage = downloaded / self.status.total_size * 100 95 | except ZeroDivisionError: 96 | percentage = 0 97 | 98 | if not speed: 99 | eta = '-:--:--' 100 | else: 101 | s = int((self.status.total_size - downloaded) / speed) 102 | h, s = divmod(s, 60 * 60) 103 | m, s = divmod(s, 60) 104 | eta = f'{h}:{m:0>2}:{s:0>2}' 105 | 106 | self._status_line = PROGRESS.format( 107 | percentage=percentage, 108 | downloaded=humanize_bytes(downloaded), 109 | speed=humanize_bytes(speed), 110 | eta=eta, 111 | ) 112 | 113 | self._prev_time = now 114 | self._prev_bytes = downloaded 115 | 116 | self.output.write( 117 | f'{CLEAR_LINE} {SPINNER[self._spinner_pos]} {self._status_line}' 118 | ) 119 | self.output.flush() 120 | 121 | self._spinner_pos = (self._spinner_pos + 1 122 | if self._spinner_pos + 1 != len(SPINNER) 123 | else 0) 124 | 125 | def sum_up(self): 126 | actually_downloaded = ( 127 | self.status.downloaded - self.status.resumed_from) 128 | time_taken = self.status.time_finished - self.status.time_started 129 | 130 | self.output.write(CLEAR_LINE) 131 | 132 | try: 133 | speed = actually_downloaded / time_taken 134 | except ZeroDivisionError: 135 | # Either time is 0 (not all systems provide `time.time` 136 | # with a better precision than 1 second), and/or nothing 137 | # has been downloaded. 138 | speed = actually_downloaded 139 | 140 | self.output.write(SUMMARY.format( 141 | downloaded=humanize_bytes(actually_downloaded), 142 | total=(self.status.total_size 143 | and humanize_bytes(self.status.total_size)), 144 | speed=humanize_bytes(speed), 145 | time=time_taken, 146 | )) 147 | self.output.flush() 148 | ``` 149 | 150 | To clear the line, it prints [this magic string](https://github.com/httpie/httpie/blob/64c31d554a367abf876bd355f07dca6e41476c3f/httpie/downloads.py#L25): ```CLEAR_LINE = '\r\033[K'```. It's a [CSI sequence](https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences). 151 | 152 | The spinner simply [iterates between the 4 states](https://github.com/httpie/httpie/blob/64c31d554a367abf876bd355f07dca6e41476c3f/httpie/downloads.py#L454-L456): vertical line, forward slash, horizontal line, back slash. 153 | 154 | [The format strings](https://github.com/httpie/httpie/blob/64c31d554a367abf876bd355f07dca6e41476c3f/httpie/downloads.py#L25-L34) are defined above: 155 | ```python 156 | CLEAR_LINE = '\r\033[K' 157 | PROGRESS = ( 158 | '{percentage: 6.2f} %' 159 | ' {downloaded: >10}' 160 | ' {speed: >10}/s' 161 | ' {eta: >8} ETA' 162 | ) 163 | PROGRESS_NO_CONTENT_LENGTH = '{downloaded: >10} {speed: >10}/s' 164 | SUMMARY = 'Done. {downloaded} in {time:0.5f}s ({speed}/s)\n' 165 | SPINNER = '|/-\\' 166 | ``` 167 | 168 | [A nice method to "return a humanized string representation of a number of bytes"](https://github.com/httpie/httpie/blob/64c31d554a367abf876bd355f07dca6e41476c3f/httpie/utils.py#L23-L64), borrowed from elsewhere: 169 | 170 | ```python 171 | def humanize_bytes(n, precision=2): 172 | # Author: Doug Latornell 173 | # Licence: MIT 174 | # URL: https://code.activestate.com/recipes/577081/ 175 | """Return a humanized string representation of a number of bytes. 176 | >>> humanize_bytes(1) 177 | '1 B' 178 | >>> humanize_bytes(1024, precision=1) 179 | '1.0 kB' 180 | >>> humanize_bytes(1024 * 123, precision=1) 181 | '123.0 kB' 182 | >>> humanize_bytes(1024 * 12342, precision=1) 183 | '12.1 MB' 184 | >>> humanize_bytes(1024 * 12342, precision=2) 185 | '12.05 MB' 186 | >>> humanize_bytes(1024 * 1234, precision=2) 187 | '1.21 MB' 188 | >>> humanize_bytes(1024 * 1234 * 1111, precision=2) 189 | '1.31 GB' 190 | >>> humanize_bytes(1024 * 1234 * 1111, precision=1) 191 | '1.3 GB' 192 | """ 193 | abbrevs = [ 194 | (1 << 50, 'PB'), 195 | (1 << 40, 'TB'), 196 | (1 << 30, 'GB'), 197 | (1 << 20, 'MB'), 198 | (1 << 10, 'kB'), 199 | (1, 'B') 200 | ] 201 | 202 | if n == 1: 203 | return '1 B' 204 | 205 | for factor, suffix in abbrevs: 206 | if n >= factor: 207 | break 208 | 209 | # noinspection PyUnboundLocalVariable 210 | return f'{n / factor:.{precision}f} {suffix}' 211 | ``` 212 | 213 | ## Testing 214 | 215 | Automated testing for this functionality seems to be lacking. 216 | 217 | ## Observations 218 | 219 | * Updating spinner position could be simplified: 220 | ```python 221 | self._spinner_pos = (self._spinner_pos + 1 222 | if self._spinner_pos + 1 != len(SPINNER) 223 | else 0) 224 | ``` 225 | to 226 | ```python 227 | self._spinner_pos = (self._spinner_pos + 1) % len(SPINNER) 228 | ``` 229 | * [Done!](https://github.com/httpie/httpie/pull/1111) 230 | 231 | ## Related 232 | 233 | * [cli-progress](https://github.com/npkgz/cli-progress) - "easy to use progress-bar for command-line/terminal applications". 234 | 235 | ## References 236 | 237 | * [GitHub repo](https://github.com/httpie/httpie) 238 | 239 | ## Copyright notice 240 | 241 | HTTPie is licensed under the [BSD 3-Clause "New" or "Revised" License](https://github.com/httpie/httpie/blob/master/LICENSE). 242 | 243 | Copyright © 2012-2021 Jakub Roztocil 244 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | activesupport (6.0.4) 5 | concurrent-ruby (~> 1.0, >= 1.0.2) 6 | i18n (>= 0.7, < 2) 7 | minitest (~> 5.1) 8 | tzinfo (~> 1.1) 9 | zeitwerk (~> 2.2, >= 2.2.2) 10 | addressable (2.8.0) 11 | public_suffix (>= 2.0.2, < 5.0) 12 | coffee-script (2.4.1) 13 | coffee-script-source 14 | execjs 15 | coffee-script-source (1.11.1) 16 | colorator (1.1.0) 17 | commonmarker (0.17.13) 18 | ruby-enum (~> 0.5) 19 | concurrent-ruby (1.1.9) 20 | dnsruby (1.61.7) 21 | simpleidn (~> 0.1) 22 | em-websocket (0.5.2) 23 | eventmachine (>= 0.12.9) 24 | http_parser.rb (~> 0.6.0) 25 | ethon (0.14.0) 26 | ffi (>= 1.15.0) 27 | eventmachine (1.2.7) 28 | execjs (2.8.1) 29 | faraday (1.5.1) 30 | faraday-em_http (~> 1.0) 31 | faraday-em_synchrony (~> 1.0) 32 | faraday-excon (~> 1.1) 33 | faraday-httpclient (~> 1.0.1) 34 | faraday-net_http (~> 1.0) 35 | faraday-net_http_persistent (~> 1.1) 36 | faraday-patron (~> 1.0) 37 | multipart-post (>= 1.2, < 3) 38 | ruby2_keywords (>= 0.0.4) 39 | faraday-em_http (1.0.0) 40 | faraday-em_synchrony (1.0.0) 41 | faraday-excon (1.1.0) 42 | faraday-httpclient (1.0.1) 43 | faraday-net_http (1.0.1) 44 | faraday-net_http_persistent (1.2.0) 45 | faraday-patron (1.0.0) 46 | ffi (1.15.3) 47 | forwardable-extended (2.6.0) 48 | gemoji (3.0.1) 49 | github-pages (215) 50 | github-pages-health-check (= 1.17.2) 51 | jekyll (= 3.9.0) 52 | jekyll-avatar (= 0.7.0) 53 | jekyll-coffeescript (= 1.1.1) 54 | jekyll-commonmark-ghpages (= 0.1.6) 55 | jekyll-default-layout (= 0.1.4) 56 | jekyll-feed (= 0.15.1) 57 | jekyll-gist (= 1.5.0) 58 | jekyll-github-metadata (= 2.13.0) 59 | jekyll-mentions (= 1.6.0) 60 | jekyll-optional-front-matter (= 0.3.2) 61 | jekyll-paginate (= 1.1.0) 62 | jekyll-readme-index (= 0.3.0) 63 | jekyll-redirect-from (= 0.16.0) 64 | jekyll-relative-links (= 0.6.1) 65 | jekyll-remote-theme (= 0.4.3) 66 | jekyll-sass-converter (= 1.5.2) 67 | jekyll-seo-tag (= 2.7.1) 68 | jekyll-sitemap (= 1.4.0) 69 | jekyll-swiss (= 1.0.0) 70 | jekyll-theme-architect (= 0.1.1) 71 | jekyll-theme-cayman (= 0.1.1) 72 | jekyll-theme-dinky (= 0.1.1) 73 | jekyll-theme-hacker (= 0.1.2) 74 | jekyll-theme-leap-day (= 0.1.1) 75 | jekyll-theme-merlot (= 0.1.1) 76 | jekyll-theme-midnight (= 0.1.1) 77 | jekyll-theme-minimal (= 0.1.1) 78 | jekyll-theme-modernist (= 0.1.1) 79 | jekyll-theme-primer (= 0.5.4) 80 | jekyll-theme-slate (= 0.1.1) 81 | jekyll-theme-tactile (= 0.1.1) 82 | jekyll-theme-time-machine (= 0.1.1) 83 | jekyll-titles-from-headings (= 0.5.3) 84 | jemoji (= 0.12.0) 85 | kramdown (= 2.3.1) 86 | kramdown-parser-gfm (= 1.1.0) 87 | liquid (= 4.0.3) 88 | mercenary (~> 0.3) 89 | minima (= 2.5.1) 90 | nokogiri (>= 1.10.4, < 2.0) 91 | rouge (= 3.26.0) 92 | terminal-table (~> 1.4) 93 | github-pages-health-check (1.17.2) 94 | addressable (~> 2.3) 95 | dnsruby (~> 1.60) 96 | octokit (~> 4.0) 97 | public_suffix (>= 2.0.2, < 5.0) 98 | typhoeus (~> 1.3) 99 | html-pipeline (2.14.0) 100 | activesupport (>= 2) 101 | nokogiri (>= 1.4) 102 | http_parser.rb (0.6.0) 103 | i18n (0.9.5) 104 | concurrent-ruby (~> 1.0) 105 | jekyll (3.9.0) 106 | addressable (~> 2.4) 107 | colorator (~> 1.0) 108 | em-websocket (~> 0.5) 109 | i18n (~> 0.7) 110 | jekyll-sass-converter (~> 1.0) 111 | jekyll-watch (~> 2.0) 112 | kramdown (>= 1.17, < 3) 113 | liquid (~> 4.0) 114 | mercenary (~> 0.3.3) 115 | pathutil (~> 0.9) 116 | rouge (>= 1.7, < 4) 117 | safe_yaml (~> 1.0) 118 | jekyll-avatar (0.7.0) 119 | jekyll (>= 3.0, < 5.0) 120 | jekyll-coffeescript (1.1.1) 121 | coffee-script (~> 2.2) 122 | coffee-script-source (~> 1.11.1) 123 | jekyll-commonmark (1.3.1) 124 | commonmarker (~> 0.14) 125 | jekyll (>= 3.7, < 5.0) 126 | jekyll-commonmark-ghpages (0.1.6) 127 | commonmarker (~> 0.17.6) 128 | jekyll-commonmark (~> 1.2) 129 | rouge (>= 2.0, < 4.0) 130 | jekyll-default-layout (0.1.4) 131 | jekyll (~> 3.0) 132 | jekyll-feed (0.15.1) 133 | jekyll (>= 3.7, < 5.0) 134 | jekyll-gist (1.5.0) 135 | octokit (~> 4.2) 136 | jekyll-github-metadata (2.13.0) 137 | jekyll (>= 3.4, < 5.0) 138 | octokit (~> 4.0, != 4.4.0) 139 | jekyll-mentions (1.6.0) 140 | html-pipeline (~> 2.3) 141 | jekyll (>= 3.7, < 5.0) 142 | jekyll-optional-front-matter (0.3.2) 143 | jekyll (>= 3.0, < 5.0) 144 | jekyll-paginate (1.1.0) 145 | jekyll-readme-index (0.3.0) 146 | jekyll (>= 3.0, < 5.0) 147 | jekyll-redirect-from (0.16.0) 148 | jekyll (>= 3.3, < 5.0) 149 | jekyll-relative-links (0.6.1) 150 | jekyll (>= 3.3, < 5.0) 151 | jekyll-remote-theme (0.4.3) 152 | addressable (~> 2.0) 153 | jekyll (>= 3.5, < 5.0) 154 | jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0) 155 | rubyzip (>= 1.3.0, < 3.0) 156 | jekyll-sass-converter (1.5.2) 157 | sass (~> 3.4) 158 | jekyll-seo-tag (2.7.1) 159 | jekyll (>= 3.8, < 5.0) 160 | jekyll-sitemap (1.4.0) 161 | jekyll (>= 3.7, < 5.0) 162 | jekyll-swiss (1.0.0) 163 | jekyll-theme-architect (0.1.1) 164 | jekyll (~> 3.5) 165 | jekyll-seo-tag (~> 2.0) 166 | jekyll-theme-cayman (0.1.1) 167 | jekyll (~> 3.5) 168 | jekyll-seo-tag (~> 2.0) 169 | jekyll-theme-dinky (0.1.1) 170 | jekyll (~> 3.5) 171 | jekyll-seo-tag (~> 2.0) 172 | jekyll-theme-hacker (0.1.2) 173 | jekyll (> 3.5, < 5.0) 174 | jekyll-seo-tag (~> 2.0) 175 | jekyll-theme-leap-day (0.1.1) 176 | jekyll (~> 3.5) 177 | jekyll-seo-tag (~> 2.0) 178 | jekyll-theme-merlot (0.1.1) 179 | jekyll (~> 3.5) 180 | jekyll-seo-tag (~> 2.0) 181 | jekyll-theme-midnight (0.1.1) 182 | jekyll (~> 3.5) 183 | jekyll-seo-tag (~> 2.0) 184 | jekyll-theme-minimal (0.1.1) 185 | jekyll (~> 3.5) 186 | jekyll-seo-tag (~> 2.0) 187 | jekyll-theme-modernist (0.1.1) 188 | jekyll (~> 3.5) 189 | jekyll-seo-tag (~> 2.0) 190 | jekyll-theme-primer (0.5.4) 191 | jekyll (> 3.5, < 5.0) 192 | jekyll-github-metadata (~> 2.9) 193 | jekyll-seo-tag (~> 2.0) 194 | jekyll-theme-slate (0.1.1) 195 | jekyll (~> 3.5) 196 | jekyll-seo-tag (~> 2.0) 197 | jekyll-theme-tactile (0.1.1) 198 | jekyll (~> 3.5) 199 | jekyll-seo-tag (~> 2.0) 200 | jekyll-theme-time-machine (0.1.1) 201 | jekyll (~> 3.5) 202 | jekyll-seo-tag (~> 2.0) 203 | jekyll-titles-from-headings (0.5.3) 204 | jekyll (>= 3.3, < 5.0) 205 | jekyll-watch (2.2.1) 206 | listen (~> 3.0) 207 | jemoji (0.12.0) 208 | gemoji (~> 3.0) 209 | html-pipeline (~> 2.2) 210 | jekyll (>= 3.0, < 5.0) 211 | just-the-docs (0.3.3) 212 | jekyll (>= 3.8.5) 213 | jekyll-seo-tag (~> 2.0) 214 | rake (>= 12.3.1, < 13.1.0) 215 | kramdown (2.3.1) 216 | rexml 217 | kramdown-parser-gfm (1.1.0) 218 | kramdown (~> 2.0) 219 | liquid (4.0.3) 220 | listen (3.6.0) 221 | rb-fsevent (~> 0.10, >= 0.10.3) 222 | rb-inotify (~> 0.9, >= 0.9.10) 223 | mercenary (0.3.6) 224 | minima (2.5.1) 225 | jekyll (>= 3.5, < 5.0) 226 | jekyll-feed (~> 0.9) 227 | jekyll-seo-tag (~> 2.1) 228 | minitest (5.14.4) 229 | multipart-post (2.1.1) 230 | nokogiri (1.11.7-x86_64-darwin) 231 | racc (~> 1.4) 232 | octokit (4.21.0) 233 | faraday (>= 0.9) 234 | sawyer (~> 0.8.0, >= 0.5.3) 235 | pathutil (0.16.2) 236 | forwardable-extended (~> 2.6) 237 | public_suffix (4.0.6) 238 | racc (1.5.2) 239 | rake (13.0.6) 240 | rb-fsevent (0.11.0) 241 | rb-inotify (0.10.1) 242 | ffi (~> 1.0) 243 | rexml (3.2.5) 244 | rouge (3.26.0) 245 | ruby-enum (0.9.0) 246 | i18n 247 | ruby2_keywords (0.0.5) 248 | rubyzip (2.3.2) 249 | safe_yaml (1.0.5) 250 | sass (3.7.4) 251 | sass-listen (~> 4.0.0) 252 | sass-listen (4.0.0) 253 | rb-fsevent (~> 0.9, >= 0.9.4) 254 | rb-inotify (~> 0.9, >= 0.9.7) 255 | sawyer (0.8.2) 256 | addressable (>= 2.3.5) 257 | faraday (> 0.8, < 2.0) 258 | simpleidn (0.2.1) 259 | unf (~> 0.1.4) 260 | terminal-table (1.8.0) 261 | unicode-display_width (~> 1.1, >= 1.1.1) 262 | thread_safe (0.3.6) 263 | typhoeus (1.4.0) 264 | ethon (>= 0.9.0) 265 | tzinfo (1.2.9) 266 | thread_safe (~> 0.1) 267 | unf (0.1.4) 268 | unf_ext 269 | unf_ext (0.0.7.7) 270 | unicode-display_width (1.7.0) 271 | webrick (1.7.0) 272 | zeitwerk (2.4.2) 273 | 274 | PLATFORMS 275 | x86_64-darwin-20 276 | 277 | DEPENDENCIES 278 | github-pages (~> 215) 279 | jekyll (~> 3.9.0) 280 | jekyll-feed (~> 0.12) 281 | just-the-docs 282 | tzinfo (~> 1.2) 283 | tzinfo-data 284 | wdm (~> 0.1.1) 285 | webrick 286 | 287 | BUNDLED WITH 288 | 2.2.5 289 | -------------------------------------------------------------------------------- /_articles/puppet-connection-pool.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Puppet - HTTP Connection Pool [Ruby]" 3 | layout: default 4 | last_modified_date: 2021-08-10T12:10:00+0300 5 | nav_order: 13 6 | 7 | status: PUBLISHED 8 | language: Ruby 9 | short-title: HTTP Connection Pool 10 | project: 11 | name: Puppet 12 | key: puppet 13 | home-page: https://github.com/puppetlabs/puppet 14 | tags: ['connection-pool'] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | Puppet, an automated administrative engine for your Linux, Unix, and Windows systems, performs administrative tasks (such as adding users, installing packages, and updating server configurations) based on a centralized specification. 22 | 23 | Puppet usually [follows client-server architecture](https://en.wikipedia.org/wiki/Puppet_(software)#Architecture). The client is known as an agent and the server is known as the master. For testing and simple configuration, it can also be used as a stand-alone application run from the command line. 24 | 25 | Puppet Server is installed on one or more servers, and Puppet Agent is installed on all the machines that the user wants to manage. Puppet Agents communicate with the server and fetch configuration instructions. The Agent then applies the configuration on the system and sends a status report to the server. 26 | 27 | [Persistent HTTP connections](https://en.wikipedia.org/wiki/HTTP_persistent_connection) allow Puppet to establish an HTTP(S) connection once and reuse it for multiple HTTP requests. This avoids making a new TCP connection and SSL handshake for each request. 28 | 29 | ## Problem 30 | 31 | Puppet needs to maintain a pool of persistent connections, keeping track of when idle connections expire. 32 | 33 | ## Overview 34 | 35 | Puppet's [Puppet::HTTP::Pool](https://github.com/puppetlabs/puppet/blob/8ed2564a3b0978ce0880af904df56d79637c15e8/lib/puppet/http/pool.rb) implements the [connection pool](https://en.wikipedia.org/wiki/Pool_(computer_science)) pattern. 36 | 37 | Connections are borrowed from the pool, yielded to the caller, and released back into the pool. If a connection is expired, it will be closed either when a connection to that site is requested, or when the pool is closed. The pool can store multiple connections to the same site, and will be reused in MRU (Most Recently Used) order. 38 | 39 | The pool delegates connection creation to a factory class, which configures SSL settings, timeouts and retries. 40 | 41 | ## Implementation details 42 | 43 | The key method [`with_connection`](https://github.com/puppetlabs/puppet/blob/8ed2564a3b0978ce0880af904df56d79637c15e8/lib/puppet/http/pool.rb#L19-L39) follows an idiomatic Ruby pattern by accepting a block of code and passing the connection to it. 44 | 45 | It borrows the connection and, after it's used, releases it back or closes it. 46 | 47 | ```ruby 48 | def with_connection(site, verifier, &block) 49 | reuse = true 50 | 51 | http = borrow(site, verifier) 52 | begin 53 | if http.use_ssl? && http.verify_mode != OpenSSL::SSL::VERIFY_PEER 54 | reuse = false 55 | end 56 | 57 | yield http 58 | rescue => detail 59 | reuse = false 60 | raise detail 61 | ensure 62 | if reuse && http.started? 63 | release(site, verifier, http) 64 | else 65 | close_connection(site, http) 66 | end 67 | end 68 | end 69 | ``` 70 | 71 | [Borrowing](https://github.com/puppetlabs/puppet/blob/8ed2564a3b0978ce0880af904df56d79637c15e8/lib/puppet/http/pool.rb#L88-L111) a connection: 72 | 73 | ```ruby 74 | # Borrow and take ownership of a persistent connection. If a new 75 | # connection is created, it will be started prior to being returned. 76 | # 77 | # @api private 78 | def borrow(site, verifier) 79 | @pool[site] = active_entries(site) 80 | index = @pool[site].index do |entry| 81 | (verifier.nil? && entry.verifier.nil?) || 82 | (!verifier.nil? && verifier.reusable?(entry.verifier)) 83 | end 84 | entry = index ? @pool[site].delete_at(index) : nil 85 | if entry 86 | @pool.delete(site) if @pool[site].empty? 87 | 88 | Puppet.debug("Using cached connection for #{site}") 89 | entry.connection 90 | else 91 | http = @factory.create_connection(site) 92 | 93 | start(site, verifier, http) 94 | setsockopts(http.instance_variable_get(:@socket)) 95 | http 96 | end 97 | end 98 | ``` 99 | 100 | [Releasing a connection](https://github.com/puppetlabs/puppet/blob/8ed2564a3b0978ce0880af904df56d79637c15e8/lib/puppet/http/pool.rb#L123-L137): 101 | 102 | ```ruby 103 | # Release a connection back into the pool. 104 | # 105 | # @api private 106 | def release(site, verifier, http) 107 | expiration = Time.now + @keepalive_timeout 108 | entry = Puppet::HTTP::PoolEntry.new(http, verifier, expiration) 109 | Puppet.debug("Caching connection for #{site}") 110 | 111 | entries = @pool[site] 112 | if entries 113 | entries.unshift(entry) 114 | else 115 | @pool[site] = [entry] 116 | end 117 | end 118 | ``` 119 | 120 | [Expirations are checked](https://github.com/puppetlabs/puppet/blob/8ed2564a3b0978ce0880af904df56d79637c15e8/lib/puppet/http/pool.rb#L139-L154) when polling active connections: 121 | 122 | ```ruby 123 | # Returns an Array of entries whose connections are not expired. 124 | # 125 | # @api private 126 | def active_entries(site) 127 | now = Time.now 128 | 129 | entries = @pool[site] || [] 130 | entries.select do |entry| 131 | if entry.expired?(now) 132 | close_connection(site, entry.connection) 133 | false 134 | else 135 | true 136 | end 137 | end 138 | end 139 | ``` 140 | 141 | Creating connections is delegated to [Puppet::HTTP::Factory](https://github.com/puppetlabs/puppet/blob/8ed2564a3b0978ce0880af904df56d79637c15e8/lib/puppet/http/factory.rb#L11): 142 | 143 | ```ruby 144 | def create_connection(site) 145 | Puppet.debug("Creating new connection for #{site}") 146 | 147 | http = Puppet::HTTP::Proxy.proxy(URI(site.addr)) 148 | http.use_ssl = site.use_ssl? 149 | if site.use_ssl? 150 | http.min_version = OpenSSL::SSL::TLS1_VERSION if http.respond_to?(:min_version) 151 | http.ciphers = Puppet[:ciphers] 152 | end 153 | http.read_timeout = Puppet[:http_read_timeout] 154 | http.open_timeout = Puppet[:http_connect_timeout] 155 | http.keep_alive_timeout = KEEP_ALIVE_TIMEOUT if http.respond_to?(:keep_alive_timeout=) 156 | 157 | # 0 means make one request and never retry 158 | http.max_retries = 0 159 | 160 | if Puppet[:sourceaddress] 161 | Puppet.debug("Using source IP #{Puppet[:sourceaddress]}") 162 | http.local_host = Puppet[:sourceaddress] 163 | end 164 | 165 | if Puppet[:http_debug] 166 | http.set_debug_output($stderr) 167 | end 168 | 169 | http 170 | end 171 | ``` 172 | 173 | ## Testing 174 | 175 | There's a comprehensive [suite](https://github.com/puppetlabs/puppet/blob/8ed2564a3b0978ce0880af904df56d79637c15e8/spec/unit/http/pool_spec.rb) of unit tests for the connection pool. 176 | 177 | Example: [multiple connections to the same site](https://github.com/puppetlabs/puppet/blob/8ed2564a3b0978ce0880af904df56d79637c15e8/spec/unit/http/pool_spec.rb#L83-L95). 178 | 179 | ```ruby 180 | it 'can yield multiple connections to the same site' do 181 | lru_conn = create_connection(site) 182 | mru_conn = create_connection(site) 183 | pool = create_pool_with_connections(site, lru_conn, mru_conn) 184 | 185 | pool.with_connection(site, verifier) do |a| 186 | expect(a).to eq(mru_conn) 187 | 188 | pool.with_connection(site, verifier) do |b| 189 | expect(b).to eq(lru_conn) 190 | end 191 | end 192 | end 193 | ``` 194 | 195 | The trick to [test expirations](https://github.com/puppetlabs/puppet/blob/8ed2564a3b0978ce0880af904df56d79637c15e8/spec/unit/http/pool_spec.rb#L83-L95) without depending on the clock is to use negative timeouts: 196 | 197 | ```ruby 198 | def create_pool_with_expired_connections(site, *connections) 199 | # setting keepalive timeout to -1 ensures any newly added 200 | # connections have already expired 201 | pool = Puppet::HTTP::Pool.new(-1) 202 | connections.each do |conn| 203 | pool.release(site, verifier, conn) 204 | end 205 | pool 206 | end 207 | ``` 208 | 209 | ## Related 210 | 211 | * [Generic connection pool](https://github.com/mperham/connection_pool/blob/c5aef742642def23664c4d9c15d12f0786347fb8/lib/connection_pool.rb) in [connection_pool](https://github.com/mperham/connection_pool) library for Ruby. 212 | * [Connection pooling](https://github.com/rails/rails/blob/83217025a171593547d1268651b446d3533e2019/activerecord/lib/active_record/connection_adapters/abstract/connection_pool.rb) in [Rails](https://rubyonrails.org/), a server-side web application framework. 213 | 214 | ## References 215 | 216 | * [GitHub Repo](https://github.com/puppetlabs/puppet) 217 | * [Puppet Website](https://puppet.com/) 218 | * [Puppet's HTTP Client Design](https://github.com/puppetlabs/puppet/blob/8ed2564a3b0978ce0880af904df56d79637c15e8/docs/http.md) 219 | 220 | ## Copyright notice 221 | 222 | Puppet is licensed under the [Apache-2.0 License](https://github.com/puppetlabs/puppet/blob/main/LICENSE). 223 | 224 | Copyright (c) 2011 Puppet Inc. 225 | -------------------------------------------------------------------------------- /_articles/jest-diff.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Jest - Displaying Diffs [TypeScript]" 3 | layout: default 4 | last_modified_date: 2021-08-31T21:14:00+0300 5 | nav_order: 17 6 | 7 | status: PUBLISHED 8 | language: TypeScript 9 | short-title: Displaying Diffs 10 | project: 11 | name: Jest 12 | key: jest 13 | home-page: https://github.com/facebook/jest 14 | tags: ['diff', 'test-framework'] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | *Jest is a JavaScript testing framework designed to ensure correctness of any JavaScript codebase. It allows you to write tests with an approachable, familiar and feature-rich API that gives you results quickly.* - [The official website](https://jestjs.io/). 22 | 23 | ## Problem 24 | 25 | When a test assertion fails, a test framework must display the diff between the actual and the expected values clearly, so users can review the error and act accordingly. This is particularly important for long multi-line strings, arrays or objects with many, possibly nested, fields. The user must clearly see what was added compared to the expectation, what was removed and what remained the same. 26 | 27 | Example from [README.md](https://github.com/facebook/jest/blob/master/packages/jest-diff/README.md): 28 | 29 | ```js 30 | const a = 'common\nchanged from'; 31 | const b = 'common\nchanged to'; 32 | 33 | const difference = diffStringsUnified(a, b); 34 | ``` 35 | 36 | Output: 37 | ```diff 38 | - Expected 39 | + Received 40 | 41 | common 42 | - changed from 43 | + changed to 44 | ``` 45 | 46 | ## Overview 47 | 48 | Jest provides the [`jest-diff`](https://github.com/facebook/jest/tree/master/packages/jest-diff) package with the following API (the description is copied from [README](https://github.com/facebook/jest/blob/master/packages/jest-diff/README.md#jest-diff)): 49 | 50 | >The `diff` named export serializes JavaScript **values**, compares them line-by-line, and returns a string which includes comparison lines. 51 | > 52 | > Two named exports compare **strings** character-by-character: 53 | > 54 | >- `diffStringsUnified` returns a string. 55 | >- `diffStringsRaw` returns an array of `Diff` objects. 56 | > 57 | >Three named exports compare **arrays of strings** line-by-line: 58 | > 59 | >- `diffLinesUnified` and `diffLinesUnified2` return a string. 60 | >- `diffLinesRaw` returns an array of `Diff` objects. 61 | 62 | The implementation is based on the [`diff-sequences`](https://github.com/facebook/jest/tree/master/packages/diff-sequences) package. `diff-sequence` compares items in two sequences to find a longest common subsequence. The items not in common are the items to delete or insert in a shortest edit script. It implements a variation of the [Myers difference algorithm](http://btn1x4.inf.uni-bayreuth.de/publications/dotor_buchmann/SCM/ChefRepo/DiffUndMerge/DAlgorithmVariations.pdf). 63 | 64 | ## Implementation details 65 | 66 | Let's look at the path comparing two JavaScript objects. 67 | 68 | The [entry point](https://github.com/facebook/jest/blob/98f10e698ae986c19fef2d8117be2341bcfb8f7f/packages/jest-diff/src/index.ts#L60-L111) function checks edge cases (mismatching types, asymmetric matchers) and delegates the work to specialized methods: 69 | 70 | ```typescript 71 | // Generate a string that will highlight the difference between two values 72 | // with green and red. (similar to how github does code diffing) 73 | // eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types 74 | export function diff(a: any, b: any, options?: DiffOptions): string | null { 75 | if (Object.is(a, b)) { 76 | return getCommonMessage(NO_DIFF_MESSAGE, options); 77 | } 78 | 79 | const aType = getType(a); 80 | let expectedType = aType; 81 | let omitDifference = false; 82 | if (aType === 'object' && typeof a.asymmetricMatch === 'function') { 83 | if (a.$$typeof !== Symbol.for('jest.asymmetricMatcher')) { 84 | // Do not know expected type of user-defined asymmetric matcher. 85 | return null; 86 | } 87 | if (typeof a.getExpectedType !== 'function') { 88 | // For example, expect.anything() matches either null or undefined 89 | return null; 90 | } 91 | expectedType = a.getExpectedType(); 92 | // Primitive types boolean and number omit difference below. 93 | // For example, omit difference for expect.stringMatching(regexp) 94 | omitDifference = expectedType === 'string'; 95 | } 96 | 97 | if (expectedType !== getType(b)) { 98 | return ( 99 | ' Comparing two different types of values.' + 100 | ` Expected ${chalk.green(expectedType)} but ` + 101 | `received ${chalk.red(getType(b))}.` 102 | ); 103 | } 104 | 105 | if (omitDifference) { 106 | return null; 107 | } 108 | 109 | switch (aType) { 110 | case 'string': 111 | return diffLinesUnified(a.split('\n'), b.split('\n'), options); 112 | case 'boolean': 113 | case 'number': 114 | return comparePrimitive(a, b, options); 115 | case 'map': 116 | return compareObjects(sortMap(a), sortMap(b), options); 117 | case 'set': 118 | return compareObjects(sortSet(a), sortSet(b), options); 119 | default: 120 | return compareObjects(a, b, options); 121 | } 122 | } 123 | ``` 124 | 125 | The [comparison method](https://github.com/facebook/jest/blob/98f10e698ae986c19fef2d8117be2341bcfb8f7f/packages/jest-diff/src/index.ts#L133-L192) for objects is below. It first serializes the objects to jsons and compares them as arrays of strings. If it doesn't produce a result, it tries to do the same with an alternative serializer. 126 | 127 | If lines in the arrays were compared as-is, the diff would look unnecessarily big because of indentation. E.g. if `expected = complexObject` and `actual = { "foo": complexObject }`, the changed indentation would make them appear as if they had nothing in common. The trick is to compare the lines ignoring indentation, but then display them with true indentation - note the `compare` and `display` variables. 128 | 129 | The format options require some explanations: 130 | * `FORMAT_OPTIONS` - default formatter, uses `.toJSON()` if available. 131 | * `FORMAT_OPTIONS_0` - same as `FORMAT_OPTIONS`, but without indentation. 132 | * `FALLBACK_FORMAT_OPTIONS` - alternative formatter not using `.toJSON()`; max depth is 10. 133 | * `FALLBACK_FORMAT_OPTIONS_0` - same as `FALLBACK_FORMAT_OPTIONS_0`, but without indentation. 134 | 135 | ```typescript 136 | function compareObjects( 137 | a: Record, 138 | b: Record, 139 | options?: DiffOptions, 140 | ) { 141 | let difference; 142 | let hasThrown = false; 143 | const noDiffMessage = getCommonMessage(NO_DIFF_MESSAGE, options); 144 | 145 | try { 146 | const aCompare = prettyFormat(a, FORMAT_OPTIONS_0); 147 | const bCompare = prettyFormat(b, FORMAT_OPTIONS_0); 148 | 149 | if (aCompare === bCompare) { 150 | difference = noDiffMessage; 151 | } else { 152 | const aDisplay = prettyFormat(a, FORMAT_OPTIONS); 153 | const bDisplay = prettyFormat(b, FORMAT_OPTIONS); 154 | 155 | difference = diffLinesUnified2( 156 | aDisplay.split('\n'), 157 | bDisplay.split('\n'), 158 | aCompare.split('\n'), 159 | bCompare.split('\n'), 160 | options, 161 | ); 162 | } 163 | } catch { 164 | hasThrown = true; 165 | } 166 | 167 | // If the comparison yields no results, compare again but this time 168 | // without calling `toJSON`. It's also possible that toJSON might throw. 169 | if (difference === undefined || difference === noDiffMessage) { 170 | const aCompare = prettyFormat(a, FALLBACK_FORMAT_OPTIONS_0); 171 | const bCompare = prettyFormat(b, FALLBACK_FORMAT_OPTIONS_0); 172 | 173 | if (aCompare === bCompare) { 174 | difference = noDiffMessage; 175 | } else { 176 | const aDisplay = prettyFormat(a, FALLBACK_FORMAT_OPTIONS); 177 | const bDisplay = prettyFormat(b, FALLBACK_FORMAT_OPTIONS); 178 | 179 | difference = diffLinesUnified2( 180 | aDisplay.split('\n'), 181 | bDisplay.split('\n'), 182 | aCompare.split('\n'), 183 | bCompare.split('\n'), 184 | options, 185 | ); 186 | } 187 | 188 | if (difference !== noDiffMessage && !hasThrown) { 189 | difference = 190 | getCommonMessage(SIMILAR_MESSAGE, options) + '\n\n' + difference; 191 | } 192 | } 193 | 194 | return difference; 195 | } 196 | ``` 197 | 198 | We will stop here, but there's a lot more to learn from [this package](https://github.com/facebook/jest/tree/98f10e698ae986c19fef2d8117be2341bcfb8f7f/packages/jest-diff) if you decide to dive deeper. For instance, [here](https://github.com/facebook/jest/blob/98f10e698ae986c19fef2d8117be2341bcfb8f7f/packages/diff-sequences/src/index.ts) are the "guts" of the sequence difference algorithm. 199 | 200 | ## Testing 201 | 202 | The [test coverage](https://github.com/facebook/jest/blob/98f10e698ae986c19fef2d8117be2341bcfb8f7f/packages/jest-diff/src/__tests__) for `jest-diff` is very extensive. For example, here's [one of the tests](https://github.com/facebook/jest/blob/98f10e698ae986c19fef2d8117be2341bcfb8f7f/packages/jest-diff/src/__tests__/diff.test.ts#L192-L212) for comparing two objects: 203 | 204 | ```typescript 205 | describe('objects', () => { 206 | const a = {a: {b: {c: 5}}}; 207 | const b = {a: {b: {c: 6}}}; 208 | const expected = [ 209 | ' Object {', 210 | ' "a": Object {', 211 | ' "b": Object {', 212 | '- "c": 5,', 213 | '+ "c": 6,', 214 | ' },', 215 | ' },', 216 | ' }', 217 | ].join('\n'); 218 | 219 | test('(unexpanded)', () => { 220 | expect(diff(a, b, unexpandedBe)).toBe(expected); 221 | }); 222 | test('(expanded)', () => { 223 | expect(diff(a, b, expandedBe)).toBe(expected); 224 | }); 225 | }); 226 | ``` 227 | 228 | ## Related 229 | 230 | * [difflib](https://docs.python.org/3/library/difflib.html) provides similar functionality in Python. 231 | 232 | ## References 233 | 234 | * [GitHub Repo](https://github.com/facebook/jest) 235 | * [Official Website](https://jestjs.io/) 236 | * [Myers difference algorithm](http://btn1x4.inf.uni-bayreuth.de/publications/dotor_buchmann/SCM/ChefRepo/DiffUndMerge/DAlgorithmVariations.pdf) 237 | 238 | ## Copyright notice 239 | 240 | Jest is licensed under the [MIT License](https://github.com/facebook/jest/blob/master/LICENSE). 241 | 242 | Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 243 | -------------------------------------------------------------------------------- /_layouts/default.html: -------------------------------------------------------------------------------- 1 | --- 2 | layout: table_wrappers 3 | --- 4 | 5 | 6 | 7 | 8 | {% include head.html %} 9 | 10 | 11 | 12 | Link 13 | 14 | 15 | 16 | 17 | 18 | Search 19 | 20 | 21 | 22 | 23 | 24 | Menu 25 | 26 | 27 | 28 | 29 | 30 | Expand 31 | 32 | 33 | 34 | 35 | 36 | Document 37 | 38 | 39 | 40 | 41 | 42 | 43 | 78 |
79 |
80 | {% if site.search_enabled != false %} 81 | 88 | {% endif %} 89 | {% include header_custom.html %} 90 | {% if site.aux_links %} 91 | 106 | {% endif %} 107 |
108 |
109 | {% unless page.url == "/" %} 110 | {% if page.parent %} 111 | {%- for node in pages_list -%} 112 | {%- if node.parent == nil -%} 113 | {%- if page.parent == node.title or page.grand_parent == node.title -%} 114 | {%- assign first_level_url = node.url | absolute_url -%} 115 | {%- endif -%} 116 | {%- if node.has_children -%} 117 | {%- assign children_list = pages_list | where: "parent", node.title -%} 118 | {%- for child in children_list -%} 119 | {%- if page.url == child.url or page.parent == child.title -%} 120 | {%- assign second_level_url = child.url | absolute_url -%} 121 | {%- endif -%} 122 | {%- endfor -%} 123 | {%- endif -%} 124 | {%- endif -%} 125 | {%- endfor -%} 126 | 137 | {% endif %} 138 | {% endunless %} 139 |
140 | {% if site.heading_anchors != false %} 141 | {% include vendor/anchor_headings.html html=content beforeHeading="true" anchorBody="" anchorClass="anchor-heading" anchorAttrs="aria-labelledby=\"%html_id%\"" %} 142 | {% else %} 143 | {{ content }} 144 | {% endif %} 145 | 146 | {% if page.has_children == true and page.has_toc != false %} 147 |
148 |

Table of contents

149 |
    150 | {%- assign children_list = pages_list | where: "parent", page.title | where: "grand_parent", page.parent -%} 151 | {% for child in children_list %} 152 |
  • 153 | {{ child.title }}{% if child.summary %} - {{ child.summary }}{% endif %} 154 |
  • 155 | {% endfor %} 156 |
157 | {% endif %} 158 | 159 | {% capture footer_custom %} 160 | {%- include footer_custom.html -%} 161 | {% endcapture %} 162 | {% if footer_custom != "" or site.last_edit_timestamp or site.gh_edit_link %} 163 |
164 |
165 | {% if site.back_to_top %} 166 |

{{ site.back_to_top_text }}

167 | {% endif %} 168 | 169 | {{ footer_custom }} 170 | 171 | {% if site.last_edit_timestamp or site.gh_edit_link %} 172 |
173 | {% if site.last_edit_timestamp and site.last_edit_time_format and page.last_modified_date %} 174 |

175 | Page last modified: {{ page.last_modified_date | date: site.last_edit_time_format }}. 176 |

177 | {% endif %} 178 | {% if 179 | site.gh_edit_link and 180 | site.gh_edit_link_text and 181 | site.gh_edit_repository and 182 | site.gh_edit_branch and 183 | site.gh_edit_view_mode 184 | %} 185 |

186 | {{ site.gh_edit_link_text }} 187 |

188 | {% endif %} 189 |
190 | {% endif %} 191 |
192 | {% endif %} 193 | 194 |
195 |
196 | 197 | {% if site.search_enabled != false %} 198 | {% if site.search.button %} 199 | 200 | 201 | 202 | {% endif %} 203 | 204 |
205 | {% endif %} 206 |
207 | 208 | 209 | -------------------------------------------------------------------------------- /_articles/did_you_mean-spell-checking.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "did_you_mean - Correcting Typos in Ruby [Ruby]" 3 | layout: default 4 | last_modified_date: 2021-08-26T16:52:00+0300 5 | nav_order: 16 6 | 7 | status: PUBLISHED 8 | language: Ruby 9 | short-title: Correcting typos in Ruby 10 | project: 11 | name: did_you_mean 12 | key: did_you_mean 13 | home-page: https://github.com/ruby/did_you_mean 14 | tags: ['spell-checking'] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | did_you_mean is *the gem that has been saving people from typos since 2014*. When a Ruby program fails because a name of a class, method or something else is mistyped, did_you_mean augments the error message with suggested corrections. 22 | 23 | Example from its README: 24 | 25 | ```ruby 26 | full_name = "Yuki Nishijima" 27 | full_name.starts_with?("Y") 28 | # => NoMethodError: undefined method `starts_with?' for "Yuki Nishijima":String 29 | # Did you mean? start_with? 30 | ``` 31 | 32 | Ruby 2.3 and later [ship with this gem](https://github.com/ruby/ruby/tree/master/lib/did_you_mean). 33 | 34 | ## Problem 35 | 36 | When a Ruby program fails with a `NameError`, how do you suggest corrections? 37 | 38 | ## Overview 39 | 40 | did_you_mean adds corrections [by overriding](https://github.com/ruby/did_you_mean/blob/master/lib/did_you_mean/core_ext/name_error.rb#L14-L30) `NameError`'s `to_s` method. 41 | 42 | Corrections are generated by spell-checking against a dictionary populated with all symbols that could be used in that place. 43 | 44 | The spell-checker uses two similarity metrics: [Jaro–Winkler](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance) and [Levenshtein](https://en.wikipedia.org/wiki/Levenshtein_distance). 45 | 46 | ## Implementation details 47 | 48 | [Extending](https://github.com/ruby/did_you_mean/blob/9c4ccac47de4722ed22d1a33791e641bc4ba9664/lib/did_you_mean/core_ext/name_error.rb) `NameError#to_s`. Suggestions are generated by spell-checkers and then formatted and appended to the original message. 49 | 50 | ```ruby 51 | def to_s 52 | msg = super.dup 53 | suggestion = DidYouMean.formatter.message_for(corrections) 54 | 55 | msg << suggestion if !msg.include?(suggestion) 56 | msg 57 | rescue 58 | super 59 | end 60 | 61 | def corrections 62 | @corrections ||= spell_checker.corrections 63 | end 64 | 65 | def spell_checker 66 | SPELL_CHECKERS[self.class.to_s].new(self) 67 | end 68 | ``` 69 | 70 | [Actual spell-checking](https://github.com/ruby/did_you_mean/blob/9c4ccac47de4722ed22d1a33791e641bc4ba9664/lib/did_you_mean/spell_checker.rb) code is quite short, but there's a lot going on there. 71 | 72 | First, it selects dictionary words within the threshold Jaro-Winkler distance from the input. The thresholds look [magic](https://en.wikipedia.org/wiki/Magic_number_(programming)), but they must be carefully tuned. Then it filters out words with high Levenshtein distance from the input. If the result is not empty, return it; otherwise, return the word with the lowest Jaro-Winkler distance from the input that has lower Levenshtein distance than the shortest of the two words. 73 | 74 | Levenshtein and Jaro-Winkler are two different metrics measuring the edit distance between two strings. The higher the value, the more similar the strings are. We will not explain the details of these metrics - there are many resources dedicated to that, see the *References* section - but one thing worth mentioning is that Jaro-Winkler favors strings that share prefixes. This is expected to make it good for finding mistypes - the assumption is that if you mistype a word, the first character is very likely to be correct. 75 | 76 | ```ruby 77 | module DidYouMean 78 | class SpellChecker 79 | def initialize(dictionary:) 80 | @dictionary = dictionary 81 | end 82 | 83 | def correct(input) 84 | input = normalize(input) 85 | threshold = input.length > 3 ? 0.834 : 0.77 86 | 87 | words = @dictionary.select { |word| JaroWinkler.distance(normalize(word), input) >= threshold } 88 | words.reject! { |word| input == word.to_s } 89 | words.sort_by! { |word| JaroWinkler.distance(word.to_s, input) } 90 | words.reverse! 91 | 92 | # Correct mistypes 93 | threshold = (input.length * 0.25).ceil 94 | corrections = words.select { |c| Levenshtein.distance(normalize(c), input) <= threshold } 95 | 96 | # Correct misspells 97 | if corrections.empty? 98 | corrections = words.select do |word| 99 | word = normalize(word) 100 | length = input.length < word.length ? input.length : word.length 101 | 102 | Levenshtein.distance(word, input) < length 103 | end.first(1) 104 | end 105 | 106 | corrections 107 | end 108 | 109 | private 110 | 111 | def normalize(str_or_symbol) #:nodoc: 112 | str = str_or_symbol.to_s.downcase 113 | str.tr!("@", "") 114 | str 115 | end 116 | end 117 | end 118 | ``` 119 | 120 | The logic populating the dictionary depends on what's mistyped: the name of a class, a method, a variable, etc. For instance, let's look at populating the dictionary with all possible method names. [The code](https://github.com/ruby/did_you_mean/blob/9c4ccac47de4722ed22d1a33791e641bc4ba9664/lib/did_you_mean/spell_checkers/method_name_checker.rb) is quite self-explanatory: 121 | 122 | ```ruby 123 | module DidYouMean 124 | class MethodNameChecker 125 | attr_reader :method_name, :receiver 126 | 127 | # ... 128 | 129 | def initialize(exception) 130 | @method_name = exception.name 131 | @receiver = exception.receiver 132 | @private_call = exception.respond_to?(:private_call?) ? exception.private_call? : false 133 | end 134 | 135 | def corrections 136 | @corrections ||= begin 137 | dictionary = method_names 138 | dictionary = RB_RESERVED_WORDS + dictionary if @private_call 139 | 140 | SpellChecker.new(dictionary: dictionary).correct(method_name) - names_to_exclude 141 | end 142 | end 143 | 144 | def method_names 145 | if Object === receiver 146 | method_names = receiver.methods + receiver.singleton_methods 147 | method_names += receiver.private_methods if @private_call 148 | method_names.uniq! 149 | method_names 150 | else 151 | [] 152 | end 153 | end 154 | 155 | def names_to_exclude 156 | Object === receiver ? NAMES_TO_EXCLUDE[receiver.class] : [] 157 | end 158 | end 159 | end 160 | ``` 161 | 162 | ## Testing 163 | 164 | [Testing spell-checking](https://github.com/ruby/did_you_mean/blob/master/test/test_spell_checker.rb). E.g. testing correcting mistypes: 165 | 166 | ```ruby 167 | def test_spell_checker_corrects_mistypes 168 | assert_spell 'foo', input: 'doo', dictionary: ['foo', 'fork'] 169 | assert_spell 'email', input: 'meail', dictionary: ['email', 'fail', 'eval'] 170 | assert_spell 'fail', input: 'fial', dictionary: ['email', 'fail', 'eval'] 171 | assert_spell 'fail', input: 'afil', dictionary: ['email', 'fail', 'eval'] 172 | assert_spell 'eval', input: 'eavl', dictionary: ['email', 'fail', 'eval'] 173 | assert_spell 'eval', input: 'veal', dictionary: ['email', 'fail', 'eval'] 174 | assert_spell 'sub!', input: 'suv!', dictionary: ['sub', 'gsub', 'sub!'] 175 | assert_spell 'sub', input: 'suv', dictionary: ['sub', 'gsub', 'sub!'] 176 | 177 | assert_spell %w(gsub! gsub), input: 'gsuv!', dictionary: %w(sub gsub gsub!) 178 | assert_spell %w(sub! sub gsub!), input: 'ssub!', dictionary: %w(sub sub! gsub gsub!) 179 | 180 | group_methods = %w(groups group_url groups_url group_path) 181 | assert_spell 'groups', input: 'group', dictionary: group_methods 182 | 183 | group_classes = %w( 184 | GroupMembership 185 | GroupMembershipPolicy 186 | GroupMembershipDecorator 187 | GroupMembershipSerializer 188 | GroupHelper 189 | Group 190 | GroupMailer 191 | NullGroupMembership 192 | ) 193 | 194 | assert_spell 'GroupMembership', dictionary: group_classes, input: 'GroupMemberhip' 195 | assert_spell 'GroupMembershipDecorator', dictionary: group_classes, input: 'GroupMemberhipDecorator' 196 | 197 | names = %w(first_name_change first_name_changed? first_name_will_change!) 198 | assert_spell names, input: 'first_name_change!', dictionary: names 199 | 200 | assert_empty DidYouMean::SpellChecker.new(dictionary: ['proc']).correct('product_path') 201 | assert_empty DidYouMean::SpellChecker.new(dictionary: ['fork']).correct('fooo') 202 | end 203 | ``` 204 | 205 | [Some tests](https://github.com/ruby/did_you_mean/blob/master/test/spell_checking/test_class_name_check.rb#L30-L46) confirming that it actually throws the right error: 206 | 207 | ```ruby 208 | class ClassNameCheckTest < Test::Unit::TestCase 209 | include DidYouMean::TestHelper 210 | 211 | def test_corrections 212 | error = assert_raise(NameError) { ::Bo0k } 213 | assert_correction "Book", error.corrections 214 | end 215 | 216 | def test_corrections_include_case_specific_class_name 217 | error = assert_raise(NameError) { ::Acronym } 218 | assert_correction "ACRONYM", error.corrections 219 | end 220 | 221 | def test_corrections_include_top_level_class_name 222 | error = assert_raise(NameError) { Project.bo0k } 223 | assert_correction "Book", error.corrections 224 | end 225 | ``` 226 | 227 | ## References 228 | 229 | * [GitHub repo](https://github.com/ruby/did_you_mean/) 230 | * [RedDotRuby 2015 - 'Did you mean?' experience in Ruby and beyond by Yuki Nishijima](https://www.youtube.com/watch?v=sca1C0Qk6ZE) 231 | * [Levenshtein Distance](https://en.wikipedia.org/wiki/Levenshtein_distance) 232 | * [Jaro–Winkler Distance](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance) 233 | * [Deep dive into Did You Mean](https://shime.sh/deep-dive-into-did-you-mean) 234 | * [Difference between Jaro-Winkler and Levenshtein distance?](https://stackoverflow.com/questions/25540581/difference-between-jaro-winkler-and-levenshtein-distance) 235 | * [Jaro winkler vs Levenshtein Distance](https://srinivas-kulkarni.medium.com/jaro-winkler-vs-levenshtein-distance-2eab21832fd6) 236 | * [https://news.ycombinator.com/item?id=8496581](https://news.ycombinator.com/item?id=8496581) 237 | * The author of the gem wrote a blog post about it, but it is not longer available. However, its discussion is. 238 | 239 | ## Copyright notice 240 | 241 | did_you_mean is licensed under the [MIT License](https://github.com/ruby/did_you_mean/blob/master/LICENSE.txt). 242 | 243 | Copyright (c) 2014-16 Yuki Nishijima. -------------------------------------------------------------------------------- /_articles/vscode-skip-list.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Visual Studio Code - Skip Lists [TypeScript]" 3 | layout: default 4 | last_modified_date: 2021-09-15T09:26:00+0300 5 | nav_order: 19 6 | 7 | status: PUBLISHED 8 | language: TypeScript 9 | short-title: Skip Lists 10 | project: 11 | name: Visual Studio Code 12 | key: vscode 13 | home-page: https://github.com/microsoft/vscode 14 | tags: ['algorithm', 'data-structure'] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | [Visual Studio Code](https://code.visualstudio.com/) is an extremely popular [IDE](https://en.wikipedia.org/wiki/Integrated_development_environment) made by Microsoft. 22 | 23 | Resources manipulated by Visual Studio Code are identified by their URIs. But different URIs [can point](https://github.com/microsoft/vscode/blob/4eae83a5232ac06b29dca142f6503d695f16da13/src/vs/workbench/services/uriIdentity/common/uriIdentity.ts) to the same resource, e.g. `file:///c:/foo/bar.txt` and `file:///c:/FOO/BAR.txt`, because Windows paths are not case-sensitive. 24 | 25 | Visual Studio Code has a [*URI Identity Service*](https://github.com/microsoft/vscode/blob/4eae83a5232ac06b29dca142f6503d695f16da13/src/vs/workbench/services/uriIdentity/common/uriIdentity.ts) to return a canonical [URI](https://en.wikipedia.org/wiki/Uniform_Resource_Identifier) for a given resource. The identity service is stateful. If it has seen a URI equal to the input before (it has a way to compare URIs), it returns that other URI. If it hasn't - the input URI becomes the canonical URI. 26 | 27 | ## Problem 28 | 29 | What data structure should be used to store URIs? It should support fast insertions and fast lookups given a function comparing two URIs. 30 | 31 | Note: it appears that URIs cannot be hashed, but it is not clear to us why. If they could be hashed, the best data structure would probably be a hash map. 32 | 33 | ## Overview 34 | 35 | URIs are put in a [skip list](https://en.wikipedia.org/wiki/Skip_list). A skip list is a probabilistic data structure that allows `O(log ⁡n)` search complexity as well as `O(log n)` insertion complexity within an ordered sequence of `n` elements. 36 | 37 | > *Skip lists are a simple data structure that can be used in place of balanced trees for most applications. Skip lists algorithms are very easy to implement, extend and modify. Skip lists are about as fast as highly optimized balanced tree algorithms and are substantially faster than casually implemented balanced tree algorithms.* - [William Pugh](https://www.epaperpress.com/sortsearch/download/skiplist.pdf) 38 | 39 | > *... it can get the best features of a sorted array (for searching) while maintaining a linked list-like structure that allows insertion, which is not possible in an array. Fast search is made possible by maintaining a linked hierarchy of subsequences, with each successive subsequence skipping over fewer elements than the previous one (see the picture below on the right). Searching starts in the sparsest subsequence until two consecutive elements have been found, one smaller and one larger than or equal to the element searched for. Via the linked hierarchy, these two elements link to elements of the next sparsest subsequence, where searching is continued until finally we are searching in the full sequence. The elements that are skipped over may be chosen probabilistically or deterministically, with the former being more common.* - [Wikipedia](https://en.wikipedia.org/wiki/Skip_list) 40 | 41 | ## Implementation details 42 | 43 | The [implementation](https://github.com/microsoft/vscode/blob/4eae83a5232ac06b29dca142f6503d695f16da13/src/vs/base/common/skipList.ts#L20) follows the algorithm described in the [paper](https://www.epaperpress.com/sortsearch/download/skiplist.pdf) by William Pugh. 44 | 45 | A [node](https://github.com/microsoft/vscode/blob/4eae83a5232ac06b29dca142f6503d695f16da13/src/vs/base/common/skipList.ts#L20) has a key, a value, a level and a list of forward links to other nodes: 46 | 47 | ```typescript 48 | class Node { 49 | readonly forward: Node[]; 50 | constructor(readonly level: number, readonly key: K, public value: V) { 51 | this.forward = []; 52 | } 53 | } 54 | ``` 55 | 56 | Skip list itself: 57 | 58 | ```typescript 59 | export class SkipList implements Map { 60 | 61 | readonly [Symbol.toStringTag] = 'SkipList'; 62 | 63 | private _maxLevel: number; 64 | private _level: number = 0; 65 | private _header: Node; 66 | private _size: number = 0; 67 | ``` 68 | 69 | `has`, `get`, `set` and `delete` delegate to private methods. It's not entirely clear why these private methods are static: perhaps it was done for performance. 70 | 71 | ```typescript 72 | has(key: K): boolean { 73 | return Boolean(SkipList._search(this, key, this.comparator)); 74 | } 75 | 76 | get(key: K): V | undefined { 77 | return SkipList._search(this, key, this.comparator)?.value; 78 | } 79 | 80 | set(key: K, value: V): this { 81 | if (SkipList._insert(this, key, value, this.comparator)) { 82 | this._size += 1; 83 | } 84 | return this; 85 | } 86 | 87 | delete(key: K): boolean { 88 | const didDelete = SkipList._delete(this, key, this.comparator); 89 | if (didDelete) { 90 | this._size -= 1; 91 | } 92 | return didDelete; 93 | } 94 | ``` 95 | 96 | [Search](https://github.com/microsoft/vscode/blob/4eae83a5232ac06b29dca142f6503d695f16da13/src/vs/base/common/skipList.ts#L123-L135). It starts with the head node and the last level. It follows forward links until it encounters an element greater than the common, then it goes to the next level. 97 | 98 | ```typescript 99 | private static _search(list: SkipList, searchKey: K, comparator: Comparator) { 100 | let x = list._header; 101 | for (let i = list._level - 1; i >= 0; i--) { 102 | while (x.forward[i] && comparator(x.forward[i].key, searchKey) < 0) { 103 | x = x.forward[i]; 104 | } 105 | } 106 | x = x.forward[0]; 107 | if (x && comparator(x.key, searchKey) === 0) { 108 | return x; 109 | } 110 | return undefined; 111 | } 112 | ``` 113 | 114 | [Insert and delete](https://github.com/microsoft/vscode/blob/4eae83a5232ac06b29dca142f6503d695f16da13/src/vs/base/common/skipList.ts#L137-L201). *[Insertions and deletions](https://en.wikipedia.org/wiki/Skip_list) are implemented much like the corresponding linked-list operations, except that "tall" elements must be inserted into or deleted from more than one linked list.* 115 | 116 | ```typescript 117 | private static _insert(list: SkipList, searchKey: K, value: V, comparator: Comparator) { 118 | let update: Node[] = []; 119 | let x = list._header; 120 | for (let i = list._level - 1; i >= 0; i--) { 121 | while (x.forward[i] && comparator(x.forward[i].key, searchKey) < 0) { 122 | x = x.forward[i]; 123 | } 124 | update[i] = x; 125 | } 126 | x = x.forward[0]; 127 | if (x && comparator(x.key, searchKey) === 0) { 128 | // update 129 | x.value = value; 130 | return false; 131 | } else { 132 | // insert 133 | let lvl = SkipList._randomLevel(list); 134 | if (lvl > list._level) { 135 | for (let i = list._level; i < lvl; i++) { 136 | update[i] = list._header; 137 | } 138 | list._level = lvl; 139 | } 140 | x = new Node(lvl, searchKey, value); 141 | for (let i = 0; i < lvl; i++) { 142 | x.forward[i] = update[i].forward[i]; 143 | update[i].forward[i] = x; 144 | } 145 | return true; 146 | } 147 | } 148 | 149 | private static _randomLevel(list: SkipList, p: number = 0.5): number { 150 | let lvl = 1; 151 | while (Math.random() < p && lvl < list._maxLevel) { 152 | lvl += 1; 153 | } 154 | return lvl; 155 | } 156 | 157 | private static _delete(list: SkipList, searchKey: K, comparator: Comparator) { 158 | let update: Node[] = []; 159 | let x = list._header; 160 | for (let i = list._level - 1; i >= 0; i--) { 161 | while (x.forward[i] && comparator(x.forward[i].key, searchKey) < 0) { 162 | x = x.forward[i]; 163 | } 164 | update[i] = x; 165 | } 166 | x = x.forward[0]; 167 | if (!x || comparator(x.key, searchKey) !== 0) { 168 | // not found 169 | return false; 170 | } 171 | for (let i = 0; i < list._level; i++) { 172 | if (update[i].forward[i] !== x) { 173 | break; 174 | } 175 | update[i].forward[i] = x.forward[i]; 176 | } 177 | while (list._level > 0 && list._header.forward[list._level - 1] === NIL) { 178 | list._level -= 1; 179 | } 180 | return true; 181 | } 182 | ``` 183 | 184 | ## Testing 185 | 186 | [Testing](https://github.com/microsoft/vscode/blob/4eae83a5232ac06b29dca142f6503d695f16da13/src/vs/base/test/common/skipList.test.ts#L48-L74) insertions, deletions and searches: 187 | 188 | ```typescript 189 | test('set/get/delete', function () { 190 | let list = new SkipList((a, b) => a - b); 191 | 192 | assert.strictEqual(list.get(3), undefined); 193 | list.set(3, 1); 194 | assert.strictEqual(list.get(3), 1); 195 | assertValues(list, [1]); 196 | 197 | list.set(3, 3); 198 | assertValues(list, [3]); 199 | 200 | list.set(1, 1); 201 | list.set(4, 4); 202 | assert.strictEqual(list.get(3), 3); 203 | assert.strictEqual(list.get(1), 1); 204 | assert.strictEqual(list.get(4), 4); 205 | assertValues(list, [1, 3, 4]); 206 | 207 | assert.strictEqual(list.delete(17), false); 208 | 209 | assert.strictEqual(list.delete(1), true); 210 | assert.strictEqual(list.get(1), undefined); 211 | assert.strictEqual(list.get(3), 3); 212 | assert.strictEqual(list.get(4), 4); 213 | 214 | assertValues(list, [3, 4]); 215 | }); 216 | ``` 217 | 218 | Functional tests are accompanied by a [performance test](https://github.com/microsoft/vscode/blob/4eae83a5232ac06b29dca142f6503d695f16da13/src/vs/base/test/common/skipList.test.ts#L144-L216). 219 | 220 | ## References 221 | 222 | * [GitHub repo](https://github.com/microsoft/vscode) 223 | * [Skip List on Wikipedia](https://en.wikipedia.org/wiki/Skip_list) 224 | * [Skip Lists: A Probabilistic Alternative to Balanced Trees](https://www.epaperpress.com/sortsearch/download/skiplist.pdf) 225 | * [Skip Lists lecture](https://www.youtube.com/watch?v=kBwUoWpeH_Q) (MIT 6.046J / 18.410J Introduction to Algorithms (SMA 5503), Fall 2005) 226 | * [URI comparisons and resources.ts](https://github.com/microsoft/vscode/issues/93368) 227 | 228 | ## Copyright notice 229 | 230 | Visual Studio Code is licensed under the [MIT License](https://github.com/microsoft/vscode/blob/main/LICENSE.txt). 231 | 232 | Copyright (c) Microsoft Corporation. All rights reserved. -------------------------------------------------------------------------------- /_articles/buck-artifact-cache-decorators.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Buck - Artifact Cache Decorators [Java]" 3 | layout: default 4 | last_modified_date: 2021-08-13T12:18:00+0300 5 | nav_order: 15 6 | 7 | status: PUBLISHED 8 | language: Java 9 | short-title: Artifact Cache Decorators 10 | project: 11 | name: Buck 12 | key: buck 13 | home-page: https://github.com/facebook/buck 14 | tags: ['decorator'] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | [Buck](https://buck.build/) is a multi-language build system developed and used by Facebook. 22 | 23 | Buck avoids rebuilding the same module twice by caching build artifacts and metadata. It employs various caching strategies, such as caching on the local disk, in SQLite database or in a shared cache over HTTP. 24 | 25 | Caches obey the [`ArtifactCache`](https://github.com/facebook/buck/blob/0acbcbbed7274ba654b64d97414b28183649e51a/src/com/facebook/buck/artifact_cache/ArtifactCache.java#) interface, which defines methods such as `fetchAsync` or `store`. 26 | 27 | ## Problem 28 | 29 | Various embellishments, such as retries or even logging, need to be added to some, but not all, cache client instances. 30 | 31 | ## Overview 32 | 33 | The solution uses the classical [Decorator](https://en.wikipedia.org/wiki/Decorator_pattern) design pattern. 34 | 35 | > *The decorator pattern is a design pattern that allows behavior to be added to an individual object, dynamically, without affecting the behavior of other objects from the same class.* - [Wikipedia](https://en.wikipedia.org/wiki/Decorator_pattern) 36 | 37 | Buck implements 3 decorators for `ArtifactCache`: 38 | 1. [`LoggingArtifactCacheDecorator`](https://github.com/facebook/buck/blob/0acbcbbed7274ba654b64d97414b28183649e51a/src/com/facebook/buck/artifact_cache/LoggingArtifactCacheDecorator.java) - logs caching events to an event bus; 39 | 1. [`RetryingCacheDecorator`](https://github.com/facebook/buck/blob/0acbcbbed7274ba654b64d97414b28183649e51a/src/com/facebook/buck/artifact_cache/RetryingCacheDecorator.java) - retries failed requests; 40 | 1. [`TwoLevelArtifactCacheDecorator`](https://github.com/facebook/buck/blob/0acbcbbed7274ba654b64d97414b28183649e51a/src/com/facebook/buck/artifact_cache/TwoLevelArtifactCacheDecorator.java) - adds a two-level caching scheme, the details of which are not in the scope of this article. 41 | 42 | The decorators implement the same `ArtifactCache` interface as the actual caches. Their constructors accept `ArtifactCache delegate` and some decorator-specific parameters. Most of the work is delegated to the `delegate`, while delegators provide additional functionality. Users of the `ArtifactCache` interface don't distinguish between "actual" caches and decorators. 43 | 44 | The decorators also implement the [`CacheDecorator`](https://github.com/facebook/buck/blob/0acbcbbed7274ba654b64d97414b28183649e51a/src/com/facebook/buck/artifact_cache/CacheDecorator.java) interface with the only method `ArtifactCache getDelegate()`. As far as we can tell, this method is only used in testing. 45 | 46 | Concrete instances of `ArtifactCache` with appropriate decorators are instanciated by [`ArtifactCaches`](https://github.com/facebook/buck/blob/0acbcbbed7274ba654b64d97414b28183649e51a/src/com/facebook/buck/artifact_cache/ArtifactCaches.java) factory class. 47 | 48 | ## Implementation details 49 | 50 | Let's look at [`LoggingArtifactCacheDecorator`](https://github.com/facebook/buck/blob/0acbcbbed7274ba654b64d97414b28183649e51a/src/com/facebook/buck/artifact_cache/LoggingArtifactCacheDecorator.java). All it does is call `eventBus.post()` before and after fetching or storing artifacts in the underlying cache. 51 | 52 | ```java 53 | /** 54 | * Decorator for wrapping a {@link ArtifactCache} to log a {@link ArtifactCacheEvent} for the start 55 | * and finish of each event. The underlying cache must only provide synchronous operations. 56 | */ 57 | public class LoggingArtifactCacheDecorator implements ArtifactCache, CacheDecorator { 58 | private final BuckEventBus eventBus; 59 | private final ArtifactCache delegate; 60 | private final ArtifactCacheEventFactory eventFactory; 61 | 62 | public LoggingArtifactCacheDecorator( 63 | BuckEventBus eventBus, ArtifactCache delegate, ArtifactCacheEventFactory eventFactory) { 64 | this.eventBus = eventBus; 65 | this.delegate = delegate; 66 | this.eventFactory = eventFactory; 67 | } 68 | 69 | @Override 70 | public ListenableFuture fetchAsync( 71 | @Nullable BuildTarget target, RuleKey ruleKey, LazyPath output) { 72 | ArtifactCacheEvent.Started started = 73 | eventFactory.newFetchStartedEvent(ImmutableSet.of(ruleKey)); 74 | eventBus.post(started); 75 | CacheResult fetchResult = Futures.getUnchecked(delegate.fetchAsync(target, ruleKey, output)); 76 | eventBus.post(eventFactory.newFetchFinishedEvent(started, fetchResult)); 77 | return Futures.immediateFuture(fetchResult); 78 | } 79 | 80 | @Override 81 | public void skipPendingAndFutureAsyncFetches() { 82 | delegate.skipPendingAndFutureAsyncFetches(); 83 | } 84 | 85 | @Override 86 | public ListenableFuture store(ArtifactInfo info, BorrowablePath output) { 87 | ArtifactCacheEvent.Started started = 88 | eventFactory.newStoreStartedEvent(info.getRuleKeys(), info.getMetadata()); 89 | eventBus.post(started); 90 | ListenableFuture storeFuture = delegate.store(info, output); 91 | eventBus.post(eventFactory.newStoreFinishedEvent(started)); 92 | return storeFuture; 93 | } 94 | 95 | @Override 96 | public ListenableFuture> multiContainsAsync( 97 | ImmutableSet ruleKeys) { 98 | ArtifactCacheEvent.Started started = eventFactory.newContainsStartedEvent(ruleKeys); 99 | eventBus.post(started); 100 | 101 | return Futures.transform( 102 | delegate.multiContainsAsync(ruleKeys), 103 | results -> { 104 | eventBus.post(eventFactory.newContainsFinishedEvent(started, results)); 105 | return results; 106 | }, 107 | MoreExecutors.directExecutor()); 108 | } 109 | 110 | @Override 111 | public ListenableFuture deleteAsync(List ruleKeys) { 112 | return delegate.deleteAsync(ruleKeys); 113 | } 114 | 115 | @Override 116 | public CacheReadMode getCacheReadMode() { 117 | return delegate.getCacheReadMode(); 118 | } 119 | 120 | @Override 121 | public void close() { 122 | delegate.close(); 123 | } 124 | 125 | @Override 126 | public ArtifactCache getDelegate() { 127 | return delegate; 128 | } 129 | } 130 | ``` 131 | 132 | Similarly, [`RetryingCacheDecorator`](https://github.com/facebook/buck/blob/0acbcbbed7274ba654b64d97414b28183649e51a/src/com/facebook/buck/artifact_cache/RetryingCacheDecorator.java) passes everything down to the `delegate`, retrying failed fetches: 133 | 134 | ```java 135 | public class RetryingCacheDecorator implements ArtifactCache, CacheDecorator { 136 | 137 | private static final Logger LOG = Logger.get(RetryingCacheDecorator.class); 138 | 139 | private final ArtifactCache delegate; 140 | private final int maxFetchRetries; 141 | private final BuckEventBus buckEventBus; 142 | private final ArtifactCacheMode cacheMode; 143 | 144 | public RetryingCacheDecorator( 145 | ArtifactCacheMode cacheMode, 146 | ArtifactCache delegate, 147 | int maxFetchRetries, 148 | BuckEventBus buckEventBus) { 149 | Preconditions.checkArgument(maxFetchRetries > 0); 150 | 151 | this.cacheMode = cacheMode; 152 | this.delegate = delegate; 153 | this.maxFetchRetries = maxFetchRetries; 154 | this.buckEventBus = buckEventBus; 155 | } 156 | 157 | @Override 158 | public ListenableFuture fetchAsync( 159 | @Nullable BuildTarget target, RuleKey ruleKey, LazyPath output) { 160 | List allCacheErrors = new ArrayList<>(); 161 | ListenableFuture resultFuture = delegate.fetchAsync(target, ruleKey, output); 162 | for (int retryCount = 1; retryCount < maxFetchRetries; retryCount++) { 163 | int retryCountForLambda = retryCount; 164 | resultFuture = 165 | Futures.transformAsync( 166 | resultFuture, 167 | result -> { 168 | if (result.getType() != CacheResultType.ERROR) { 169 | return Futures.immediateFuture(result); 170 | } 171 | result.cacheError().ifPresent(allCacheErrors::add); 172 | LOG.info( 173 | "Failed to fetch %s after %d/%d attempts, exception: %s", 174 | ruleKey, retryCountForLambda + 1, maxFetchRetries, result.cacheError()); 175 | return delegate.fetchAsync(target, ruleKey, output); 176 | }); 177 | } 178 | return Futures.transform( 179 | resultFuture, 180 | result -> { 181 | if (result.getType() != CacheResultType.ERROR) { 182 | return result; 183 | } 184 | String msg = String.join("\n", allCacheErrors); 185 | if (!msg.contains(NoHealthyServersException.class.getName())) { 186 | buckEventBus.post( 187 | ConsoleEvent.warning( 188 | "Failed to fetch %s over %s after %d attempts.", 189 | ruleKey, cacheMode.name(), maxFetchRetries)); 190 | } 191 | return result.withCacheError(Optional.of(msg)); 192 | }, 193 | MoreExecutors.directExecutor()); 194 | } 195 | 196 | // ... 197 | // Just delegates 198 | } 199 | ``` 200 | 201 | [`TwoLevelArtifactCacheDecorator`](https://github.com/facebook/buck/blob/0acbcbbed7274ba654b64d97414b28183649e51a/src/com/facebook/buck/artifact_cache/TwoLevelArtifactCacheDecorator.java) is a lot more involved, but its details are not in the scope of this article. 202 | 203 | ## Testing 204 | 205 | `RetryingCacheDecorator` and `LoggingArtifactCacheDecorator` aren't tested directly. `TwoLevelArtifactCacheDecorator` has [its own test](https://github.com/facebook/buck/blob/0acbcbbed7274ba654b64d97414b28183649e51a/src/com/facebook/buck/artifact_cache/TwoLevelArtifactCacheDecorator.java). 206 | 207 | ## Observations 208 | 209 | * If most decorator methods simply delegate to the underlying cache without doing anything else, perhaps there could be a `BaseCacheDecorator` that would simply delegate all calls, and concrete decorators could inherit from `BaseCacheDecorator` and only override those methods where they need to do something. 210 | 211 | ## References 212 | 213 | * [GitHub Repo](https://github.com/facebook/buck) 214 | * [Buck Website](https://buck.build/) 215 | * [Buck on Wikipedia](https://en.wikipedia.org/wiki/Buck_(software)) 216 | * [Decorator Pattern](https://en.wikipedia.org/wiki/Decorator_pattern) 217 | * [Refactoring to Patterns: Move Embellishment to Decorator](https://www.informit.com/articles/article.aspx?p=1398607&seqNum=3) 218 | 219 | ## Copyright notice 220 | 221 | Stockfish is licensed under the [Apache License 2.0](https://github.com/facebook/buck/blob/master/LICENSE). 222 | 223 | Copyright (c) Facebook, Inc. and its affiliates. -------------------------------------------------------------------------------- /_articles/chaos-monkey-store.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Chaos Monkey - MySQL-backed Store for Schedules and Terminations [Go]" 3 | layout: default 4 | last_modified_date: 2021-08-01T18:27:00+0300 5 | nav_order: 4 6 | 7 | status: PUBLISHED 8 | language: Go 9 | short-title: MySQL-backed Store for Schedules and Terminations 10 | project: 11 | name: Chaos Monkey 12 | key: chaos-monkey 13 | home-page: https://github.com/Netflix/chaosmonkey 14 | tags: [data-access, sql, dao, chaos-engineering] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | *Chaos Monkey randomly terminates virtual machine instances and containers that run inside of your production environment. Exposing engineers to failures more frequently incentivizes them to build resilient services. Chaos Monkey is an example of a tool that follows the [Principles of Chaos Engineering](https://principlesofchaos.org/).* 22 | 23 | Chaos Monkey uses [MySQL](https://www.mysql.com/) to store termination schedules and performed instance terminations. 24 | 25 | ## Problem 26 | 27 | It's often considered a good practice to separate application logic from persistence. How is it done in Chaos Monkey? 28 | 29 | ## Overview 30 | 31 | The persistence logic of Chaos Monkey is encapsulated in the [`MySQL` struct](https://github.com/Netflix/chaosmonkey/blob/c16d769a82bb765f6544627ef6f08305791e8895/mysql/mysql.go#L40-L42). It'a a variation of the [Data Access Object (DAO)](https://www.oracle.com/java/technologies/dataaccessobject.html) pattern or of the [Repository](https://martinfowler.com/eaaCatalog/repository.html) pattern. The [difference](https://stackoverflow.com/questions/8550124/what-is-the-difference-between-dao-and-repository-patterns) between them is rather subtle. 32 | 33 | The structure provides methods to: 34 | * Retrieve the schedule for a given date. 35 | * Publish a schedule. 36 | * Check if a termination is permitted. 37 | 38 | It's common for DAOs and Repositories to try to abstract away the nature of the storage as much as possible. Here, while the interface does not give away the nature of the storage, the name, `MySQL`, does. 39 | 40 | ## Implementation details 41 | 42 | [Structure definition](https://github.com/Netflix/chaosmonkey/blob/c16d769a82bb765f6544627ef6f08305791e8895/mysql/mysql.go#L40-L42). The only field is the underlying database. 43 | 44 | ```go 45 | // MySQL represents a MySQL-backed store for schedules and terminations 46 | type MySQL struct { 47 | db *sql.DB 48 | } 49 | ``` 50 | 51 | [Retrieving the schedule](https://github.com/Netflix/chaosmonkey/blob/c16d769a82bb765f6544627ef6f08305791e8895/mysql/mysql.go#L109-L143) for the given date. It runs a [`SELECT`](https://www.w3schools.com/sql/sql_select.asp) statement and maps the results to a `Schedule` object. 52 | 53 | ```go 54 | // Retrieve retrieves the schedule for the given date 55 | func (m MySQL) Retrieve(date time.Time) (sched *schedule.Schedule, err error) { 56 | rows, err := m.db.Query("SELECT time, app, account, region, stack, cluster FROM schedules WHERE date = DATE(?)", utcDate(date)) 57 | if err != nil { 58 | return nil, errors.Wrapf(err, "failed to retrieve schedule for %s", date) 59 | } 60 | 61 | sched = schedule.New() 62 | 63 | defer func() { 64 | if cerr := rows.Close(); cerr != nil && err == nil { 65 | err = errors.Wrap(cerr, "rows.Close() failed") 66 | } 67 | }() 68 | 69 | for rows.Next() { 70 | var tm time.Time 71 | var app, account, region, stack, cluster string 72 | 73 | err = rows.Scan(&tm, &app, &account, ®ion, &stack, &cluster) 74 | if err != nil { 75 | return nil, errors.Wrap(err, "failed to scan row") 76 | } 77 | 78 | sched.Add(tm, grp.New(app, account, region, stack, cluster)) 79 | } 80 | 81 | err = rows.Err() 82 | if err != nil { 83 | return nil, errors.Wrap(err, "rows.Err() errored") 84 | } 85 | 86 | return sched, nil 87 | 88 | } 89 | ``` 90 | 91 | [Publishing a schedule](https://github.com/Netflix/chaosmonkey/blob/c16d769a82bb765f6544627ef6f08305791e8895/mysql/mysql.go#L145-L212). It checks if a schedule for the date already exists and, if it doesn't, runs an `INSERT` statement. Note the delay that allows testing for race conditions. 92 | 93 | It could run a little faster if [statements were prepared](https://github.com/Netflix/chaosmonkey/blob/c16d769a82bb765f6544627ef6f08305791e8895/mysql/mysql.go#L185-L186) during the [initialization](https://github.com/Netflix/chaosmonkey/blob/c16d769a82bb765f6544627ef6f08305791e8895/mysql/mysql.go#L85-L93). Read more about [using prepared statements in Go](https://golang.org/doc/database/prepared-statements). 94 | 95 | ```go 96 | // Publish publishes the schedule for the given date 97 | func (m MySQL) Publish(date time.Time, sched *schedule.Schedule) error { 98 | return m.PublishWithDelay(date, sched, 0) 99 | } 100 | 101 | // PublishWithDelay publishes the schedule with a delay between checking the schedule 102 | // exists and writing it. The delay is used only for testing race conditions 103 | func (m MySQL) PublishWithDelay(date time.Time, sched *schedule.Schedule, delay time.Duration) (err error) { 104 | // First, we check to see if there is a schedule present 105 | tx, err := m.db.Begin() 106 | if err != nil { 107 | return errors.Wrap(err, "failed to begin transaction") 108 | } 109 | 110 | // We must either commit or rollback at the end 111 | defer func() { 112 | switch err { 113 | case nil: 114 | err = tx.Commit() 115 | case schedstore.ErrAlreadyExists: 116 | // We want to return ErrAlreadyExists even if the transaction commit 117 | // fails 118 | _ = tx.Commit() 119 | default: 120 | _ = tx.Rollback() 121 | } 122 | }() 123 | 124 | exists, err := schedExists(tx, date) 125 | if err != nil { 126 | return err 127 | } 128 | 129 | if exists { 130 | return schedstore.ErrAlreadyExists 131 | } 132 | 133 | if delay > 0 { 134 | time.Sleep(delay) 135 | } 136 | query := "INSERT INTO schedules (date, time, app, account, region, stack, cluster) VALUES (?, ?, ?, ?, ?, ?, ?)" 137 | stmt, err := tx.Prepare(query) 138 | if err != nil { 139 | return errors.Wrapf(err, "failed to prepare sql statement: %s", query) 140 | } 141 | 142 | for _, entry := range sched.Entries() { 143 | var app, account, region, stack, cluster string 144 | app = entry.Group.App() 145 | account = entry.Group.Account() 146 | if val, ok := entry.Group.Region(); ok { 147 | region = val 148 | } 149 | if val, ok := entry.Group.Stack(); ok { 150 | stack = val 151 | } 152 | if val, ok := entry.Group.Cluster(); ok { 153 | cluster = val 154 | } 155 | 156 | _, err = stmt.Exec(utcDate(date), entry.Time.In(time.UTC), app, account, region, stack, cluster) 157 | if err != nil { 158 | return errors.Wrapf(err, "failed to execute prepared query") 159 | } 160 | } 161 | 162 | return nil 163 | } 164 | ``` 165 | 166 | [Checking if a termination is permitted](https://github.com/Netflix/chaosmonkey/blob/c16d769a82bb765f6544627ef6f08305791e8895/mysql/mysql.go#L262-L297). The name of the method seems to imply that it's side-effect free, but it actually records the termination time. 167 | 168 | It's not obvious that this logic belongs to the persistence layer. Perhaps it was done this way to ensure that the check and the recording are done in the same transaction. 169 | 170 | ```go 171 | / Check checks if a termination is permitted and, if so, records the 172 | // termination time on the server 173 | func (m MySQL) Check(term chaosmonkey.Termination, appCfg chaosmonkey.AppConfig, endHour int, loc *time.Location) error { 174 | return m.CheckWithDelay(term, appCfg, endHour, loc, 0) 175 | } 176 | 177 | // CheckWithDelay is the same as Check, but adds a delay between reading and 178 | // writing to the database (used for testing only) 179 | func (m MySQL) CheckWithDelay(term chaosmonkey.Termination, appCfg chaosmonkey.AppConfig, endHour int, loc *time.Location, delay time.Duration) error { 180 | tx, err := m.db.Begin() 181 | if err != nil { 182 | return errors.Wrap(err, "failed to begin transaction") 183 | } 184 | 185 | defer func() { 186 | switch err { 187 | case nil: 188 | err = tx.Commit() 189 | default: 190 | _ = tx.Rollback() 191 | } 192 | }() 193 | 194 | err = respectsMinTimeBetweenKills(tx, term.Time, term, appCfg, endHour, loc) 195 | if err != nil { 196 | return err 197 | } 198 | 199 | if delay > 0 { 200 | time.Sleep(delay) 201 | } 202 | 203 | err = recordTermination(tx, term, loc) 204 | return err 205 | 206 | } 207 | ``` 208 | 209 | ## Testing 210 | 211 | [Basic happy test](https://github.com/Netflix/chaosmonkey/blob/c16d769a82bb765f6544627ef6f08305791e8895/mysql/schedstore_test.go#L35-L79): 212 | 213 | ```go 214 | // Test we can publish and then retrieve a schedule 215 | func TestPublishRetrieve(t *testing.T) { 216 | err := initDB() 217 | if err != nil { 218 | t.Fatal(err) 219 | } 220 | 221 | m, err := mysql.New("localhost", port, "root", password, "chaosmonkey") 222 | if err != nil { 223 | t.Fatal(err) 224 | } 225 | 226 | loc, err := time.LoadLocation("America/Los_Angeles") 227 | if err != nil { 228 | t.Fatal(err) 229 | } 230 | 231 | sched := schedule.New() 232 | 233 | t1 := time.Date(2016, time.June, 20, 11, 40, 0, 0, loc) 234 | sched.Add(t1, grp.New("chaosguineapig", "test", "us-east-1", "", "chaosguineapig-test")) 235 | 236 | date := time.Date(2016, time.June, 20, 0, 0, 0, 0, loc) 237 | 238 | // Code under test: 239 | err = m.Publish(date, sched) 240 | if err != nil { 241 | t.Fatal(err) 242 | } 243 | sched, err = m.Retrieve(date) 244 | if err != nil { 245 | t.Fatal(err) 246 | } 247 | 248 | entries := sched.Entries() 249 | if got, want := len(entries), 1; got != want { 250 | t.Fatalf("got len(entries)=%d, want %d", got, want) 251 | } 252 | 253 | entry := entries[0] 254 | 255 | if !t1.Equal(entry.Time) { 256 | t.Errorf("%s != %s", t1, entry.Time) 257 | } 258 | } 259 | ``` 260 | 261 | [Testing for race conditions](https://github.com/Netflix/chaosmonkey/blob/c16d769a82bb765f6544627ef6f08305791e8895/mysql/schedstore_test.go#L185-L253). Note that Go's [built-in race detector](https://golang.org/doc/articles/race_detector) wouldn't catch race conditions like these. 262 | 263 | ```go 264 | func TestScheduleAlreadyExistsConcurrency(t *testing.T) { 265 | // ... 266 | 267 | // Try to publish the schedule twice. At least one schedule should return an 268 | // error 269 | ch := make(chan error, 2) 270 | 271 | date := time.Date(2016, time.June, 20, 0, 0, 0, 0, loc) 272 | 273 | go func() { 274 | ch <- m.PublishWithDelay(date, psched1, 3*time.Second) 275 | }() 276 | 277 | go func() { 278 | ch <- m.PublishWithDelay(date, psched2, 0) 279 | }() 280 | 281 | // Retrieve the two error values from the two calls 282 | 283 | var success int 284 | var txDeadlock int 285 | for i := 0; i < 2; i++ { 286 | err := <-ch 287 | switch { 288 | case err == nil: 289 | success++ 290 | case mysql.TxDeadlock(err): 291 | txDeadlock++ 292 | default: 293 | t.Fatalf("Unexpected error: %+v", err) 294 | } 295 | } 296 | 297 | if got, want := success, 1; got != want { 298 | t.Errorf("got %d succeses, want: %d", got, want) 299 | } 300 | 301 | // Should cause a deadlock 302 | if got, want := txDeadlock, 1; got != want { 303 | t.Errorf("got %d txDeadlock, want: %d", got, want) 304 | } 305 | } 306 | ``` 307 | 308 | ## References 309 | 310 | * [GitHub repo](https://github.com/Netflix/chaosmonkey) 311 | * [Documentation](https://netflix.github.io/chaosmonkey/) 312 | * [Chaos Engineering](https://en.wikipedia.org/wiki/Chaos_engineering) 313 | * [Principles of Chaos Engineering](https://principlesofchaos.org/) 314 | * [Data Access Object (DAO)](https://www.oracle.com/java/technologies/dataaccessobject.html) - Oracle website 315 | * [Data Access Object (DAO)](https://en.wikipedia.org/wiki/Data_access_object) - Wikipedia 316 | * [Repository](https://martinfowler.com/eaaCatalog/repository.html) - Martin Fowler's website 317 | * [Repository](https://docs.microsoft.com/en-us/previous-versions/msp-n-p/ff649690(v=pandp.10)) - Microsoft website 318 | 319 | ## Copyright notice 320 | 321 | Bat is licensed under the [Apache License 2.0](https://github.com/Netflix/chaosmonkey/blob/master/LICENSE). 322 | 323 | Copyright 2015 Netflix, Inc. 324 | -------------------------------------------------------------------------------- /_articles/protobuf-tokenizer.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Protocol Buffers - Stream Tokenizer [C++]" 3 | layout: default 4 | last_modified_date: 2021-08-12T16:43:00+0300 5 | nav_order: 14 6 | 7 | status: PUBLISHED 8 | language: C++ 9 | short-title: Stream Tokenizer 10 | project: 11 | name: Protocol Buffers 12 | key: protobuf 13 | home-page: https://github.com/protocolbuffers/protobuf/ 14 | tags: ['tokenizer', 'parsing'] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | [Protocol buffers](https://developers.google.com/protocol-buffers/) (Protobuf) are Google's language-neutral, platform-neutral, extensible mechanism for serializing structured data – think XML, but smaller, faster, and simpler. You define how you want your data to be structured once, then you can use special generated source code to easily write and read your structured data to and from a variety of data streams and using a variety of languages. 22 | 23 | ## Problem 24 | 25 | Parsing serialized data requires [tokenizing](https://en.wikipedia.org/wiki/Lexical_analysis) (lexing) data streams. 26 | 27 | ## Overview 28 | 29 | Protobuf implements a custom tokenizer. Why it needs to exist and why it's implemented this way is explained in much detail in [this comment](https://github.com/protocolbuffers/protobuf/blob/8a3c4948a49d3b38effea499fd9dee66f28cb0c4/src/google/protobuf/io/tokenizer.cc#L35-L89): 30 | 31 | ```c++ 32 | // Here we have a hand-written lexer. At first you might ask yourself, 33 | // "Hand-written text processing? Is Kenton crazy?!" Well, first of all, 34 | // yes I am crazy, but that's beside the point. There are actually reasons 35 | // why I ended up writing this this way. 36 | // 37 | // The traditional approach to lexing is to use lex to generate a lexer for 38 | // you. Unfortunately, lex's output is ridiculously ugly and difficult to 39 | // integrate cleanly with C++ code, especially abstract code or code meant 40 | // as a library. Better parser-generators exist but would add dependencies 41 | // which most users won't already have, which we'd like to avoid. (GNU flex 42 | // has a C++ output option, but it's still ridiculously ugly, non-abstract, 43 | // and not library-friendly.) 44 | // 45 | // The next approach that any good software engineer should look at is to 46 | // use regular expressions. And, indeed, I did. I have code which 47 | // implements this same class using regular expressions. It's about 200 48 | // lines shorter. However: 49 | // - Rather than error messages telling you "This string has an invalid 50 | // escape sequence at line 5, column 45", you get error messages like 51 | // "Parse error on line 5". Giving more precise errors requires adding 52 | // a lot of code that ends up basically as complex as the hand-coded 53 | // version anyway. 54 | // - The regular expression to match a string literal looks like this: 55 | // kString = new RE("(\"([^\"\\\\]|" // non-escaped 56 | // "\\\\[abfnrtv?\"'\\\\0-7]|" // normal escape 57 | // "\\\\x[0-9a-fA-F])*\"|" // hex escape 58 | // "\'([^\'\\\\]|" // Also support single-quotes. 59 | // "\\\\[abfnrtv?\"'\\\\0-7]|" 60 | // "\\\\x[0-9a-fA-F])*\')"); 61 | // Verifying the correctness of this line noise is actually harder than 62 | // verifying the correctness of ConsumeString(), defined below. I'm not 63 | // even confident that the above is correct, after staring at it for some 64 | // time. 65 | // - PCRE is fast, but there's still more overhead involved than the code 66 | // below. 67 | // - Sadly, regular expressions are not part of the C standard library, so 68 | // using them would require depending on some other library. For the 69 | // open source release, this could be really annoying. Nobody likes 70 | // downloading one piece of software just to find that they need to 71 | // download something else to make it work, and in all likelihood 72 | // people downloading Protocol Buffers will already be doing so just 73 | // to make something else work. We could include a copy of PCRE with 74 | // our code, but that obligates us to keep it up-to-date and just seems 75 | // like a big waste just to save 200 lines of code. 76 | // 77 | // On a similar but unrelated note, I'm even scared to use ctype.h. 78 | // Apparently functions like isalpha() are locale-dependent. So, if we used 79 | // that, then if this code is being called from some program that doesn't 80 | // have its locale set to "C", it would behave strangely. We can't just set 81 | // the locale to "C" ourselves since we might break the calling program that 82 | // way, particularly if it is multi-threaded. WTF? Someone please let me 83 | // (Kenton) know if I'm missing something here... 84 | // 85 | // I'd love to hear about other alternatives, though, as this code isn't 86 | // exactly pretty. 87 | ``` 88 | 89 | ## Implementation details 90 | 91 | Let's look at the method [`Next`](https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/io/tokenizer.cc#L565-L659), which reads the next token. It looks at the beginning of the token and identifies its type (string, number, identifier, etc.) and then, knowing the type, delegates reading the rest of the token to specialized methods. 92 | 93 | ```c++ 94 | bool Tokenizer::Next() { 95 | previous_ = current_; 96 | 97 | while (!read_error_) { 98 | ConsumeZeroOrMore(); 99 | 100 | switch (TryConsumeCommentStart()) { 101 | case LINE_COMMENT: 102 | ConsumeLineComment(NULL); 103 | continue; 104 | case BLOCK_COMMENT: 105 | ConsumeBlockComment(NULL); 106 | continue; 107 | case SLASH_NOT_COMMENT: 108 | return true; 109 | case NO_COMMENT: 110 | break; 111 | } 112 | 113 | // Check for EOF before continuing. 114 | if (read_error_) break; 115 | 116 | if (LookingAt() || current_char_ == '\0') { 117 | AddError("Invalid control characters encountered in text."); 118 | NextChar(); 119 | // Skip more unprintable characters, too. But, remember that '\0' is 120 | // also what current_char_ is set to after EOF / read error. We have 121 | // to be careful not to go into an infinite loop of trying to consume 122 | // it, so make sure to check read_error_ explicitly before consuming 123 | // '\0'. 124 | while (TryConsumeOne() || 125 | (!read_error_ && TryConsume('\0'))) { 126 | // Ignore. 127 | } 128 | 129 | } else { 130 | // Reading some sort of token. 131 | StartToken(); 132 | 133 | if (TryConsumeOne()) { 134 | ConsumeZeroOrMore(); 135 | current_.type = TYPE_IDENTIFIER; 136 | } else if (TryConsume('0')) { 137 | current_.type = ConsumeNumber(true, false); 138 | } else if (TryConsume('.')) { 139 | // This could be the beginning of a floating-point number, or it could 140 | // just be a '.' symbol. 141 | 142 | if (TryConsumeOne()) { 143 | // It's a floating-point number. 144 | if (previous_.type == TYPE_IDENTIFIER && 145 | current_.line == previous_.line && 146 | current_.column == previous_.end_column) { 147 | // We don't accept syntax like "blah.123". 148 | error_collector_->AddError( 149 | line_, column_ - 2, 150 | "Need space between identifier and decimal point."); 151 | } 152 | current_.type = ConsumeNumber(false, true); 153 | } else { 154 | current_.type = TYPE_SYMBOL; 155 | } 156 | } else if (TryConsumeOne()) { 157 | current_.type = ConsumeNumber(false, false); 158 | } else if (TryConsume('\"')) { 159 | ConsumeString('\"'); 160 | current_.type = TYPE_STRING; 161 | } else if (TryConsume('\'')) { 162 | ConsumeString('\''); 163 | current_.type = TYPE_STRING; 164 | } else { 165 | // Check if the high order bit is set. 166 | if (current_char_ & 0x80) { 167 | error_collector_->AddError( 168 | line_, column_, 169 | StringPrintf("Interpreting non ascii codepoint %d.", 170 | static_cast(current_char_))); 171 | } 172 | NextChar(); 173 | current_.type = TYPE_SYMBOL; 174 | } 175 | 176 | EndToken(); 177 | return true; 178 | } 179 | } 180 | 181 | // EOF 182 | current_.type = TYPE_END; 183 | current_.text.clear(); 184 | current_.line = line_; 185 | current_.column = column_; 186 | current_.end_column = column_; 187 | return false; 188 | } 189 | ``` 190 | 191 | Let's now look at one of the specialized methods, [`ConsumeNumber`](https://github.com/protocolbuffers/protobuf/blob/8a3c4948a49d3b38effea499fd9dee66f28cb0c4/src/google/protobuf/io/tokenizer.cc#L427-L480). It's more difficult than just reading a sequence of digits because the number can also be negative, float, hex or written in the exponential notation. The code is written very clearly and makes it all simple. 192 | 193 | ```c++ 194 | Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero, 195 | bool started_with_dot) { 196 | bool is_float = false; 197 | 198 | if (started_with_zero && (TryConsume('x') || TryConsume('X'))) { 199 | // A hex number (started with "0x"). 200 | ConsumeOneOrMore("\"0x\" must be followed by hex digits."); 201 | 202 | } else if (started_with_zero && LookingAt()) { 203 | // An octal number (had a leading zero). 204 | ConsumeZeroOrMore(); 205 | if (LookingAt()) { 206 | AddError("Numbers starting with leading zero must be in octal."); 207 | ConsumeZeroOrMore(); 208 | } 209 | 210 | } else { 211 | // A decimal number. 212 | if (started_with_dot) { 213 | is_float = true; 214 | ConsumeZeroOrMore(); 215 | } else { 216 | ConsumeZeroOrMore(); 217 | 218 | if (TryConsume('.')) { 219 | is_float = true; 220 | ConsumeZeroOrMore(); 221 | } 222 | } 223 | 224 | if (TryConsume('e') || TryConsume('E')) { 225 | is_float = true; 226 | TryConsume('-') || TryConsume('+'); 227 | ConsumeOneOrMore("\"e\" must be followed by exponent."); 228 | } 229 | 230 | if (allow_f_after_float_ && (TryConsume('f') || TryConsume('F'))) { 231 | is_float = true; 232 | } 233 | } 234 | 235 | if (LookingAt() && require_space_after_number_) { 236 | AddError("Need space between number and identifier."); 237 | } else if (current_char_ == '.') { 238 | if (is_float) { 239 | AddError( 240 | "Already saw decimal point or exponent; can't have another one."); 241 | } else { 242 | AddError("Hex and octal numbers must be integers."); 243 | } 244 | } 245 | 246 | return is_float ? TYPE_FLOAT : TYPE_INTEGER; 247 | } 248 | ``` 249 | 250 | ## Testing 251 | 252 | There's a very comprehensive [test suite](https://github.com/protocolbuffers/protobuf/blob/8a3c4948a49d3b38effea499fd9dee66f28cb0c4/src/google/protobuf/io/tokenizer_unittest.cc). 253 | 254 | Let's look at [one of the tests](https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/io/tokenizer_unittest.cc#L290-L318) for tokenizing floats: 255 | 256 | ```c++ 257 | TEST_1D(TokenizerTest, FloatSuffix, kBlockSizes) { 258 | // Test the "allow_f_after_float" option. 259 | 260 | // Set up the tokenizer. 261 | const char* text = "1f 2.5f 6e3f 7F"; 262 | TestInputStream input(text, strlen(text), kBlockSizes_case); 263 | TestErrorCollector error_collector; 264 | Tokenizer tokenizer(&input, &error_collector); 265 | tokenizer.set_allow_f_after_float(true); 266 | 267 | // Advance through tokens and check that they are parsed as expected. 268 | ASSERT_TRUE(tokenizer.Next()); 269 | EXPECT_EQ(tokenizer.current().text, "1f"); 270 | EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT); 271 | ASSERT_TRUE(tokenizer.Next()); 272 | EXPECT_EQ(tokenizer.current().text, "2.5f"); 273 | EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT); 274 | ASSERT_TRUE(tokenizer.Next()); 275 | EXPECT_EQ(tokenizer.current().text, "6e3f"); 276 | EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT); 277 | ASSERT_TRUE(tokenizer.Next()); 278 | EXPECT_EQ(tokenizer.current().text, "7F"); 279 | EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT); 280 | 281 | // There should be no more input. 282 | EXPECT_FALSE(tokenizer.Next()); 283 | // There should be no errors. 284 | EXPECT_TRUE(error_collector.text_.empty()); 285 | } 286 | ``` 287 | 288 | ## Related 289 | 290 | * [Tokenizer](https://github.com/amzn/ion-java/blob/0eb62ba92633f03e9f3175bb77f1b3214fcc1cdd/src/com/amazon/ion/impl/IonReaderTextRawTokensX.java) in [Ion](https://en.wikipedia.org/wiki/Ion_(serialization_format)), a data serialization format developed by Amazon. 291 | 292 | ## References 293 | 294 | * [GitHub Repo](https://github.com/protocolbuffers/protobuf) 295 | * [Protocol Buffers on Wikipedia](https://en.wikipedia.org/wiki/Protocol_Buffers) 296 | * [Protocol Buffers on developer.google.com](https://developers.google.com/protocol-buffers/) 297 | 298 | ## Copyright notice 299 | 300 | Protocol Buffers is licensed under a [custom license](https://github.com/protocolbuffers/protobuf/blob/master/LICENSE). 301 | 302 | Copyright 2008 Google Inc. 303 | -------------------------------------------------------------------------------- /_articles/aws-cdk-template-diff.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "AWS CDK - Computing Diff Between Infrastructure Templates [TypeScript]" 3 | layout: default 4 | last_modified_date: 2021-07-29T16:03:00+0300 5 | nav_order: 5 6 | 7 | status: PUBLISHED 8 | language: TypeScript 9 | short-title: Computing Diff Between Infrastructure Templates 10 | project: 11 | name: AWS CDK 12 | key: aws-cdk 13 | home-page: https://github.com/aws/aws-cdk 14 | tags: [aws, cloud, diff, infrastructure-as-code] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | CDK (Cloud Development Kit) is an [infrastructure as code](https://en.wikipedia.org/wiki/Infrastructure_as_code) software tool created by [AWS](https://aws.amazon.com/). CDK is used to synthesize and deploy [CloudFormation](https://aws.amazon.com/cloudformation/) infrastructure templates. 22 | 23 | Infrastructures are made up of resources (virtual machines, database tables, load balancers, etc.). Resources can depend on other resources. 24 | 25 | ## Problem 26 | 27 | Synthesized infrastructure templates need to be compared to the existing state of the infrastructure to see what resources will be created, updated or deleted if the template is deployed. The diff algorithm needs to be aware of the template semantics. 28 | 29 | ## Overview 30 | 31 | There are different diff handlers for the 9 top-level keys (`AWSTemplateFormatVersion`, `Description`, `Metadata`, `Parameters`, `Mappings`, `Conditions`, `Transform`, `Resources`, `Outputs`). 32 | 33 | It calculates what was added, removed or updated. For each changed resource it decides the impact: if it will be updated, destroyed, orphaned (excluded from the template but not actually deleted). 34 | 35 | Changes to one resource can trigger changes to resources dependent on it. These changes are propagated until convergence. 36 | 37 | There's a method to print the diff in a human-readable format. 38 | 39 | ## Implementation details 40 | 41 | [The implementation](https://github.com/aws/aws-cdk/tree/d88b45eb21bcd051146477e3c97de7dd7b8634d3/packages/%40aws-cdk/cloudformation-diff) is rather long; we are just scratching the surface in this review. 42 | 43 | [The main method](https://github.com/aws/aws-cdk/blob/d88b45eb21bcd051146477e3c97de7dd7b8634d3/packages/%40aws-cdk/cloudformation-diff/lib/diff-template.ts#L31-L78). First it actually calculates the diff and then propagates replacements for replaced resources until it converges. 44 | 45 | ```typescript 46 | /** 47 | * Compare two CloudFormation templates and return semantic differences between them. 48 | * 49 | * @param currentTemplate the current state of the stack. 50 | * @param newTemplate the target state of the stack. 51 | * 52 | * @returns a +types.TemplateDiff+ object that represents the changes that will happen if 53 | * a stack which current state is described by +currentTemplate+ is updated with 54 | * the template +newTemplate+. 55 | */ 56 | export function diffTemplate(currentTemplate: { [key: string]: any }, newTemplate: { [key: string]: any }): types.TemplateDiff { 57 | // Base diff 58 | const theDiff = calculateTemplateDiff(currentTemplate, newTemplate); 59 | 60 | // We're going to modify this in-place 61 | const newTemplateCopy = deepCopy(newTemplate); 62 | 63 | let didPropagateReferenceChanges; 64 | let diffWithReplacements; 65 | do { 66 | diffWithReplacements = calculateTemplateDiff(currentTemplate, newTemplateCopy); 67 | 68 | // Propagate replacements for replaced resources 69 | didPropagateReferenceChanges = false; 70 | if (diffWithReplacements.resources) { 71 | diffWithReplacements.resources.forEachDifference((logicalId, change) => { 72 | if (change.changeImpact === types.ResourceImpact.WILL_REPLACE) { 73 | if (propagateReplacedReferences(newTemplateCopy, logicalId)) { 74 | didPropagateReferenceChanges = true; 75 | } 76 | } 77 | }); 78 | } 79 | } while (didPropagateReferenceChanges); 80 | 81 | // Copy "replaced" states from `diffWithReplacements` to `theDiff`. 82 | diffWithReplacements.resources 83 | .filter(r => isReplacement(r!.changeImpact)) 84 | .forEachDifference((logicalId, downstreamReplacement) => { 85 | const resource = theDiff.resources.get(logicalId); 86 | 87 | if (resource.changeImpact !== downstreamReplacement.changeImpact) { 88 | propagatePropertyReplacement(downstreamReplacement, resource); 89 | } 90 | }); 91 | 92 | return theDiff; 93 | } 94 | ``` 95 | 96 | [Diffing templates (without propagation)](https://github.com/aws/aws-cdk/blob/d88b45eb21bcd051146477e3c97de7dd7b8634d3/packages/%40aws-cdk/cloudformation-diff/lib/diff-template.ts#L96-L111). Most of the work is delegated to [`DIFF_HANDLERS`](https://github.com/aws/aws-cdk/blob/d88b45eb21bcd051146477e3c97de7dd7b8634d3/packages/%40aws-cdk/cloudformation-diff/lib/diff-template.ts#L10-L29). 97 | ```typescript 98 | function calculateTemplateDiff(currentTemplate: { [key: string]: any }, newTemplate: { [key: string]: any }): types.TemplateDiff { 99 | const differences: types.ITemplateDiff = {}; 100 | const unknown: { [key: string]: types.Difference } = {}; 101 | for (const key of unionOf(Object.keys(currentTemplate), Object.keys(newTemplate)).sort()) { 102 | const oldValue = currentTemplate[key]; 103 | const newValue = newTemplate[key]; 104 | if (deepEqual(oldValue, newValue)) { continue; } 105 | const handler: DiffHandler = DIFF_HANDLERS[key] 106 | || ((_diff, oldV, newV) => unknown[key] = impl.diffUnknown(oldV, newV)); 107 | handler(differences, oldValue, newValue); 108 | 109 | } 110 | if (Object.keys(unknown).length > 0) { differences.unknown = new types.DifferenceCollection(unknown); } 111 | 112 | return new types.TemplateDiff(differences); 113 | } 114 | ``` 115 | 116 | [Diffing two resources](https://github.com/aws/aws-cdk/blob/d88b45eb21bcd051146477e3c97de7dd7b8634d3/packages/%40aws-cdk/cloudformation-diff/lib/diff/index.ts#L29-L77): 117 | 118 | ```typescript 119 | export function diffResource(oldValue?: types.Resource, newValue?: types.Resource): types.ResourceDifference { 120 | const resourceType = { 121 | oldType: oldValue && oldValue.Type, 122 | newType: newValue && newValue.Type, 123 | }; 124 | let propertyDiffs: { [key: string]: types.PropertyDifference } = {}; 125 | let otherDiffs: { [key: string]: types.Difference } = {}; 126 | 127 | if (resourceType.oldType !== undefined && resourceType.oldType === resourceType.newType) { 128 | // Only makes sense to inspect deeper if the types stayed the same 129 | const typeSpec = cfnspec.filteredSpecification(resourceType.oldType); 130 | const impl = typeSpec.ResourceTypes[resourceType.oldType]; 131 | propertyDiffs = diffKeyedEntities(oldValue!.Properties, 132 | newValue!.Properties, 133 | (oldVal, newVal, key) => _diffProperty(oldVal, newVal, key, impl)); 134 | 135 | otherDiffs = diffKeyedEntities(oldValue, newValue, _diffOther); 136 | delete otherDiffs.Properties; 137 | } 138 | 139 | return new types.ResourceDifference(oldValue, newValue, { 140 | resourceType, propertyDiffs, otherDiffs, 141 | }); 142 | 143 | function _diffProperty(oldV: any, newV: any, key: string, resourceSpec?: cfnspec.schema.ResourceType) { 144 | let changeImpact = types.ResourceImpact.NO_CHANGE; 145 | 146 | const spec = resourceSpec && resourceSpec.Properties && resourceSpec.Properties[key]; 147 | if (spec && !deepEqual(oldV, newV)) { 148 | switch (spec.UpdateType) { 149 | case cfnspec.schema.UpdateType.Immutable: 150 | changeImpact = types.ResourceImpact.WILL_REPLACE; 151 | break; 152 | case cfnspec.schema.UpdateType.Conditional: 153 | changeImpact = types.ResourceImpact.MAY_REPLACE; 154 | break; 155 | default: 156 | // In those cases, whatever is the current value is what we should keep 157 | changeImpact = types.ResourceImpact.WILL_UPDATE; 158 | } 159 | } 160 | 161 | return new types.PropertyDifference(oldV, newV, { changeImpact }); 162 | } 163 | 164 | function _diffOther(oldV: any, newV: any) { 165 | return new types.Difference(oldV, newV); 166 | } 167 | } 168 | ``` 169 | 170 | [Rendering diffs in a human-readable form (not listed).](https://github.com/aws/aws-cdk/blob/d88b45eb21bcd051146477e3c97de7dd7b8634d3/packages/%40aws-cdk/cloudformation-diff/lib/format.ts) 171 | 172 | ## Testing 173 | 174 | 175 | [The test suite](https://github.com/aws/aws-cdk/blob/d88b45eb21bcd051146477e3c97de7dd7b8634d3/packages/%40aws-cdk/cloudformation-diff/test/diff-template.test.ts) is quite comprehensive. 176 | 177 | [Basic test for adding a resource](https://github.com/aws/aws-cdk/blob/d88b45eb21bcd051146477e3c97de7dd7b8634d3/packages/%40aws-cdk/cloudformation-diff/test/diff-template.test.ts#L32-L45): 178 | ```typescript 179 | test('when a resource is created', () => { 180 | const currentTemplate = { Resources: {} }; 181 | 182 | const newTemplate = { Resources: { BucketResource: { Type: 'AWS::S3::Bucket' } } }; 183 | 184 | const differences = diffTemplate(currentTemplate, newTemplate); 185 | expect(differences.differenceCount).toBe(1); 186 | expect(differences.resources.differenceCount).toBe(1); 187 | const difference = differences.resources.changes.BucketResource; 188 | expect(difference).not.toBeUndefined(); 189 | expect(difference?.isAddition).toBeTruthy(); 190 | expect(difference?.newResourceType).toEqual('AWS::S3::Bucket'); 191 | expect(difference?.changeImpact).toBe(ResourceImpact.WILL_CREATE); 192 | }); 193 | ``` 194 | 195 | [Test cascading changes](https://github.com/aws/aws-cdk/blob/d88b45eb21bcd051146477e3c97de7dd7b8634d3/packages/%40aws-cdk/cloudformation-diff/test/diff-template.test.ts#L279-L323): 196 | ```typescript 197 | test('resource replacement is tracked through references', () => { 198 | // If a resource is replaced, then that change shows that references are 199 | // going to change. This may lead to replacement of downstream resources 200 | // if the reference is used in an immutable property, and so on. 201 | 202 | // GIVEN 203 | const currentTemplate = { 204 | Resources: { 205 | Bucket: { 206 | Type: 'AWS::S3::Bucket', 207 | Properties: { BucketName: 'Name1' }, // Immutable prop 208 | }, 209 | Queue: { 210 | Type: 'AWS::SQS::Queue', 211 | Properties: { QueueName: { Ref: 'Bucket' } }, // Immutable prop 212 | }, 213 | Topic: { 214 | Type: 'AWS::SNS::Topic', 215 | Properties: { TopicName: { Ref: 'Queue' } }, // Immutable prop 216 | }, 217 | }, 218 | }; 219 | 220 | // WHEN 221 | const newTemplate = { 222 | Resources: { 223 | Bucket: { 224 | Type: 'AWS::S3::Bucket', 225 | Properties: { BucketName: 'Name2' }, 226 | }, 227 | Queue: { 228 | Type: 'AWS::SQS::Queue', 229 | Properties: { QueueName: { Ref: 'Bucket' } }, 230 | }, 231 | Topic: { 232 | Type: 'AWS::SNS::Topic', 233 | Properties: { TopicName: { Ref: 'Queue' } }, 234 | }, 235 | }, 236 | }; 237 | const differences = diffTemplate(currentTemplate, newTemplate); 238 | 239 | // THEN 240 | expect(differences.resources.differenceCount).toBe(3); 241 | }); 242 | ``` 243 | 244 | [Testing](https://github.com/aws/aws-cdk/blob/d88b45eb21bcd051146477e3c97de7dd7b8634d3/packages/%40aws-cdk/cloudformation-diff/test/diff-template.test.ts#L434-L460) that it understands that the order of elements in an array matters in some places and doesn't matter in others: 245 | 246 | ```typescript 247 | test('array equivalence is independent of element order in DependsOn expressions', () => { 248 | // GIVEN 249 | const currentTemplate = { 250 | Resources: { 251 | BucketResource: { 252 | Type: 'AWS::S3::Bucket', 253 | DependsOn: ['SomeResource', 'AnotherResource'], 254 | }, 255 | }, 256 | }; 257 | 258 | // WHEN 259 | const newTemplate = { 260 | Resources: { 261 | BucketResource: { 262 | Type: 'AWS::S3::Bucket', 263 | DependsOn: ['AnotherResource', 'SomeResource'], 264 | }, 265 | }, 266 | }; 267 | 268 | let differences = diffTemplate(currentTemplate, newTemplate); 269 | expect(differences.resources.differenceCount).toBe(0); 270 | 271 | differences = diffTemplate(newTemplate, currentTemplate); 272 | expect(differences.resources.differenceCount).toBe(0); 273 | }); 274 | 275 | test('arrays of different length are considered unequal in DependsOn expressions', () => { 276 | // GIVEN 277 | const currentTemplate = { 278 | Resources: { 279 | BucketResource: { 280 | Type: 'AWS::S3::Bucket', 281 | DependsOn: ['SomeResource', 'AnotherResource', 'LastResource'], 282 | }, 283 | }, 284 | }; 285 | 286 | // WHEN 287 | const newTemplate = { 288 | Resources: { 289 | BucketResource: { 290 | Type: 'AWS::S3::Bucket', 291 | DependsOn: ['AnotherResource', 'SomeResource'], 292 | }, 293 | }, 294 | }; 295 | 296 | let differences = diffTemplate(currentTemplate, newTemplate); 297 | expect(differences.resources.differenceCount).toBe(1); 298 | 299 | differences = diffTemplate(newTemplate, currentTemplate); 300 | expect(differences.resources.differenceCount).toBe(1); 301 | }); 302 | ``` 303 | 304 | ## Observations 305 | 306 | * Some resources ([IAM](https://github.com/aws/aws-cdk/tree/d88b45eb21bcd051146477e3c97de7dd7b8634d3/packages/%40aws-cdk/cloudformation-diff/lib/iam), [networking](https://github.com/aws/aws-cdk/tree/d88b45eb21bcd051146477e3c97de7dd7b8634d3/packages/%40aws-cdk/cloudformation-diff/lib/network)) require special treatment. 307 | 308 | ## Related 309 | 310 | [Terraform](https://github.com/hashicorp/terraform/), a competing product, [implements a similar algorithm](https://github.com/hashicorp/terraform/blob/d35bc0531255b496beb5d932f185cbcdb2d61a99/internal/legacy/terraform/diff.go). 311 | 312 | ## References 313 | 314 | * [GitHub Repo](https://github.com/aws/aws-cdk) 315 | * [Product website](https://aws.amazon.com/cdk/) 316 | * [Documentation](https://docs.aws.amazon.com/cdk/index.html) 317 | * [Sample templates](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/sample-templates-services-us-west-2.html) 318 | 319 | ## Copyright notice 320 | 321 | AWS CDK is licensed under the [Apache License 2.0](https://github.com/aws/aws-cdk/blob/master/LICENSE). 322 | 323 | Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. 324 | -------------------------------------------------------------------------------- /_articles/error-prone-test-helper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Error Prone - Testing Bug Checkers [Java]" 3 | layout: default 4 | last_modified_date: 2021-09-01T17:57:00+0300 5 | nav_order: 2 6 | 7 | status: PUBLISHED 8 | language: Java 9 | short-title: Testing Bug Checkers 10 | project: 11 | name: Error Prone 12 | key: error-prone 13 | home-page: https://github.com/google/error-prone 14 | tags: [dsl, test-helper, builder] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | [Error Prone](https://errorprone.info) is a static analysis tool for Java that catches common programming mistakes at compile-time. 22 | 23 | Error Prone is comprised of hundreds of different checkers searching for different types of defects. Checkers inherit the same base class [`BugChecker`](https://github.com/google/error-prone/blob/c601758e81723a8efc4671726b8363be7a306dce/check_api/src/main/java/com/google/errorprone/bugpatterns/BugChecker.java#). In the nutshell, the interface for a checker is *unit of code (class, method, etc.) in - findings out*. 24 | 25 | ## Problem 26 | 27 | There must be an easy and uniform way to test checkers. Tests must be easy to read and easy to write. They must be agnostic to how source code is represented and how these representations (abstract syntax trees, etc.) are built, how the checker interacts with the rest of the system, etc. 28 | 29 | ## Overview 30 | 31 | Error Prone implements a helper class, [`CompilationTestHelper`](https://github.com/google/error-prone/blob/c601758e81723a8efc4671726b8363be7a306dce/test_helpers/src/main/java/com/google/errorprone/CompilationTestHelper.java), to simplify writing bug checkers. 32 | 33 | It is initialized with the checker under test and accepts source code to run the checker on. That code can either be inlined in the test or read from another file. 34 | 35 | The test's author annotates the input code with comments marking where the checker must fire and what it must output. 36 | 37 | `CompilationTestHelper` then runs the checker on the provided code and compares its output with the expectation extracted from the marker comments. 38 | 39 | [Example usage (checker input is inlined)](https://github.com/google/error-prone/blob/c601758e81723a8efc4671726b8363be7a306dce/core/src/test/java/com/google/errorprone/bugpatterns/HashCodeToStringTest.java#L29-L62): 40 | 41 | ```java 42 | public class HashCodeToStringTest { 43 | 44 | private final CompilationTestHelper compilationHelper = 45 | CompilationTestHelper.newInstance(HashCodeToString.class, getClass()); 46 | 47 | @Test 48 | public void testPositiveCase() { 49 | compilationHelper 50 | .addSourceLines( 51 | "HashCodeOnly.java", 52 | "public class HashCodeOnly {", 53 | " // BUG: Diagnostic contains: HashCodeToString", 54 | " public int hashCode() {", 55 | " return 0;", 56 | " }", 57 | "}") 58 | .doTest(); 59 | } 60 | 61 | @Test 62 | public void negative_bothHashCodeAndToString() { 63 | compilationHelper 64 | .addSourceLines( 65 | "HashCodeAndToString.java", 66 | "public class HashCodeAndToString {", 67 | " public int hashCode() {", 68 | " return 0;", 69 | " }", 70 | " public String toString() {", 71 | " return \"42\";", 72 | " }", 73 | "}") 74 | .doTest(); 75 | } 76 | ``` 77 | 78 | [Example usage (checker input is read from another file)](https://github.com/google/error-prone/blob/c601758e81723a8efc4671726b8363be7a306dce/core/src/test/java/com/google/errorprone/bugpatterns/ComparableTypeTest.java#L25-L38): 79 | 80 | ```java 81 | public class ComparableTypeTest { 82 | private final CompilationTestHelper compilationHelper = 83 | CompilationTestHelper.newInstance(ComparableType.class, getClass()); 84 | 85 | @Test 86 | public void testPositiveCase() { 87 | compilationHelper.addSourceFile("ComparableTypePositiveCases.java").doTest(); 88 | } 89 | 90 | @Test 91 | public void testNegativeCase() { 92 | compilationHelper.addSourceFile("ComparableTypeNegativeCases.java").doTest(); 93 | } 94 | } 95 | ``` 96 | 97 | Unlike typical unit tests that interact with the public interface of the code under test, these Error Prone bug checkers' tests interact with the code under test in a very indirect way. They follow they [black-box approach](https://en.wikipedia.org/wiki/Black-box_testing) and make no assumptions about the checker's implementation. 98 | 99 | ## Implementation details 100 | 101 | `CompilationTestHelper` [uses](https://github.com/google/error-prone/blob/c601758e81723a8efc4671726b8363be7a306dce/test_helpers/src/main/java/com/google/errorprone/CompilationTestHelper.java#L168-L198) the [Builder pattern](https://refactoring.guru/design-patterns/builder/java/example) (not the same Builder as [described in the GoF book!](https://en.wikipedia.org/wiki/Builder_pattern)) to add source code and various configurations: 102 | 103 | ```java 104 | /** 105 | * Adds a source file to the test compilation, from the string content of the file. 106 | * 107 | *

The diagnostics expected from compiling the file are inferred from the file contents. For 108 | * each line of the test file that contains the bug marker pattern "// BUG: Diagnostic contains: 109 | * foo", we expect to see a diagnostic on that line containing "foo". For each line of the test 110 | * file that does not contain the bug marker pattern, we expect no diagnostic to be 111 | * generated. You can also use "// BUG: Diagnostic matches: X" in tandem with {@code 112 | * expectErrorMessage("X", "foo")} to allow you to programmatically construct the error message. 113 | * 114 | * @param path a path for the source file 115 | * @param lines the content of the source file 116 | */ 117 | public CompilationTestHelper addSourceLines(String path, String... lines) { 118 | this.sources.add(forSourceLines(path, lines)); 119 | return this; 120 | } 121 | 122 | /** 123 | * Adds a source file to the test compilation, from an existing resource file. 124 | * 125 | *

See {@link #addSourceLines} for how expected diagnostics should be specified. 126 | * 127 | * @param path the path to the source file 128 | */ 129 | public CompilationTestHelper addSourceFile(String path) { 130 | this.sources.add(forResource(clazz, path)); 131 | return this; 132 | } 133 | ``` 134 | 135 | The main [`doTest` method](https://github.com/google/error-prone/blob/c601758e81723a8efc4671726b8363be7a306dce/test_helpers/src/main/java/com/google/errorprone/CompilationTestHelper.java#L293-L346) that compiles the supplied code and compares the output to the expectations: 136 | ```java 137 | /** Performs a compilation and checks that the diagnostics and result match the expectations. */ 138 | public void doTest() { 139 | checkState(!sources.isEmpty(), "No source files to compile"); 140 | checkState(!run, "doTest should only be called once"); 141 | this.run = true; 142 | Result result = compile(); 143 | for (Diagnostic diagnostic : diagnosticHelper.getDiagnostics()) { 144 | if (diagnostic.getCode().contains("error.prone.crash")) { 145 | fail(diagnostic.getMessage(Locale.ENGLISH)); 146 | } 147 | } 148 | if (expectNoDiagnostics) { 149 | List> diagnostics = diagnosticHelper.getDiagnostics(); 150 | assertWithMessage( 151 | String.format( 152 | "Expected no diagnostics produced, but found %d: %s", 153 | diagnostics.size(), diagnostics)) 154 | .that(diagnostics.size()) 155 | .isEqualTo(0); 156 | assertWithMessage( 157 | String.format( 158 | "Expected compilation result to be " 159 | + expectedResult.orElse(Result.OK) 160 | + ", but was %s. No diagnostics were emitted." 161 | + " OutputStream from Compiler follows.\n\n%s", 162 | result, 163 | outputStream)) 164 | .that(result) 165 | .isEqualTo(expectedResult.orElse(Result.OK)); 166 | } else { 167 | for (JavaFileObject source : sources) { 168 | try { 169 | diagnosticHelper.assertHasDiagnosticOnAllMatchingLines( 170 | source, lookForCheckNameInDiagnostic); 171 | } catch (IOException e) { 172 | throw new UncheckedIOException(e); 173 | } 174 | } 175 | assertWithMessage("Unused error keys: " + diagnosticHelper.getUnusedLookupKeys()) 176 | .that(diagnosticHelper.getUnusedLookupKeys().isEmpty()) 177 | .isTrue(); 178 | } 179 | 180 | expectedResult.ifPresent( 181 | expected -> 182 | assertWithMessage( 183 | String.format( 184 | "Expected compilation result %s, but was %s\n%s\n%s", 185 | expected, 186 | result, 187 | Joiner.on('\n').join(diagnosticHelper.getDiagnostics()), 188 | outputStream)) 189 | .that(result) 190 | .isEqualTo(expected)); 191 | } 192 | ``` 193 | 194 | [Extracting markers](https://github.com/google/error-prone/blob/c601758e81723a8efc4671726b8363be7a306dce/test_helpers/src/main/java/com/google/errorprone/DiagnosticTestHelper.java#L275-L353) from code and comparing them to the actual results: 195 | 196 | ```java 197 | /** 198 | * Asserts that the diagnostics contain a diagnostic on each line of the source file that matches 199 | * our bug marker pattern. Parses the bug marker pattern for the specific string to look for in 200 | * the diagnostic. 201 | * 202 | * @param source File in which to find matching lines 203 | */ 204 | public void assertHasDiagnosticOnAllMatchingLines( 205 | JavaFileObject source, LookForCheckNameInDiagnostic lookForCheckNameInDiagnostic) 206 | throws IOException { 207 | final List> diagnostics = getDiagnostics(); 208 | final LineNumberReader reader = 209 | new LineNumberReader(CharSource.wrap(source.getCharContent(false)).openStream()); 210 | do { 211 | String line = reader.readLine(); 212 | if (line == null) { 213 | break; 214 | } 215 | 216 | List> predicates = null; 217 | if (line.contains(BUG_MARKER_COMMENT_INLINE)) { 218 | // Diagnostic must contain all patterns from the bug marker comment. 219 | List patterns = extractPatterns(line, reader, BUG_MARKER_COMMENT_INLINE); 220 | predicates = new ArrayList<>(patterns.size()); 221 | for (String pattern : patterns) { 222 | predicates.add(new SimpleStringContains(pattern)); 223 | } 224 | } else if (line.contains(BUG_MARKER_COMMENT_LOOKUP)) { 225 | int markerLineNumber = reader.getLineNumber(); 226 | List lookupKeys = extractPatterns(line, reader, BUG_MARKER_COMMENT_LOOKUP); 227 | predicates = new ArrayList<>(lookupKeys.size()); 228 | for (String lookupKey : lookupKeys) { 229 | assertWithMessage( 230 | "No expected error message with key [%s] as expected from line [%s] " 231 | + "with diagnostic [%s]", 232 | lookupKey, markerLineNumber, line.trim()) 233 | .that(expectedErrorMsgs.containsKey(lookupKey)) 234 | .isTrue(); 235 | predicates.add(expectedErrorMsgs.get(lookupKey)); 236 | usedLookupKeys.add(lookupKey); 237 | } 238 | } 239 | 240 | if (predicates != null) { 241 | int lineNumber = reader.getLineNumber(); 242 | for (Predicate predicate : predicates) { 243 | Matcher>> patternMatcher = 244 | hasItem(diagnosticOnLine(source.toUri(), lineNumber, predicate)); 245 | assertWithMessage( 246 | "Did not see an error on line %s matching %s. %s", 247 | lineNumber, predicate, allErrors(diagnostics)) 248 | .that(patternMatcher.matches(diagnostics)) 249 | .isTrue(); 250 | } 251 | 252 | if (checkName != null && lookForCheckNameInDiagnostic == LookForCheckNameInDiagnostic.YES) { 253 | // Diagnostic must contain check name. 254 | Matcher>> checkNameMatcher = 255 | hasItem( 256 | diagnosticOnLine( 257 | source.toUri(), lineNumber, new SimpleStringContains("[" + checkName + "]"))); 258 | assertWithMessage( 259 | "Did not see an error on line %s containing [%s]. %s", 260 | lineNumber, checkName, allErrors(diagnostics)) 261 | .that(checkNameMatcher.matches(diagnostics)) 262 | .isTrue(); 263 | } 264 | 265 | } else { 266 | int lineNumber = reader.getLineNumber(); 267 | Matcher>> matcher = 268 | hasItem(diagnosticOnLine(source.toUri(), lineNumber)); 269 | if (matcher.matches(diagnostics)) { 270 | fail("Saw unexpected error on line " + lineNumber + ". " + allErrors(diagnostics)); 271 | } 272 | } 273 | } while (true); 274 | reader.close(); 275 | } 276 | ``` 277 | 278 | ## Related 279 | 280 | Many related products - linters, bug checkers, etc. - implement very similar patterns. For example, see SonarQube's [CheckVerifier](https://github.com/SonarSource/sonar-java/blob/434f170b9667df33eb7355c0e7e62147c48a7da8/java-checks-testkit/src/main/java/org/sonar/java/checks/verifier/CheckVerifier.java#) or Rubocop's [ExpectOffence](https://github.com/rubocop/rubocop/blob/dc858b7ba893ffeae5edfe7b8012d8f13afd6903/lib/rubocop/rspec/expect_offense.rb). 281 | 282 | Some other Java code analysis tools are built on top of Error Prone, making use of its extensible design. For example, Uber's [NullAway](https://github.com/uber/NullAway). NullAway also uses Error Prone's test helper, e.g. [here](https://github.com/uber/NullAway/blob/067c31dca42a7d2302c3058cb7a626bc02574c39/nullaway/src/test/java/com/uber/nullaway/NullAwayTest.java#L294-L331). 283 | 284 | ## References 285 | 286 | * [GitHub Repo](https://github.com/google/error-prone) 287 | * [Error Prone](https://errorprone.info) 288 | * [Writing a check](https://github.com/google/error-prone/wiki/Writing-a-check) 289 | 290 | ## Copyright notice 291 | 292 | Error Prone is licensed under the [Apache License 2.0](https://github.com/google/error-prone/blob/master/COPYING). 293 | -------------------------------------------------------------------------------- /_articles/zookeeper-trie.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "ZooKeeper - Trie [Java]" 3 | layout: default 4 | last_modified_date: 2021-07-28T19:29:00+0300 5 | nav_order: 9 6 | 7 | status: PUBLISHED 8 | language: Java 9 | short-title: Trie 10 | project: 11 | name: Apache ZooKeeper 12 | key: zookeeper 13 | home-page: https://github.com/apache/zookeeper 14 | tags: [trie, data-structure, algorithm] 15 | --- 16 | 17 | {% include article-meta.html article=page %} 18 | 19 | ## Context 20 | 21 | Apache ZooKeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services. ZooKeeper manipulates [znodes](https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_zkDataModel_znodes) - data objects organized hierarchically as in file systems. 22 | 23 | ## Problem 24 | 25 | As a part of the quota management, ZooKeeper needs to find the longest stored prefix for a given path. 26 | 27 | ## Overview 28 | 29 | ZooKeeper implements the [Trie](https://en.wikipedia.org/wiki/Trie) data structure to find the longest stored prefix for a given path. 30 | 31 | The implementation is thread-safe. [`ReadWriteLock`](https://docs.oracle.com/javase/7/docs/api/java/util/concurrent/locks/ReadWriteLock.html) is used - the read lock may be held simultaneously by multiple reader threads, so long as there are no writers. The write lock is exclusive. 32 | 33 | ## Implementation details 34 | 35 | [The implementation](https://github.com/apache/zookeeper/blob/e642a325b91ab829aefa47708c7b4b45811d2d23/zookeeper-server/src/main/java/org/apache/zookeeper/common/PathTrie.java#L34-L354) is rather short and self-explanatory: 36 | 37 | ```java 38 | /** 39 | * a class that implements prefix matching for 40 | * components of a filesystem path. the trie 41 | * looks like a tree with edges mapping to 42 | * the component of a path. 43 | * example /ab/bc/cf would map to a trie 44 | * / 45 | * ab/ 46 | * (ab) 47 | * bc/ 48 | * / 49 | * (bc) 50 | * cf/ 51 | * (cf) 52 | */ 53 | public class PathTrie { 54 | 55 | /** Logger for this class */ 56 | private static final Logger LOG = LoggerFactory.getLogger(PathTrie.class); 57 | 58 | /** Root node of PathTrie */ 59 | private final TrieNode rootNode; 60 | 61 | private final ReadWriteLock lock = new ReentrantReadWriteLock(true); 62 | 63 | private final Lock readLock = lock.readLock(); 64 | 65 | private final Lock writeLock = lock.writeLock(); 66 | 67 | static class TrieNode { 68 | 69 | final String value; 70 | final Map children; 71 | boolean property; 72 | TrieNode parent; 73 | 74 | /** 75 | * Create a trie node with parent as parameter. 76 | * 77 | * @param parent the parent of this node 78 | * @param value the value stored in this node 79 | */ 80 | private TrieNode(TrieNode parent, String value) { 81 | this.value = value; 82 | this.parent = parent; 83 | this.property = false; 84 | this.children = new HashMap<>(4); 85 | } 86 | 87 | /** 88 | * Get the parent of this node. 89 | * 90 | * @return the parent node 91 | */ 92 | TrieNode getParent() { 93 | return this.parent; 94 | } 95 | 96 | /** 97 | * set the parent of this node. 98 | * 99 | * @param parent the parent to set to 100 | */ 101 | void setParent(TrieNode parent) { 102 | this.parent = parent; 103 | } 104 | 105 | /** 106 | * A property that is set for a node - making it special. 107 | */ 108 | void setProperty(boolean prop) { 109 | this.property = prop; 110 | } 111 | 112 | /** 113 | * The property of this node. 114 | * 115 | * @return the property for this node 116 | */ 117 | boolean hasProperty() { 118 | return this.property; 119 | } 120 | 121 | /** 122 | * The value stored in this node. 123 | * 124 | * @return the value stored in this node 125 | */ 126 | public String getValue() { 127 | return this.value; 128 | } 129 | 130 | /** 131 | * Add a child to the existing node. 132 | * 133 | * @param childName the string name of the child 134 | * @param node the node that is the child 135 | */ 136 | void addChild(String childName, TrieNode node) { 137 | this.children.putIfAbsent(childName, node); 138 | } 139 | 140 | /** 141 | * Delete child from this node. 142 | * 143 | * @param childName the name of the child to be deleted 144 | */ 145 | void deleteChild(String childName) { 146 | this.children.computeIfPresent(childName, (key, childNode) -> { 147 | // Node no longer has an external property associated 148 | childNode.setProperty(false); 149 | 150 | // Delete it if it has no children (is a leaf node) 151 | if (childNode.isLeafNode()) { 152 | childNode.setParent(null); 153 | return null; 154 | } 155 | 156 | return childNode; 157 | }); 158 | } 159 | 160 | /** 161 | * Return the child of a node mapping to the input child name. 162 | * 163 | * @param childName the name of the child 164 | * @return the child of a node 165 | */ 166 | TrieNode getChild(String childName) { 167 | return this.children.get(childName); 168 | } 169 | 170 | /** 171 | * Get the list of children of this trienode. 172 | * 173 | * @return A collection containing the node's children 174 | */ 175 | Collection getChildren() { 176 | return children.keySet(); 177 | } 178 | 179 | /** 180 | * Determine if this node is a leaf (has no children). 181 | * 182 | * @return true if this node is a lead node; otherwise false 183 | */ 184 | boolean isLeafNode() { 185 | return children.isEmpty(); 186 | } 187 | 188 | @Override 189 | public String toString() { 190 | return "TrieNode [name=" + value + ", property=" + property + ", children=" + children.keySet() + "]"; 191 | } 192 | 193 | } 194 | 195 | /** 196 | * Construct a new PathTrie with a root node. 197 | */ 198 | public PathTrie() { 199 | this.rootNode = new TrieNode(null, "/"); 200 | } 201 | 202 | /** 203 | * Add a path to the path trie. All paths are relative to the root node. 204 | * 205 | * @param path the path to add to the trie 206 | */ 207 | public void addPath(final String path) { 208 | Objects.requireNonNull(path, "Path cannot be null"); 209 | 210 | if (path.length() == 0) { 211 | throw new IllegalArgumentException("Invalid path: " + path); 212 | } 213 | final String[] pathComponents = split(path); 214 | 215 | writeLock.lock(); 216 | try { 217 | TrieNode parent = rootNode; 218 | for (final String part : pathComponents) { 219 | TrieNode child = parent.getChild(part); 220 | if (child == null) { 221 | child = new TrieNode(parent, part); 222 | parent.addChild(part, child); 223 | } 224 | parent = child; 225 | } 226 | parent.setProperty(true); 227 | } finally { 228 | writeLock.unlock(); 229 | } 230 | } 231 | 232 | /** 233 | * Delete a path from the trie. All paths are relative to the root node. 234 | * 235 | * @param path the path to be deleted 236 | */ 237 | public void deletePath(final String path) { 238 | Objects.requireNonNull(path, "Path cannot be null"); 239 | 240 | if (path.length() == 0) { 241 | throw new IllegalArgumentException("Invalid path: " + path); 242 | } 243 | final String[] pathComponents = split(path); 244 | 245 | 246 | writeLock.lock(); 247 | try { 248 | TrieNode parent = rootNode; 249 | for (final String part : pathComponents) { 250 | if (parent.getChild(part) == null) { 251 | // the path does not exist 252 | return; 253 | } 254 | parent = parent.getChild(part); 255 | LOG.debug("{}", parent); 256 | } 257 | 258 | final TrieNode realParent = parent.getParent(); 259 | realParent.deleteChild(parent.getValue()); 260 | } finally { 261 | writeLock.unlock(); 262 | } 263 | } 264 | 265 | /** 266 | * Return true if the given path exists in the trie, otherwise return false; 267 | * All paths are relative to the root node. 268 | * 269 | * @param path the input path 270 | * @return the largest prefix for the 271 | */ 272 | public boolean existsNode(final String path) { 273 | Objects.requireNonNull(path, "Path cannot be null"); 274 | 275 | if (path.length() == 0) { 276 | throw new IllegalArgumentException("Invalid path: " + path); 277 | } 278 | final String[] pathComponents = split(path); 279 | 280 | readLock.lock(); 281 | try { 282 | TrieNode parent = rootNode; 283 | for (final String part : pathComponents) { 284 | if (parent.getChild(part) == null) { 285 | // the path does not exist 286 | return false; 287 | } 288 | parent = parent.getChild(part); 289 | LOG.debug("{}", parent); 290 | } 291 | } finally { 292 | readLock.unlock(); 293 | } 294 | return true; 295 | } 296 | 297 | /** 298 | * Return the largest prefix for the input path. All paths are relative to the 299 | * root node. 300 | * 301 | * @param path the input path 302 | * @return the largest prefix for the input path 303 | */ 304 | public String findMaxPrefix(final String path) { 305 | Objects.requireNonNull(path, "Path cannot be null"); 306 | 307 | final String[] pathComponents = split(path); 308 | 309 | readLock.lock(); 310 | try { 311 | TrieNode parent = rootNode; 312 | TrieNode deepestPropertyNode = null; 313 | for (final String element : pathComponents) { 314 | parent = parent.getChild(element); 315 | if (parent == null) { 316 | LOG.debug("{}", element); 317 | break; 318 | } 319 | if (parent.hasProperty()) { 320 | deepestPropertyNode = parent; 321 | } 322 | } 323 | 324 | if (deepestPropertyNode == null) { 325 | return "/"; 326 | } 327 | 328 | final Deque treePath = new ArrayDeque<>(); 329 | TrieNode node = deepestPropertyNode; 330 | while (node != this.rootNode) { 331 | treePath.offerFirst(node.getValue()); 332 | node = node.parent; 333 | } 334 | return "/" + String.join("/", treePath); 335 | } finally { 336 | readLock.unlock(); 337 | } 338 | } 339 | 340 | /** 341 | * Clear all nodes in the trie. 342 | */ 343 | public void clear() { 344 | writeLock.lock(); 345 | try { 346 | rootNode.getChildren().clear(); 347 | } finally { 348 | writeLock.unlock(); 349 | } 350 | } 351 | 352 | private static String[] split(final String path){ 353 | return Stream.of(path.split("/")) 354 | .filter(t -> !t.trim().isEmpty()) 355 | .toArray(String[]::new); 356 | } 357 | 358 | } 359 | ``` 360 | 361 | ## Testing 362 | 363 | [PathTrieTest](https://github.com/apache/zookeeper/blob/e642a325b91ab829aefa47708c7b4b45811d2d23/zookeeper-server/src/test/java/org/apache/zookeeper/common/PathTrieTest.java) covers all public methods of `PathTrie`. 364 | 365 | For instance, [tests for `findMaxPrexix`](): 366 | 367 | ```java 368 | @Test 369 | public void findMaxPrefixNullPath() { 370 | assertThrows(NullPointerException.class, () -> { 371 | this.pathTrie.findMaxPrefix(null); 372 | }); 373 | } 374 | 375 | @Test 376 | public void findMaxPrefixRootPath() { 377 | assertEquals("/", this.pathTrie.findMaxPrefix("/")); 378 | } 379 | 380 | @Test 381 | public void findMaxPrefixChildren() { 382 | this.pathTrie.addPath("node1"); 383 | this.pathTrie.addPath("node1/node2"); 384 | this.pathTrie.addPath("node1/node3"); 385 | 386 | assertEquals("/node1", this.pathTrie.findMaxPrefix("/node1")); 387 | assertEquals("/node1/node2", this.pathTrie.findMaxPrefix("/node1/node2")); 388 | assertEquals("/node1/node3", this.pathTrie.findMaxPrefix("/node1/node3")); 389 | } 390 | 391 | @Test 392 | public void findMaxPrefixChildrenPrefix() { 393 | this.pathTrie.addPath("node1"); 394 | 395 | assertEquals("/node1", this.pathTrie.findMaxPrefix("/node1/node2")); 396 | assertEquals("/node1", this.pathTrie.findMaxPrefix("/node1/node3")); 397 | } 398 | ``` 399 | 400 | ## Observations 401 | 402 | * [Deques](https://en.wikipedia.org/wiki/Double-ended_queue) [are used](https://github.com/apache/zookeeper/blob/e642a325b91ab829aefa47708c7b4b45811d2d23/zookeeper-server/src/main/java/org/apache/zookeeper/common/PathTrie.java#L324-L330) to temporarily store values in paths from nodes up to the root. Deques are designed to support element insertion and removal at both ends, but this implementation only adds elements to the front and never removes elements. So a simple `LinkedList` would do. However, `ArrayDeque` may be more efficient. 403 | 404 | ## References 405 | 406 | * [GitHub repo](https://github.com/apache/zookeeper) 407 | * [Trie on Wikipedia](https://en.wikipedia.org/wiki/Trie) 408 | 409 | ## Copyright notice 410 | 411 | Zookeeper is licensed under the [Apache License 2.0](https://github.com/apache/zookeeper/blob/master/LICENSE.txt). 412 | --------------------------------------------------------------------------------