├── html
└── .gitkeep
├── .rspec
├── README.md
├── lib
├── blog_snippets.rb
└── blog_snippets
│ ├── version.rb
│ ├── articles
│ ├── attr_optimizations.rb
│ ├── attr_optimizations
│ │ ├── minimalist_attrs.rb
│ │ └── excessive_attrs.rb
│ ├── tail_call_optimization_in_ruby_internals
│ │ ├── fact.rb
│ │ ├── fib.rb
│ │ ├── opt_send_without_block.vm.inc
│ │ ├── fact_disasm.txt
│ │ ├── fact_tco_disasm.txt
│ │ ├── tail_optimized_reload.rb
│ │ ├── fib_tco_disasm.txt
│ │ ├── fib_disasm.txt
│ │ ├── from_call_method_to_tco.c
│ │ └── README.md
│ ├── on_the_road_from_ruby_journeyman_to_ruby_master
│ │ ├── int_from_ord_diff_benchmark.rb
│ │ └── README.md
│ ├── tuning_dd_block_size
│ │ ├── dd_obs_test.sh
│ │ ├── dd_ibs_test.sh
│ │ └── README.md
│ ├── eager_boolean_operators
│ │ ├── notes.txt
│ │ └── README.md
│ ├── introducing_the_tco_method_gem
│ │ └── README.md
│ ├── tail_call_optimization_in_ruby_background
│ │ └── README.md
│ └── module_factory_for_dependency_management
│ │ └── README.md
│ ├── markdown_to_html_transformer.rb
│ └── renderers
│ └── wordpress_html_renderer.rb
├── test
├── test_helper.rb
├── concerns
│ ├── coverage.rb
│ └── test_case.rb
└── unit
│ ├── blog_snippets_test.rb
│ └── markdown_to_html_transformer_test.rb
├── .travis.yml
├── script
├── wp-console
└── update_remote_revision
├── Guardfile
├── Gemfile
├── .gitignore
├── blog_snippets.gemspec
├── Rakefile
├── LICENSE
└── notes.txt
/html/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.rspec:
--------------------------------------------------------------------------------
1 | --color
2 | --require spec_helper
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Blog Snippets
2 |
3 | Code snippets from my blog (http://blog.tdg5.com)
4 |
--------------------------------------------------------------------------------
/lib/blog_snippets.rb:
--------------------------------------------------------------------------------
1 | require "blog_snippets/version"
2 |
3 | module BlogSnippets
4 | end
5 |
--------------------------------------------------------------------------------
/lib/blog_snippets/version.rb:
--------------------------------------------------------------------------------
1 | module BlogSnippets
2 | VERSION = "0.0.1".freeze
3 | end
4 |
--------------------------------------------------------------------------------
/test/test_helper.rb:
--------------------------------------------------------------------------------
1 | require "concerns/coverage" if ENV["CI"]
2 | require "minitest/autorun"
3 | require "mocha/setup"
4 | require "blog_snippets"
5 | require "concerns/test_case"
6 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: ruby
2 |
3 | rvm:
4 | - 1.9.3
5 | - 2.0.0
6 | - 2.1.0
7 | - 2.2.0
8 | - jruby-19mode
9 | - jruby-head
10 | - rbx-19mode
11 | - rbx-2
12 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/attr_optimizations.rb:
--------------------------------------------------------------------------------
1 | require "blog_snippets/articles/attr_optimizations/minimalist_attrs"
2 | require "blog_snippets/articles/attr_optimizations/excessive_attrs"
3 |
--------------------------------------------------------------------------------
/test/concerns/coverage.rb:
--------------------------------------------------------------------------------
1 | require "simplecov"
2 | require "coveralls"
3 | SimpleCov.formatter = Coveralls::SimpleCov::Formatter
4 | SimpleCov.root(File.expand_path("../../lib", __FILE__))
5 | SimpleCov.start
6 |
--------------------------------------------------------------------------------
/script/wp-console:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 |
3 | require "ruby-wpdb"
4 | require "pry"
5 |
6 | db_config = YAML.load(File.read(File.expand_path("../../config/database.yml", __FILE__)))
7 | WPDB.init(db_config["database_url"], db_config["wp_prefix"])
8 |
9 | Pry.start
10 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/attr_optimizations/minimalist_attrs.rb:
--------------------------------------------------------------------------------
1 | module BlogSnippets
2 | module AttrOptimizations
3 | class MinimalistAttrs
4 | attr_accessor :accessor
5 | attr_reader :reader
6 | attr_writer :writer
7 | end
8 | end
9 | end
10 |
--------------------------------------------------------------------------------
/test/concerns/test_case.rb:
--------------------------------------------------------------------------------
1 | # Use alternate shoulda-style DSL for tests
2 | class BlogSnippets::TestCase < Minitest::Spec
3 | class << self
4 | alias :setup :before
5 | alias :teardown :after
6 | alias :context :describe
7 | alias :should :it
8 | end
9 | end
10 |
--------------------------------------------------------------------------------
/Guardfile:
--------------------------------------------------------------------------------
1 | guard(:minitest, :all_after_pass => false, :all_on_start => false) do
2 | watch(%r{^lib/blog_snippets\.rb$}) { "test" }
3 | watch(%r{^lib/blog_snippets/(.+)\.rb$}) { |m| "test/unit/#{m[1]}_test.rb" }
4 | watch(%r{^test/.+_test\.rb$})
5 | watch(%r{^(?:test/test_helper|test/concerns/)(.*)\.rb$}) { "test" }
6 | end
7 |
--------------------------------------------------------------------------------
/test/unit/blog_snippets_test.rb:
--------------------------------------------------------------------------------
1 | require "test_helper"
2 |
3 | class BlogSnippetsTest < BlogSnippets::TestCase
4 | Subject = BlogSnippets
5 |
6 | subject { Subject }
7 |
8 | context Subject.name do
9 | should "be defined" do
10 | assert defined?(subject), "Expected #{subject.name} to be defined!"
11 | end
12 | end
13 | end
14 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 |
3 | gemspec
4 |
5 | gem "pry"
6 | gem "redcarpet", :platform => %w[mri]
7 | gem "ruby-wpdb", :git => "https://github.com/tdg5-wordpress/ruby-wpdb.git", :branch => :master
8 |
9 | group :test do
10 | gem "coveralls", :require => false
11 | gem "guard"
12 | gem "guard-minitest"
13 | gem "minitest", ">= 3.0"
14 | gem "mocha"
15 | gem "simplecov", :require => false
16 | end
17 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | Gemfile.lock
2 |
3 | *.gem
4 | *.rbc
5 | /.config
6 | /coverage/
7 | /html/
8 | /InstalledFiles
9 | /pkg/
10 | /spec/reports/
11 | /test/tmp/
12 | /test/version_tmp/
13 | /tmp/
14 |
15 | ## Documentation cache and generated files:
16 | /.yardoc/
17 | /_yardoc/
18 | /doc/
19 | /rdoc/
20 |
21 | ## Environment normalisation:
22 | /.bundle/
23 | /lib/bundler/man/
24 |
25 | ## Random
26 | /src.html
27 | config/database.yml
28 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/attr_optimizations/excessive_attrs.rb:
--------------------------------------------------------------------------------
1 | module BlogSnippets
2 | module AttrOptimizations
3 | class ExcessiveAttrs
4 | def accessor
5 | @accessor
6 | end
7 |
8 | def accessor=(value)
9 | @accessor = value
10 | end
11 |
12 | def reader
13 | @reader
14 | end
15 |
16 | def writer=(value)
17 | @writer = value
18 | end
19 | end
20 | end
21 | end
22 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fact.rb:
--------------------------------------------------------------------------------
1 | code = <<-CODE
2 | class Factorial
3 | def self.fact_helper(n, res)
4 | n == 1 ? res : fact_helper(n - 1, n * res)
5 | end
6 |
7 | def self.fact(n)
8 | fact_helper(n, 1)
9 | end
10 | end
11 | CODE
12 |
13 | {
14 | "normal" => { :tailcall_optimization => false, :trace_instruction => false },
15 | "tail call optimized" => { :tailcall_optimization => true, :trace_instruction => false },
16 | }.each do |identifier, compile_options|
17 | instruction_sequence = RubyVM::InstructionSequence.new(code, nil, nil, nil, compile_options)
18 | puts "#{identifier}:\n#{instruction_sequence.disasm}"
19 | end
20 |
--------------------------------------------------------------------------------
/blog_snippets.gemspec:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | lib = File.expand_path("../lib", __FILE__)
3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4 | require "blog_snippets/version"
5 |
6 | Gem::Specification.new do |spec|
7 | spec.name = "blog_snippets"
8 | spec.version = BlogSnippets::VERSION
9 | spec.authors = ["Danny Guinther"]
10 | spec.email = ["dannyguinther@gmail.com"]
11 | spec.summary = %q{Code snippets from my blog.}
12 | spec.description = %q{Code snippets from my blog: http://blog.tdg5.com}
13 | spec.homepage = "https://github.com/tdg5/blog_snippets"
14 | spec.license = "MIT"
15 |
16 | spec.files = `git ls-files -z`.split("\x0")
17 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18 | spec.require_paths = ["lib"]
19 |
20 | spec.add_development_dependency "bundler", "~> 1.6"
21 | spec.add_development_dependency "rake"
22 | end
23 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require "bundler/gem_tasks"
2 | require "rake/testtask"
3 |
4 | Rake::TestTask.new do |t|
5 | t.libs << "test"
6 | t.pattern = "test/**/*_test.rb"
7 | end
8 |
9 | task :generate_html, [:source_path] do |tsk, arguments|
10 | require "redcarpet"
11 | require "blog_snippets/renderers/wordpress_html_renderer"
12 | require "blog_snippets/markdown_to_html_transformer"
13 |
14 | source_path = arguments[:source_path] || ENV["SOURCE"]
15 | source_path = File.expand_path(File.join("..", source_path), __FILE__)
16 | raise "#{source_path} does not exist!" unless File.exist?(source_path)
17 | raw_source = File.open(source_path, "r") { |f| f.read }
18 | renderer = BlogSnippets::Renderers::WordpressHTMLRenderer.new
19 | transformer = BlogSnippets::MarkdownToHTMLTransformer.new({
20 | :parser_class => Redcarpet::Markdown,
21 | :renderer => renderer
22 | })
23 | html = transformer.transform(raw_source)
24 | puts "---- BEGIN COPY ----\n#{html}\n---- END COPY ----"
25 | end
26 |
27 | task :default => :test
28 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fib.rb:
--------------------------------------------------------------------------------
1 | {
2 | "Fib" => { :tailcall_optimization => false, :trace_instruction => false },
3 | "TCOFib" => { :tailcall_optimization => true, :trace_instruction => false },
4 | }.each do |class_name, compile_options|
5 | RubyVM::InstructionSequence.compile_option = compile_options
6 | code = <<-CODE
7 | module BlogSnippets
8 | module #{class_name}
9 | def self.acc(i, n, result)
10 | if i == -1
11 | result
12 | else
13 | acc(i - 1, n + result, n)
14 | end
15 | end
16 |
17 | def self.fib(i)
18 | acc(i, 1, 0)
19 | end
20 | end
21 | end
22 | CODE
23 | instruction_sequence = RubyVM::InstructionSequence.new(code)
24 |
25 | puts "#{class_name}:\n#{instruction_sequence.disasm}"
26 | instruction_sequence.eval
27 | end
28 |
29 | # Reset compile options
30 | RubyVM::InstructionSequence.compile_option = { :tailcall_optimization => false, :trace_instruction => true }
31 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/opt_send_without_block.vm.inc:
--------------------------------------------------------------------------------
1 | INSN_ENTRY(opt_send_without_block){
2 | {
3 | VALUE val;
4 | CALL_INFO ci = (CALL_INFO)GET_OPERAND(1);
5 |
6 | DEBUG_ENTER_INSN("opt_send_without_block");
7 | ADD_PC(1+1);
8 | PREFETCH(GET_PC());
9 | #define CURRENT_INSN_opt_send_without_block 1
10 | #define INSN_IS_SC() 0
11 | #define INSN_LABEL(lab) LABEL_opt_send_without_block_##lab
12 | #define LABEL_IS_SC(lab) LABEL_##lab##_##t
13 | COLLECT_USAGE_INSN(BIN(opt_send_without_block));
14 | COLLECT_USAGE_OPERAND(BIN(opt_send_without_block), 0, ci);
15 | {
16 | ci->argc = ci->orig_argc;
17 | vm_search_method(ci, ci->recv = TOPN(ci->argc));
18 | CALL_METHOD(ci);
19 |
20 | CHECK_VM_STACK_OVERFLOW_FOR_INSN(REG_CFP, 1);
21 | PUSH(val);
22 | #undef CURRENT_INSN_opt_send_without_block
23 | #undef INSN_IS_SC
24 | #undef INSN_LABEL
25 | #undef LABEL_IS_SC
26 | END_INSN(opt_send_without_block);
27 | }
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2015 Danny Guinther
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/script/update_remote_revision:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 |
3 | require "ruby-wpdb"
4 | require "redcarpet"
5 | require "blog_snippets/renderers/wordpress_html_renderer"
6 | require "blog_snippets/markdown_to_html_transformer"
7 | require "yaml"
8 |
9 | source_path = ARGV[1]
10 | raise "#{source_path} does not exist!" unless File.exist?(source_path)
11 |
12 | db_config = YAML.load(File.read(File.expand_path("../../config/database.yml", __FILE__)))
13 | WPDB.init(db_config["database_url"], db_config["wp_prefix"])
14 |
15 | raw_source = File.read(source_path)
16 | renderer = BlogSnippets::Renderers::WordpressHTMLRenderer.new
17 | transformer = BlogSnippets::MarkdownToHTMLTransformer.new({
18 | :parser_class => Redcarpet::Markdown,
19 | :renderer => renderer
20 | })
21 | post_content = transformer.transform(raw_source)
22 |
23 | post_id = Integer(ARGV[0])
24 | post = WPDB::Post.where(:id => post_id).first
25 | last_revision = post.revisions.last
26 | revision = WPDB::Post.new
27 | last_revision.keys.each {|key| revision.send("#{key}=", post.send(key)) }
28 | revision.ID = nil
29 | revision.instance_variable_set(:@new, true)
30 | revision.post_content = post_content
31 | revision.post_modified = Time.now
32 | revision.post_modified_gmt = Time.now.utc
33 | revision.save
34 |
--------------------------------------------------------------------------------
/lib/blog_snippets/markdown_to_html_transformer.rb:
--------------------------------------------------------------------------------
1 | module BlogSnippets
2 | class MarkdownToHTMLTransformer
3 |
4 | DEFAULT_MARKDOWN_EXTENSIONS = {
5 | :autolink => true,
6 | :disable_indented_code_blocks => true,
7 | :fenced_code_blocks => true,
8 | :footnotes => true,
9 | :no_intra_emphasis => true,
10 | :space_after_headers => true,
11 | :strikethrough => true,
12 | :tables => true,
13 | :underline => true,
14 | }
15 |
16 | attr_reader :markdown_extensions, :renderer
17 |
18 | def self.default_markdown_extensions
19 | const_get(:DEFAULT_MARKDOWN_EXTENSIONS).dup
20 | end
21 |
22 | def initialize(options = {})
23 | raise ArgumentError, ":renderer is required!" unless options[:renderer]
24 | raise ArgumentError, ":parser_class is required!" unless options[:parser_class]
25 |
26 | @renderer = options[:renderer]
27 | @parser_class = options[:parser_class]
28 | @markdown_extensions = options[:markdown_extensions] || default_markdown_extensions
29 | end
30 |
31 | def parser
32 | @parser ||= parser_class.new(renderer, @markdown_extensions)
33 | end
34 |
35 | def transform(markdown)
36 | parser.render(markdown)
37 | end
38 |
39 | private
40 |
41 | attr_reader :parser_class
42 |
43 | def default_markdown_extensions
44 | self.class.default_markdown_extensions
45 | end
46 | end
47 | end
48 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/on_the_road_from_ruby_journeyman_to_ruby_master/int_from_ord_diff_benchmark.rb:
--------------------------------------------------------------------------------
1 | require "benchmark/ips"
2 |
3 | NUMBER = "7316717653133062491922511967442657474235534919493496983520312774506326239578318016984801869478851843858615607891129494954595017379583319528532088055111254069874715852386305071569329096329522744304355766896648950445244523161731856403098711121722383113622298934233803081353362766142828064444866452387493035890729629049156044077239071381051585930796086670172427121883998797908792274921901699720888093776657273330010533678812202354218097512545405947522435258490771167055601360483958644670632441572215539753697817977846174064955149290862569321978468622482839722413756570560574902614079729686524145351004748216637048440319989000889524345065854122758866688116427171479924442928230863465674813919123162824586178664583591245665294765456828489128831426076900422421902267105562632111110937054421750694165896040807198403850962455444362981230987879927244284909188845801561660979191338754992005240636899125607176060588611646710940507754100225698315520005593572972571636269561882670428252483600823257530420752963450"
4 | CHARS = NUMBER.each_char.to_a
5 | ZERO_ORD = "0".ord.freeze
6 | ORD_PROC = proc { |char| char.ord - ZERO_ORD }
7 |
8 | Benchmark.ips do |bm|
9 | bm.report("String#to_i") { CHARS.each(&:to_i) }
10 | bm.report("String#ord - ZERO_ORD") { CHARS.each(&ORD_PROC) }
11 | end
12 |
13 | # Calculating -----------------------------------------
14 | # String#to_i 836.000 i/100ms
15 | # String#ord - ZERO_ORD 1.083k i/100ms
16 | # -----------------------------------------------------
17 | # String#to_i 8.473k (± 1.2%) i/s - 42.636k
18 | # String#ord - ZERO_ORD 10.859k (± 1.4%) i/s - 55.233k
19 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tuning_dd_block_size/dd_obs_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Since we're dealing with dd, abort if any errors occur
4 | set -e
5 |
6 | TEST_FILE=${1:-dd_obs_testfile}
7 | TEST_FILE_EXISTS=0
8 | if [ -e "$TEST_FILE" ]; then TEST_FILE_EXISTS=1; fi
9 | TEST_FILE_SIZE=134217728
10 |
11 | if [ $EUID -ne 0 ]; then
12 | echo "NOTE: Kernel cache will not be cleared between tests without sudo. This will likely cause inaccurate results." 1>&2
13 | fi
14 |
15 | # Header
16 | PRINTF_FORMAT="%8s : %s\n"
17 | printf "$PRINTF_FORMAT" 'block size' 'transfer rate'
18 |
19 | # Block sizes of 512b 1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M
20 | for BLOCK_SIZE in 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432 67108864
21 | do
22 | # Calculate number of segments required to copy
23 | COUNT=$(($TEST_FILE_SIZE / $BLOCK_SIZE))
24 |
25 | if [ $COUNT -le 0 ]; then
26 | echo "Block size of $BLOCK_SIZE estimated to require $COUNT blocks, aborting further tests."
27 | break
28 | fi
29 |
30 | # Clear kernel cache to ensure more accurate test
31 | [ $EUID -eq 0 ] && [ -e /proc/sys/vm/drop_caches ] && echo 3 > /proc/sys/vm/drop_caches
32 |
33 | # Create a test file with the specified block size
34 | DD_RESULT=$(dd if=/dev/zero of=$TEST_FILE bs=$BLOCK_SIZE count=$COUNT conv=fsync 2>&1 1>/dev/null)
35 |
36 | # Extract the transfer rate from dd's STDERR output
37 | TRANSFER_RATE=$(echo $DD_RESULT | \grep --only-matching -E '[0-9.]+ ([MGk]?B|bytes)/s(ec)?')
38 |
39 | # Clean up the test file if we created one
40 | if [ $TEST_FILE_EXISTS -ne 0 ]; then rm $TEST_FILE; fi
41 |
42 | # Output the result
43 | printf "$PRINTF_FORMAT" "$BLOCK_SIZE" "$TRANSFER_RATE"
44 | done
45 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tuning_dd_block_size/dd_ibs_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Since we're dealing with dd, abort if any errors occur
4 | set -e
5 |
6 | TEST_FILE=${1:-dd_ibs_testfile}
7 | if [ -e "$TEST_FILE" ]; then TEST_FILE_EXISTS=$?; fi
8 | TEST_FILE_SIZE=134217728
9 |
10 | # Exit if file exists
11 | if [ -e $TEST_FILE ]; then
12 | echo "Test file $TEST_FILE exists, aborting."
13 | exit 1
14 | fi
15 | TEST_FILE_EXISTS=1
16 |
17 | if [ $EUID -ne 0 ]; then
18 | echo "NOTE: Kernel cache will not be cleared between tests without sudo. This will likely cause inaccurate results." 1>&2
19 | fi
20 |
21 | # Create test file
22 | echo 'Generating test file...'
23 | BLOCK_SIZE=65536
24 | COUNT=$(($TEST_FILE_SIZE / $BLOCK_SIZE))
25 | dd if=/dev/urandom of=$TEST_FILE bs=$BLOCK_SIZE count=$COUNT conv=fsync > /dev/null 2>&1
26 |
27 | # Header
28 | PRINTF_FORMAT="%8s : %s\n"
29 | printf "$PRINTF_FORMAT" 'block size' 'transfer rate'
30 |
31 | # Block sizes of 512b 1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M
32 | for BLOCK_SIZE in 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432 67108864
33 | do
34 | # Clear kernel cache to ensure more accurate test
35 | [ $EUID -eq 0 ] && [ -e /proc/sys/vm/drop_caches ] && echo 3 > /proc/sys/vm/drop_caches
36 |
37 | # Read test file out to /dev/null with specified block size
38 | DD_RESULT=$(dd if=$TEST_FILE of=/dev/null bs=$BLOCK_SIZE 2>&1 1>/dev/null)
39 |
40 | # Extract transfer rate
41 | TRANSFER_RATE=$(echo $DD_RESULT | \grep --only-matching -E '[0-9.]+ ([MGk]?B|bytes)/s(ec)?')
42 |
43 | printf "$PRINTF_FORMAT" "$BLOCK_SIZE" "$TRANSFER_RATE"
44 | done
45 |
46 | # Clean up the test file if we created one
47 | if [ $TEST_FILE_EXISTS -ne 0 ]; then rm $TEST_FILE; fi
48 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fact_disasm.txt:
--------------------------------------------------------------------------------
1 | == disasm: @>==========
2 | 0000 putspecialobject 3 ( 1)
3 | 0002 putnil
4 | 0003 defineclass :Factorial, , 0
5 | 0007 leave
6 | == disasm: @>===
7 | 0000 putspecialobject 1 ( 2)
8 | 0002 putself
9 | 0003 putobject :fact_helper
10 | 0005 putiseq fact_helper
11 | 0007 opt_send_without_block
12 | 0009 pop
13 | 0010 putspecialobject 1 ( 6)
14 | 0012 putself
15 | 0013 putobject :fact
16 | 0015 putiseq fact
17 | 0017 opt_send_without_block
18 | 0019 leave
19 | == disasm: >=========
20 | local table (size: 3, argc: 2 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
21 | [ 3] n [ 2] res
22 | 0000 getlocal_OP__WC__0 3 ( 3)
23 | 0002 putobject_OP_INT2FIX_O_1_C_
24 | 0003 opt_eq
25 | 0005 branchunless 11
26 | 0007 getlocal_OP__WC__0 2
27 | 0009 leave
28 | 0010 pop
29 | 0011 putself
30 | 0012 getlocal_OP__WC__0 3
31 | 0014 putobject_OP_INT2FIX_O_1_C_
32 | 0015 opt_minus
33 | 0017 getlocal_OP__WC__0 3
34 | 0019 getlocal_OP__WC__0 2
35 | 0021 opt_mult
36 | 0023 opt_send_without_block
37 | 0025 leave
38 | == disasm: >================
39 | local table (size: 2, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
40 | [ 2] n
41 | 0000 putself ( 7)
42 | 0001 getlocal_OP__WC__0 2
43 | 0003 putobject_OP_INT2FIX_O_1_C_
44 | 0004 opt_send_without_block
45 | 0006 leave
46 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fact_tco_disasm.txt:
--------------------------------------------------------------------------------
1 | == disasm: @>==========
2 | 0000 putspecialobject 3 ( 1)
3 | 0002 putnil
4 | 0003 defineclass :Factorial, , 0
5 | 0007 leave
6 | == disasm: @>===
7 | 0000 putspecialobject 1 ( 2)
8 | 0002 putself
9 | 0003 putobject :fact_helper
10 | 0005 putiseq fact_helper
11 | 0007 opt_send_without_block
12 | 0009 pop
13 | 0010 putspecialobject 1 ( 6)
14 | 0012 putself
15 | 0013 putobject :fact
16 | 0015 putiseq fact
17 | 0017 opt_send_without_block
18 | 0019 leave
19 | == disasm: >=========
20 | local table (size: 3, argc: 2 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
21 | [ 3] n [ 2] res
22 | 0000 getlocal_OP__WC__0 3 ( 3)
23 | 0002 putobject_OP_INT2FIX_O_1_C_
24 | 0003 opt_eq
25 | 0005 branchunless 11
26 | 0007 getlocal_OP__WC__0 2
27 | 0009 leave
28 | 0010 pop
29 | 0011 putself
30 | 0012 getlocal_OP__WC__0 3
31 | 0014 putobject_OP_INT2FIX_O_1_C_
32 | 0015 opt_minus
33 | 0017 getlocal_OP__WC__0 3
34 | 0019 getlocal_OP__WC__0 2
35 | 0021 opt_mult
36 | 0023 opt_send_without_block
37 | 0025 leave
38 | == disasm: >================
39 | local table (size: 2, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
40 | [ 2] n
41 | 0000 putself ( 7)
42 | 0001 getlocal_OP__WC__0 2
43 | 0003 putobject_OP_INT2FIX_O_1_C_
44 | 0004 opt_send_without_block
45 | 0006 leave
46 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/tail_optimized_reload.rb:
--------------------------------------------------------------------------------
1 | # This script demonstrates that any file loaded after a change to
2 | # RubyVM::InstructionSequence.compile_option will be compiled with the new
3 | # compile options. Rather than do this with two scripts, this script is hacked
4 | # together such that this can be demonstrated with one file that reloads itself
5 | # the first time it is loaded.
6 |
7 | # Flag indicating whether this is the first time time this file has been loaded.
8 | $first_load = true if $first_load.nil?
9 |
10 | # We can actually turn on tailcall optimization here without affecting how the
11 | # script is loaded the first time because the RubyVM::InstructionSequence object
12 | # that is used to compile the file the first time has already been created and
13 | # as such won't be affected by changing the global compile option.
14 | RubyVM::InstructionSequence.compile_option = {
15 | tailcall_optimization: true,
16 | trace_instruction: false,
17 | }
18 |
19 | # Declare classes to facilitate #instance_eval later
20 | class FirstLoadFactorial; end
21 | class ReloadedFactorial; end
22 |
23 | # On the first load, extend FirstLoadFactorial,
24 | # on the second load, extend ReloadedFactorial.
25 | klass = $first_load ? FirstLoadFactorial : ReloadedFactorial
26 |
27 | # Tail recursive factorial adapted from
28 | # https://github.com/ruby/ruby/blob/fcf6fa8781fe236a9761ad5d75fa1b87f1afeea2/test/ruby/test_optimization.rb#L213
29 | klass.instance_eval do
30 | def self.fact_helper(n, res)
31 | n == 1 ? res : fact_helper(n - 1, n * res)
32 | end
33 |
34 | def self.fact(n)
35 | fact_helper(n, 1)
36 | end
37 | end
38 |
39 | # This check avoids calculating the factorial twice; ReloadedFactorial will only
40 | # respond to :fact after the file has been reloaded.
41 | if ReloadedFactorial.respond_to?(:fact)
42 | begin
43 | puts "FirstLoadFactorial: #{FirstLoadFactorial.fact(50000).to_s.length}"
44 | rescue SystemStackError
45 | puts "FirstLoadFactorial: stack level too deep"
46 | end
47 |
48 | puts "ReloadedFactorial: #{ReloadedFactorial.fact(50000).to_s.length}"
49 | end
50 |
51 | # Reload the file on the first load only.
52 | if $first_load
53 | $first_load = false
54 | load __FILE__
55 | end
56 |
57 | # $ ruby tail_optimized_reload.rb
58 | # FirstLoadFactorial: stack level too deep
59 | # ReloadedFactorial: 213237
60 |
--------------------------------------------------------------------------------
/notes.txt:
--------------------------------------------------------------------------------
1 | Coercion to Boolean compared to lazy evaluation counterpart
2 | > false | :WTF?
3 | => true
4 | > true & :WTF?
5 | => true
6 | Even with very large object on RHS, no efficiency gained by coercion
7 | Because of method call implementation?
8 | Other than tricky coercion, only gain is that it is ever so slightly faster
9 | than double negation:
10 | require 'benchmark/ips'
11 |
12 | Benchmark.ips do |bm|
13 | bm.report("Double negate") { !!(true && :a) }
14 |
15 | bm.report("Logical bit-wise coerce") { true & :a }
16 | end
17 |
18 | # Calculating --------------------------------------------
19 | # Double negate 138.008k i/100ms
20 | # Logical bit-wise coerce 139.350k i/100ms
21 | # --------------------------------------------------------
22 | # Double negate 7.262M (± 1.0%) i/s - 36.434M
23 | # Logical bit-wise coerce 7.825M (± 1.3%) i/s - 39.157M
24 | # --------------------------------------------------------
25 |
26 |
27 |
28 | "Maybe use when you have very simple boolean expressions and the cost
29 | of short cutting (i.e. a branch) is greater than the time you save by
30 | not evaluating the later expressions."
31 | http://stackoverflow.com/a/7105382/1169710
32 | Secretly method calls in Ruby!
33 | Doesn't seem to apply in Ruby. Branching always cheaper than a method call.
34 |
35 |
36 | Operator precedence:
37 | > true || 1 && 3
38 | => true
39 | > true || (1 && 3)
40 | => true
41 |
42 | > true | 1 && 3
43 | => 3
44 | > (true | 1) && 3
45 | => 3
46 |
47 |
48 | > false && true ^ true
49 | => false
50 | > false && (true ^ true)
51 | => false
52 |
53 | > false & true ^ true
54 | => true
55 | > (false && true) ^ true
56 | => true
57 |
58 | Seems like they'd mostly be used for their side effects which is bad
59 |
60 | Only works consistently for falsy values and true. Truthy values explosive!
61 |
62 | Examples:
63 | https://github.com/ruby/ruby/blob/75feee0968c9345e7ffd2bda9835fcd60b4c0880/benchmark/bm_so_k_nucleotide.rb#L40
64 | https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/slice_spec.rb#L436
65 | https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/shared/slice.rb#L419
66 | https://github.com/rubyspec/rubyspec/blob/324c37bb67ea51f197954a37a2c71878eeadea01/core/string/plus_spec.rb#L41
67 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fib_tco_disasm.txt:
--------------------------------------------------------------------------------
1 | == disasm: @>==========
2 | 0000 putspecialobject 3 ( 1)
3 | 0002 putnil
4 | 0003 defineclass :BlogSnippets, , 2
5 | 0007 leave
6 | == disasm: @>
7 | 0000 putspecialobject 3 ( 2)
8 | 0002 putnil
9 | 0003 defineclass :TCOFib, , 2
10 | 0007 leave
11 | == disasm: @>=====
12 | 0000 putspecialobject 1 ( 3)
13 | 0002 putself
14 | 0003 putobject :acc
15 | 0005 putiseq acc
16 | 0007 opt_send_simple
17 | 0009 pop
18 | 0010 putspecialobject 1 ( 11)
19 | 0012 putself
20 | 0013 putobject :fib
21 | 0015 putiseq fib
22 | 0017 opt_send_simple
23 | 0019 leave
24 | == disasm: >=================
25 | local table (size: 4, argc: 3 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@5] s1)
26 | [ 4] i [ 3] n [ 2] result
27 | 0000 getlocal_OP__WC__0 4 ( 4)
28 | 0002 putobject -1
29 | 0004 opt_eq
30 | 0006 branchunless 12
31 | 0008 getlocal_OP__WC__0 2 ( 5)
32 | 0010 leave ( 4)
33 | 0011 pop
34 | 0012 putself ( 7)
35 | 0013 getlocal_OP__WC__0 4
36 | 0015 putobject_OP_INT2FIX_O_1_C_
37 | 0016 opt_minus
38 | 0018 getlocal_OP__WC__0 3
39 | 0020 getlocal_OP__WC__0 2
40 | 0022 opt_plus
41 | 0024 getlocal_OP__WC__0 3
42 | 0026 opt_send_simple
43 | 0028 leave
44 | == disasm: >=================
45 | local table (size: 2, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@3] s1)
46 | [ 2] i
47 | 0000 putself ( 12)
48 | 0001 getlocal_OP__WC__0 2
49 | 0003 putobject_OP_INT2FIX_O_1_C_
50 | 0004 putobject_OP_INT2FIX_O_0_C_
51 | 0005 opt_send_simple
52 | 0007 leave
53 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fib_disasm.txt:
--------------------------------------------------------------------------------
1 | == disasm: @>========== [44/385]
2 | 0000 putspecialobject 3 ( 1)
3 | 0002 putnil
4 | 0003 defineclass :BlogSnippets, , 2
5 | 0007 leave
6 | == disasm: @>
7 | 0000 putspecialobject 3 ( 2)
8 | 0002 putnil
9 | 0003 defineclass :Fib, , 2
10 | 0007 leave
11 | == disasm: @>========
12 | 0000 putspecialobject 1 ( 3)
13 | 0002 putself
14 | 0003 putobject :acc
15 | 0005 putiseq acc
16 | 0007 opt_send_simple
17 | 0009 pop
18 | 0010 putspecialobject 1 ( 11)
19 | 0012 putself
20 | 0013 putobject :fib
21 | 0015 putiseq fib
22 | 0017 opt_send_simple
23 | 0019 leave
24 | == disasm: >=================
25 | local table (size: 4, argc: 3 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@5] s1)
26 | [ 4] i [ 3] n [ 2] result
27 | 0000 getlocal_OP__WC__0 4 ( 4)
28 | 0002 putobject -1
29 | 0004 opt_eq
30 | 0006 branchunless 12
31 | 0008 getlocal_OP__WC__0 2 ( 5)
32 | 0010 leave ( 4)
33 | 0011 pop
34 | 0012 putself ( 7)
35 | 0013 getlocal_OP__WC__0 4
36 | 0015 putobject_OP_INT2FIX_O_1_C_
37 | 0016 opt_minus
38 | 0018 getlocal_OP__WC__0 3
39 | 0020 getlocal_OP__WC__0 2
40 | 0022 opt_plus
41 | 0024 getlocal_OP__WC__0 3
42 | 0026 opt_send_simple
43 | 0028 leave
44 | == disasm: >=================
45 | local table (size: 2, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@3] s1)
46 | [ 2] i
47 | 0000 putself ( 12)
48 | 0001 getlocal_OP__WC__0 2
49 | 0003 putobject_OP_INT2FIX_O_1_C_
50 | 0004 putobject_OP_INT2FIX_O_0_C_
51 | 0005 opt_send_simple
52 | 0007 leave
53 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/eager_boolean_operators/notes.txt:
--------------------------------------------------------------------------------
1 | Coercion to Boolean compared to lazy evaluation counterpart
2 | > false | :WTF?
3 | => true
4 | > true & :WTF?
5 | => true
6 | Even with very large object on RHS, no efficiency gained by coercion
7 | Because of method call implementation?
8 | Other than tricky coercion, only gain is that it is ever so slightly faster
9 | than double negation:
10 | require 'benchmark/ips'
11 |
12 | Benchmark.ips do |bm|
13 | bm.report("Double negate") { !!(true && :a) }
14 |
15 | bm.report("Logical bit-wise coerce") { true & :a }
16 | end
17 |
18 | # Calculating --------------------------------------------
19 | # Double negate 138.008k i/100ms
20 | # Logical bit-wise coerce 139.350k i/100ms
21 | # --------------------------------------------------------
22 | # Double negate 7.262M (± 1.0%) i/s - 36.434M
23 | # Logical bit-wise coerce 7.825M (± 1.3%) i/s - 39.157M
24 | # --------------------------------------------------------
25 |
26 |
27 |
28 | "Maybe use when you have very simple boolean expressions and the cost
29 | of short cutting (i.e. a branch) is greater than the time you save by
30 | not evaluating the later expressions."
31 | http://stackoverflow.com/a/7105382/1169710
32 | Secretly method calls in Ruby!
33 | Doesn't seem to apply in Ruby. Branching always cheaper than a method call.
34 |
35 |
36 | Operator precedence:
37 | > true || 1 && 3
38 | => true
39 | > true || (1 && 3)
40 | => true
41 |
42 | > true | 1 && 3
43 | => 3
44 | > (true | 1) && 3
45 | => 3
46 |
47 |
48 | > false && true ^ true
49 | => false
50 | > false && (true ^ true)
51 | => false
52 |
53 | > false & true ^ true
54 | => true
55 | > (false && true) ^ true
56 | => true
57 |
58 | Seems like they'd mostly be used for their side effects which is bad
59 |
60 | The console is the only somewhat reasonable use case I can think of.
61 |
62 | Only works consistently for falsy values and true. Truthy values explosive!
63 |
64 | Examples:
65 | https://github.com/ruby/ruby/blob/75feee0968c9345e7ffd2bda9835fcd60b4c0880/benchmark/bm_so_k_nucleotide.rb#L40
66 | https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/slice_spec.rb#L436
67 | https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/shared/slice.rb#L419
68 | https://github.com/rubyspec/rubyspec/blob/324c37bb67ea51f197954a37a2c71878eeadea01/core/string/plus_spec.rb#L41
69 |
70 |
71 | Method execution behavior means errors on the RHS, prevent the value from the
72 | LHS from being stored:
73 |
74 | or_result = nil
75 | begin
76 | or_result = true | Seriously(this(is(valid(Ruby!))))
77 | rescue NameError
78 | puts "NameError :("
79 | end
80 | or_result
81 | # Name Error :(
82 | # => nil
83 |
84 | and_result = nil
85 | begin
86 | and_result = false & 0/0
87 | rescue ZeroDivisionError
88 | puts "ZeroDivisionError :("
89 | end
90 | and_result
91 | # ZeroDivisionError :(
92 | # => nil
93 |
--------------------------------------------------------------------------------
/test/unit/markdown_to_html_transformer_test.rb:
--------------------------------------------------------------------------------
1 | require "test_helper"
2 | require "blog_snippets/markdown_to_html_transformer"
3 |
4 | class MarkdownToHTMLTransformerTest < BlogSnippets::TestCase
5 | Subject = BlogSnippets::MarkdownToHTMLTransformer
6 |
7 | subject { Subject }
8 |
9 | context "::default_markdown_extensions" do
10 | should "return expected defaults" do
11 | expected = {
12 | :autolink => true,
13 | :disable_indented_code_blocks => true,
14 | :fenced_code_blocks => true,
15 | :footnotes => true,
16 | :no_intra_emphasis => true,
17 | :space_after_headers => true,
18 | :strikethrough => true,
19 | :tables => true,
20 | :underline => true,
21 | }
22 | assert_equal expected, subject.default_markdown_extensions
23 | end
24 |
25 | should "return a new Hash instance each call" do
26 | first_defaults = subject.default_markdown_extensions
27 | second_defaults = subject.default_markdown_extensions
28 | refute_equal first_defaults.object_id, second_defaults.object_id
29 | end
30 | end
31 |
32 | context "#initialize" do
33 | [:parser_class, :renderer].each do |required_opt|
34 | should "raise unless #{required_opt} option is given" do
35 | assert_raises(ArgumentError) do
36 | opts = default_initialization_options
37 | opts.delete(required_opt)
38 | subject.new(opts)
39 | end
40 | end
41 | end
42 |
43 | should "assign given :renderer to #renderer" do
44 | instance = subject.new(default_initialization_options)
45 | assert_equal renderer, instance.renderer
46 | end
47 |
48 | should "take a Hash of Markdown extensions" do
49 | exts = { :tables => true }
50 | opts = default_initialization_options.merge(:markdown_extensions => exts)
51 | instance = subject.new(opts)
52 | assert_equal exts, instance.markdown_extensions
53 | end
54 |
55 | should "use default Markdown extensions if none given" do
56 | opts = default_initialization_options
57 | opts.delete(:markdown_extensions)
58 | instance = subject.new(opts)
59 | assert_equal subject.default_markdown_extensions, instance.markdown_extensions
60 | end
61 |
62 | should "assign :markdown_extensions to #markdown_extensions" do
63 | exts = { :tables => true }
64 | opts = default_initialization_options.merge(:markdown_extensions => exts)
65 | instance = subject.new(opts)
66 | assert_equal exts, instance.markdown_extensions
67 | end
68 | end
69 |
70 | context "instance_methods" do
71 | subject { Subject.new(default_initialization_options) }
72 |
73 | context "#parser" do
74 | should "initialize an instance of parser_class with renderer and markdown extensions" do
75 | parser_class.expects(:new).with(subject.renderer, subject.markdown_extensions)
76 | subject.parser
77 | end
78 | end
79 |
80 | context "#transform" do
81 | should "invoke parser#render with given markdown" do
82 | markdown = "# Hello World!"
83 | subject.expects(:parser).returns(mck = mock)
84 | mck.expects(:render).with(markdown)
85 | subject.transform(markdown)
86 | end
87 | end
88 | end
89 |
90 | def default_initialization_options
91 | {
92 | :parser_class => parser_class,
93 | :renderer => renderer,
94 | }
95 | end
96 |
97 | def parser_class
98 | @parser_class ||= mock
99 | end
100 |
101 | def renderer
102 | @renderer ||= mock
103 | end
104 | end
105 |
--------------------------------------------------------------------------------
/lib/blog_snippets/renderers/wordpress_html_renderer.rb:
--------------------------------------------------------------------------------
1 | require "English"
2 | require "json"
3 | require "redcarpet"
4 |
5 | module BlogSnippets
6 | module Renderers
7 | class WordpressHTMLRenderer < Redcarpet::Render::HTML
8 | UNTARGETED_LINK = /^(?:mailto:|#)/.freeze
9 | # http://rubular.com/r/apmHqN4joc
10 | HEADER_MATCHER = /(?[1-6])[^>]+id="(?[^"]+)".*?>.*?<\/h\k>)/.freeze
11 | INDENTATION_TOKEN = "__WORDPRESS_HTML_RENDERER_INDENTATION__".freeze
12 | NEW_LINE_TOKEN = "__WORDPRESS_HTML_RENDERER_NEW_LINE__".freeze
13 |
14 | def initialize(options = nil)
15 | super(@options = options || default_options)
16 | end
17 |
18 | # Can't call super due to C-extension design, so fake it so we can
19 | # customize it.
20 | def link(link, title, content)
21 | element = %Q[#{content}])
28 | element
29 | end
30 |
31 | def block_code(code, language_or_attributes)
32 | # Replace line breaks with new-line token
33 | code.gsub!(/\n/, NEW_LINE_TOKEN)
34 | code.gsub!(/ /, INDENTATION_TOKEN)
35 |
36 | # Extract code tag attributes
37 | code_attrs = code_attributes(language_or_attributes)
38 | code_attrs &&= " #{code_attrs}"
39 |
40 | # Can't call super due to C-extension design, so fake it.
41 | [
42 | "[code#{code_attrs}]",
43 | NEW_LINE_TOKEN,
44 | code,
45 | "[/code]\n",
46 | ].join
47 | end
48 |
49 | def postprocess(document)
50 | remove_new_lines_and_white_space_runs!(document)
51 | replace_tokens!(document)
52 | add_header_links!(document)
53 | end
54 |
55 | private
56 |
57 | def add_header_links!(document)
58 | document.gsub!(HEADER_MATCHER) do |match|
59 | match_data = $LAST_MATCH_INFO
60 | match[0..-6] +
61 | %Q|| +
62 | ""
63 | end
64 | document
65 | end
66 |
67 | def link_attributes(link)
68 | return {} unless attrs = @options[:link_attributes]
69 | link_attrs = attrs.dup
70 | link_attrs.delete("target") if UNTARGETED_LINK === link
71 | link_attrs
72 | end
73 |
74 | def code_attributes(lang_or_attrs)
75 | return "language=\"#{lang_or_attrs}\"" unless /[, :]/ === lang_or_attrs
76 |
77 | # Curly braces are omitted for some reason, so restore them.
78 | attr_json = JSON.parse("{#{lang_or_attrs}}")
79 | attr_json.map { |key, value| "#{key}=\"#{value}\"" }.join(" ")
80 | end
81 |
82 | def default_options
83 | {
84 | :link_attributes => {
85 | "target" => "_blank",
86 | },
87 | :with_toc_data => true,
88 | }
89 | end
90 |
91 | def remove_new_lines_and_white_space_runs!(document)
92 | # Remove line breaks; HTML should handle breaking lines
93 | document.gsub!(/\n/, " ")
94 | # Removing line breaks may have introduced white space runs; zap 'em.
95 | # http://rubular.com/r/aaVCG1Wlep
96 | document.gsub!(/(?<=[^\s])\s{2,}/, " ")
97 | document
98 | end
99 |
100 | def replace_tokens!(document)
101 | # Replace tokens with desired characters
102 | document.gsub!(/#{NEW_LINE_TOKEN}/, "\n")
103 | document.gsub!(/#{INDENTATION_TOKEN}/, " ")
104 | document
105 | end
106 | end
107 | end
108 | end
109 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/introducing_the_tco_method_gem/README.md:
--------------------------------------------------------------------------------
1 | 
2 |
3 | Earlier this week I published a gem intended to help simplify the process of
4 | compiling Ruby code with tail call optimization enabled in MRI Ruby. The gem,
5 | [tco_method](https://rubygems.org/gems/tco_method), builds on my recent research
6 | into the [internals of Ruby's implementation of tail call optimization](http://blog.tdg5.com/tail-call-optimization-ruby-deep-dive/)
7 | and the ideas presented in [Nithin Bekal's article *Tail Optimization in Ruby*](http://nithinbekal.com/posts/ruby-tco/).
8 |
9 | The gem aims to ease the process of compiling select Ruby code with tail call
10 | optimization by providing a helper method, [**TCOMethod.tco_eval**](http://www.rubydoc.info/gems/tco_method/TCOMethod/Mixin:tco_eval),
11 | for evaluating code with tail call optimization enabled and a mix-in,
12 | [**TCOMethod::Mixin**](http://www.rubydoc.info/gems/tco_method/TCOMethod/Mixin),
13 | for adding annotations to Classes and/or Modules for annotating singleton or
14 | instance methods that should be compiled with tail call optimization enabled.
15 | You can see what each of these approaches would look like below.
16 |
17 | ## TCOMethod.eval
18 |
19 | ```ruby
20 | TCOMethod.tco_eval(<<-CODE)
21 | module MyFactorial
22 | def self.factorial(n, acc = 1)
23 | n <= 1 ? acc : factorial(n - 1, n * acc)
24 | end
25 | end
26 | CODE
27 |
28 | MyFactorial.factorial(10_000).to_s.length
29 | # => 35660
30 | ```
31 |
32 | Though not as powerful as Ruby's native **eval** method, **TCOMethod.tco_eval** provides
33 | easy access to the full power of Ruby with the added benefit of tail call
34 | optimization. The major downside to using **tco_eval** is that code must be
35 | provided as a String. Also, unlike Ruby's standard **eval** method, **tco_eval**
36 | currently cannot take a binding for the evaluation which can make it awkward
37 | at times to connect code that's being compiled with tail optimization to
38 | other application code compiled by Ruby's primary compilation process.
39 |
40 | All that said, I view **tco_eval** as more of a starting point than a solution.
41 | It inches the door a little wider for the Ruby community to play with tail call
42 | optimization and get a better sense of how and when it might be useful. I think
43 | this is an exciting opportunity that Nithin Bekal's work with TCO method
44 | decorators began to explore and, as we'll see momentarily, the
45 | **TCOMethod::Mixin** continues to test the waters of.
46 |
47 | Beyond the opportunity it offers the Ruby community, I'm also excited because
48 | the [tco_method gem](https://rubygems.org/gems/tco_method) seems like a great
49 | opportunity to dig into Ruby's C extensions and see how extending the gem to
50 | interface with Ruby's C code more directly could extend the abilities of the gem
51 | while further simplifying access to tail call optimization in Ruby.
52 |
53 | ## TCOMethod::Mixin#tco_method
54 |
55 | ```ruby
56 | class MyFibonacci
57 | extend TCOMethod::Mixin
58 |
59 | def fibonacci(index, back_one = 1, back_two = 0)
60 | index < 1 ? back_two : fibonacci(index - 1, back_one + back_two, back_one)
61 | end
62 | tco_method :fibonacci
63 | end
64 |
65 | puts MyFibonacci.new.fibonacci(10_000).to_s.length
66 | # => 2090
67 | ```
68 |
69 | The **TCOMethod::Mixin** module provides annotations at the Class and Module
70 | level allowing a developer access to some of the niceties of tail call
71 | optimization, but without the awkwardness that comes from String literal code or
72 | heredocs. In the style of some of Ruby's other class annotations like
73 | **private_class_method** or **module_function**, the **tco_module_method**,
74 | **tco_class_method**, and eponymous *tco_method** annotation for instance
75 | methods, allow a user to annotate a previously defined method indicating that
76 | the specified method should be recompiled with tail call optimization enabled.
77 |
78 | Currently these helper methods are little more than nicely wrapped hacks that
79 | use some trickery to redefine the specified method with tail call optimization
80 | enabled. More specifically, the helper annotations will:
81 |
82 | - find the method identified by the given argument
83 | - retrieve the source for that method using the [method_source
84 | gem](https://github.com/banister/method_source)
85 | - generate a redefinition expression from the method source that
86 | reopens the defining Module or Class and redefines the method
87 | - pass the generated redefinition expression to **TCOMethod.tco_eval**,
88 | effectively overriding the previously defined method with the new tail call
89 | optimized version
90 |
91 | While this works in most situations, there are quite a few [pitfalls and
92 | gotchas](https://github.com/tdg5/tco_method/tree/6241e57f8bb8478e2ef2286d4cc6e463c0198e61#gotchas)
93 | that come from this approach.
94 |
95 | For one, this approach only works for methods defined using the **def** keyword.
96 | Though in some cases methods defined using **define_method** could be redefined
97 | correctly, given that **define_method** takes a block that maintains a closure
98 | with the definition context, there's no foolproof way to ensure that all methods
99 | defined using **define_method** could be reevaluated with tail call optimization
100 | enabled because of references to the closure context.
101 |
102 | Another gotcha worth mentioning is that because the current implementation
103 | relies on reopening the parent Module or Class, the helper methods won't work on
104 | anonymous Classes or Modules because they cannot be reopened by name. With more
105 | hacking there are ways to get around this limitation, but, at present, I don't
106 | think more hacking is the path forward and something more along the lines of a C
107 | extension is the right way to address these issues.
108 |
109 | ## Interesting problems
110 |
111 | As I said before, I think the [tco_method gem](https://rubygems.org/gems/tco_method)
112 | is a starting point, not a solution, and I'm excited by the various
113 | opportunities and challenges it presents. Though I am definitely interested in
114 | learning more about Ruby's C extension support, the [tco_method gem](https://rubygems.org/gems/tco_method)
115 | has already presented some interesting problems despite its current primitive
116 | and hacky nature.
117 |
118 | For example, in order to test that a recursive factorial method would no longer
119 | encounter a stack overflow after being recompiled with tail call optimization
120 | enabled, I first had to devise a means of ensuring that that method would
121 | have encountered a stack overflow without tail call optimization enabled and at
122 | what point that stack overflow would have occurred. To achieve this, I wrote a
123 | test helper that performs [a binary search to discover how many stack frames a
124 | recursive function can allocate before a stack overflow is
125 | encountered](https://github.com/tdg5/tco_method/blob/c28895742e18e9d87393c97435db99e4b71c5fa3/test/test_helpers/stack_busters/factorial_stack_buster.rb#L25).
126 |
127 | Though my current solution could use some refactoring, I thought this was a fun
128 | and interesting problem to solve. Though I don't find binary search particularly
129 | interesting on its own, I found this particular case interesting because the
130 | expensive nature of the **raise**/**rescue** cycle in Ruby introduces a sort of
131 | penalty to the process such that the process will be much quicker if the point
132 | of overflow can be discovered while causing as few **SystemStackError**
133 | exceptions as possible. I think this detail makes the binary search more
134 | interesting because there's more to it than just finding the desired result in as few
135 | operations as possible, there are also other considerations to keep in mind that
136 | could totally change how the utility of the search is assessed. In fact, given
137 | this behavior, a binary search may not be the best approach at all.
138 |
139 | For now, I've taken the approach of using one binary search to find a point of
140 | overflow, then using a second binary search to find the exact point at which the
141 | recursive function begins to exceed the system stack between the last successful
142 | invocation and the overflowing invocation.
143 |
144 | I haven't tried to do much research on this particular type of problem yet, but
145 | I'm excited to revisit this search function at some point in the future and see
146 | what other ideas are out there for me to throw at the problem.
147 |
148 | **Update:** After discussing the peculiarities of this approach with my coworker
149 | Matt Bittarelli, he suggested a couple of alternatives to the binary search
150 | approach that seemed intriguing and simpler. The first idea was simply to [force
151 | a **SystemStackError** and check the length of the exception's backtrace from the
152 | **rescue** context to determine the maximum stack
153 | depth](https://github.com/tdg5/tco_method/commit/e2e7f30314fd3d0e1b2d138328d7deeb31e7bd96).
154 | Though this approach works in Ruby 2.2, [it does not work in Ruby 2.0 or Ruby
155 | 2.1](https://travis-ci.org/tdg5/tco_method/builds/54811953). The other idea Matt
156 | had was that maybe a **SystemStackError** wasn't necessary at all if a block
157 | could be used to monitor how the stack depth changed from iteration to
158 | iteration. Though a little mind bending, I was able to [use a recursive method
159 | that yields to a block to monitor how the stack depth changes and using that
160 | information determine whether the method had been compiled with tail call
161 | optimization enabled](https://github.com/tdg5/tco_method/commit/c2963276376f7705b2fb1b6b582d88f07954c02f).
162 | Though the means of determining if a method is compiled with tail call
163 | optimization has changed since I initially wrote this article, I think all three
164 | of the above approaches are interesting and I expect more interesting problems
165 | will emerge as work on this gem continues. Thanks again to Matt Bittarelli for
166 | his insights into the problem!
167 |
168 | ## Test drive
169 |
170 | Because tail recursive functions can typically be restated in other ways that
171 | don't require tail call optimization, I'm still on the fence as to whether TCO
172 | provides any real value other than expanding the expressiveness of the Ruby
173 | language. As such, I encourage you to take the [tco_method gem](https://rubygems.org/gems/tco_method)
174 | for a test drive and explore the opportunities it presents. If you do take
175 | it for a test drive, drop me a line to let me know how it went. I'd be
176 | interested to hear about your experiences both with tail call optimization in
177 | Ruby-land and with the API offered by the [tco_method gem](https://rubygems.org/gems/tco_method).
178 | Contributions are also always welcome!
179 |
180 | [View the tco_method gem on RubyGems](https://rubygems.org/gems/tco_method)
181 | [View the tco_method gem on GitHub](https://github.com/tdg5/tco_method)
182 |
183 | As always, thanks for reading!
184 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_background/README.md:
--------------------------------------------------------------------------------
1 | Back in November, care of [/r/ruby](https://www.reddit.com/r/ruby), I came
2 | across [a blog post by Nithin Bekal, Tail Call Optimization in
3 | Ruby](http://nithinbekal.com/posts/ruby-tco/), demonstrating Ruby's built-in
4 | support for tail call optimization and I have to admit, my mind was a little
5 | blown.
6 |
7 | It's not that I have a specific need for tail call optimization. In fact,
8 | I can't think of even a single situation where I would have done
9 | things differently if I'd known the VM supported it. But, I guess I was
10 | surprised to find that tail call optimization was just hiding somewhere in the
11 | Ruby VM, waiting to be flipped on with a compile flag, or **at runtime**.
12 |
13 | I think it was this ability to just turn it on at any time that blew my mind.
14 | Not just that it was hiding in there somewhere, but that the VM is flexible
15 | enough to swap in the machinery to support tail call optimization whenever you
16 | decide you want it. Pretty awesome.
17 |
18 | With no particular use for tail call optimization, I've just been sitting on the
19 | knowledge, the notion bouncing around in my head. That is, until earlier
20 | this week when I decided I would try to apply some of what I learned from reading
21 | [Pat Shaughnessy's Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope)
22 | to better understanding how the Ruby VM can be so flexible when it comes to tail
23 | call optimization.
24 |
25 | Though I think that that will make for an interesting blog post, it's turned into a
26 | bit of an epic. So this week, I'm going to begin with a little background on
27 | tail call optimization and hopefully build on what others have already shared
28 | with some of what I've learned about Ruby's implementation of tail call
29 | optimization while trudging through Ruby's depths. Then, in my next post, with
30 | the stage already set, we can get into the internals of how the Ruby VM makes
31 | tail call optimization happen at runtime.
32 |
33 | Let's get started!
34 |
35 | ## A little background on tail call optimization
36 | [Nithin's article](http://nithinbekal.com/posts/ruby-tco/) does a great job of
37 | explaining tail recursive functions and tail call optimization, so if you're a
38 | little iffy on either subject, I'd recommend reading that before you continue
39 | with this post. The [Tail call entry in Wikipedia](https://en.wikipedia.org/wiki/Tail_call)
40 | is also a useful resource for even more depth on the subject.
41 |
42 | To summarize, tail call optimization, or tail call elimination as it is also
43 | known, is a special feature of some kinds of tail recursive functions that
44 | allows for the tail call to be implemented without adding a new stack frame to
45 | the call stack. This allows for more efficient tail calls while also
46 | allowing the size of the stack to remain constant which in turn allows recursion
47 | to be used in situations that might otherwise encounter a stack overflow without
48 | tail call optimization.
49 |
50 | ## Ruby and tail call optimization
51 | Starting with Ruby 1.9.2, the Ruby VM offers built-in, though experimental,
52 | support for tail call optimization. That said, there are other ways of achieving
53 | tail call optimization without enabling it in the VM. [Magnus Holm offers a
54 | couple of other hacks for achieving tail call optimization in Ruby in his blog post
55 | Tailin' Ruby](http://timelessrepo.com/tailin-ruby), which is worth the read
56 | just for the innovative ways he attempts to solve the problem, even if you're
57 | fine to use the Ruby VM's implementation of tail call optimization. Maybe it's
58 | just because I haven't had an itch that I needed tail call optimization to
59 | scratch, but using **redo** to emulate tail call optimization in a performant
60 | fashion is pretty damn clever.
61 |
62 | Now, although support for tail call optimization is built into the VM, because
63 | of its experimental nature it isn't enabled by default and must be turned on
64 | either with a flag when compiling Ruby or by configuring
65 | **RubyVM::InstructionSequence** at runtime with special compile options. There
66 | was some talk of [enabling tail call optimization by default around the time
67 | that Ruby 2.0 was released](https://bugs.ruby-lang.org/issues/6602), however
68 | this hasn't come to be for a number of reasons: Primary concerns were that tail
69 | call optimization makes it difficult to implement **set_trace_func** and also
70 | causes backtrace weirdness due to the absence of a new stack frame.
71 |
72 | Now that we have a little background on tail call optimization in Ruby, let's
73 | take a look at an example of a tail recursive, tail call optimizable function.
74 |
75 | ## A tail recursive Guinea pig
76 | In order for us to take Ruby's implementation of tail call optimization for a
77 | test drive and to help us get to the bottom of Ruby's implementation of tail
78 | call optimization in my next post, we'll first need a tail recursive function to
79 | be the subject of our experiments. As it turns out, we can actually extract such
80 | a subject from the Ruby source code itself.
81 |
82 | Depending on your feelings about the recent debate regarding how Ruby is
83 | tested[^1][^2], it may surprise you to learn that our Guinea pig comes directly
84 | from Ruby's built-in test suite. After all, though tail call optimization may
85 | not be enabled by default, and though it may only be experimental at this time,
86 | it's not unreasonable to think that there'd be a test for it somewhere. That
87 | somewhere is among a handful of other tests for various optimizations to the
88 | Ruby VM in the Ruby source at [test/ruby/test_optimization.rb](https://github.com/ruby/ruby/blob/fcf6fa8781fe236a9761ad5d75fa1b87f1afeea2/test/ruby/test_optimization.rb#L213).
89 |
90 | The test that is home to our Guinea pig is somewhat unremarkable, so though
91 | you're welcome to review the full contents of the test, for our purposes I've
92 | extracted the tail recursive factorial function used by the test with some
93 | refactoring to, among other things, isolate the HEREDOC and make it work outside
94 | of the test:
95 |
96 | ```ruby
97 | code = <<-CODE
98 | class Factorial
99 | def self.fact_helper(n, res)
100 | n == 1 ? res : fact_helper(n - 1, n * res)
101 | end
102 |
103 | def self.fact(n)
104 | fact_helper(n, 1)
105 | end
106 | end
107 | CODE
108 | options = {
109 | tailcall_optimization: true,
110 | trace_instruction: false,
111 | }
112 | RubyVM::InstructionSequence.new(code, nil, nil, nil, options).eval
113 | ```
114 |
115 | The tail recursive method of interest above is the **fact_helper** method. It
116 | should hopefully be pretty obvious that **fact_helper** is tail recursive given
117 | that, in all but the base case, the final action of the method is the invocation
118 | of the itself with primitive values. Other than the tail recursive nature
119 | of this function, there are a couple of other things going on here that are worth
120 | noting.
121 |
122 | First, as I alluded to before in regard to tail call optimization not being
123 | enabled by default, currently it is not possible to turn on tail call
124 | optimization without also disabling the **set_trace_func** capabilities of the VM.
125 | This can be seen above in the option to **RubyVM::InstructionSequence** setting
126 | **trace_instruction** to false.
127 |
128 | Second, this example demonstrates the best strategy of enabling tail call
129 | optimization that I have come across so far. I say this because the other
130 | examples I've referenced have all enabled tail call optimization by changing
131 | **RubyVM::InstructionSequence.compile_option**, effectively enabling tail call
132 | optimization globally.
133 |
134 | Though at least one source suggested that the modified compile options would only be
135 | applied to code directly compiled with **RubyVM::InstructionSequence**, this is
136 | incorrect. In fact, any files loaded after the change to
137 | **RubyVM::InstructionSequence.compile_option** will be compiled with tail call
138 | optimization enabled. This can be verified by running the following contrived
139 | test script that adapts our Guinea pig both to evidence the global nature of
140 | **RubyVM::InstructionSequence.compile_option** and to demonstrate the utility of
141 | tail call optimization.
142 |
143 | ```ruby
144 | # Flag indicating whether this is the first time time this file has been loaded
145 | $first_load = true if $first_load.nil?
146 |
147 | # Declare classes to facilitate #instance_eval later
148 | class FirstLoadFactorial; end
149 | class ReloadedFactorial; end
150 |
151 | # On the first load, extend FirstLoadFactorial,
152 | # On the second load, extend ReloadedFactorial.
153 | klass = $first_load ? FirstLoadFactorial : ReloadedFactorial
154 |
155 | # Tail recursive factorial adapted from
156 | # https://github.com/ruby/ruby/blob/fcf6fa8781fe236a9761ad5d75fa1b87f1afeea2/test/ruby/test_optimization.rb#L213
157 | klass.instance_eval do
158 | def self.fact_helper(n, res)
159 | n == 1 ? res : fact_helper(n - 1, n * res)
160 | end
161 |
162 | def self.fact(n)
163 | fact_helper(n, 1)
164 | end
165 | end
166 |
167 | # Turn on tailcall optimization
168 | RubyVM::InstructionSequence.compile_option = {
169 | tailcall_optimization: true,
170 | trace_instruction: false,
171 | }
172 |
173 | # This check avoids calculating the factorial twice; ReloadedFactorial will only
174 | # respond to :fact after the file has been reloaded.
175 | if ReloadedFactorial.respond_to?(:fact)
176 | begin
177 | puts "FirstLoadFactorial: #{FirstLoadFactorial.fact(50000).to_s.length}"
178 | rescue SystemStackError
179 | puts 'FirstLoadFactorial: stack level too deep'
180 | end
181 |
182 | # 50000! is 213,237 digits long, so display just the length of the calculation
183 | puts "ReloadedFactorial: #{ReloadedFactorial.fact(50000).to_s.length}"
184 | end
185 |
186 | # Reload the file on the first load only
187 | if $first_load
188 | $first_load = false
189 | load __FILE__
190 | end
191 |
192 | # $ ruby tail_optimized_reload.rb
193 | # FirstLoadFactorial: stack level too deep
194 | # ReloadedFactorial: 213237
195 | ```
196 |
197 | [View on GitHub](https://github.com/tdg5/blog_snippets/blob/8cdc800e711f5270754e352b9f3458d7e429b87d/lib/blog_snippets/tail_call_optimization_in_ruby_internals/tail_optimized_reload.rb)
198 |
199 | Since tail call optimization is still an experimental feature, if you're going
200 | to use tail call optimization in production code or in code that could become
201 | production code, the strategy demonstrated by the Ruby core test of creating a
202 | new **RubyVM::InstructionSequence** object that can be used to load/compile tail
203 | call optimized code without affecting other code compiled by the VM later is
204 | absolutely the right way to go.
205 |
206 | ## End Part I
207 | That does it for our initial foray into tail call optimization in Ruby. I hope
208 | you've found something here today worth the price of admission. Stay tuned for
209 | my next post in which we'll take our tail recursive Guinea pig for a deep dive into the
210 | internals of Ruby, all the way from the Ruby source, through the YARV instructions
211 | just below the surface, down deep into the C weeds in search of the source
212 | of Ruby's tail call optimization implementation. It'll certainly be an
213 | interesting ride.
214 |
215 | [^1]: http://rubini.us/2014/12/31/matz-s-ruby-developers-don-t-use-rubyspec/
216 | [^2]: https://gist.github.com/nateberkopec/11dbcf0ee7f2c08450ea
217 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c:
--------------------------------------------------------------------------------
1 | static void
2 | vm_search_method(rb_call_info_t *ci, VALUE recv)
3 | {
4 | VALUE klass = CLASS_OF(recv);
5 |
6 | #if OPT_INLINE_METHOD_CACHE
7 | if (LIKELY(GET_GLOBAL_METHOD_STATE() == ci->method_state && RCLASS_SERIAL(klass) == ci->class_serial)) {
8 | /* cache hit! */
9 | return;
10 | }
11 | #endif
12 |
13 | ci->me = rb_method_entry(klass, ci->mid, &ci->defined_class);
14 | ci->klass = klass;
15 | ci->call = vm_call_general;
16 | #if OPT_INLINE_METHOD_CACHE
17 | ci->method_state = GET_GLOBAL_METHOD_STATE();
18 | ci->class_serial = RCLASS_SERIAL(klass);
19 | #endif
20 | }
21 |
22 |
23 | static VALUE
24 | vm_call_general(rb_thread_t *th, rb_control_frame_t *reg_cfp, rb_call_info_t *ci)
25 | {
26 | return vm_call_method(th, reg_cfp, ci);
27 | }
28 |
29 |
30 | VALUE
31 | vm_call_method(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci)
32 | {
33 | int enable_fastpath = 1;
34 | rb_call_info_t ci_temp;
35 |
36 | start_method_dispatch:
37 | if (ci->me != 0) {
38 | if ((ci->me->flag == 0)) {
39 | VALUE klass;
40 |
41 | normal_method_dispatch:
42 | switch (ci->me->def->type) {
43 | case VM_METHOD_TYPE_ISEQ:{
44 | CI_SET_FASTPATH(ci, vm_call_iseq_setup, enable_fastpath);
45 | return vm_call_iseq_setup(th, cfp, ci);
46 | }
47 | case VM_METHOD_TYPE_NOTIMPLEMENTED:
48 | case VM_METHOD_TYPE_CFUNC:
49 | CI_SET_FASTPATH(ci, vm_call_cfunc, enable_fastpath);
50 | return vm_call_cfunc(th, cfp, ci);
51 | case VM_METHOD_TYPE_ATTRSET:{
52 | CALLER_SETUP_ARG(cfp, ci);
53 | rb_check_arity(ci->argc, 1, 1);
54 | ci->aux.index = 0;
55 | CI_SET_FASTPATH(ci, vm_call_attrset, enable_fastpath && !(ci->flag & VM_CALL_ARGS_SPLAT));
56 | return vm_call_attrset(th, cfp, ci);
57 | }
58 | case VM_METHOD_TYPE_IVAR:{
59 | CALLER_SETUP_ARG(cfp, ci);
60 | rb_check_arity(ci->argc, 0, 0);
61 | ci->aux.index = 0;
62 | CI_SET_FASTPATH(ci, vm_call_ivar, enable_fastpath && !(ci->flag & VM_CALL_ARGS_SPLAT));
63 | return vm_call_ivar(th, cfp, ci);
64 | }
65 | case VM_METHOD_TYPE_MISSING:{
66 | ci->aux.missing_reason = 0;
67 | CI_SET_FASTPATH(ci, vm_call_method_missing, enable_fastpath);
68 | return vm_call_method_missing(th, cfp, ci);
69 | }
70 | case VM_METHOD_TYPE_BMETHOD:{
71 | CI_SET_FASTPATH(ci, vm_call_bmethod, enable_fastpath);
72 | return vm_call_bmethod(th, cfp, ci);
73 | }
74 | case VM_METHOD_TYPE_ZSUPER:{
75 | klass = ci->me->klass;
76 | klass = RCLASS_ORIGIN(klass);
77 | zsuper_method_dispatch:
78 | klass = RCLASS_SUPER(klass);
79 | if (!klass) {
80 | ci->me = 0;
81 | goto start_method_dispatch;
82 | }
83 | ci_temp = *ci;
84 | ci = &ci_temp;
85 |
86 | ci->me = rb_method_entry(klass, ci->mid, &ci->defined_class);
87 |
88 | if (ci->me != 0) {
89 | goto normal_method_dispatch;
90 | }
91 | else {
92 | goto start_method_dispatch;
93 | }
94 | }
95 | case VM_METHOD_TYPE_OPTIMIZED:{
96 | switch (ci->me->def->body.optimize_type) {
97 | case OPTIMIZED_METHOD_TYPE_SEND:
98 | CI_SET_FASTPATH(ci, vm_call_opt_send, enable_fastpath);
99 | return vm_call_opt_send(th, cfp, ci);
100 | case OPTIMIZED_METHOD_TYPE_CALL:
101 | CI_SET_FASTPATH(ci, vm_call_opt_call, enable_fastpath);
102 | return vm_call_opt_call(th, cfp, ci);
103 | default:
104 | rb_bug("vm_call_method: unsupported optimized method type (%d)",
105 | ci->me->def->body.optimize_type);
106 | }
107 | break;
108 | }
109 | case VM_METHOD_TYPE_UNDEF:
110 | break;
111 | case VM_METHOD_TYPE_REFINED:{
112 | NODE *cref = rb_vm_get_cref(cfp->iseq, cfp->ep);
113 | VALUE refinements = cref ? cref->nd_refinements : Qnil;
114 | VALUE refinement, defined_class;
115 | rb_method_entry_t *me;
116 |
117 | refinement = find_refinement(refinements,
118 | ci->defined_class);
119 | if (NIL_P(refinement)) {
120 | goto no_refinement_dispatch;
121 | }
122 | me = rb_method_entry(refinement, ci->mid, &defined_class);
123 | if (me) {
124 | if (ci->call == vm_call_super_method) {
125 | rb_control_frame_t *top_cfp = current_method_entry(th, cfp);
126 | if (top_cfp->me &&
127 | rb_method_definition_eq(me->def, top_cfp->me->def)) {
128 | goto no_refinement_dispatch;
129 | }
130 | }
131 | ci->me = me;
132 | ci->defined_class = defined_class;
133 | if (me->def->type != VM_METHOD_TYPE_REFINED) {
134 | goto start_method_dispatch;
135 | }
136 | }
137 |
138 | no_refinement_dispatch:
139 | if (ci->me->def->body.orig_me) {
140 | ci->me = ci->me->def->body.orig_me;
141 | if (UNDEFINED_METHOD_ENTRY_P(ci->me)) {
142 | ci->me = 0;
143 | }
144 | goto start_method_dispatch;
145 | }
146 | else {
147 | klass = ci->me->klass;
148 | goto zsuper_method_dispatch;
149 | }
150 | }
151 | }
152 | rb_bug("vm_call_method: unsupported method type (%d)", ci->me->def->type);
153 | }
154 | else {
155 | int noex_safe;
156 | if (!(ci->flag & VM_CALL_FCALL) && (ci->me->flag & NOEX_MASK) & NOEX_PRIVATE) {
157 | int stat = NOEX_PRIVATE;
158 |
159 | if (ci->flag & VM_CALL_VCALL) {
160 | stat |= NOEX_VCALL;
161 | }
162 | ci->aux.missing_reason = stat;
163 | CI_SET_FASTPATH(ci, vm_call_method_missing, 1);
164 | return vm_call_method_missing(th, cfp, ci);
165 | }
166 | else if (!(ci->flag & VM_CALL_OPT_SEND) && (ci->me->flag & NOEX_MASK) & NOEX_PROTECTED) {
167 | enable_fastpath = 0;
168 | if (!rb_obj_is_kind_of(cfp->self, ci->defined_class)) {
169 | ci->aux.missing_reason = NOEX_PROTECTED;
170 | return vm_call_method_missing(th, cfp, ci);
171 | }
172 | else {
173 | goto normal_method_dispatch;
174 | }
175 | }
176 | else if ((noex_safe = NOEX_SAFE(ci->me->flag)) > th->safe_level && (noex_safe > 2)) {
177 | rb_raise(rb_eSecurityError, "calling insecure method: %"PRIsVALUE, rb_id2str(ci->mid));
178 | }
179 | else {
180 | goto normal_method_dispatch;
181 | }
182 | }
183 | }
184 | else {
185 | /* method missing */
186 | int stat = 0;
187 | if (ci->flag & VM_CALL_VCALL) {
188 | stat |= NOEX_VCALL;
189 | }
190 | if (ci->flag & VM_CALL_SUPER) {
191 | stat |= NOEX_SUPER;
192 | }
193 | if (ci->mid == idMethodMissing) {
194 | rb_control_frame_t *reg_cfp = cfp;
195 | VALUE *argv = STACK_ADDR_FROM_TOP(ci->argc);
196 | rb_raise_method_missing(th, ci->argc, argv, ci->recv, stat);
197 | }
198 | else {
199 | ci->aux.missing_reason = stat;
200 | CI_SET_FASTPATH(ci, vm_call_method_missing, 1);
201 | return vm_call_method_missing(th, cfp, ci);
202 | }
203 | }
204 |
205 | rb_bug("vm_call_method: unreachable");
206 | }
207 |
208 |
209 | static VALUE
210 | vm_call_iseq_setup(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci)
211 | {
212 | vm_callee_setup_arg(th, ci, ci->me->def->body.iseq, cfp->sp - ci->argc);
213 | return vm_call_iseq_setup_2(th, cfp, ci);
214 | }
215 |
216 |
217 | static VALUE
218 | vm_call_iseq_setup_2(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci)
219 | {
220 | if (LIKELY(!(ci->flag & VM_CALL_TAILCALL))) {
221 | return vm_call_iseq_setup_normal(th, cfp, ci);
222 | }
223 | else {
224 | return vm_call_iseq_setup_tailcall(th, cfp, ci);
225 | }
226 | }
227 |
228 |
229 | static inline VALUE
230 | vm_call_iseq_setup_normal(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci)
231 | {
232 | int i, local_size;
233 | VALUE *argv = cfp->sp - ci->argc;
234 | rb_iseq_t *iseq = ci->me->def->body.iseq;
235 | VALUE *sp = argv + iseq->param.size;
236 |
237 | /* clear local variables (arg_size...local_size) */
238 | for (i = iseq->param.size, local_size = iseq->local_size; i < local_size; i++) {
239 | *sp++ = Qnil;
240 | }
241 |
242 | vm_push_frame(th, iseq, VM_FRAME_MAGIC_METHOD, ci->recv, ci->defined_class,
243 | VM_ENVVAL_BLOCK_PTR(ci->blockptr),
244 | iseq->iseq_encoded + ci->aux.opt_pc, sp, 0, ci->me, iseq->stack_max);
245 |
246 | cfp->sp = argv - 1 /* recv */;
247 | return Qundef;
248 | }
249 |
250 |
251 | static inline VALUE
252 | vm_call_iseq_setup_tailcall(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci)
253 | {
254 | int i;
255 | VALUE *argv = cfp->sp - ci->argc;
256 | rb_iseq_t *iseq = ci->me->def->body.iseq;
257 | VALUE *src_argv = argv;
258 | VALUE *sp_orig, *sp;
259 | VALUE finish_flag = VM_FRAME_TYPE_FINISH_P(cfp) ? VM_FRAME_FLAG_FINISH : 0;
260 |
261 | cfp = th->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(th->cfp); /* pop cf */
262 |
263 | RUBY_VM_CHECK_INTS(th);
264 |
265 | sp_orig = sp = cfp->sp;
266 |
267 | /* push self */
268 | sp[0] = ci->recv;
269 | sp++;
270 |
271 | /* copy arguments */
272 | for (i=0; i < iseq->param.size; i++) {
273 | *sp++ = src_argv[i];
274 | }
275 |
276 | /* clear local variables */
277 | for (i = 0; i < iseq->local_size - iseq->param.size; i++) {
278 | *sp++ = Qnil;
279 | }
280 |
281 | vm_push_frame(th, iseq, VM_FRAME_MAGIC_METHOD | finish_flag,
282 | ci->recv, ci->defined_class, VM_ENVVAL_BLOCK_PTR(ci->blockptr),
283 | iseq->iseq_encoded + ci->aux.opt_pc, sp, 0, ci->me, iseq->stack_max);
284 |
285 | cfp->sp = sp_orig;
286 | return Qundef;
287 | }
288 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tuning_dd_block_size/README.md:
--------------------------------------------------------------------------------
1 | Though I wouldn't call myself a dd expert, I have had my fair share of occasions
2 | to yield the might that is dd. From my first job after college using
3 | [KNOPPIX](http://www.knopper.net/knoppix/index-en.html) and dd to rescue NFL
4 | game footage from dying HDDs on behalf of NFL video coordinators, to using dd
5 | this past summer to move [my girlfriend's](http://alilallovertheplace.com/) OSX
6 | installation over to a faster SSD, dd has been an invaluable tool in my Unix
7 | arsenal for almost 10 years.
8 |
9 | Maybe it's because everyone focuses on getting the **of** (output file) argument
10 | right, or maybe there's more to it, but in my time with dd, one aspect of dd's
11 | usage that I've found often overlooked relates to dd's three block size
12 | arguments, **ibs** (input block size), **obs** (output block size), and the all
13 | encompassing **bs** (input and output block size). Don't get me wrong, making
14 | sure you've determined the correct **of** argument is of paramount importance,
15 | but once you've got that nailed down, there's more to be done than breathe a
16 | giant sigh of relief. The various block size arguments that dd takes will be the
17 | deciding factor between whether the copy completes in a day or in two hours.
18 |
19 | ## A little background on block size
20 | A **block** in terms of dd as explained by Wikipedia:
21 | > A block is a unit measuring the number of bytes that are read, written, or
22 | > converted at one time.[^1]
23 |
24 | As such, the various block size arguments tell dd how many sectors should be
25 | copied at once, whether for input, output, or both. By default, most versions of
26 | dd will use a block size 512 bytes for both input and output.[^2] This may have
27 | been fine pre-1999 when most hard drives had a sector size of 512 bytes, but
28 | in recent years most hard drives have a sector size of at least 4KB (4096
29 | bytes). This change may seem inconsequential but can lead to enormous
30 | inefficiencies when combined with the fact that these days many typical consumer
31 | hard drives have more than a terabyte of capacity. When dealing with a terabyte
32 | or more of data, you **really** want to make sure you choose an optimal block
33 | size.
34 |
35 | There's a useful, though pretty dated, [message in the archive of the Eugene,
36 | Oregon Linux User's Group (Eug-Lug) that offers some perspective on optimal
37 | block sizes for dd](http://www.mail-archive.com/eug-lug@efn.org/msg12073.html)
38 | that can be useful as a jumping off point for your own tests or in those
39 | situations where testing different block sizes isn't feasible.
40 | The findings presented in the message show that for the author's particular
41 | hardware, a block size of about 64K was pretty close to optimal.
42 |
43 | That's nice advice, but without more context it's somewhat meaningless, so let's
44 | perform a few experiments.
45 |
46 | ## Science!
47 | As an example of the impact that an inefficient/optimal block size can have,
48 | I've run a few tests for your consideration. These results are all specific to
49 | my hardware, and though they may offer a rule-of-thumb for similar situations,
50 | it's important to keep in mind that there is no universally correct block size;
51 | what is optimal for one situation may be terribly inefficient for another. To
52 | that end, the tests below are meant to provide a simple example of the benefits
53 | of optimizing the block size used by dd; they are not intended to accurately
54 | replicate real world copy scenarios.
55 |
56 | For simplicity, we will be reading data from */dev/zero*, which should be able
57 | to churn out zeros at a much, much faster rate than we can actually write them, which,
58 | in turn, means that these examples are actually testing optimal output block
59 | sizes and are, more or less, ignoring input block size entirely. Optimizing input
60 | block sizing is left as an exercise for the reader and should be easy enough to
61 | achieve by reading data from the desired disk and writing it out to */dev/null*.
62 |
63 | On with the experiments!
64 |
65 | Let's start off with a few tests writing out to a HDD:
66 |
67 | - Reading from */dev/zero* and writing out to a HDD with the default block size
68 | of 512 bytes yields a throughput of 10.9 MB/s. At that rate, writing 1TB of
69 | data would take about 96,200 seconds or just north of 26 hours.
70 |
71 | - Reading from */dev/zero* and writing out to a HDD with the Eug-Lug suggested
72 | block size of 64K yields a throughput of 108 MB/s. At that rate, writing 1TB
73 | of data would take 9,709 seconds or about 2.7 hours to complete. This is a
74 | huge improvement, nearly an order of magnitude, over the default block size of
75 | 512 bytes.
76 |
77 | - Reading from */dev/zero* and writing out to a HDD with a more
78 | optimal block size of 512K yields a throughput of 131 MB/s. At that rate,
79 | writing 1TB of data would take about 8,004 seconds or about 2.2 hours. Though
80 | not as pronounced a difference, this is even faster than the Eug-Lug
81 | suggestion and is more than a full order of magnitude faster than the default
82 | block size of 512 bytes.
83 |
84 | Let's switch gears and try a couple of experiments writing out to a SSD:
85 |
86 | - Reading from */dev/zero* and writing out to a SSD with the default block size
87 | of 512 bytes yields a throughput of 39.6 MB/s. At that rate writing 1TB of
88 | data would take about 26,479 seconds or about 7.4 hours.
89 |
90 | - Reading from */dev/zero* and writing out to a SSD with the Eug-Lug suggested
91 | block size of 64K yields a throughput of 266 MB/s. At that rate, writing 1TB
92 | of data would take about 3,942 seconds or about 1.1 hours. Once again, this
93 | is a huge improvement, nearly an order of magnitude faster than the default
94 | block size of 512 bytes.
95 |
96 | - Reading from */dev/zero* and writing out to a SSD with a more
97 | optimal block size of 256K yields a throughput of 280 MB/s. At that rate,
98 | writing 1TB of data would take about 3,744 seconds or about 1 hour. Once
99 | again this is faster than both the Eug-Lug suggestion and the default, though
100 | not as much of an improvement as in the HDD case.
101 |
102 | Let's switch gears one last time and try a few experiments writing out to RAM:
103 |
104 | - Reading from */dev/zero* and writing out to RAM with the default block size
105 | of 512 bytes yields a throughput of 221 MB/s. At that rate, writing 1TB of
106 | data would take about 4,745 seconds or about 1.3 hours.
107 |
108 | - Reading from */dev/zero* and writing out to RAM with the Eug-Lug suggested
109 | block size of 64K yields a throughput of 1,433 MB/s. At that rate, writing 1TB
110 | of data would take about 731 seconds or about 12 minutes to complete the
111 | transfer. Once again, this is a huge improvement, nearly an order of
112 | magnitude faster than the default block size.
113 |
114 | - Reading from */dev/zero* and writing out to RAM with a more
115 | optimal block size of 256K yields a throughput of 1,536 MB/s. At that rate,
116 | writing 1TB of data would take about 682 seconds or about 11 minutes. This is
117 | once again faster than the default and the Eug-Lug suggestion, but once
118 | again, pretty comparable to the Eug-Lug suggestion.
119 |
120 | These experiments should help illustrate that depending on the type,
121 | manufacturer, and state of the source and destination media, optimal block sizes
122 | can vary wildly. This should also help demonstrate that on modern hardware the
123 | default block size of 512 bytes tends to be horribly inefficient. That said,
124 | though not always the most optimal, the Eug-Lug suggested block size of 64K can
125 | be a somewhat reliable option for a more modern default.
126 |
127 | ## A pair of scripts to find more optimal block sizes
128 | Because of the wild variance in optimal block sizing, I've written a couple of
129 | scripts to test a range of different input and output block size options for use
130 | prior to starting any large copies with dd. However, before we discuss the
131 | scripts, **be warned that this both scripts use dd behind the scenes, so it's
132 | important to use caution when running either script so as to avoid summoning
133 | dd's alter ego, disk destroyer.**[^3] The scripts are short enough that I
134 | encourage you to read both scripts before using either one of them so you have a
135 | better understanding of what is going on behind the scenes. That said, first
136 | we'll look at a script for determining an optimal output block size.
137 |
138 | ### dd_obs_test.sh
139 |
140 | Let's just jump straight into the script:
141 |
142 | ```bash
143 | #!/bin/bash
144 |
145 | # Since we're dealing with dd, abort if any errors occur
146 | set -e
147 |
148 | TEST_FILE=${1:-dd_obs_testfile}
149 | [ -e "$TEST_FILE" ]; TEST_FILE_EXISTS=$?
150 | TEST_FILE_SIZE=134217728
151 |
152 | # Header
153 | PRINTF_FORMAT="%8s : %s\n"
154 | printf "$PRINTF_FORMAT" 'block size' 'transfer rate'
155 |
156 | # Block sizes of 512b 1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M
157 | for BLOCK_SIZE in 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432 67108864
158 | do
159 | # Calculate number of segments required to copy
160 | COUNT=$(($TEST_FILE_SIZE / $BLOCK_SIZE))
161 |
162 | if [ $COUNT -le 0 ]; then
163 | echo "Block size of $BLOCK_SIZE estimated to require $COUNT blocks, aborting further tests."
164 | break
165 | fi
166 |
167 | # Create a test file with the specified block size
168 | DD_RESULT=$(dd if=/dev/zero of=$TEST_FILE bs=$BLOCK_SIZE count=$COUNT 2>&1 1>/dev/null)
169 |
170 | # Extract the transfer rate from dd's STDERR output
171 | TRANSFER_RATE=$(echo $DD_RESULT | \grep --only-matching -E '[0-9.]+ ([MGk]?B|bytes)/s(ec)?')
172 |
173 | # Clean up the test file if we created one
174 | [ $TEST_FILE_EXISTS -ne 0 ] && rm $TEST_FILE
175 |
176 | # Output the result
177 | printf "$PRINTF_FORMAT" "$BLOCK_SIZE" "$TRANSFER_RATE"
178 | done
179 | ```
180 |
181 | [View on GitHub](https://github.com/tdg5/blog_snippets/blob/master/lib/blog_snippets/articles/tuning_dd_block_size/dd_obs_test.sh)
182 |
183 | As you can see, the script is a pretty basic for-loop that uses dd to create a
184 | test file of 128MB using a variety of block sizes, from the default of 512
185 | bytes, all the way up to 64M. There are a few extra arguments to the dd command
186 | to make writing out a 128M file easy and there's also some grepping to pull out
187 | the transfer rate, but otherwise, that's pretty much all there is to it.
188 |
189 | By default the command will create a test file named *dd_obs_testfile* in the
190 | current directory. Alternatively, you can provide a path to a custom test file
191 | by providing a path after the script name:
192 |
193 | ```bash
194 | $ ./dd_obs_test.sh /path/to/disk/or/test_file
195 | ```
196 |
197 | The output of the script is a list of the tested block sizes and their respective transfer
198 | rates like so:
199 |
200 | ```bash
201 | $ ./dd_obs_test.sh /dev/null
202 | 512: 1.4 GB/s
203 | 1K: 2.6 GB/s
204 | 2K: 4.3 GB/s
205 | 4K: 6.5 GB/s
206 | 8K: 7.8 GB/s
207 | 16K: 9.0 GB/s
208 | 32K: 8.1 GB/s
209 | 64K: 7.6 GB/s
210 | 128K: 9.8 GB/s
211 | 256K: 7.9 GB/s
212 | 512K: 9.7 GB/s
213 | 1M: 12.8 GB/s
214 | 2M: 8.8 GB/s
215 | 4M: 7.2 GB/s
216 | 8M: 7.3 GB/s
217 | 16M: 5.5 GB/s
218 | 32M: 6.4 GB/s
219 | 64M: 4.0 GB/s
220 | ```
221 |
222 | Wow, I guess [*/dev/null* really is
223 | web-scale.](https://www.youtube.com/watch?v=b2F-DItXtZs&t=1m42s)
224 |
225 | ### dd_ibs_test.sh
226 | Now let's look at a similar script for determining an optimal input block size.
227 | We can follow pretty much the same pattern expect for a couple of key
228 | differences: instead of reading from */dev/zero* and writing out the test
229 | file, this script reads from */dev/urandom* to create a test file of random bits
230 | and then uses dd to copy that test file to */dev/null* using a variety of
231 | different block sizes. Since this script creates the test file at the path you
232 | specify, you will want to be careful not to accidentally overwrite an existing
233 | file by pointing the script at an existing path.
234 |
235 | Here's the script:
236 |
237 | ```bash
238 | #!/bin/bash
239 |
240 | # Since we're dealing with dd, abort if any errors occur
241 | set -e
242 |
243 | TEST_FILE=${1:-dd_ibs_testfile}
244 | [ -e "$TEST_FILE" ]; TEST_FILE_EXISTS=$?
245 | TEST_FILE_SIZE=134217728
246 |
247 | # Exit if file exists
248 | if [ -e $TEST_FILE ]; then
249 | echo "Test file $TEST_FILE exists, aborting."
250 | exit 1
251 | fi
252 |
253 | # Create test file
254 | echo 'Generating test file...'
255 | BLOCK_SIZE=65536
256 | COUNT=$(($TEST_FILE_SIZE / $BLOCK_SIZE))
257 | dd if=/dev/urandom of=$TEST_FILE bs=$BLOCK_SIZE count=$COUNT > /dev/null 2>&1
258 |
259 | # Header
260 | PRINTF_FORMAT="%8s : %s\n"
261 | printf "$PRINTF_FORMAT" 'block size' 'transfer rate'
262 |
263 | # Block sizes of 512b 1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M
264 | for BLOCK_SIZE in 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432 67108864
265 | do
266 | # Read test file out to /dev/null with specified block size
267 | DD_RESULT=$(dd if=$TEST_FILE of=/dev/null bs=$BLOCK_SIZE 2>&1 1>/dev/null)
268 |
269 | # Extract transfer rate
270 | TRANSFER_RATE=$(echo $DD_RESULT | \grep --only-matching -E '[0-9.]+ ([MGk]?B|bytes)/s(ec)?')
271 |
272 | printf "$PRINTF_FORMAT" "$BLOCK_SIZE" "$TRANSFER_RATE"
273 | done
274 |
275 | # Clean up the test file if we created one
276 | [ $TEST_FILE_EXISTS -ne 0 ] && rm $TEST_FILE
277 | ```
278 |
279 | [View on GitHub](https://github.com/tdg5/blog_snippets/blob/master/lib/blog_snippets/articles/tuning_dd_block_size/dd_ibs_test.sh)
280 |
281 | Similar to the *dd_obs_test.sh* script, this script will create a default test
282 | file named *dd_ibs_testfile* but you you can also provide the script with a path
283 | argument to test input block sizes on different devices:
284 |
285 | ```bash
286 | $ ./dd_ibs_test.sh /path/to/disk/test_file
287 | ```
288 |
289 | Again, it is important to remember that the script will try to overwrite the
290 | test file and later will remove the file after it has been written, so use
291 | extreme caution to avoid blowing away something you didn't mean to destroy. It
292 | is likely that you will need to tweak this script to meet your particular use
293 | case.
294 |
295 | Also like *dd_obs_test.sh*, the output of this script is a list of the tested
296 | block sizes and their respective transfer rates like so:
297 |
298 | ```bash
299 | $ ./dd_ibs_test.sh
300 | 512: 1.1 GB/s
301 | 1K: 1.8 GB/s
302 | 2K: 3.0 GB/s
303 | 4K: 4.2 GB/s
304 | 8K: 5.1 GB/s
305 | 16K: 5.7 GB/s
306 | 32K: 5.4 GB/s
307 | 64K: 5.8 GB/s
308 | 128K: 6.3 GB/s
309 | 256K: 5.4 GB/s
310 | 512K: 5.8 GB/s
311 | 1M: 5.8 GB/s
312 | 2M: 5.3 GB/s
313 | 4M: 5.0 GB/s
314 | 8M: 4.9 GB/s
315 | 16M: 4.5 GB/s
316 | 32M: 4.4 GB/s
317 | 64M: 3.5 GB/s
318 | ```
319 |
320 | In the above example it can be seen that an input block size of 128K is optimal
321 | for my particular setup.
322 |
323 | ## The end
324 | I hope this post has given you some insight into tuning dd's block size
325 | arguments and maybe even saved you a day spent transferring blocks 512 bytes at
326 | a time.
327 |
328 | Thanks for reading!
329 |
330 | [^1]: ["A block is a unit measuring the number of bytes that are read, written, or converted at one time."](https://en.wikipedia.org/wiki/Dd_(Unix)#Block_size)
331 | [^2]: [**dd's** ibs (input block size) and obs (output block size) arguments both default to 512 bytes](http://man7.org/linux/man-pages/man1/dd.1.html)
332 | [^3]: ["Some people believe dd means "Destroy Disk" or "Delete Data" because if it is misused, a partition or output file can be trashed very quickly."](http://www.codecoffee.com/tipsforlinux/articles/036.html)
333 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/module_factory_for_dependency_management/README.md:
--------------------------------------------------------------------------------
1 | At last year's RubyConf in San Diego, [Craig Buchek](https://twitter.com/craigbuchek)
2 | gave a presentation entitled [Ruby Idioms You're Not Using Yet](https://www.youtube.com/watch?v=hc_wtllfKtQ),
3 | focusing on some of Ruby's under-utilized and emerging idioms. In this post
4 | we'll discuss one of those idioms, an idiom Craig appropriately calls **Module
5 | Factory**. In particular, we'll explore the using a Module Factory as a pattern
6 | for dependency and load-order management.
7 |
8 | ## Hey! Who you callin' an idiom?
9 |
10 | For those unfamiliar with idioms or, more likely, unfamiliar with what idioms
11 | refer to in the context of a programming language, Craig presents a number of
12 | different perspectives, my favorite of which is:
13 |
14 | > A style or form of expression that is characteristic of a particular person,
15 | > type of art, etc.[^1]
16 |
17 | Craig also offers his own perspective, which I think helps clarify and distill
18 | this concept further:
19 |
20 | > A way in which we normally express ourselves in a language.
21 |
22 | Though I think this definition captures the idea nicely, I think there's a pearl
23 | of enlightenment to be found in reducing the concept down to its roots:
24 |
25 | > Late Latin idioma, idiomat-, from Greek, from idiousthai, to make one's own,
26 | > from idios, own, personal, private.[^2]
27 |
28 | I find this etymology charming because while formal definitions tend to focus on
29 | existing patterns of language belonging to specific communities and cultures,
30 | the origin of the word hints at a deeper essence that leads ultimately to the
31 | cradle of all idiomatic expression: idioms are an emergent behavior of the
32 | efforts of individuals and communities to make a language their own.
33 |
34 | ## Idioms in Ruby
35 |
36 | In terms of Ruby, let's take a look at a couple of concrete examples of common
37 | Ruby idioms juxtaposed with their less idiomatic counterparts to give ourselves
38 | some grounding. Hopefully you'll agree that within each example, each variation
39 | gets further and further from how you'd expect to see an idea expressed in Ruby.
40 |
41 | ###### Conditional assignment:
42 | ```ruby
43 | # Idiomatic Ruby
44 | a ||= b
45 |
46 | # Less idiomatic
47 | a || a = b
48 |
49 | # And lastly, please don't do this
50 | a = b if a == nil || a == false
51 | ```
52 |
53 | ###### Sequential iteration
54 | ```ruby
55 | # Idiomatic Ruby
56 | 5.times { |i| puts i }
57 |
58 | # Less idiomatic, though more performant
59 | i = 0
60 | while i < 5
61 | puts i
62 | i += 1
63 | end
64 |
65 | # And finally, the dreaded `for` statement
66 | for i in 0..4
67 | puts i
68 | end
69 | ```
70 |
71 | Hopefully, these examples give you a good idea of idioms in Ruby, but if not,
72 | I'd encourage you to watch [Ruby Idioms You're Not Using Yet](https://www.youtube.com/watch?v=hc_wtllfKtQ),
73 | as it provides more examples which may help to further elucidate the concept.
74 |
75 | On with the show!
76 |
77 | ## Module Factory: An Introduction
78 |
79 | The Module Factory pattern as described in the presentation constitutes the use
80 | of some variety of [Factory Method](https://en.wikipedia.org/wiki/Factory_method_pattern)
81 | in place of a reference to a concrete Module when calling **extend** or
82 | **include** from a Class or a Module. This is a fairly technical description, so
83 | let's take a look at the example the presentation uses to demonstrate this
84 | pattern. This example comes from the README for the [Virtus gem](https://rubygems.org/gems/virtus):
85 |
86 | ```ruby
87 | class User
88 | include Virtus.model(:constructor => false, :mass_assignment => false)
89 | end
90 | ```
91 |
92 | [View on GitHub](https://github.com/solnic/virtus/blob/e648e2fe771d715179bddb7b0df9b0169a295ae3/README.md#cherry-picking-extensions)
93 |
94 | Though it may be unclear what is going on here, if we trust that neither the
95 | [Virtus docs](https://github.com/solnic/virtus/blob/e648e2fe771d715179bddb7b0df9b0169a295ae3/README.md#cherry-picking-extensions)
96 | nor the Ruby docs for [Module#include](http://www.ruby-doc.org/core-2.2.0/Module.html#method-i-include)
97 | contain an error, we can use a little deduction to piece together what's going
98 | on:
99 |
100 | - Though the Ruby docs aren't totally explicit about it, **Module#include**
101 | will raise an error unless given one or more Modules. From this we can infer
102 | that **Virtus.model** must be returning one or more Modules.
103 | - A little trial and error in irb further uncovers that though
104 | **Module#include** supports being invoked with multiple Modules, these Modules
105 | cannot be provided in an Array, but must be normal method arguments (or in the
106 | case of an Array, must be exploded with the [splat operator](https://endofline.wordpress.com/2011/01/21/the-strange-ruby-splat/#calling_methods)
107 | into normal method arguments). Since the Virtus docs don't use the splat
108 | operator, we can further narrow our inference to deduce that **Virtus.model**
109 | must be returning a single module.
110 |
111 | Now that we have a clearer understanding of what's going on in this example, it
112 | becomes easier to see how it fulfills our definition of a Module Factory:
113 | Instead of referencing a concrete Module, **Module#include** is invoked with the
114 | result of invoking the **Virtus.model** method. Furthermore, we've deduced that
115 | **Virtus.model** must return a Module of some sort and given the arguments it
116 | takes, it's safe to assume there's some sort of factory logic going on inside.
117 | In fact, this Module Factory allows the including class to cherry-pick a subset
118 | of Virtus' model extensions and include only those selected modules.
119 |
120 | Alright! Not so bad, right? Now that we've got one Module Factory under our
121 | belt, let's take a look at how the Module Factory patten can help with
122 | dependency management and load ordering.
123 |
124 | ## A job for refactoring
125 |
126 | In order to provide some context for our discussion, let's start with some example
127 | code that I think could benefit from a refactoring to use the Module Factory
128 | pattern. For the sake of brevity, this code is non-functional and skips many of
129 | the details that don't impact our particular interests. That said, the code
130 | below should have a familiar flavor to anyone who has worked with an
131 | asynchronous job framework in the past, such as
132 | [Resque](https://github.com/resque/resque),
133 | [Sidekiq](https://github.com/mperham/sidekiq),
134 | [Backburner](https://github.com/nesquena/backburner), or
135 | [Rails' ActiveJob](https://github.com/rails/rails/tree/master/activejob).
136 |
137 | The example code outlines the skeleton of a job class that performs some
138 | undefined unit of work. For those unfamiliar with any of the job frameworks I
139 | mentioned above, the typical usage pattern for such a framework tends to involve
140 | subclassing a class provided by the job framework which encapsulates and handles
141 | most of the required behaviors of a job. In the example below, this role is
142 | filled by the fictitious class **JobFramework::Job**.
143 |
144 | Generally, by subclassing a class like **JobFramework::Job**,
145 | the subclass agrees to an interface contract that typically requires the
146 | subclass to implement a **perform** method at the instance level. This pattern
147 | is also followed in the example below, as can be seen by the **perform**
148 | instance method on the **ImportantJob** class.
149 |
150 | One final point worth discussing before getting into the example is that the job
151 | classes provided by many job frameworks tend to provide an **around_perform**
152 | method hook or similar functionality to allow for adding middleware-type
153 | behavior around job execution in a generic, unobtrusive way. The example below
154 | also borrows this pattern, however it can be inferred that **JobFramework::Job**
155 | provides this behavior in a very naive manner that relies heavily upon the class
156 | hierarchy and repeated calls to **super**.
157 |
158 | OK, that should be enough background, on to the example!
159 |
160 | **important_job.rb**
161 |
162 | ```ruby
163 | class ImportantJob < JobFramework::Job
164 | # NineLives must be included before ExceptionNotification,
165 | # otherwise up to nine alert emails will be sent per failed
166 | # job and in many cases, exception notifications will be
167 | # sent when the job didn't actually fail!
168 | include NineLives
169 | include ExceptionNotification
170 |
171 | def perform(*args)
172 | # Important work
173 | end
174 | end
175 | ```
176 |
177 | **job_extensions.rb**
178 |
179 | ```ruby
180 | module NineLives
181 | def around_perform(*args)
182 | retry_count = 0
183 | begin
184 | super
185 | rescue TransientError
186 | if retry_count < 9
187 | retry_count += 1
188 | retry
189 | else
190 | raise
191 | end
192 | end
193 | end
194 | end
195 |
196 | module ExceptionNotification
197 | def around_perform(*args)
198 | super
199 | rescue
200 | # dispatch an email notification of the exception
201 | end
202 | end
203 | ```
204 |
205 | Here's a quick rundown of what we can expect the lifetime of an execution of the
206 | **ImportantJob** class to look like:
207 |
208 | 1. Some code somewhere else in the codebase calls **ImportantJob.perform**.
209 | This class level **perform** method is provided by **JobFramework::Job** as a
210 | convenience method to enqueue an **ImportantJob** to be completed
211 | asynchronously.
212 | 2. Elsewhere, a worker process, also typically running code provided by the job
213 | framework, pops the job off of the job queue and instantiates a new instance
214 | of the **ImportantJob** class with the provided arguments. The internals of
215 | the worker process then take steps to execute the job which causes the
216 | **around_perform** method of the instance to be executed. Normally, the
217 | invocation of **around_perform** would simply cause **ImportantJob#perform**
218 | to be executed, however, since we've overwritten **around_perform** a couple
219 | of times, the behavior in the example is not so simple. The first version of
220 | **around_perform** that will be executed, perhaps counterintuitively, is the
221 | version from the last module we included in **ImportantJob**, **ExceptionNotification.around_perform**.
222 | 3. **ExceptionNotification.around_perform** immediately calls
223 | **super**, but includes a rescue block that catches any errors that bubble up
224 | and, hypothetically, dispatches email alerts about those exceptions. The
225 | invocation of **super** triggers the **around_perform** method from the first
226 | module we included in **ImportantJob**, **NineLives#around_perform**.
227 | 4. **NineLives#around_perform** is more involved, but its goals
228 | are pretty simple: Similar to **ExceptionNotification.around_perform**, it
229 | calls **super** almost immediately but adds some special error handling that
230 | catches errors of the **TransientError** class. The error handling will retry
231 | the call to **super** up to 9 times if the **TransientError** exception
232 | continues to occur. After 9 times, the error will be raised up to
233 | **ExceptionNotification** at which point an email should be dispatched. The
234 | call to **super** this time around invokes the original **around_perform**
235 | method, **JobFramework::Job#around_perform**, which as we discussed earlier,
236 | invokes **ImportantJob#perform**.
237 |
238 | Now that we've got a solid understanding of the example job, let's see how using
239 | the Module Factory pattern could benefit this class.
240 |
241 | ## What's wrong with a well written comment?
242 |
243 | You may already have an intuition for where we should begin our refactoring to
244 | introduce a Module Factory, but if you don't that's fine too. Personally, I'm
245 | inclined to start with the very first line of the **ImportantJob** class. No,
246 | not **include NineLives**. The honking four line comment that explains why the
247 | **NineLives** module must be included before the **ExceptionNotification**
248 | module. In a small enough codebase, the current form of **ImportantJob** might
249 | be fine, but if that codebase is likely to grow, or if the codebase is already
250 | of reasonable size, I'd argue that the comment and the rigid load-order are bad
251 | news.
252 |
253 | You may have your own arguments for or against the current implementation, but
254 | here are my arguments against:
255 |
256 | - That whopper of a comment is going to be repeated in every other job class
257 | that uses both the NineLives and ExceptionNotification modules (and if it's
258 | not, it should be). Trust me, I've seen it happen. Not only is this a
259 | violation of [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself),
260 | but because it's a comment it's pretty likely to mutate and/or deteriorate
261 | with each subsequent duplication. Eventually this leads to a situation
262 | where a newcomer to the code base doesn't know which version of the comment is
263 | accurate, or, alternatively, you end up with some job classes that tag
264 | **include NineLives** simply with "Must be included before
265 | ExceptionNotification" and no additional explanation. After this reduction,
266 | the comment starts to disappear entirely.
267 | - Without the comment, there is no other clue that there is a load-order
268 | dependency between these two modules. Obviously, this is why the comment was
269 | added, but a comment can't help the situation where a job class that already
270 | includes **NineLives** now needs to include **ExceptionNotification**, or vice
271 | versa. If the dev making the change is lucky enough to have seen the comment
272 | elsewhere in the codebase, or another dev happens to catch the issue in a code
273 | review, maybe you can avoid a Spam dinner, but if not, it's
274 | Spam-a-lam-a-ding-dong until the next deploy goes out.
275 | - What happens when another load-order dependency is added with the inclusion of
276 | a new module? Another giant comment in every class that needs some combination
277 | of the three modules? One giant comment that tries to encompass all the
278 | permutations in a generic fashion? How would you feel if the purpose of the
279 | **ImportantJob** class was to perform a payment on a loan and the newly
280 | included module was added to lower someone's credit score every time an
281 | exception bubbled out of **NineLives#around_perform**? It's a bit of a
282 | stretch, but don't think that financial systems are immune to these
283 | situations, and I certainly hope they're using a better design than repeated
284 | comments.
285 |
286 | One could certainly make the argument for handling this issue by introducing
287 | another module to encapsulate the load-order dependency, but in my experience
288 | that doesn't actually solve any of these problems, but instead, it just moves
289 | the problems into other parts of the codebase or mutates them into slightly
290 | different issues.
291 |
292 | While we could explore alternative solutions for handling this situation all
293 | day, let's move on and get an idea of how a Module Factory could be used to
294 | address all of the concerns I've raised.
295 |
296 | ## A Module Factory for job extensions
297 |
298 | Before we look at how me might go about implementing a Module Factory to address
299 | the issues I raised above, let's take a look at what **ImportantJob** might look
300 | like after we refactored it to use a Module Factory.
301 |
302 | ```ruby
303 | class ImportantJob < JobFramework::Job
304 | include JobExtensions.select(:exception_notification, :nine_lives)
305 |
306 | def perform(*args)
307 | # Important work
308 | end
309 | end
310 | ```
311 |
312 | We have to make some assumptions for now, but hopefully you'll agree that this
313 | is already a significant improvement.
314 |
315 | We can't yet make a determination on the ultimate fate of the comment because
316 | it's no longer included in **ImportantJob**, but this by itself is a good sign.
317 | Realistically, I don't think there was ever hope of going completely comment
318 | free, but, at least for the moment, things have a much DRYer feeling.
319 |
320 | Otherwise, there's still no hint that a load-order dependency exists somewhere,
321 | but given the order of the arguments to **JobExtensions.select**, we can hope it
322 | doesn't matter anymore. If the order of the arguments truly doesn't matter, than
323 | this also helps the situation where someone wants to add **ExceptionNotification**
324 | to a class that already includes **NineLives**, as it seems like they could just
325 | add the snake-cased name of the extension to the list of selected extensions and
326 | continue on their way. The same applies for any new extension that might be
327 | added in the future. In fact, the use of the snake-cased names actually involves
328 | less coupling than the original version because though the snake-cased names
329 | match the module names in this case, there really is no need for the module name
330 | and the snake-cased name passed to the factory method to match. This means that
331 | the module implementing :nine_lives could change to an entirely different module
332 | with fewer repercussions to the codebase.
333 |
334 | So far, so good. So what kind of sorcery is required to make this interface
335 | possible? Behold! The **JobExtensions** module:
336 |
337 | ```ruby
338 | module JobExtensions
339 | def self.select(*selected_extensions)
340 | Module.new do
341 | # NineLives must be included before ExceptionNotification,
342 | # otherwise up to nine alert emails will be sent per failed
343 | # job and in many cases, exception notifications will be
344 | # sent when the job didn't actually fail!
345 | if selected_extensions.include?(:nine_lives)
346 | include NineLives
347 | end
348 | if selected_extensions.include?(:exception_notification)
349 | include ExceptionNotification
350 | end
351 | end
352 | end
353 | end
354 | ```
355 |
356 | Maybe a little magical, but certainly not sorcery, in fact it looks a lot like
357 | we took the comment and includes from the former version of **ImportantJob**,
358 | added some conditional logic, and wrapped all that in a **Module.new** block.
359 | What's going on here?
360 |
361 | I suspect I don't need to explain the internals of the block, but **Module.new**
362 | is definitely worth taking a closer look at on its own.
363 |
364 | [Module.new](http://www.ruby-doc.org/core-2.2.0/Module.html#method-c-new), is
365 | the more metaprogramming-friendly version of your standard module declaration
366 | using the **module** keyword. In fact, when used with a block, it's even more
367 | similar to a standard module declaration than might be obvious because in the
368 | context of the block the target of **self** is the module being constructed.
369 | This behavior is what allows us to make normal calls to **include** without
370 | having to use an explicit receiver or having to call **send**.
371 |
372 | For our particular purposes, **Module.new** does offer one advantage over the
373 | **module** keyword worth mentioning. Because **Module.new** uses a block, a
374 | closure is created that allows us to reach outside of the block and access the
375 | list of **selected_extensions** while building the new module. Access to this
376 | list is crucial to our Module Factory's ability to build a customized module on
377 | demand. Without access to the list we'd have to figure out another way to
378 | assemble the desired module, which is certainly doable, but would be less
379 | pleasant to look at and would require using **send** to circumvent the generated
380 | module's public access rules.
381 |
382 | Other than the call to **Module.new**, I expect everything else in this factory
383 | method should make sense. We've found our missing comment and can be fairly
384 | confidant that in this form it's unlikely to be repeated. If it is repeated in
385 | the future, it will likely be a modified version that documents the load-order
386 | gotchas of a different extension that this Module Factory supports. While there
387 | is probably a better way to document the specifics of this particular load-order
388 | requirement, I'm much less concerned with many similar comments documenting
389 | similar behavior inside a particular method than I am with the same spread all
390 | across the codebase in any number of unaffiliated jobs.
391 |
392 | ## Before you get too excited: A couple of trade offs
393 |
394 | Though the Module Factory we've built certainly helps deal with handling the
395 | load-order logic in a DRY fashion, there are a couple of potential trade offs
396 | that I should mention. These issues can be addressed, but I won't go into great
397 | detail about how to address them. The good news, though, is that both trade offs
398 | are solved by pretty much the same code.
399 |
400 | The first trade off is that generating a module dynamically like we did above
401 | produces a more anonymous module than you might be used to seeing if you usually
402 | create modules using the **module** keyword. For example, here's the fictitious
403 | ancestry of the **ImportantJob** class:
404 |
405 | ```ruby
406 | ImportantJob.ancestors
407 | # => [
408 | # ImportantJob, #,
409 | # JobFramework::Job, Object, Kernel, BasicObject
410 | # ]
411 | ```
412 |
413 | That funky Module between **ImportantJob** and **JobFramework::Job** is our
414 | generated module. Though we've handled the load-order issue in a more robust
415 | fashion, we've obscured the class hierarchy which makes it harder to find
416 | information about the class via interrogation or examination.
417 |
418 | To get some insight into the second trade off introduced by the Module Factory
419 | pattern, let's pretend we've created another job class, **ReallyImportantJob**,
420 | that is an exact duplicate of **ImportantJob**, except named differently. What
421 | does the class hierarchy for **ReallyImportantJob** look like?
422 |
423 | ```ruby
424 | ReallyImportantJob.ancestors
425 | # => [
426 | # ReallyImportantJob, #,
427 | # JobFramework::Job, Object, Kernel, BasicObject
428 | # ]
429 | ```
430 |
431 | What may not be clear from this output is that though the two job classes are
432 | made up of the exact same code and modules, each generates its own special
433 | module when the **JobExtensions.select** factory method is called. This can be
434 | seen in the output above in that the each of the generated modules is identified
435 | by a different memory address. This might not be the end of the world in a small
436 | codebase, but it should make it clear that every class is going to generate its
437 | own version of the module, even if one matching the requested requirements
438 | already exists. This is obviously inefficient in terms of time and memory, but
439 | it also adds another complication to understanding a class by interrogation or
440 | inspection because though another dev might expect the class hierarchies of
441 | **ImportantJob** and **ReallyImportantJob** to include the same modules, they
442 | don't, but they do, but they don't.
443 |
444 | So what's the solution? Well, it turns out both issues can be solved by dealing
445 | with some naming issues. In terms of the first trade off, the anonymous module,
446 | Ruby uses an anonymous name because we never assigned the module to a constant.
447 | This is one of the implicit benefits of the **module** keyword: you assign the
448 | module to a constant at inception. So, if we can come up with a way to generate
449 | a name for the generated module, all we need to do is assign a constant with the
450 | generated name to point to the generated module and Ruby will use that name to
451 | refer to the generated module.
452 |
453 | Though it's not obvious, generating a name also helps us to address the second
454 | trade off of generating a new module every time the factory method is invoked. A
455 | name helps solve this problem because if we can generate a name that uniquely
456 | identifies the contents of a generated module and assign the appropriate
457 | constant, we can also check that constant in the future before generating a new
458 | module. If the constant is defined, we return the previously generated module,
459 | if not, we generate a new module and assign it to the constant.
460 |
461 | In terms of our example job, the actual implementation is left to the reader as
462 | an exercise, but generating a name that uniquely identifies each generated
463 | module could be as simple as creating a string from the sorted, title-cased
464 | collection of extensions that are used in the module being named. Title casing
465 | is important for readability, consistency, and so Ruby will accept the name as a
466 | constant.[^3] Sorting is also important because, at least in the case of our
467 | example, we don't want the order of the arguments to change the name of the
468 | class being created because whether **:exception_rety** is passed in before
469 | **:nine_lives**, or vice versa, both invocations should generate and refer to
470 | the same module. This naming pattern still has some problems because it is still
471 | unclear what the module does, but it is at least a little better than the module
472 | being identified by its raw memory address.
473 |
474 | ## Closing thoughts
475 |
476 | Though it may not feel like it, this post has really only scratched the surface
477 | of the power and potential of the Module Factory pattern. Though we've discussed
478 | how it can be used to improve code readability, maintainability, reliability, and
479 | flexibility, there's really a lot more opportunity out there. And so, rather
480 | than summarize what we've covered in this post, I'll leave you to ponder these
481 | possibilities:
482 |
483 | - As evidenced by [Kernel#Array](http://www.ruby-doc.org/core-2.2.0/Kernel.html#method-i-Array)
484 | and [Kernel#Integer](http://www.ruby-doc.org/core-2.2.0/Kernel.html#method-i-Integer)
485 | Ruby doesn't require method names to start with a lowercase letter. How might
486 | a method with a title-cased name be used to compliment the Module Factory
487 | pattern? Are there trade offs that come with this type of naming convention?
488 | - Ruby method names don't need to be words at all, take for example
489 | [Hash::[]](http://ruby-doc.org/core-2.1.5/Hash.html#method-c-5B-5D). How might
490 | an operator style of method name pair with the Module Factory pattern?
491 | - How else could the power of a method call be leveraged for Module Factory
492 | awesomeness? What magic could be yielded (pun intended!) by a factory method
493 | that takes a block? How might keyword arguments, Hash arguments, or splat
494 | arguments be leveraged in combination with a Module Factory?
495 | - If you've ever used a framework that uses dependency injection like
496 | Javascript's AngularJS, then the examples above may have caused your Spidey
497 | sense to tingle. How might the Module Factory pattern be used for dependency
498 | injection in Ruby?
499 |
500 | [^1]: Source: [Merriam-Webster](http://www.merriam-webster.com/dictionary/idiom)
501 | [^2]: Source [thefreedictionary.com](http://www.thefreedictionary.com/idiom)
502 | [^3]: A third-party library like [**ActiveSupport**](https://rubygems.org/gems/activesupport) can make the work of title casing the string trivial.
503 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/on_the_road_from_ruby_journeyman_to_ruby_master/README.md:
--------------------------------------------------------------------------------
1 | Mind-blowingly awful are really the only words that come to mind to describe my
2 | first bunch of Ruby scripts.[^1] Sure, this is probably unfair and over-critical
3 | given that Ruby, algorithms, and the whole shebang were all new to me at the
4 | time, but *damn*. There are so many decisions I can't even begin to comprehend
5 | or defend today.
6 |
7 | I imagine few Ruby devs still have their first scripts available to reflect on.
8 | This may be for the best, yet, as I looked over a few of my early scripts this
9 | past weekend, I began to ponder the value of occasionally revisiting old code
10 | samples to better gauge one's progress and get a periodic sense of perspective.
11 | Similarly, I also found myself contemplating the value of occasionally taking a
12 | step away from production code to draw a new line in the sand recording one's
13 | state as a developer in that moment; A coded testament of one's values, whether
14 | in terms of syntax, tradeoffs, or any number of other metrics; a mile marker
15 | somewhere along the road from Ruby journeyman to Ruby master.
16 |
17 | To that end, in this article I'll be sharing and discussing one of those early
18 | scripts. From there, I'll also leave behind a new mile marker by taking a stab
19 | at how I might solve the same problem today. With any luck, we'll all learn
20 | something along the way, and if not, it seems like I'll be back to rant about
21 | the inferior quality of my past work in no time. For now though, onward!
22 |
23 | ## When Danny met Ruby...
24 |
25 | Back in 2009, at the encouragement of my stepfather who thought the future had
26 | great things in store for Ruby and Rails (boy, was he wrong!), I began to
27 | explore the Ruby programming language by trying to solve a few of the math heavy
28 | programming problems over at [Project Euler](https://projecteuler.net/). Up
29 | until this point, I'd only ever done any "programming" in Basic and Visual
30 | Basic, as these were the focus of the programming courses taught at my high
31 | school. I'd argue that I got pretty advanced in my usage of Visual Basic, going
32 | so far as to develop a reasonable grasp on the Win32 API, but given my present
33 | distaste for my early Ruby code, I can only imagine that my earlier VB code must
34 | have been transcendently awful. In VB, I'd only ever written small utilities and
35 | weak attempts at games, so using Ruby to efficiently solve what were essentially
36 | math problems was new territory for me.
37 |
38 | For each problem that I attempted, I followed two rules. First, and obviously,
39 | the computed solution had to be correct. Second, the script had to run to
40 | completion in less than one minute. I don't remember if the second rule was
41 | stipulated from the beginning or if my naive tendency toward brute-force
42 | solutions prompted my stepfather to introduce the rule, but I definitely
43 | remember struggling to get my scripts to run in less than a minute at various
44 | times. For anyone getting started with this type of endeavor, it's definitely a
45 | great constraint to have in place. That said, the problem we're going to look at
46 | today isn't one of those long running problems, in fact, even my early attempts
47 | at solving the problem take less than a second to run. Let's have a look, shall
48 | we?
49 |
50 | ## Problem 8: Largest Product in a Series
51 |
52 | Though it's not the first problem I solved, [Problem 8: Largest Product in a
53 | Series](https://projecteuler.net/problem=8), seems like a problem of sufficient
54 | complexity to merit a bit of discussion. For your convenience here is the full
55 | text of the question:
56 |
57 | >The four adjacent digits in the 1000-digit number that have the greatest
58 | >product are 9 × 9 × 8 × 9 = 5832.
59 | >
60 | > 73167176531330624919225119674426574742355349194934
61 | > 96983520312774506326239578318016984801869478851843
62 | > 85861560789112949495459501737958331952853208805511
63 | > 12540698747158523863050715693290963295227443043557
64 | > 66896648950445244523161731856403098711121722383113
65 | > 62229893423380308135336276614282806444486645238749
66 | > 30358907296290491560440772390713810515859307960866
67 | > 70172427121883998797908792274921901699720888093776
68 | > 65727333001053367881220235421809751254540594752243
69 | > 52584907711670556013604839586446706324415722155397
70 | > 53697817977846174064955149290862569321978468622482
71 | > 83972241375657056057490261407972968652414535100474
72 | > 82166370484403199890008895243450658541227588666881
73 | > 16427171479924442928230863465674813919123162824586
74 | > 17866458359124566529476545682848912883142607690042
75 | > 24219022671055626321111109370544217506941658960408
76 | > 07198403850962455444362981230987879927244284909188
77 | > 84580156166097919133875499200524063689912560717606
78 | > 05886116467109405077541002256983155200055935729725
79 | > 71636269561882670428252483600823257530420752963450
80 | >
81 | > Find the thirteen adjacent digits in the 1000-digit number that have the
82 | > greatest product. What is the value of this product?
83 |
84 | It's worth noting that the requirements of the problem were modified in 2014 to
85 | encourage more programmatic solutions to the exercise. More specifically, the
86 | question originally asked for the largest product of not 13 adjacent digits but
87 | of just 5 adjacent digits in the 1000-digit number. A minor difference, but one
88 | that will, at the very least, help better explain at least one of the decisions
89 | I made in my 2009 solution.
90 |
91 | To that end, a modified version of my 2009 solution appears below. The solution
92 | has been modified from its original form in two ways. First, as necessitated by
93 | the change in the problem requirements, the solution has been extended, in a
94 | manner consistent with the original solution, to handle runs of 13 digits.
95 | Second, rather than repeat the 1000-digit number, we will assume it is stored in
96 | the constant NUMBER as a Bignum. I won't explain the solution, but hopefully my
97 | discussion of it should help fill in any gaps in understanding. Instead, I'll
98 | jump right into my thoughts on the shortcomings of this script.
99 |
100 | ### 2009 Edition
101 |
102 | ```ruby
103 | a=NUMBER.to_s
104 | big = 0
105 | for i in 1..(987)
106 | su=a[i,1].to_i*a[i+1,1].to_i*a[i+2,1].to_i*a[i+3,1].to_i*a[i+4,1].to_i*a[i+5,1].to_i*
107 | a[i+6,1].to_i*a[i+7,1].to_i*a[i+8,1].to_i*a[i+9,1].to_i*a[i+10,1].to_i*a[i+11,1].to_i*
108 | a[i+12,1].to_i
109 | if su>big
110 | big=su
111 | end
112 | end
113 | puts big
114 | ```
115 |
116 | #### Where's the whitespace?
117 |
118 | The first thing that strikes me about this script, and many of the others I've
119 | reviewed from this period, is the omission of optional spaces. This is one of
120 | those situations where I can't even begin to understand what I was thinking.
121 | Given that I do add optional spaces in at least one place, we can rule out the
122 | possibility that my spacebar was broken. This being the case, I'm inclined to
123 | believe I simply wasn't thinking about it, but it seems so blatantly obvious to
124 | me now that I find this hard to believe.
125 |
126 | It is certainly possible that I had no notion of (or concern for) readability.
127 | It's also possible that my mental parser was in a sufficiently unformed,
128 | immature, or plastic state that the omission of optional spaces felt as readable
129 | to me then as when optional spaces were included. This seems a bit unfathomable
130 | now, but that's really all I can come up with.
131 |
132 | In the JavaScript world, you will sometimes see libraries that achieve some feat
133 | in less than 1KB or some other very minimal file size. In JavaScript, where
134 | libraries are typically transmitted over the wire to web browsers across the
135 | world, this type of optimization can be desirable to reduce the size of the
136 | payload being transmitted (though it really should be the job of a minifier).
137 | But in Ruby, where libraries typically live on the server, there is no benefit
138 | to this type of optimization as far as I'm aware. If there is a benefit to this
139 | approach that I am unaware of, I can assure you it's not what I was striving for
140 | at the time.
141 |
142 | #### Hmm, seems like a loop might help...
143 |
144 | Next on my list of grievances is the ginormous series of substring accesses of
145 | the form **a[i+n, 1]**. First, let's get it out of the way that the second
146 | argument to **String#[]** is totally useless here, being as it is that the
147 | default behavior is to return the 1-character substring located at the index
148 | given by the first argument. Normally, this might be an excusable offense, but
149 | given that this snippet could benefit from some serious DRYing, it's a little
150 | more intolerable because the extraneous argument would have to be removed in 13
151 | different places.
152 |
153 | Given that this seems like an obvious situation for a loop of some sort, why the
154 | no loop? In this particular case, I do have some recollection of my thinking,
155 | and I'm fairly certain that forgoing a loop was a conscious decision. If you'll
156 | recall, the problem at the time was concerned with 5 consecutive digits instead
157 | of 13 which made the repeated code a little more manageable and perhaps even
158 | tolerable.
159 |
160 | At the time, I may have hoped to gain some performance by skipping the loop and
161 | retrieving each element directly, though this concept seems like it would have
162 | been too advanced for my thinking at the time. Instead, I'm inclined to believe
163 | that I may have chosen five direct accesses because it was easier for me at the
164 | time than setting up a loop, though I'm not sure. Though skipping the loop is a
165 | teensy bit faster, it's clearly not DRY and it also hardcodes an implicit over
166 | specification into the solution that makes it very difficult to change the
167 | length of the series of adjacent digits that should be tested. As such, to
168 | update the code to test a series of 13 digits, I had to more than double the
169 | number of element accesses, moving the code even further from the goals of DRY.
170 |
171 | If it's not already clear, using a simple loop would have been a better choice.
172 | Though insignificantly slower, a simple loop would make the code much DRYer
173 | while also enabling the solution to be more generic. This would better prepare
174 | the solution to handle any number of adjacent digits while also making the code
175 | easier to read, follow, and understand. Generality definitely wasn't something
176 | that was on my mind in solving this problem as we'll see again in a moment.
177 |
178 | #### Maybe one loop was a better choice...
179 |
180 | Though we can hopefully agree that it seems like a loop would have been a better
181 | choice in the situation above, there are enough problems with that loop already
182 | used that it starts to seem like utilizing another loop might not have been a
183 | good idea. The loop already in use is a **for loop** operating over a range of
184 | Integers that allows for traversing the vector of digits. There are a number of
185 | things about this loop that are less than ideal, some more obvious than others.
186 |
187 | One thing that may stick out to more experienced Rubyists is the choice of a
188 | **for loop** over other alternatives. Though not technically wrong, the **for
189 | loop** is not commonly seen in Ruby and typically more idiomatic loop primitives
190 | are used instead. Another thing that may stick out to more experienced Rubyists
191 | is the unnecessary use of parentheses around the terminal Integer or upper bound
192 | of the Range expression. Again, not wrong per se, but certainly an indicator of
193 | my noob status and perhaps an indicator that I didn't fully grok the Range
194 | expression and perhaps thought I was calling a dot method on the Integer class,
195 | like **Integer#.**, that returned a Range instance when invoked with an Integer
196 | for an argument. Novel perhaps, but wrong.
197 |
198 | Returning to the topic of generality, the loop also hardcodes **two** more
199 | over-specifications into the solution that make the solution more rigid and less
200 | reuseable. As if this weren't bad enough, the two over-specifications interact
201 | with each other in such a way that it's not obvious what's going on. In fact,
202 | they're both encapsulated in the seemingly random choice of 987 for the upper
203 | bound of the Range. Being as astute as you are, I imagine if you were paying
204 | attention to the problem description then you've already surmised that 987 is
205 | none other than, 1,000, the length of the input digit, minus 13, the length of
206 | the run of adjacent digits we're calculating the product of. This upper bound
207 | makes sure our product calculations don't overflow the length of the provided
208 | number. Duh, right?
209 |
210 | Wrapped up there in one little number are three flavors of weak. First, the
211 | hardcoded reference to 1,000 means we won't be able to reliably use this solution
212 | on a similar problem that features a number that is anything other than exactly
213 | 1,000 digits. Second, the hardcoded reference to 13 means yet another place an
214 | update will be required in order to mutate the solution to handle runs of
215 | lengths other than 13. Finally, both of these facts are obscured by the use of
216 | the precalculated value of 987 for the upper bound of the range. Instead of
217 | hardcoding the value, calculating the upper bound by taking the difference of
218 | the length of **NUMBER** and the desired length of adjacent digits would be
219 | better. Having no reliance on knowing the length of **NUMBER** would be even
220 | better, if possible.
221 |
222 | One final point about the loop before we move on: it's wrong! Given the
223 | magnitude of the wrongness, you may prefer to think of it as a bug, but at the
224 | end of the day, it's just plain old wrong. The problem is that the Range starts
225 | at 1, which translates to index 1 of the stringified **NUMBER**. Starting with
226 | index 1 means that the digit at index 0 is totally ignored, which means that if,
227 | by some chance, the 13 consecutive digits with the largest product were the
228 | first 13 digits, this solution would fail to find the correct product. Whether
229 | you call this a bug or broken, it's bad news. So yeah, maybe one loop was the
230 | way to go.
231 |
232 | #### A final look back at 2009
233 |
234 | Before we look at how I might solve this problem today, I want to make two final
235 | points about my 2009 solution. First, the variable names suck. The only variable
236 | name that comes close to being tolerable is **big**, and even that isn't great.
237 | Finally, a compliment. Despite all of its problems, my 2009 solution does excel
238 | as example of the lowest of low Ruby newbie code. Certainly, that's a
239 | back-handed compliment, but I really could not have written an example like this
240 | today if I wanted to: it simply would have felt far too contrived.
241 |
242 | With the past firmly behind us, let's take a look at how I might solve this
243 | problem today.
244 |
245 | ### Solution 2015
246 |
247 | ```ruby
248 | # Project Euler #8 - Largest product in a series
249 | # https://projecteuler.net/problem=8
250 | #
251 | # Find the thirteen adjacent digits in the 1000-digit number that have the
252 | # greatest product. What is the value of this product?
253 |
254 | def largest_product_in_series(number, adjacency_length = 13)
255 | series = number.to_s
256 | zero_ord = '0'.ord
257 | factors = []
258 | largest_product = 0
259 | current_product = 1
260 | series.each_char do |new_factor|
261 | # This String-to-Integer conversion assumes we can trust our input will only
262 | # contain digits. If we can safely assume this, calling String#ord and then
263 | # subtracting the ordinal of the String '0' will work faster than
264 | # String#to_i.
265 | new_factor = new_factor.ord - zero_ord
266 |
267 | # If our new_factor is zero, we know that the product of anything
268 | # currently in our collection of factors will be zero. so, rather than
269 | # work through that, just drop the current set of factors, drop the
270 | # zero, reset our current product, and move on to the next iteration.
271 | if new_factor.zero?
272 | factors.clear
273 | current_product = 1
274 | next
275 | end
276 |
277 | factors << new_factor
278 | current_product *= new_factor
279 | next if factors.length < adjacency_length
280 |
281 | largest_product = current_product if current_product > largest_product
282 | current_product /= factors.shift
283 | end
284 | largest_product
285 | end
286 |
287 | puts largest_product_in_series(NUMBER)
288 | ```
289 |
290 | I think I'm still too close to this solution to offer much objective criticism,
291 | so though I'll touch on a few concerns later, for the most part, we'll leave
292 | criticism to future-Danny to worry about. So, let's start by seeing how the
293 | updated solution fairs in regard to some specific points that were brought up
294 | while dissecting my 2009 solution. After that, we'll look at some new goodness
295 | it brings to the table. Like the 2009 solution, I won't explain exactly what's
296 | going on, but hopefully the discussion below and included comments will suffice
297 | to convey the intention of the code.
298 |
299 | #### Lessons learned
300 |
301 | Here's a brief rundown of a few of the concerns I raised about the 2009 solution
302 | and how those concerns have faired in the 2015 solution:
303 |
304 | - Spacing is kind of funny in that you might not think about it if it's there,
305 | but if it's missing you'll definitely notice. Whether you noticed the
306 | additional white space or not, hopefully you'll agree that the use of
307 | consistent white space makes this solution much more readable than its
308 | counterpart.
309 |
310 | - Variable names, like white space, can be a little funny too given how personal
311 | and subjective they tend to be. Whether you think the variable names used in
312 | the updated solution are great, too short, too long, or just a little off,
313 | hopefully we can all agree they are a significant improvement over the
314 | variable names of the 2009 solution.
315 |
316 | - In terms of rigidity and over-specificity, the 2015 solution is much more
317 | flexible and generic. It has no dependency on the length of the number given,
318 | meaning the provided number could be 1,000 digits long or 10,000 digits long.
319 | Though it still needs to know how long a run of digits should be tested, it is
320 | not hardcoded to a certain length. A default length of 13 is used, but this
321 | can easily be overridden by invoking the **largest_product_in_series** method
322 | with a specific value for **adjacency_length**. This means that we could
323 | answer both the original 5-digit version of the question and the updated
324 | 13-digit version of the question with one algorithm.
325 |
326 | - Because the solutions are so different, any discussion in terms of the number
327 | of loops is somewhat moot, however the loop used in the 2015 solution does
328 | have one characteristic that I'd previously suggested could be desirable: it
329 | does not depend upon knowing the length of **NUMBER**. Instead, it iterates
330 | over every character in the String derived from **NUMBER**, **series**, using
331 | String#each_char. In this case, we still know **series** comes from the full
332 | **NUMBER** so, we're not a lot closer to a solution that would work for true
333 | streams of numbers, but the length agnostic nature of the loop is a step in
334 | the right direction.
335 |
336 | - One other big improvement included in the updated solution that we didn't
337 | mention in terms of the 2009 solution is the addition of comments. There are
338 | two flavors of comments in the updated solution that help provide clarity to
339 | the solution. First, the problem description is included as a comment at the
340 | head of the solution. This is really handy for someone else looking at the
341 | code or for coming back to the code six years later. Second, comments
342 | explaining some of the solution's logic have been added making it easier for a
343 | reader to understand what is going on and why those decisions were made.
344 |
345 | #### An alternate approach
346 |
347 | Beyond the better coding practices exhibited by the 2015 solution, the solution
348 | also leverages a better approach to solving the problem. Better can be somewhat
349 | subjective, so I should be clear that in this case I think the 2015 solution is
350 | superior because the algorithm is more efficient and offers a performance
351 | improvement of about an order of magnitude while still using about the same
352 | amount of memory. The concept for the alternate approach emerged from two
353 | seemingly unrelated notions, each of which I thought could be useful
354 | independently to squeeze some extra performance out of the algorithm. As it
355 | turns out, they weren't completely independent notions and one is actually much
356 | easier to implement when built on top of the other.
357 |
358 | The first idea for optimization revolved around a means to more efficiently
359 | calculate the new product each iteration. While the 2009 solution calculated the
360 | new product each iteration by performing 12 multiplications, I reasoned that
361 | since we're really only changing two numbers each iteration (the digit going out
362 | of focus and the digit coming in to focus), it should be possible to calculate
363 | the new product with only two operations (divide out the digit going out of
364 | focus, and multiply in the digit coming into focus). The only situation where
365 | this would be complicated is when a zero was encountered because a zero would
366 | effectively destroy our partial product when it got multiplied in, not to
367 | mention trying to divide by zero later would also be a fatal error. A better
368 | means of handling zeros would be required to calculate products in this manner
369 | and that's just what the second idea offered.
370 |
371 | The second notion I had for optimizing the algorithm stemmed from removing the
372 | extraneous work that was being performed the iteration in which a zero was
373 | encountered and the 12 subsequent iterations after. Because zero multiplied by
374 | any other number is always going to be zero, there were effectively 13
375 | iterations for every zero where the algorithm would do all the work despite the
376 | fact that the answer was guaranteed to be zero. It seemed to me that there had
377 | to be a way to avoid this extraneous effort and actually use zeros as a way to
378 | speed up the calculation. As it turns out, handling zeros is pretty easy because
379 | all that needs to be done when a zero is encountered is reset the partial
380 | product to its initial value, 1, and move on.
381 |
382 | With zeros taken care of, the more efficient means of calculating the product is
383 | simplified to keeping a queue of the factors of the partial product. Then, each
384 | iteration the digit going out of focus is removed from the queue and divided
385 | out of the partial product and the number coming into focus is added to the
386 | queue and multiplied into the partial product. One final bit of house keeping
387 | that is required is that when a zero is encountered, the queue of factors must
388 | be reset as well.
389 |
390 | #### A faster Char#to_i
391 |
392 | One final bit of hackery (of debatable merit) is the means by which the updated
393 | solution turns the String form of a digit into its Integer form. Though
394 | **String#to_i**, is the obvious candidate for this conversion, I wondered if
395 | there might be a faster way since this problem has little need for error
396 | checking or converting large strings of digits. If Ruby had a **Char** class
397 | for single characters, **Char#to_i** would likely have a different performance
398 | character than **String#to_s**, and a **Char#to_s** style approach was more what
399 | I was looking for.
400 |
401 | One way I had seen this done for individual numbers in other languages was to
402 | take the ordinal, or character code, of an ASCII number and subtract from it the
403 | ordinal for the character "0" to get the Integer equivalent of the character.
404 | This is exactly what the updated solution does using **String#ord**. In each of
405 | my trials, I found the **String#ord** trick to be 25-30% faster than
406 | **String#to_i**. Whether using this trick is a good idea or not (given that this
407 | method makes no checks to verify that the provided character is a number) is a
408 | whole other blog post. In this particular case, I thought the approach novel and
409 | performant enough to utilize it.
410 |
411 | #### Still a Ruby journeyman: A few concerns
412 |
413 | Before concluding this post, I want to mention a few concerns that have come to
414 | mind as I've spent some time analyzing the updated solution. Most stem from
415 | tradeoffs or implementation details. I can't help but wonder if a few of these
416 | concerns are going to be the reasons future-Danny gives for this solution being
417 | mind-blowingly awful in its own way.
418 |
419 | - Did I put way too much effort into the updated solution? 2009 for all of it's
420 | shortcomings was much more pragmatic in that it was all about getting the
421 | correct solution and moving on. The goals of the 2009 solution and the 2015
422 | solution are clearly different, so maybe I put exactly the right amount of
423 | time into the updated solution. I suspect it's something only future-Danny
424 | will be able to make a ruling on.
425 |
426 | - Should the solution include more/any error handling? The use of the
427 | **String#ord** trick certainly opens up opportunities for misuse. But even
428 | that hack aside, what happens when the number provided is shorter than the
429 | adjacency length? Currently it does a correct thing and returns zero, but
430 | should that raise an error instead? Is additional error handling worth the
431 | time?
432 |
433 | - Why the focus on performance? Is performance really critical for this problem
434 | or is the focus on performance more to provide some concrete metric of how the
435 | efficiency of my programming has improved over the last 6 years? The
436 | **String#ord** trick is nice, but is it really worth the extra complexity,
437 | confusion, and possible bugs? What benefit might a simpler, less efficient
438 | solution offer?
439 |
440 | - Should the **String#ord** trick be extracted into a method to make it easier
441 | to substitute a different means of converting a digit character into its
442 | Integer form?
443 |
444 | - Why convert **NUMBER** to a String? For all the focus on performance, this is
445 | likely not the most efficient option. If **NUMBER** can remain a Bignum and
446 | each of the digits could be extracted from it in Integer form, would that be a
447 | more performant solution? Would it be a simpler solution?
448 |
449 | - Why the long method format? Sandi Metz would likely argue for smaller methods,
450 | as would Martin Fowler. The long method was partly due to performance concerns
451 | and partly because **Replace Method With Method Object** seemed excessive by
452 | the time it made sense. That said, should this method be broken up into
453 | smaller methods encapsulated in a class of some sort?
454 |
455 | ## Happily ever after?
456 |
457 | Though my exploration of Ruby, and the many other concepts secretly embodied by
458 | the set of problems at Project Euler, didn't pay off in an obvious way at the
459 | time I was focusing on them, I'm happy to have begun my career with Ruby
460 | struggling to write efficient algorithms. Though a friend of mine, a Gopher
461 | through and through, would argue that all Ruby is struggling to write efficient
462 | algorithms, this is a sentiment I've never shared. Perhaps, our disagreement on
463 | the subject stems from my beginnings with Ruby where any algorithmic
464 | inefficiencies were almost always my own and not some fault of the language.
465 | Though there is certainly an argument to be made for using the right tool for
466 | the job, at least in the part of the stack I tend to work in, I have yet to come
467 | across a situation where Ruby was clearly inappropriate. But maybe that's just
468 | me defending an old friend.
469 |
470 | In the end, I'm glad I've held on to my old Project Euler solutions because
471 | though I wouldn't land my first Rails job until late 2011 and I'd spend two more
472 | years on the Microsoft stack dabbling in C# and relational concepts in MSSQL,
473 | and though, for a time, Ruby and I would talk less often, given our history
474 | together, it's nice to be able to look all the way back to the beginning of my
475 | time with Ruby. It helps me to understand that, frankly, I hope to always be
476 | writing code that is four years away from being mind-blowingly awful. If this
477 | stops being the case then I've stopped learning or I've stopped caring and
478 | either way, that'd be pretty sad.
479 |
480 | [^1]: I would **never** talk about another person's code in these terms, especially if that person was as junior as I was when I wrote these scripts. In the words of the [Ten Commandments of Egoless Programming](http://blog.stephenwyattbush.com/2012/04/07/dad-and-the-ten-commandments-of-egoless-programming), "Treat people who know less than you with respect, deference, and patience." I hope you too will follow this advice and save harsher criticisms for your own work.
481 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/README.md:
--------------------------------------------------------------------------------
1 | In [my last post](http://blog.tdg5.com/tail-call-optimization-ruby-background/),
2 | I began an exploration of tail call optimization in Ruby with some
3 | [background on tail call optimization and its little known existence and usage
4 | in Ruby](http://blog.tdg5.com/tail-call-optimization-ruby-background/).
5 | In this post, we'll continue that exploration at a much lower level, moving out
6 | of the Ruby layer and descending to whatever depths are necessary to get to the
7 | bottom of how the Ruby VM implements tail call optimization internally.
8 |
9 | A lot of what follows wouldn't be possible without [Pat Shaughnessy's Ruby Under
10 | a Microscope](http://patshaughnessy.net/ruby-under-a-microscope) (and a healthy
11 | dose of [K & R](https://en.wikipedia.org/wiki/The_C_Programming_Language)). If
12 | you find you enjoy the conceptual level of this article and you're interested in
13 | more, I'd highly recommend [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope).
14 | I found it an enjoyable, empowering, fascinating, and approachable introduction
15 | to the internals of Ruby. If you're curious about the book, but you're still
16 | unsure about it, I'd encourage you to check out [Ruby Rogues #146, a book club
17 | episode featuring Ruby Under a Microscope](http://devchat.tv/ruby-rogues/146-rr-book-club-ruby-under-a-microscope-with-pat-shaughnessy)
18 | with guest appearances by the author, [Pat Shaughnessy](http://patshaughnessy.net/),
19 | and [Aaron Patterson](http://tenderlovemaking.com/), of Ruby and Rails fame, and
20 | who also wrote the foreword of the book. It's an enjoyable episode that
21 | definitely helped guide my decision to read the book.
22 |
23 | So, getting on to the subject of today's post. Hold on to your butts.
24 |
25 | ## Revisiting our tail recursive Guinea pig
26 | In [my last post](http://blog.tdg5.com/tail-call-optimization-ruby-background/),
27 | we discovered a tail recursive function in [the Ruby test suite](https://github.com/ruby/ruby/blob/fcf6fa8781fe236a9761ad5d75fa1b87f1afeea2/test/ruby/test_optimization.rb#L213),
28 | which we extracted (with a few tweaks) to demonstrate tail call optimization in
29 | Ruby. We'll need our Guinea pig again for today's exercise, so allow me to
30 | introduce her one more time:
31 |
32 | ```ruby
33 | code = <<-CODE
34 | class Factorial
35 | def self.fact_helper(n, res)
36 | n == 1 ? res : fact_helper(n - 1, n * res)
37 | end
38 |
39 | def self.fact(n)
40 | fact_helper(n, 1)
41 | end
42 | end
43 | CODE
44 | options = {
45 | tailcall_optimization: true,
46 | trace_instruction: false,
47 | }
48 | RubyVM::InstructionSequence.new(code, nil, nil, nil, options).eval
49 | ```
50 |
51 | I won't go into the details again, but suffice it to say that this code snippet
52 | will add a **Factorial** class with a tail call optimized **fact** method to our
53 | environment. Our journey begins with this class method.
54 |
55 | ## Initial descent
56 | With our tail recursive Guinea pig revived, we can begin our descent into the
57 | internals of Ruby's implementation of tail call optimization. A month ago I
58 | wouldn't have known where to begin such a quest, but this is where some of the
59 | background and methods employed in [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope)
60 | will be of great utility.
61 |
62 | One method that [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope)
63 | uses to great effect is using [**RubyVM::InstructionSequence#disasm**](http://www.ruby-doc.org/core-2.2.0/RubyVM/InstructionSequence.html#method-c-disasm)
64 | to disassemble Ruby code into the underlying YARV instructions that the Ruby VM
65 | will actually execute at runtime. Using this technique we should be able to
66 | disassemble both a tail call optimized version and an unoptimized version of our
67 | **Factorial#fact** method and compare the instruction sequences for differences.
68 |
69 | Before we continue, let's rewind for a second and discuss YARV. YARV, which
70 | stands for Yet Another Ruby Virtual Machine, is a stack-oriented VM internal to
71 | Ruby that is responsible for compiling your Ruby code into low-level bytecode
72 | instructions (called YARV instructions) and executing those instructions. YARV
73 | was introduced in Ruby 1.9 to improve performance over Ruby 1.8's direct
74 | traversal and interpretation of the Abstract Syntax Tree generated by parsing a
75 | Ruby program. For more insight into on how Ruby executes your code, you can
76 | check out an excerpt from [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope),
77 | [How Ruby Executes Your Code by Pat Shaughnessy](http://patshaughnessy.net/2012/6/29/how-ruby-executes-your-code).
78 |
79 | Back to our regularly scheduled broadcast.
80 |
81 | To facilitate comparing the YARV instructions of the tail call optimized and
82 | unoptimized versions of our factorial function, I've tweaked our Guinea pig
83 | script to disassemble both versions of the function and **puts** them to STDOUT.
84 | Here's the resulting script:
85 |
86 | ```ruby
87 | code = <<-CODE
88 | class Factorial
89 | def self.fact_helper(n, res)
90 | n == 1 ? res : fact_helper(n - 1, n * res)
91 | end
92 |
93 | def self.fact(n)
94 | fact_helper(n, 1)
95 | end
96 | end
97 | CODE
98 |
99 | {
100 | 'unoptimized' => { :tailcall_optimization => false, :trace_instruction => false },
101 | 'tail call optimized' => { :tailcall_optimization => true, :trace_instruction => false },
102 | }.each do |identifier, compile_options|
103 | instruction_sequence = RubyVM::InstructionSequence.new(code, nil, nil, nil, compile_options)
104 | puts "#{identifier}:\n#{instruction_sequence.disasm}"
105 | end
106 | ```
107 |
108 | There are two things here worth making note of. First, I've chosen to disable
109 | the trace instruction for both versions to avoid unnecessary differences between
110 | the two instruction sequences that don't actually pertain to how Ruby implements
111 | tail call optimization internally. Second, though it is not explicit in this
112 | script, I am running MRI Ruby 2.2.0 locally, so all of the YARV instructions and
113 | C code that we'll look at are specific to MRI Ruby 2.2.0 and may be different
114 | from other versions.
115 |
116 | You can view [the YARV instructions of the unoptimized Factorial class here](https://github.com/tdg5/blog_snippets/blob/60b19663b0c9a34117b47665045ba66679584e14/lib/blog_snippets/tail_call_optimization_in_ruby_internals/fact_disasm.txt)
117 | and [the YARV instructions of the tail call optimized Factorial class here](https://github.com/tdg5/blog_snippets/blob/60b19663b0c9a34117b47665045ba66679584e14/lib/blog_snippets/tail_call_optimization_in_ruby_internals/fact_tco_disasm.txt).
118 |
119 | A vimdiff of the two instruction sequences with changed lines highlighted in
120 | purple and the actual changes highlighted in red looks like so:
121 |
122 | [](https://tdg5.s3.amazonaws.com/blog/wp-content/uploads/2014/02/tco_diff.jpg)
123 |
124 | Oh no! Disaster! It seems that our initial descent is some what of a failure.
125 | Other than the addition of a **TAILCALL** flag to a few of the
126 | **opt_send_without_block** instructions, the YARV instructions for both the
127 | unoptimized version and the tail call optimized version are **exactly the
128 | same**. What gives?
129 |
130 | From here it seems like our only logical course of action is to descend even
131 | further and look at the C code that makes up those YARV instructions with the
132 | hope that the **TAILCALL** flag is really all that's needed to transform an
133 | unoptimized call into a tail call optimized call.
134 |
135 | ## Descending into the C
136 | We begin our journey into Ruby's C internals where our YARV instructions left
137 | us, with the **opt_send_without_block** instruction. Hopefully, we can find
138 | something in the implementation of that instruction that will help us find
139 | our way to where Ruby implements tail call optimization internally.
140 |
141 | As discussed in [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope),
142 | the definitions that are used during the Ruby build process to generate the C
143 | code for all the YARV instructions live in the Ruby source in [insns.def](https://github.com/ruby/ruby/blob/6c0a375c58e99d1f5f1c9b9754d1bb87f1646f61/insns.def).
144 | With a little grepping, we can find the definition of **opt_send_without_block**
145 | around [line 1047 of insns.def](https://github.com/ruby/ruby/blob/6c0a375c58e99d1f5f1c9b9754d1bb87f1646f61/insns.def#L1047):
146 |
147 | ```c
148 | DEFINE_INSN
149 | opt_send_without_block
150 | (CALL_INFO ci)
151 | (...)
152 | (VALUE val) // inc += -ci->orig_argc;
153 | {
154 | ci->argc = ci->orig_argc;
155 | vm_search_method(ci, ci->recv = TOPN(ci->argc));
156 | CALL_METHOD(ci);
157 | }
158 | ```
159 |
160 | As you've almost certainly noticed, this isn't quite C. Rather, during the Ruby
161 | build process this definition is used to generate the actual C code for the
162 | **opt_send_without_block** instruction. [You can view the fully generated C code
163 | for **opt_send_without_block** in all its monstrous glory here](https://github.com/tdg5/blog_snippets/blob/2a9e48ccc10082d37c821e3b838f223597a0d7b6/lib/blog_snippets/tail_call_optimization_in_ruby_internals/opt_send_without_block.vm.inc).
164 |
165 | Luckily, for our purposes, we don't have to go quite to that extreme and can
166 | operate at the instruction definition level. One mutation I will make before we
167 | continue is to expand the **CALL_METHOD** macro and remove some noise added to
168 | facilitate the macro. That brings us to the following:
169 |
170 | ```c
171 | ci->argc = ci->orig_argc;
172 | vm_search_method(ci, ci->recv = TOPN(ci->argc));
173 | VALUE v = (*(ci)->call)(th, GET_CFP(), (ci));
174 | if (v == Qundef) {
175 | RESTORE_REGS();
176 | NEXT_INSN();
177 | }
178 | else {
179 | val = v;
180 | }
181 | ```
182 |
183 | OK, so what in the name of Neptune is going on here? Well, the first thing to
184 | notice is there's no sign of tail call optimization here, so the question for
185 | now is, where to next?
186 |
187 | In this case, the **ci** variable is of most interest to our particular quest.
188 | The **ci** variable references a **rb_call_info_t** struct which encapsulates a
189 | variety of data about a method call including, among other things, the receiver
190 | of the call, how many arguments the call takes, and a reference to the C
191 | function that should actually be executed by the call. It's this final reference,
192 | **ci->call**, that we're most interested in, as we hope to find some trace of
193 | tail call optimization therein.
194 |
195 | From the code above we can ascertain that when the Ruby VM executes a method
196 | call, it invokes the function pointed to by the **rb_call_info_t* struct's
197 | **call** field with the current thread (**th**), the current frame pointer
198 | (result of **GET_CFP**), and the **rb_call_info_t** struct itself (**ci**) for
199 | arguments.
200 |
201 | This is definitely a step in the right direction, but since we have no insight
202 | into the origins of the function pointed to by the **rb_call_info_t** struct's
203 | **call** pointer, we'll need to step backward before we can step forward.
204 | Luckily for us, we literally only need to take one step backward to the previous
205 | line where **vm_search_method** is invoked.
206 |
207 | At this point, rather than drill into every call
208 | that is made on the way to our goal, let's speed things up a bit. We'll still
209 | walk through each step, but we'll be more brief and skip the code snippets
210 | until we get a whiff of tail call optimization. That said, I've collected [the
211 | source for each step of the way from **CALL_METHOD** to the internals of Ruby's
212 | tail call optimization into one file](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c)
213 | for your viewing pleasure.
214 |
215 | Take a deep breath...
216 |
217 | - The call to [**vm_search_method**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L2)
218 | is where the value of [**ci->call** is set, and it is set to reference another
219 | function, **vm_call_general**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L15).
220 |
221 | - [**vm_call_general**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L24)
222 | when called [invokes and returns the result of another method, **vm_call_method**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L26).
223 |
224 | - [**vm_call_method**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L31)
225 | at about 155 lines, is a monster of a function, that handles every type of
226 | method invocation that the Ruby VM supports. It'd be pretty easy to get lost in
227 | this method, but we are fortunate in that we know we are dealing with an
228 | instruction sequence method type because we got to this point from a YARV
229 | instruction. This allows us to jump right to the portion of the
230 | switch statement that deals with instruction sequence type methods. In which
231 | case, [**vm_call_method** returns the result of yet another function invocation **vm_call_iseq_setup**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L45).
232 |
233 | (If you're beginning to wonder if this rabbit hole of a descent has a bottom,
234 | don't worry, we're almost there.)
235 |
236 | - [**vm_call_iseq_setup**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L210)
237 | is a two-liner that sets up the callee of the method and then [returns the
238 | result of another function invocation, **vm_call_iseq_setup_2**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L213).
239 |
240 | - [**vm_call_iseq_setup_2**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L218)
241 | is where we finally get our first whiff of tail call optimization. In fact, the
242 | only purpose of **vm_call_iseq_setup_2** is to check if tail call optimization
243 | is enabled and if so [it calls yet another function, **vm_call_iseq_setup_tailcall**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L224).
244 |
245 | (**So close!** But, while we're here, it's worth noting that normally [when tail
246 | call optimization is not enabled, **vm_call_iseq_setup_2** will call
247 | **vm_call_iseq_setup_normal**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L221)
248 | instead of **vm_call_iseq_setup_tailcall**. We'll come back to this alternative
249 | path in a moment.)
250 |
251 | - One look at [**vm_call_iseq_setup_tailcall**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L252)
252 | and it's obvious that we've found what we've been searching for, the heart of
253 | Ruby's support for tail call optimization.
254 |
255 | Success! Well, sort of, we still need to grok what's going on here, and come to
256 | think of it, where the hell are we? Let's take a look at what's going on inside
257 | **vm_call_iseq_setup_tailcall** and see if we can find our bearings and see how
258 | this call translates into the goodness of tail call optimization.
259 |
260 | ## Just when you were starting to think it was turtles all the way down
261 | Though we could consider **vm_call_iseq_setup_tailcall** on its own, we would
262 | probably do better to use the same strategy that we employed earlier and compare
263 | the unoptimized version to the tail call optimized version, and see what is
264 | different between the two. It didn't work for us last time, but maybe we'll have
265 | better luck this time around.
266 |
267 | We've established that the tail optimized version can be found in
268 | **vm_call_iseq_setup_tailcall**, and if it wasn't obvious from its name or from
269 | my making a point of mentioning it during our descent, the unoptimized version
270 | can be found in [**vm_call_iseq_setup_normal**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L221).
271 | Looking at both methods at a high level, it looks like we're still in the
272 | process of making the method call, as both of these functions seem to be
273 | preparing Ruby's internal stack prior to pushing a new frame onto the call
274 | stack.
275 |
276 | Here's a side-by-side vimdiff highlighting the differences between the two
277 | functions, though I should warn you that I made a couple of minor adjustments to
278 | **vm_call_iseq_setup_normal** to suppress irrelevant differences:
279 |
280 | [](http://tdg5.s3.amazonaws.com/blog/wp-content/uploads/2014/02/vm_call_iseq_setup_diff.jpg)
281 |
282 | Compared to the extremely minimal differences in the our initial diff, I'm much
283 | more optimistic that we'll find what we're looking for in this larger change
284 | set. Let's start with **vm_call_iseq_setup_normal** since it is the shorter and
285 | more typical of the two functions.
286 |
287 | ## vm_call_iseq_setup_normal
288 | ```c
289 | VALUE *argv = cfp->sp - ci->argc;
290 | ```
291 |
292 | **vm_call_iseq_setup_normal** begins by creating a pointer to the position on
293 | the stack where the argument vector (**argv**) for the next iteration of the
294 | recursive call begins. This is achieved by taking the current stack frame's
295 | stack pointer (**cfp->sp**) and moving backward down the stack the appropriate
296 | number of elements, as determined by our old friend the call info struct
297 | (**rb_call_info_t**) and its argument count field (**ci->argc**).
298 |
299 | ```c
300 | rb_iseq_t *iseq = ci->me->def->body.iseq;
301 | ```
302 |
303 | **vm_call_iseq_setup_normal** then continues by creating a pointer to the
304 | **rb_iseq_t** struct identifying and encapsulating data about the instruction
305 | sequence that will be invoked by this call.
306 |
307 | ```c
308 | VALUE *sp = argv + iseq->param.size;
309 | ```
310 |
311 | **vm_call_iseq_setup_normal** next creates a new pointer (**sp**) and points it
312 | to where it calculates the end of the argument vector (**argv**) to be using the
313 | value returned by **iseq->param.size**, a field related to the instruction
314 | sequence indicating how many parameters the instruction sequence takes.
315 |
316 | It may seem strange that the VM determines the beginning of **argv** by descending
317 | **ci->argc** elements from the top of the stack and then later finds the end of
318 | **argv** by ascending **iseq->param.size** elements up the stack, however the use
319 | of **iseq->param.size** allows the VM to allocate extra space on the stack in
320 | situations that use special types of arguments. In this case however, our Guinea
321 | pig function uses only simple arguments so **ci->argc** and **iseq->param.size**
322 | are equal. This brings us right back to where we started at the top of the stack.
323 |
324 | ```c
325 | for (i = 0; i < iseq->local_size - iseq->param.size; i++) {
326 | *sp++ = Qnil;
327 | }
328 | ```
329 |
330 | This next segment is responsible for allocating and clearing out space on the
331 | stack for local variables and special variables that will be required to execute
332 | the method call. In this case, our Guinea pig function doesn't use any local
333 | variables so no space is needed for those, but the VM does need to allocate a
334 | spot on the stack for special variables. That said, though the VM allocates a
335 | spot on the stack for special variables, our function doesn't actually use any
336 | of Ruby's special variables[^1], so that spot on the stack will remain nil.
337 |
338 | ```c
339 | vm_push_frame(th, iseq, VM_FRAME_MAGIC_METHOD,
340 | ci->recv, ci->defined_class, VM_ENVVAL_BLOCK_PTR(ci->blockptr),
341 | iseq->iseq_encoded + ci->aux.opt_pc, sp, 0, ci->me, iseq->stack_max);
342 | ```
343 |
344 | For our particular intentions we don't need to get into the nitty-gritty details
345 | of this function invocation, but suffice it to say this call is responsible for
346 | pushing a new frame on to the stack for executing the method call. This new
347 | frame is the next iteration of our recursive function.
348 |
349 | ```c
350 | cfp->sp = argv - 1 /* recv */;
351 | ```
352 |
353 | This last bit of logic sets the current frame's stack pointer (**cfp->sp**) to
354 | point to the position on the stack just before the beginning of the argument
355 | vector (**argv - 1**). When this line is executed, that position on the stack is
356 | occupied by the receiver of the next iteration of our function call. This may
357 | seem a little strange, but this assignment is preparing the current stack frame
358 | for when it resumes execution after the completion of the frame we've just
359 | pushed on to the stack. When the current frame resumes, it can assume the
360 | arguments further up the stack have already been consumed and should continue
361 | from further down the stack. Though it's not obvious, we'll see in a minute that
362 | this behavior is important for supporting tail call optimization.
363 |
364 | Whew, one down. Now let's take a look at how Ruby handles things differently in
365 | the tail call optimized case.
366 |
367 | ## vm_call_iseq_setup_tailcall
368 |
369 | ```c
370 | VALUE *argv = cfp->sp - ci->argc;
371 | rb_iseq_t *iseq = ci->me->def->body.iseq;
372 | ```
373 |
374 | **vm_call_iseq_setup_tailcall** starts exactly the same as its counterpart: It
375 | creates a pointer to the beginning of the argument vector (**argv**) of the next
376 | iteration of our recursive function and extracts a reference to the instruction
377 | sequence struct from the call info struct.
378 |
379 | ```c
380 | VALUE *src_argv = argv;
381 | VALUE *sp_orig, *sp;
382 | VALUE finish_flag = VM_FRAME_TYPE_FINISH_P(cfp) ? VM_FRAME_FLAG_FINISH : 0;
383 | ```
384 |
385 | Though the functions start the same, **vm_call_iseq_setup_tailcall** soon
386 | distinguishes itself with the allocation of a number of additional variables.
387 | First, a new pointer (**src_argv**) is created pointing to the beginning of the
388 | argument vector (**argv**). Next, two pointers (**sp_orig** and **sp**) are
389 | allocated, but not assigned. Finally, a fourth variable (**finish_flag**) is
390 | allocated and conditionally assigned.
391 |
392 | The final variable, **finish_flag**, is used to allow tail call optimization of
393 | special types of stack frames called **finish frames**. Since we're working with
394 | normal method frames, the **finish_flag** variable can be safely ignored.
395 |
396 | ```c
397 | cfp = th->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(th->cfp);
398 | ```
399 |
400 | This is where the cleverness of tail call optimization begins to surface.
401 | Whereas the normal recursive strategy continues to accumulate frame after frame,
402 | this line begins to demonstrate how an optimized tail recursive call can avoid
403 | doing so.
404 |
405 | The secret sauce behind the success of **vm_call_iseq_setup_tailcall**, and tail
406 | call optimization in general, is that each iteration actually removes itself
407 | from the stack, as part of invoking the next iteration. Since the nature of
408 | recursion can make discussion difficult, it's worth taking a moment here for
409 | clarity.
410 |
411 | The beginning of **vm_call_iseq_setup_tailcall**, places us at the point in the
412 | sequence of events where the current frame, iteration n of
413 | **Factorial.fact_helper**, is preparing the stack for the recursive invocation
414 | of iteration n+1 of **Factorial.fact_helper**. Iteration n, after storing a
415 | reference to the argument vector intended for iteration n+1, pops the current
416 | stack frame (itself) off of the call stack, effectively removing itself from the
417 | stack and giving the appearance that **Factorial.fact** is the call in the stack
418 | before iteration n+1 of **Factorial.fact_helper**.
419 |
420 | In terms of another metaphor, if you think of the factorial calculation as
421 | exercise and the call stack as distance traveled, tail call optimization is kind
422 | of like a hamster (or Guinea pig) running on a hamster wheel. Though both the
423 | hamster and the recursive call are running in place, they both still make
424 | progress on the work they are performing. This analogy may also elucidate why
425 | tail recursion can be thought of as a special kind of loop construct.
426 |
427 | Returning our focus to **vm_call_iseq_setup_tailcall**, after popping the
428 | current frame from the call stack, **vm_call_iseq_setup_tailcall** then updates
429 | the thread's current frame pointer (**th->cfp**) and the **cfp** variable to
430 | point at the stack frame prior to the invocation of our tail recursive function,
431 | **Factorial.fact**.
432 |
433 | Though this mechanism allows tail call optimization to avoid the stack overflows
434 | inherent to its counterpart, we will see in a moment that it also has other
435 | benefits.
436 |
437 | ```c
438 | RUBY_VM_CHECK_INTS(th);
439 | ```
440 |
441 | This line handles a little extra bookkeeping that tail call optimization in Ruby
442 | incurs. Usually, when Ruby switches from one stack frame to another, it takes a
443 | moment to check for pending interrupts. However, since the stack frame was
444 | manually popped off of the call stack, the check for interrupts must also be
445 | handled manually.
446 |
447 | ```c
448 | sp_orig = sp = cfp->sp;
449 | ```
450 |
451 | Though it is pretty clear that this line assigns the **sp_orig** and **sp**
452 | variables to the value stored in the current frame's stack pointer (**cfp->sp**)
453 | field, keep in mind that **cfp** now refers to the call to **Factorial.fact**.
454 |
455 | As you'll recall from the normal setup function, before the first invocation of
456 | **Factorial.fact_helper**, the previous frame (**Factorial.fact**) would have
457 | rewound it's stack pointer to the position on the stack that it should resume
458 | execution from, which would have been the point on the stack right before the
459 | arguments consumed by the first iteration of **Factorial.fact_helper**. This
460 | behavior benefits tail call optimization in a few ways.
461 |
462 | First, because the function call that just ended is exactly the same as the one
463 | that's being set up, it can be assumed that there's enough room on the stack for
464 | the call being prepared. This means that the stack pointer from the call prior
465 | to our tail optimized call (**cfp->sp**) can be used as the starting position
466 | for the new stack (**sp**) thats being assembled.
467 |
468 | Second, because the character of the stack is likely consistent for each
469 | recursive call, less overhead is required when setting up the stack. For
470 | example, earlier I mentioned that the Ruby VM allocates a spot on the stack for
471 | special variables that might be used by the function, but that since the
472 | function doesn't use any special variables, that field remains nil. Because of
473 | the alignment of values on stack from iteration to iteration, that nil field is
474 | actually only assigned on the first iteration and on every other iteration the
475 | assignment can be skipped because the value is already nil.
476 |
477 | The final benefit that comes from being able to reuse the stack pointer from the
478 | stack frame prior to our tail optimized call (**cfp->sp**) is that that same
479 | pointer also doubles as a pointer to the place on the stack that our current
480 | frame's stack pointer (**cfp->sp**) will need to be rewound later. To
481 | facilitate this usage a reference is set aside in **sp_orig** for later use.
482 |
483 | ```c
484 | sp[0] = ci->recv;
485 | sp++;
486 | ```
487 |
488 | With this line, **vm_call_iseq_setup_tailcall** begins to rebuild the stack for
489 | the next iteration of the recursive call. To achieve this, it first pushes the
490 | receiver of the call (**ci-> recv**) into the position at the head of the stack
491 | (**sp[0]**), and increments the stack pointer to the next position.
492 |
493 | ```c
494 | for (i=0; i < iseq->param.size; i++) {
495 | *sp++ = src_argv[i];
496 | }
497 | ```
498 |
499 | Next, the function continues by pushing each of the arguments for the next
500 | iteration onto the stack. This is where it becomes clear why a reference to the
501 | next iteration's argument vector is needed, as the **cfp** pointer was replaced,
502 | and without this reference (**src_argv**) there'd be no consistent means by
503 | which to access those arguments.
504 |
505 | This loop is also responsible for the behavior I alluded to above where each
506 | argument is written to a consistent position on the stack with each iteration.
507 |
508 |
509 | ```c
510 | for (i = 0; i < iseq->local_size - iseq->param.size; i++) {
511 | *sp++ = Qnil;
512 | }
513 | ```
514 |
515 | Consistent with the normal setup function, the tail call optimized setup function
516 | also reserves and resets additional space on the stack for the method call as
517 | required.
518 |
519 | ```c
520 | vm_push_frame(th, iseq, VM_FRAME_MAGIC_METHOD | finish_flag,
521 | ci->recv, ci->defined_class, VM_ENVVAL_BLOCK_PTR(ci->blockptr),
522 | iseq->iseq_encoded + ci->aux.opt_pc, sp, 0, ci->me, iseq->stack_max);
523 | ```
524 |
525 | The process of pushing a new frame on to the stack is almost exactly the same as
526 | in the normal setup function, except for one slight difference: The bitwise
527 | logic related to the **finish_flag** variable is added to allow tail call
528 | optimization to be performed on **finish frames** as we briefly discussed
529 | earlier.
530 |
531 | ```c
532 | cfp->sp = sp_orig;
533 | ```
534 |
535 | Last but not least, after pushing the new frame on to the stack, the setup
536 | function sets the current frame pointer's stack pointer (**cfp->sp**) to the
537 | point on the stack that it should resume from. In this case, that position
538 | matches the original position of the frame's stack pointer which was tucked away
539 | in **sp_orig** for later use.
540 |
541 | At this point we're back in sync with **vm_call_iseq_setup_normal**, but whereas
542 | **vm_call_iseq_setup_normal** would have picked up another stack frame, after
543 | some minor stack shuffling, **vm_call_iseq_setup_tailcall** leaves us right back
544 | where we started, but one step closer to the solution to our factorial
545 | calculation.
546 |
547 | ## The bends
548 | Wow. I don't know about you, but I didn't expect the bottom to be quite so far
549 | down there. Though I'm eager to come back up for air, as are you I'm sure, it's
550 | worth deferring our ascent a moment to reflect on what we found in the depths.
551 |
552 | Ruby's implementation of tail call optimization emerges from the Ruby VM's
553 | stack-oriented nature and ability to discard the current stack frame as it
554 | prepares the next frame for execution. Given this design it becomes more clear
555 | why tail call optimization is handled by Ruby on the C side instead of on the
556 | YARV side since method call setup is below the conceptual level at which YARV
557 | tends to work.
558 |
559 | In the end, there's a satirical humor in that we had to go to such depths to
560 | understand the facilities that allow the Ruby VM to handle tail recursive
561 | functions like treading water at the top of the stack.
562 |
563 | It's been a long journey, but I hope you learned something along the way, I know
564 | I certainly did. Thanks for reading!
565 |
566 | (I swear my next post will be shorter!)
567 |
568 | [^1]: Ruby's special **$** variables are out of the scope of this article, but you can see where the [parser defines the various special variables here](https://github.com/ruby/ruby/blob/17a65c320d9ce3bce3d7fe0177d74bf78314b8fa/parse.y#L7606).
569 |
--------------------------------------------------------------------------------
/lib/blog_snippets/articles/eager_boolean_operators/README.md:
--------------------------------------------------------------------------------
1 | 
2 | In relaying the story of eager Boolean operators, it is best to begin with their
3 | more ubiquitous siblings, short-circuiting logical Boolean operators. This is
4 | perhaps best achieved with an example:
5 |
6 | ```ruby
7 | true || Seriously(this(is(valid(Ruby!))))
8 | # => true
9 |
10 | false && 0/0
11 | # => false
12 | ```
13 |
14 | In Ruby, and many other common programming languages,[^1] the Boolean operators
15 | used for chaining together logical expressions are designed to minimize the
16 | amount of work required to determine the outcome of a logical expression. More
17 | specifically, when determining the outcome of a logical expression as few of
18 | the statements in the expression will be evaluated as possible. In the previous
19 | example, this notion, known as [short-circuit evaluation](https://en.wikipedia.org/wiki/Short-circuit_evaluation),
20 | is exploited to include some very bad code in a manner that renders that bad
21 | code completely innocuous.
22 |
23 | In the first example, the short-circuiting behavior of the **||** Boolean
24 | operator, representing a [logical **OR** or logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction)
25 | operation, prevents a series of undefined methods from causing a fatal
26 | **NoMethodError** exception. This code can safely be executed because when the
27 | first argument of an **OR** operation is **true** then the overall value of the
28 | expression must also be **true**. Put more simply, **true OR _anything_** will
29 | always result in **true**. Given this logical maxim, at runtime the program does
30 | not need to execute the right-hand side of the expression and can move on
31 | without executing the explosive code.
32 |
33 | Similarly, in the second example, the short-circuiting behavior of the **&&**
34 | Boolean operator, representing a [logical **AND** or logical conjunction](https://en.wikipedia.org/wiki/Logical_conjunction)
35 | operation, prevents a fatal **ZeroDivisionError** exception. This code can
36 | safely be executed because when the first argument of an **AND** operation is
37 | **false** then the overall value of the expression must also be **false**. In
38 | simpler terms, **false AND _anything_** will always result in **false**. Given
39 | this basic tenant of Boolean logic, at runtime the program can decide the
40 | outcome of the logical expression without executing the subversive right-hand
41 | side of the expression.
42 |
43 | It's interesting to note that, because of their short-circuiting behavior, the
44 | **||** and **&&** Boolean operators are more than just logical operators,
45 | they actually also function as control structures. To demonstrate this,
46 | though the previous example used Boolean operators, it could just have easily
47 | have been written with more traditional flow control structures like **if** or
48 | **unless**:
49 |
50 | ```ruby
51 | # The true result is lost, but we weren't storing it anyway, so no problemo.
52 | Seriously(this(is(valid(Ruby!)))) unless true
53 | # => nil
54 |
55 | # Again, the result of false is lost, but for this example that's okay.
56 | 0/0 if false
57 | # => nil
58 | ```
59 |
60 | Eager Boolean operators come into play when someone inevitably asks the
61 | question, "what if we don't want to short-circuit?"
62 |
63 | ## Eager Boolean Operators
64 |
65 | As their name suggests, eager Boolean operators are logical operators that do
66 | not short-circuit. Instead, even when the outcome of a logical expression is
67 | determined, they continue to execute the logical expression until it has been
68 | fully evaluated. If we changed our example of short-circuiting Boolean operators
69 | to use eager Boolean operators instead, we'd no longer be safe from that
70 | sinister code. Here it is again as such with a couple of other tweaks:
71 |
72 | ```ruby
73 | begin
74 | true | Seriously(this(is(valid(Ruby!))))
75 | rescue NoMethodError => e
76 | e.class
77 | end
78 | # => NoMethodError
79 |
80 | begin
81 | false & 0/0
82 | rescue ZeroDivisionError => e
83 | e.class
84 | end
85 | # => ZeroDivisionError
86 | ```
87 |
88 | In the first example above, I've modified the earlier example to replace the
89 | **||** Boolean operator with an alternative Boolean operator included in Ruby
90 | that offers eager evaluation of logical **OR** expressions, **|**. Though more
91 | commonly used for bitwise operations, when used with **true**, **false**, or
92 | **nil**, the **|** operator functions similarly to its counterpart, **||**,
93 | except without the short-circuiting behavior. Evidence of this eager evaluation
94 | behavior can be seen above in that the outcome of the **begin** block is not
95 | true, as would be the case if **|** were a short circuiting operator, but it is
96 | instead the exception class we would expect to be raised if the right-hand side
97 | of the logical expression had been evaluated.
98 |
99 | Similarly, in the second example above, I've modified the earlier example and
100 | replaced the **&&** Boolean operator with Ruby's eager Boolean **AND**
101 | operator, **&**. Also more commonly used in bitwise expressions, when used with
102 | **true**, **false**, or **nil**, the **&** operator behaves similarly to its
103 | short-circuiting cousin, **&&**, except that it eagerly evaluates the right-hand
104 | side of the logical expression even if the overall outcome of the expression has
105 | already been determined. Once again, this behavior can be seen in that the
106 | result of the **begin** block is the **ZeroDivisionError** class, which would
107 | only be the case if the right-hand side of the logical expression had been
108 | evaluated.
109 |
110 | Though this example helps demonstrate the eager evaluation properties of the
111 | **|** and **&** Boolean operators, given its explosive nature, it doesn't offer
112 | much insight into how eager Boolean operators might be useful. Having addressed
113 | the question of "what if we don't want to short-circuit?", let us consider
114 | another question that may actually be a better answer to the question than the
115 | one I've just outlined: "why wouldn't you want to short-circuit?"
116 |
117 | ## Bitwise digression
118 |
119 | Before we look at a handful of examples of eager Boolean operators, I'd like to
120 | digress for a moment for a brief discussion of bitwise Boolean operators.
121 | Bitwise Boolean operators are operators like **&** and **|** that perform
122 | operations on Boolean values as though those Boolean values were bits or binary
123 | 0s and 1s, where **false** and **nil** are both 0 and **true** is 1. For
124 | example, consider the following truth table for the **&** bitwise operation that
125 | demonstrates the equivalence of the two operations.
126 |
127 | | Truth of & | nil ( 0 ) | false ( 0 ) | true ( 1 ) |
128 | |------------|-------------|-------------|-------------|
129 | |nil ( 0 ) | false ( 0 ) | false ( 0 ) | false ( 0 ) |
130 | |false ( 0 ) | false ( 0 ) | false ( 0 ) | false ( 0 ) |
131 | |true ( 1 ) | false ( 0 ) | false ( 0 ) | true ( 1 ) |
132 |
133 | One behavior of bitwise Boolean operators worth noting is that they always
134 | return a Boolean value. Even if the second argument to a bitwise Boolean
135 | operator is truthy or falsy, or even if the first argument to the bitwise
136 | Boolean operator is falsy, as is the case with **nil**, the result of the
137 | expression will still be a Boolean value. This is in contrast to their logical
138 | Boolean counterparts who are more than content to return a truthy or falsy value
139 | in place of a strict Boolean value.
140 |
141 | This behavior can be useful at times, but can certainly come as a surprise to
142 | those who are more familiar with the more ubiquitous logical Boolean operators
143 | and their penchant for returning truthy and falsy values. The behavior of
144 | bitwise Boolean operators can also surprise the unaware in that unlike the
145 | logical Boolean operators which can be invoked with any two values, the bitwise
146 | Boolean operators must be invoked with either **true**, **false**, or **nil** on
147 | the left-hand side of the expression, otherwise, an error or other unexpected
148 | behavior will occur.
149 |
150 | In terms of eager Boolean operators, the bitwise Boolean operators are important
151 | because the eager Boolean operators are a sort of subset of the bitwise Boolean
152 | operators. The **&** and **|** operators are both bitwise Boolean operators, but
153 | in the cases of **true | _anything_** and **false & _anything_** they are also
154 | eager Boolean operators. If this is unclear, the following examples may help.
155 |
156 | ## Eager Boolean Operators in Practice
157 |
158 | Let's look at a couple of examples of eager Boolean operators in practice. After
159 | we've considered a couple of examples, perhaps we'll be better prepared to take
160 | a step back and get more clarity on what aspects or behaviors of eager
161 | evaluation are exploited by these examples in the name of utility. I've done
162 | what I can to try to find examples of eager Boolean operators out in the wild,
163 | but I've not had enormous success. To that end, I've tried to evaluate and order
164 | the examples below in terms of utility. Some examples are mine, some come from
165 | more popular libraries.
166 |
167 | ### Enumerable#eager_all?
168 |
169 | The first example is far and away the best use-case I've found for both bitwise
170 | and eager Boolean operators that I've come across. The example below uses the
171 | bitwise **AND** operator, **&**, to create a version of
172 | [**Enumerable#all?**](http://ruby-doc.org/core-2.2.0/Enumerable.html#method-i-all-3F)
173 | that is guaranteed to evaluate all elements in a collection. This is different
174 | from the normal behavior of **Enumerable#all?** in that **Enumerable#all?**
175 | normally discontinues evaluation of the collection as soon as any element in the
176 | collection returns **false** for the provided block.
177 |
178 | ```ruby
179 | module Enumerable
180 | def eager_all?
181 | inject(true) do |result, item|
182 | result & (block_given? ? yield(item) : item)
183 | end
184 | end
185 | end
186 | ```
187 |
188 | This example leverages the **&** operator to ensure that the right-hand side of
189 | the logical expression is always evaluated. This behavior is combined with
190 | [**Enumerable#inject**](http://ruby-doc.org/core-2.2.0/Enumerable.html#method-i-inject)
191 | to ensure that all elements of the collection are evaluated, ultimately
192 | accumulating to the correct result.
193 |
194 | The astute among you may have noticed that this example could alternatively have
195 | used the short-circuiting **&&** Boolean operator by flipping the operands like
196 | so:
197 |
198 | ```ruby
199 | module Enumerable
200 | def alternative_eager_all?
201 | inject(true) do |result, item|
202 | (block_given? ? yield(item) : item) && result
203 | end
204 | end
205 | end
206 | ```
207 |
208 | Though this is true, at runtime this alternative approach draws attention to the
209 | bitwise nature of the **&** operator as compared to its short-circuiting cousin,
210 | **&&**, a difference in nature which I think in this case gives the eager
211 | Boolean operator the edge. The bitwise nature I refer to is, as I mentioned
212 | before and as is demonstrated below, eager Boolean operators will always return
213 | **true** or **false** while the short-circuiting Boolean operators could return
214 | any object depending on the operator and the arguments given to it. We don't
215 | have to worry about *any* object in the alternative example since the result of
216 | the yield combined with **true** or **false** using **&&**, but we do have
217 | to worry about one other object, **nil**. Because of the short-circuiting nature
218 | of **&&**, if the result of the **yield** is **nil**, the result of the call to
219 | **alternative_eager_all?** will also result in **nil** as demonstrated below:
220 |
221 | ```ruby
222 | [false, nil].eager_all?
223 | # => false
224 |
225 | [false, nil].alternative_eager_all?
226 | # => nil
227 | ```
228 |
229 | Given that **nil** is also falsy, this isn't really a problem, but I think it
230 | does make **alternative_eager_all?** less robust than it could be.
231 |
232 | Another way the **nil** case could be handled without resorting to using an
233 | eager Boolean operator is by double negating the result of the **inject** call
234 | to ensure that a Boolean is returned. That would look like this:
235 |
236 | ```ruby
237 | module Enumerable
238 | def alternative_eager_all?
239 | !!inject(true) do |result, item|
240 | (block_given? ? yield(item) : item) && result
241 | end
242 | end
243 | end
244 | ```
245 |
246 | Though the practice of double negation is pretty common, as it turns out, the
247 | coercive nature of the bitwise Boolean operators is actually slightly faster
248 | than the more idiomatic double negation. Consider this benchmark generated using
249 | the [benchmark-ips gem](https://github.com/evanphx/benchmark-ips):
250 |
251 | ```ruby
252 | require "benchmark/ips"
253 |
254 | Benchmark.ips do |bm|
255 | bm.config(:time => 20, :warmup => 5)
256 |
257 | bm.report("Double negate") { !!(true && :a) }
258 | bm.report("Logical bit-wise coerce") { true & :a }
259 | end
260 |
261 | # Calculating --------------------------------------------
262 | # Double negate 138.008k i/100ms
263 | # Logical bit-wise coerce 139.350k i/100ms
264 | # --------------------------------------------------------
265 | # Double negate 7.262M (± 1.0%) i/s - 36.434M
266 | # Logical bit-wise coerce 7.825M (± 1.3%) i/s - 39.157M
267 | # --------------------------------------------------------
268 | ```
269 |
270 | The difference in performance between the two approaches is pretty negligible
271 | and certainly isn't substantial enough to merit choosing bitwise Boolean
272 | coercion over double negation. Keep in mind also that the bitwise coercion (if
273 | you want to call it that) to **true** or **false** is not without its downside.
274 | As I mentioned before, the coercive behavior of eager Boolean operators may
275 | come as a surprise for developers who are more familiar with the behavior of the
276 | more common short-circuiting logical Boolean operators.
277 |
278 | ### Bringing *before_suite* type behavior to Minitest
279 |
280 | The next example is a bit of questionable code of mine from a few years ago. In
281 | this example, I use the **&** eager Boolean operator in an attempt to emulate
282 | behavior similar to **RSpec's #before_suite** hook in a **Minitest** test case
283 | seeing as **Minitest** does not offer a similar behavior.
284 |
285 | ```ruby
286 | class SomeTest < Minitest::TestCase
287 | setup { self.class.one_time_setup }
288 |
289 | def self.one_time_setup
290 | return if @setup_complete & @setup_complete ||= true
291 | # Some expensive or non-idempotent setup
292 | end
293 |
294 | def test_something
295 | # ...
296 | end
297 | end
298 | ```
299 |
300 | At the time, I thought this was clever, probably because of its condensed
301 | nature, but a few years later and I can see that this code is excessively tricky
302 | and has obvious, though minor, inefficiencies. This example exploits two tricks
303 | to create a sort of switch that doesn't fire the first time it's evaluated, but
304 | will fire on all subsequent evaluations.
305 |
306 | The first trick in this example takes advantage of the fact that accessing a
307 | nonexistent instance variable will never result in an error. The second trick
308 | takes advantage of the **&** operator to ensure that even when the
309 | **@setup_complete** instance variable is **nil**, a second statement is
310 | evaluated that will set **@setup_complete** to true, while still returning
311 | **nil** to the **if** statement. These two tricks allow for the described
312 | behavior as more concisely demonstrated below:
313 |
314 | ```ruby
315 | def first_time_only
316 | return if @not_first_time & @not_first_time ||= true
317 | "Hello world!"
318 | end
319 |
320 | first_time_only
321 | # => "Hello world!"
322 |
323 | first_time_only
324 | # => nil
325 | ```
326 |
327 | The inefficiency of this approach that I referenced earlier is that the
328 | **@not_first_time** variable is going to be evaluated twice every time the
329 | **first_time_only** method is invoked, once on both the left and right hand
330 | sides of the **&** operator. Since this evaluation is cheap, it's not the end
331 | of the world, but it starts to beg a question that has been nagging me as I've
332 | become more familiar with bitwise and eager Boolean operators: When is chaining
333 | logical expressions using eager Boolean operators a better choice than just
334 | splitting the expression into two statements?
335 |
336 | In terms of the **first_time_only** example above, the method could be rewritten
337 | like so by splitting the logical expression into two parts instead of relying on
338 | the tricky behavior of the **&** operator:
339 |
340 | ```ruby
341 | def first_time_only
342 | return if @not_first_time
343 | @not_first_time = true
344 | "Hello world!"
345 | end
346 | ```
347 |
348 | ## Examples from the real world
349 |
350 | I've led with two of my own examples not because of my acute egomania, but
351 | because frankly, I couldn't find many examples of bitwise Boolean operators,
352 | much less eager Boolean operators out there in the wild. Maybe there was a flaw
353 | in the regular expression I used to grep through the wealth of gems I've
354 | accumulated or maybe I've missed some genius examples in the noise of numerical
355 | bitwise expressions and Array intersections, I don't know.
356 |
357 | In the end, I was only able to find 4 examples, and unfortunately, three of
358 | those four were similar enough (two were exactly the same!) to make it really
359 | only worth mentioning one. Making matters worse, I'm not convinced any of the
360 | examples are using eager or bitwise Boolean operators in an effective way. But
361 | again, maybe I'm missing something. You be the judge.
362 |
363 | ### RubySpec: Three flavors of tainted?
364 |
365 | The three very similar examples I mentioned above come from the now defunct
366 | [RubySpec](https://github.com/rubyspec/rubyspec) project. Each occurs while
367 | testing whether a **String** has become tainted following a slice operation
368 | [[1]](https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/slice_spec.rb#L436)
369 | [[2]](https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/shared/slice.rb#L419)
370 | or a [concatenation using the **+** operator](https://github.com/rubyspec/rubyspec/blob/324c37bb67ea51f197954a37a2c71878eeadea01/core/string/plus_spec.rb#L41).
371 | The example testing concatenation with **+** is the shortest of the bunch, so
372 | let's have a look.
373 |
374 | ```ruby
375 | it "taints the result when self or other is tainted" do
376 | strs = ["", "OK", StringSpecs::MyString.new(""), StringSpecs::MyString.new("OK")]
377 | strs += strs.map { |s| s.dup.taint }
378 |
379 | strs.each do |str|
380 | strs.each do |other|
381 | (str + other).tainted?.should == (str.tainted? | other.tainted?)
382 | end
383 | end
384 | end
385 | ```
386 |
387 | In this example, a few instances of the **String** class and their tainted alter
388 | egos are created and then each of the instances is concatenated with each of the
389 | other instances using the **+** operator. For each concatenation produced, the
390 | result is tested to ensure that it is considered tainted if either of its
391 | parents were tainted. During the test to determine if a result **String** should
392 | be tainted or not, we find our bitwise Boolean friend, the **|** operator. But
393 | what advantage does the **|** operator offer in this situation over its
394 | short-circuiting counterpart, **||**?
395 |
396 | When **str.tainted?** is **true**, the result of parenthetical expression will
397 | be **true**, however, keep in mind that **other.tainted?** will still be
398 | evaluated, though the result will be discarded. Unless there is some hidden side
399 | effect of calling **other.tainted?** at this point in the test, this seems like
400 | extraneous work to me. If there is a side effect to calling **other.tainted?**
401 | at this point in the test, that's a whole other problem because it seems quite
402 | possible that whatever that side effect is, it could have impacted the outcome
403 | of **(str + other).tainted?**, in which case, who knows what's really being
404 | tested. All this taken into account, I'm inclined to believe that
405 | short-circuiting would be desirable alternative in this case.
406 |
407 | Conversely, when **str.tainted?** is **false**, the result of the parenthetical
408 | expression depends entirely on the outcome of **other.tainted?**. This may seem
409 | good in that when **other.tainted?** is **true**, the parenthetical expression
410 | will be **true** and when **other.tainted?** is **false**, the parenthetical
411 | expression will be **false**. However, as we discussed earlier, the eager
412 | Boolean operators only return **true** or **false** unlike their
413 | short-circuiting counterparts. This means that **other.tainted?** could return
414 | **:wtf?** or **nil** and the parenthetical expression would evaluate to **true**
415 | or **false**, respectively. Perhaps this coercion to **true** or **false** was
416 | the goal in choosing **|** over **||**, but in a test, particularly a test aimed
417 | at describing how the language itself should work, this seems like a bad idea to
418 | me.
419 |
420 | Overall, it seems like **||** would be a much better choice here than **|**, as
421 | it ensures the minimal amount of evaluation is performed while also ensuring
422 | that the output values of both **str.tainted?** and **other.tainted?** are
423 | tested for validity.
424 |
425 | ### Ruby: k-nucleotide benchmark
426 |
427 | The final example we'll look at is a Ruby implementation of the
428 | [k-nucleotide benchmark](http://benchmarksgame.alioth.debian.org/u32/performance.php?test=knucleotide#about).
429 | Unchanged since it was added to the Ruby source tree in 2007,
430 | [bm_so_k_nucelotide.rb](https://github.com/ruby/ruby/blob/75feee0968c9345e7ffd2bda9835fcd60b4c0880/benchmark/bm_so_k_nucleotide.rb#L40)
431 | utilizes the eager Boolean operator **&** to read lines from a file until a line
432 | is encountered that starts with ">".
433 |
434 | ```ruby
435 | while (line !~ /^>/) & line do
436 | seq << line.chomp
437 | line = input.gets
438 | end
439 | ```
440 |
441 | The purpose of this code is fairly straightforward, however what is less clear,
442 | is the utility of taking the eager logical conjunction (**&**) of **(line !~ /^>/)**
443 | and **line**.
444 |
445 | When the result of the **!~** operation results in **false**, the right-hand
446 | side of the expression will be evaluated and the result discarded. It's
447 | important to keep in mind that this will only happen once because the result of
448 | **false** will end the loop, but more generally speaking, in circumstances
449 | similar to this there's no reason to waste CPU time extraneously evaluating the
450 | right-hand side of the expression. We can be pretty confidant that this
451 | operation is wasteful because the value of **line** has no impact on the outcome
452 | of the logical expression and since we know that **line** is a reference to an
453 | object and not a method call, we know that the evaluation of **line** should not
454 | cause any side effects that might be worth preserving. Again though, since the
455 | eager evaluation is only going to happen once for this loop, it's really not of
456 | great concern.
457 |
458 | The case when the **!~** expression evaluates to **true** is a little trickier.
459 | One would think that when the left-hand side of the expression evaluates to
460 | **true**, there would be no point in evaluating **line** as we might expect that
461 | the value of **line** is a **String** that will be coerced into **true** by
462 | **&**. However, the **!~** operator is defined for more than just instances of
463 | **String**. In fact, **true**, **false**, **nil**, and anything that inherits
464 | from **Object** all implement the complement method to **!~**, **=~**, and by
465 | default they all return a value of **nil** for **=~**. This means that in most
466 | cases the **!~** operator will be negating **nil** which means the left-hand
467 | side is going to evaluate to **true** in a lot of cases we might not expect.
468 |
469 | In reality though, I suspect that the real reason the right-hand side of the
470 | expression is included is as a guard against **line** having a value of **nil**.
471 | If this is the case, then the only reason to choose **&** over **&&** would be
472 | the ability of **&** to coerce truthy values to **true**. If the result of the
473 | expression were being stored, this might make sense, however, since the result
474 | of the expression is being used as the condition for a **while** loop, it seems
475 | unlikely that this coercion would yield any perceivable benefit. As such, I
476 | think **&&** would be a better choice here because it is more familiar to most
477 | programmers and it will still guard against **nil** values.
478 |
479 | In the event that a value of **true** is easier for **while** statement to
480 | consume than other truthy values, we can always flip the condition around like
481 | so:
482 |
483 | ```ruby
484 | while line && (line !~ /^>/) do
485 | # ...
486 | end
487 | ```
488 |
489 | This arrangement has the added benefit of removing the need for the parentheses
490 | and short-circuiting the **!~** operation in situations where **line** is falsy.
491 |
492 | But why stop there? Why explicitly guard against **nil** and **false** at all?
493 | Especially when every other **Object** out in the Ruby universe is going to slip
494 | right past this check, resulting in a **NoMethodError** when the program
495 | attempts to call **chomp** on an object that doesn't support **chomp**. When it
496 | comes down to it, the condition of this **while** loop is pretty inadequate.
497 |
498 | A lot of the problem with the condition comes from the negation of the **=~**
499 | operation, what if we could avoid that? Given the regular expression of
500 | **/^>/**, it would seem that we're on the lookout for any line that starts with
501 | ">". But, what if, instead, we changed the condition such that it were **true**
502 | as long as a line started with anything other than ">"? This can be achieved by
503 | modifying the regular expression and would change the **while** loop to look
504 | like so:
505 |
506 | ```ruby
507 | while line =~ /^[^>]/ do
508 | # ...
509 | end
510 | ```
511 |
512 | Though the regular expression is more complex, I think the whole expression is
513 | much easier to reason about without the negation, extra logical expression, and
514 | parentheses.
515 |
516 | I've gotten a little off topic here, so we should move on, but before we do so,
517 | here are a few benchmarks generated using the [benchmark-ips gem](https://github.com/evanphx/benchmark-ips)
518 | for the **&**, **&&**, and altered **Regexp** versions of the **while**
519 | loop when run in the actual context of the nucleotide benchmark:
520 |
521 | ```ruby
522 | # Calculating ----------------------------------------------
523 | # & 2.000 i/100ms
524 | # && 2.000 i/100ms
525 | # Alternate Regexp 3.000 i/100ms
526 | # ----------------------------------------------------------
527 | # & 27.538 (± 3.6%) i/s - 550.000
528 | # && 28.092 (± 3.6%) i/s - 562.000
529 | # Alternate Regexp 29.000 (± 3.4%) i/s - 582.000
530 | # ----------------------------------------------------------
531 | ```
532 |
533 | Very minor performance differences, but another case where bitwise Boolean
534 | operators don't seem to be the best choice for the job.
535 |
536 | ## Optimization by branch avoidance
537 |
538 | Having been through a few examples of eager Boolean operators in Ruby, I imagine
539 | you're opinions on the matter are starting to coalesce, I know mine certainly
540 | are. Though I started this article to get a better understanding of when and
541 | why one might want to use eager Boolean operators, the more research I've done,
542 | the more the question for me has become "Why would I ever want to use bitwise or
543 | eager Boolean operators?"
544 |
545 | If you looked at the [list of programming languages that support both short-circuiting and eager Boolean operators](https://en.wikipedia.org/wiki/Short-circuit_evaluation#Support_in_common_programming_languages)
546 | I referenced earlier, you may have noticed that quite a few languages support
547 | both types of operators. This seems like a clue that there is a strong
548 | reason to have both types of operators. However, perhaps my Google-fu failed me,
549 | but I really couldn't find a strong argument for using eager Boolean operators.
550 |
551 | The best argument I came across that we haven't already discussed in some form
552 | comes from [a Stack Overflow question asking about the difference between the
553 | **||** operator and the **|** operator](https://stackoverflow.com/questions/7101992/why-do-we-usually-use-not-what-is-the-difference/7105382#7105382).
554 | All the way down 8 or 9 answers in is [an answer from Peter Lawrey](http://stackoverflow.com/a/7105382/1169710)
555 | that I think has some merit. Peter writes:
556 |
557 | > Maybe use [eager Boolean operators] when you have very simple boolean
558 | > expressions and the cost of short cutting (i.e. a branch) is greater than the
559 | > time you save by not evaluating the later expressions.
560 |
561 | I was certainly intrigued by this idea, especially since one of the commenters
562 | on Peter's answer claimed to have actually come across this behavior on some
563 | CPUs.
564 |
565 | I could see this type of behavior pretty easily existing in a lower level
566 | language like C, but I had reservations about whether or not something that must
567 | be a pretty minor micro-optimization could bubble all the way up into a higher
568 | level language like Ruby. To find out, I put together the following benchmark,
569 | again making use of the [benchmark-ips gem](https://github.com/evanphx/benchmark-ips):
570 |
571 | ```ruby
572 | require "benchmark/ips"
573 |
574 | Benchmark.ips do |bm|
575 | bm.config(:time => 20, :warmup => 5)
576 |
577 | bm.report(";") { true ; true }
578 | bm.report("&&") { true && true }
579 | bm.report("&") { true & true }
580 | end
581 | ```
582 |
583 | The goal of this benchmark is to use the simplest case possible to get an idea
584 | of the cost of branching compared to a more strict eager evaluation alternative.
585 | To this end, both the **&&** and **&** operators are benchmarked. In addition,
586 | to provide a baseline, the benchmarks also include a version that simply
587 | evaluates **true** twice to ensure a benchmark that includes no branching or
588 | other silly business. I found the results surprising:
589 |
590 | ```ruby
591 | # Calculating -------------------------
592 | # ; 131.478k i/100ms
593 | # && 128.222k i/100ms
594 | # & 126.305k i/100ms
595 | # -------------------------------------
596 | # ; 9.346M (± 3.4%) i/s - 186.699M
597 | # && 8.867M (± 3.2%) i/s - 177.075M
598 | # & 7.812M (± 2.6%) i/s - 156.113M
599 | # -------------------------------------
600 | ```
601 |
602 | I wasn't surprised to find that **&** wasn't faster than **&&**, but what did
603 | surprise me was how much slower **&** actually was compared to **&&**,
604 | especially in a case where I expected there to be a fairly negligible
605 | difference. It's pretty clear from this benchmark that, at least in Ruby, any
606 | branching that's avoided by using the **&** operator is insignificant in
607 | comparison to other overhead. But what could that other overhead be? Though it
608 | may surprise you, that overhead is a method call. *Say what?*
609 |
610 | ## Holy method calls, Batman!
611 |
612 | As it turns out, in the case of Boolean values, bitwise operators like **&** and
613 | **|** aren't so much operators as they are methods on **TrueClass**,
614 | **FalseClass**, and **NilClass**! Consider for example the C source of the
615 | bitwise **|** method on **TrueClass**:
616 |
617 | ```c
618 | static VALUE
619 | true_or(VALUE obj, VALUE obj2)
620 | {
621 | return Qtrue;
622 | }
623 | ```
624 |
625 | [View on GitHub](https://github.com/ruby/ruby/blob/16294913f71b8a38526096cf6458340b19b45f9f/object.c#L1247)
626 |
627 | Thankfully, this is one of the simplest examples of Ruby's C source you'll come
628 | across. Though it's simple to read, the nuance of what is going on here is a
629 | little more complicated. The **true_or** method is simply a method that takes
630 | two arguments (actually only one really since the first argument will always be
631 | the **true** singleton), and regardless of what those arguments are, returns
632 | **true**. What may not be completely obvious from this code is how this method
633 | implementation leads to the eager evaluation of the right-hand side of a logical
634 | expression.
635 |
636 | Throughout this article we've treated **|** like a primitive operator, perhaps
637 | if we treat it more like a method call, it will make it more obvious how this
638 | simple method equates to eager evaluation. Let's consider something along the
639 | lines of the simplest possible case and while we're at it, let's see if
640 | **||** is also implemented as a method on **TrueClass**. Let's see what happens
641 | if we try to use **Object#send**:
642 |
643 | ```ruby
644 | true.send("||", true)
645 | # => NoMethodError: undefined method `||' for true:TrueClass
646 |
647 | true.send("|", true)
648 | # => true
649 | ```
650 |
651 | Interesting! So we've learned that **||** is not a method, but must be a more
652 | primitive operator. Additionally, we can see much more clearly now that **|** is
653 | definitely a method of **TrueClass**.
654 |
655 | With some closer examination, this example should also help make it clear how
656 | implementing **TrueClass#|** as a method call leads to eager evaluation. Though
657 | the argument we passed to **TrueClass#|** in the example above was a primitive
658 | **true** value, it could have been any arbitrary Ruby expression. Unlike **||**
659 | which could completely ignore the right-hand side of the expression when the
660 | left-hand side of the operation is **true**, **TrueClass#|** cannot skip the
661 | right-hand side of the expression because it is a method call. In fact, before
662 | **TrueClass#|** is invoked, the RubyVM has already evaluated the right-hand side
663 | of the expression, reducing it to the value that will be used as the argument to
664 | **TrueClass#|**.
665 |
666 | So, that's the magic behind one of the eager bitwise Boolean operators, what
667 | about one of the bitwise Boolean operators? How is that implemented? Is it
668 | also a method call? As it turns out, yes. Consider the implementation of
669 | **TrueClass#&**:
670 |
671 | ```ruby
672 | static VALUE
673 | true_and(VALUE obj, VALUE obj2)
674 | {
675 | return RTEST(obj2)?Qtrue:Qfalse;
676 | }
677 | ```
678 |
679 | [View on GitHub](https://github.com/ruby/ruby/blob/16294913f71b8a38526096cf6458340b19b45f9f/object.c#L1225)
680 |
681 | Thankfully, this method is also pretty easy to read. It's a little more
682 | complicated than **TrueClass#|**, but it's pretty easy to see that the method
683 | evaluates the **RTEST** macro on **obj2** and returns **true** or **false**
684 | depending on the outcome of that evaluation. I won't go into the inner workings
685 | of **RTEST**, but you can view [the C source for the **RTEST** macro here](https://github.com/ruby/ruby/blob/01195a202cb9fcc6ddb6cf793868e4c7d85292dc/include/ruby/ruby.h#L422)
686 | if you're interested. Basically, **RTEST** uses a couple of numeric bitwise
687 | operations to determine if its argument is **false** or **nil** and if not
688 | returns **true**, which in turn causes **true_and** to do the same.
689 |
690 | Okay, so given all that, it should make more sense that using a bitwise/eager
691 | Boolean operator would be slower than a more primitive operator. Unfortunately
692 | though, slower execution is not the only drawback of these these method-based
693 | bitwise Boolean operators.
694 |
695 | # Inconsistent precedence
696 |
697 | The fundamentally different nature of the method-based bitwise Boolean
698 | operators and the more primitive logical Boolean operators is unfortunately not
699 | without consequence. The overhead of a method call is only one consequence.
700 | Another consequence is that the bitwise Boolean operators have a different
701 | precedence than their logical cousins.
702 |
703 | I won't get into the nature of [precedence, or order of operations,](https://en.wikipedia.org/wiki/Order_of_operations)
704 | in this article, but I will offer these examples for your consideration:
705 |
706 | ```ruby
707 | true || 1 && 3
708 | # => true
709 |
710 | true | 1 && 3
711 | # => 3
712 |
713 | # wtf?
714 | # `true || 1 && 3` evaluates like `true || (1 && 3)` while
715 | # `true | 1 && 3` evaluates like `(true | 1) && 3`
716 |
717 |
718 | false && true ^ true
719 | # => false
720 |
721 | false & true ^ true
722 | # => true
723 |
724 | # wtf?
725 | # `false && true ^ true` evaluates like `false && (true ^ true)` while
726 | # `false & true ^ true` evaluates like `(false && true) ^ true`
727 | ```
728 |
729 | As if the bitwise Boolean operators didn't have enough going against them, the
730 | differences in operator precedence reek too much of a 4-hour debugging session
731 | for my taste.
732 |
733 | ## The case against bitwise Boolean operators
734 |
735 | Though I started this article with an agenda for finding a use-case appropriate
736 | for eager Boolean operators, the search for such a use-case has ultimately led
737 | me to the opposite end of the spectrum. Where once I sought to bring light to
738 | eager Boolean operators, I now find myself at odds with the whole family of
739 | bitwise Boolean operators. We've been through many of the arguments against, but
740 | here they are again, in summary:
741 |
742 | - Rare usage in community code suggests limited understanding and familiarity
743 | - The primary benefit of eager evaluation is side effects.
744 | - Side effects make the code harder to debug, harder to reason about, and
745 | harder to test.
746 | - Errors encountered during eager evaluation occur before assignment operations
747 | - Even if errors during eager evaluation are caught, the value of the logical
748 | expression is lost.[^2]
749 | - Bitwise Boolean operators have too many differences from their logical
750 | counterparts.
751 | - Return values are converted to Booleans
752 | - Operator precedence is different
753 | - Operators are implemented as method calls, which are about 10% slower
754 | - Can only be invoked on **true**, **false**, or **nil**
755 |
756 | With such an abundance of arguments against, arguments in favor had better
757 | be significant in length or benefit. Unfortunately, they're not.
758 |
759 | - Conversion of return values to Booleans slightly faster than double negation.
760 | - Eager evaluation?
761 | - Maybe useful in irb?
762 |
763 | I didn't expect to find so many reasons not to use eager or bitwise Boolean
764 | operators, but maybe that's part of the reason I had so much trouble finding
765 | examples of bitwise Boolean operators at large. With the evidence laid out
766 | before you, I hope you will join me in continuing to never use any of the
767 | bitwise Boolean operators in Ruby without a comment and a damn good reason.
768 |
769 | Thanks for reading!
770 |
771 | *Have I missed something? Do you know of an example of bitwise and/or eager
772 | Boolean operators being used effectively? Have I got it all wrong? Leave me a
773 | comment and let me know! I'd love to hear your feedback and/or find a
774 | legitimate reason to utilize the family of bitwise Boolean operators.*
775 |
776 | [^1]: [Short-circuit evaluation - Support in common programming languages](https://en.wikipedia.org/wiki/Short-circuit_evaluation#Support_in_common_programming_languages)
777 | [^2]: [Gist: Errors during eager evaluation cause result of logical expression to be lost](https://gist.github.com/tdg5/12eccaae6132e72c0490)
778 |
--------------------------------------------------------------------------------