├── html
    └── .gitkeep
├── .rspec
├── README.md
├── lib
    ├── blog_snippets.rb
    └── blog_snippets
    │   ├── version.rb
    │   ├── articles
    │       ├── attr_optimizations.rb
    │       ├── attr_optimizations
    │       │   ├── minimalist_attrs.rb
    │       │   └── excessive_attrs.rb
    │       ├── tail_call_optimization_in_ruby_internals
    │       │   ├── fact.rb
    │       │   ├── fib.rb
    │       │   ├── opt_send_without_block.vm.inc
    │       │   ├── fact_disasm.txt
    │       │   ├── fact_tco_disasm.txt
    │       │   ├── tail_optimized_reload.rb
    │       │   ├── fib_tco_disasm.txt
    │       │   ├── fib_disasm.txt
    │       │   ├── from_call_method_to_tco.c
    │       │   └── README.md
    │       ├── on_the_road_from_ruby_journeyman_to_ruby_master
    │       │   ├── int_from_ord_diff_benchmark.rb
    │       │   └── README.md
    │       ├── tuning_dd_block_size
    │       │   ├── dd_obs_test.sh
    │       │   ├── dd_ibs_test.sh
    │       │   └── README.md
    │       ├── eager_boolean_operators
    │       │   ├── notes.txt
    │       │   └── README.md
    │       ├── introducing_the_tco_method_gem
    │       │   └── README.md
    │       ├── tail_call_optimization_in_ruby_background
    │       │   └── README.md
    │       └── module_factory_for_dependency_management
    │       │   └── README.md
    │   ├── markdown_to_html_transformer.rb
    │   └── renderers
    │       └── wordpress_html_renderer.rb
├── test
    ├── test_helper.rb
    ├── concerns
    │   ├── coverage.rb
    │   └── test_case.rb
    └── unit
    │   ├── blog_snippets_test.rb
    │   └── markdown_to_html_transformer_test.rb
├── .travis.yml
├── script
    ├── wp-console
    └── update_remote_revision
├── Guardfile
├── Gemfile
├── .gitignore
├── blog_snippets.gemspec
├── Rakefile
├── LICENSE
└── notes.txt


/html/.gitkeep:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.rspec:
--------------------------------------------------------------------------------
1 | --color
2 | --require spec_helper
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Blog Snippets
2 | 
3 | Code snippets from my blog (http://blog.tdg5.com)
4 | 


--------------------------------------------------------------------------------
/lib/blog_snippets.rb:
--------------------------------------------------------------------------------
1 | require "blog_snippets/version"
2 | 
3 | module BlogSnippets
4 | end
5 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/version.rb:
--------------------------------------------------------------------------------
1 | module BlogSnippets
2 |   VERSION = "0.0.1".freeze
3 | end
4 | 


--------------------------------------------------------------------------------
/test/test_helper.rb:
--------------------------------------------------------------------------------
1 | require "concerns/coverage" if ENV["CI"]
2 | require "minitest/autorun"
3 | require "mocha/setup"
4 | require "blog_snippets"
5 | require "concerns/test_case"
6 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: ruby
 2 | 
 3 | rvm:
 4 |   - 1.9.3
 5 |   - 2.0.0
 6 |   - 2.1.0
 7 |   - 2.2.0
 8 |   - jruby-19mode
 9 |   - jruby-head
10 |   - rbx-19mode
11 |   - rbx-2
12 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/attr_optimizations.rb:
--------------------------------------------------------------------------------
1 | require "blog_snippets/articles/attr_optimizations/minimalist_attrs"
2 | require "blog_snippets/articles/attr_optimizations/excessive_attrs"
3 | 


--------------------------------------------------------------------------------
/test/concerns/coverage.rb:
--------------------------------------------------------------------------------
1 | require "simplecov"
2 | require "coveralls"
3 | SimpleCov.formatter = Coveralls::SimpleCov::Formatter
4 | SimpleCov.root(File.expand_path("../../lib", __FILE__))
5 | SimpleCov.start
6 | 


--------------------------------------------------------------------------------
/script/wp-console:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | require "ruby-wpdb"
 4 | require "pry"
 5 | 
 6 | db_config = YAML.load(File.read(File.expand_path("../../config/database.yml", __FILE__)))
 7 | WPDB.init(db_config["database_url"], db_config["wp_prefix"])
 8 | 
 9 | Pry.start
10 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/attr_optimizations/minimalist_attrs.rb:
--------------------------------------------------------------------------------
 1 | module BlogSnippets
 2 |   module AttrOptimizations
 3 |     class MinimalistAttrs
 4 |       attr_accessor :accessor
 5 |       attr_reader :reader
 6 |       attr_writer :writer
 7 |     end
 8 |   end
 9 | end
10 | 


--------------------------------------------------------------------------------
/test/concerns/test_case.rb:
--------------------------------------------------------------------------------
 1 | # Use alternate shoulda-style DSL for tests
 2 | class BlogSnippets::TestCase < Minitest::Spec
 3 |   class << self
 4 |     alias :setup :before
 5 |     alias :teardown :after
 6 |     alias :context :describe
 7 |     alias :should :it
 8 |   end
 9 | end
10 | 


--------------------------------------------------------------------------------
/Guardfile:
--------------------------------------------------------------------------------
1 | guard(:minitest, :all_after_pass => false, :all_on_start => false) do
2 |   watch(%r{^lib/blog_snippets\.rb$}) { "test" }
3 |   watch(%r{^lib/blog_snippets/(.+)\.rb$}) { |m| "test/unit/#{m[1]}_test.rb" }
4 |   watch(%r{^test/.+_test\.rb$})
5 |   watch(%r{^(?:test/test_helper|test/concerns/)(.*)\.rb$}) { "test" }
6 | end
7 | 


--------------------------------------------------------------------------------
/test/unit/blog_snippets_test.rb:
--------------------------------------------------------------------------------
 1 | require "test_helper"
 2 | 
 3 | class BlogSnippetsTest < BlogSnippets::TestCase
 4 |   Subject = BlogSnippets
 5 | 
 6 |   subject { Subject }
 7 | 
 8 |   context Subject.name do
 9 |     should "be defined" do
10 |       assert defined?(subject), "Expected #{subject.name} to be defined!"
11 |     end
12 |   end
13 | end
14 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | source "https://rubygems.org"
 2 | 
 3 | gemspec
 4 | 
 5 | gem "pry"
 6 | gem "redcarpet", :platform => %w[mri]
 7 | gem "ruby-wpdb", :git => "https://github.com/tdg5-wordpress/ruby-wpdb.git", :branch => :master
 8 | 
 9 | group :test do
10 |   gem "coveralls", :require => false
11 |   gem "guard"
12 |   gem "guard-minitest"
13 |   gem "minitest", ">= 3.0"
14 |   gem "mocha"
15 |   gem "simplecov", :require => false
16 | end
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | Gemfile.lock
 2 | 
 3 | *.gem
 4 | *.rbc
 5 | /.config
 6 | /coverage/
 7 | /html/
 8 | /InstalledFiles
 9 | /pkg/
10 | /spec/reports/
11 | /test/tmp/
12 | /test/version_tmp/
13 | /tmp/
14 | 
15 | ## Documentation cache and generated files:
16 | /.yardoc/
17 | /_yardoc/
18 | /doc/
19 | /rdoc/
20 | 
21 | ## Environment normalisation:
22 | /.bundle/
23 | /lib/bundler/man/
24 | 
25 | ## Random
26 | /src.html
27 | config/database.yml
28 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/attr_optimizations/excessive_attrs.rb:
--------------------------------------------------------------------------------
 1 | module BlogSnippets
 2 |   module AttrOptimizations
 3 |     class ExcessiveAttrs
 4 |       def accessor
 5 |         @accessor
 6 |       end
 7 | 
 8 |       def accessor=(value)
 9 |         @accessor = value
10 |       end
11 | 
12 |       def reader
13 |         @reader
14 |       end
15 | 
16 |       def writer=(value)
17 |         @writer = value
18 |       end
19 |     end
20 |   end
21 | end
22 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fact.rb:
--------------------------------------------------------------------------------
 1 | code = <<-CODE
 2 |   class Factorial
 3 |     def self.fact_helper(n, res)
 4 |       n == 1 ? res : fact_helper(n - 1, n * res)
 5 |     end
 6 | 
 7 |     def self.fact(n)
 8 |       fact_helper(n, 1)
 9 |     end
10 |   end
11 | CODE
12 | 
13 | {
14 |   "normal" => { :tailcall_optimization => false, :trace_instruction => false },
15 |   "tail call optimized" => { :tailcall_optimization => true, :trace_instruction => false },
16 | }.each do |identifier, compile_options|
17 |   instruction_sequence = RubyVM::InstructionSequence.new(code, nil, nil, nil, compile_options)
18 |   puts "#{identifier}:\n#{instruction_sequence.disasm}"
19 | end
20 | 


--------------------------------------------------------------------------------
/blog_snippets.gemspec:
--------------------------------------------------------------------------------
 1 | # coding: utf-8
 2 | lib = File.expand_path("../lib", __FILE__)
 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 4 | require "blog_snippets/version"
 5 | 
 6 | Gem::Specification.new do |spec|
 7 |   spec.name          = "blog_snippets"
 8 |   spec.version       = BlogSnippets::VERSION
 9 |   spec.authors       = ["Danny Guinther"]
10 |   spec.email         = ["dannyguinther@gmail.com"]
11 |   spec.summary       = %q{Code snippets from my blog.}
12 |   spec.description   = %q{Code snippets from my blog: http://blog.tdg5.com}
13 |   spec.homepage      = "https://github.com/tdg5/blog_snippets"
14 |   spec.license       = "MIT"
15 | 
16 |   spec.files         = `git ls-files -z`.split("\x0")
17 |   spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
18 |   spec.require_paths = ["lib"]
19 | 
20 |   spec.add_development_dependency "bundler", "~> 1.6"
21 |   spec.add_development_dependency "rake"
22 | end
23 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | require "bundler/gem_tasks"
 2 | require "rake/testtask"
 3 | 
 4 | Rake::TestTask.new do |t|
 5 |   t.libs << "test"
 6 |   t.pattern = "test/**/*_test.rb"
 7 | end
 8 | 
 9 | task :generate_html, [:source_path] do |tsk, arguments|
10 |   require "redcarpet"
11 |   require "blog_snippets/renderers/wordpress_html_renderer"
12 |   require "blog_snippets/markdown_to_html_transformer"
13 | 
14 |   source_path = arguments[:source_path] || ENV["SOURCE"]
15 |   source_path = File.expand_path(File.join("..", source_path), __FILE__)
16 |   raise "#{source_path} does not exist!" unless File.exist?(source_path)
17 |   raw_source = File.open(source_path, "r") { |f| f.read }
18 |   renderer = BlogSnippets::Renderers::WordpressHTMLRenderer.new
19 |   transformer = BlogSnippets::MarkdownToHTMLTransformer.new({
20 |     :parser_class => Redcarpet::Markdown,
21 |     :renderer => renderer
22 |   })
23 |   html = transformer.transform(raw_source)
24 |   puts "---- BEGIN COPY ----\n#{html}\n---- END COPY ----"
25 | end
26 | 
27 | task :default => :test
28 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fib.rb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "Fib" => { :tailcall_optimization => false, :trace_instruction => false },
 3 |   "TCOFib" => { :tailcall_optimization => true, :trace_instruction => false },
 4 | }.each do |class_name, compile_options|
 5 |   RubyVM::InstructionSequence.compile_option = compile_options
 6 |     code = <<-CODE
 7 |     module BlogSnippets
 8 |       module #{class_name}
 9 |         def self.acc(i, n, result)
10 |           if i == -1
11 |             result
12 |           else
13 |             acc(i - 1, n + result, n)
14 |           end
15 |         end
16 | 
17 |         def self.fib(i)
18 |           acc(i, 1, 0)
19 |         end
20 |       end
21 |     end
22 |   CODE
23 |   instruction_sequence = RubyVM::InstructionSequence.new(code)
24 | 
25 |   puts "#{class_name}:\n#{instruction_sequence.disasm}"
26 |   instruction_sequence.eval
27 | end
28 | 
29 | # Reset compile options
30 | RubyVM::InstructionSequence.compile_option = { :tailcall_optimization => false, :trace_instruction => true }
31 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/opt_send_without_block.vm.inc:
--------------------------------------------------------------------------------
 1 | INSN_ENTRY(opt_send_without_block){
 2 |   {
 3 |     VALUE val;
 4 |     CALL_INFO ci = (CALL_INFO)GET_OPERAND(1);
 5 | 
 6 |     DEBUG_ENTER_INSN("opt_send_without_block");
 7 |     ADD_PC(1+1);
 8 |     PREFETCH(GET_PC());
 9 |     #define CURRENT_INSN_opt_send_without_block 1
10 |     #define INSN_IS_SC()     0
11 |     #define INSN_LABEL(lab)  LABEL_opt_send_without_block_##lab
12 |     #define LABEL_IS_SC(lab) LABEL_##lab##_##t
13 |     COLLECT_USAGE_INSN(BIN(opt_send_without_block));
14 |     COLLECT_USAGE_OPERAND(BIN(opt_send_without_block), 0, ci);
15 |     {
16 |       ci->argc = ci->orig_argc;
17 |       vm_search_method(ci, ci->recv = TOPN(ci->argc));
18 |       CALL_METHOD(ci);
19 | 
20 |       CHECK_VM_STACK_OVERFLOW_FOR_INSN(REG_CFP, 1);
21 |       PUSH(val);
22 |       #undef CURRENT_INSN_opt_send_without_block
23 |       #undef INSN_IS_SC
24 |       #undef INSN_LABEL
25 |       #undef LABEL_IS_SC
26 |       END_INSN(opt_send_without_block);
27 |     }
28 |   }
29 | }
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2015 Danny Guinther
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/script/update_remote_revision:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env ruby
 2 | 
 3 | require "ruby-wpdb"
 4 | require "redcarpet"
 5 | require "blog_snippets/renderers/wordpress_html_renderer"
 6 | require "blog_snippets/markdown_to_html_transformer"
 7 | require "yaml"
 8 | 
 9 | source_path = ARGV[1]
10 | raise "#{source_path} does not exist!" unless File.exist?(source_path)
11 | 
12 | db_config = YAML.load(File.read(File.expand_path("../../config/database.yml", __FILE__)))
13 | WPDB.init(db_config["database_url"], db_config["wp_prefix"])
14 | 
15 | raw_source = File.read(source_path)
16 | renderer = BlogSnippets::Renderers::WordpressHTMLRenderer.new
17 | transformer = BlogSnippets::MarkdownToHTMLTransformer.new({
18 |   :parser_class => Redcarpet::Markdown,
19 |   :renderer => renderer
20 | })
21 | post_content = transformer.transform(raw_source)
22 | 
23 | post_id = Integer(ARGV[0])
24 | post = WPDB::Post.where(:id => post_id).first
25 | last_revision = post.revisions.last
26 | revision = WPDB::Post.new
27 | last_revision.keys.each {|key| revision.send("#{key}=", post.send(key)) }
28 | revision.ID = nil
29 | revision.instance_variable_set(:@new, true)
30 | revision.post_content = post_content
31 | revision.post_modified = Time.now
32 | revision.post_modified_gmt = Time.now.utc
33 | revision.save
34 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/markdown_to_html_transformer.rb:
--------------------------------------------------------------------------------
 1 | module BlogSnippets
 2 |   class MarkdownToHTMLTransformer
 3 | 
 4 |     DEFAULT_MARKDOWN_EXTENSIONS = {
 5 |       :autolink => true,
 6 |       :disable_indented_code_blocks => true,
 7 |       :fenced_code_blocks => true,
 8 |       :footnotes => true,
 9 |       :no_intra_emphasis => true,
10 |       :space_after_headers => true,
11 |       :strikethrough => true,
12 |       :tables => true,
13 |       :underline => true,
14 |     }
15 | 
16 |     attr_reader :markdown_extensions, :renderer
17 | 
18 |     def self.default_markdown_extensions
19 |       const_get(:DEFAULT_MARKDOWN_EXTENSIONS).dup
20 |     end
21 | 
22 |     def initialize(options = {})
23 |       raise ArgumentError, ":renderer is required!" unless options[:renderer]
24 |       raise ArgumentError, ":parser_class is required!" unless options[:parser_class]
25 | 
26 |       @renderer = options[:renderer]
27 |       @parser_class = options[:parser_class]
28 |       @markdown_extensions = options[:markdown_extensions] || default_markdown_extensions
29 |     end
30 | 
31 |     def parser
32 |       @parser ||= parser_class.new(renderer, @markdown_extensions)
33 |     end
34 | 
35 |     def transform(markdown)
36 |       parser.render(markdown)
37 |     end
38 | 
39 |     private
40 | 
41 |     attr_reader :parser_class
42 | 
43 |     def default_markdown_extensions
44 |       self.class.default_markdown_extensions
45 |     end
46 |   end
47 | end
48 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/on_the_road_from_ruby_journeyman_to_ruby_master/int_from_ord_diff_benchmark.rb:
--------------------------------------------------------------------------------
 1 | require "benchmark/ips"
 2 | 
 3 | NUMBER = "7316717653133062491922511967442657474235534919493496983520312774506326239578318016984801869478851843858615607891129494954595017379583319528532088055111254069874715852386305071569329096329522744304355766896648950445244523161731856403098711121722383113622298934233803081353362766142828064444866452387493035890729629049156044077239071381051585930796086670172427121883998797908792274921901699720888093776657273330010533678812202354218097512545405947522435258490771167055601360483958644670632441572215539753697817977846174064955149290862569321978468622482839722413756570560574902614079729686524145351004748216637048440319989000889524345065854122758866688116427171479924442928230863465674813919123162824586178664583591245665294765456828489128831426076900422421902267105562632111110937054421750694165896040807198403850962455444362981230987879927244284909188845801561660979191338754992005240636899125607176060588611646710940507754100225698315520005593572972571636269561882670428252483600823257530420752963450"
 4 | CHARS = NUMBER.each_char.to_a
 5 | ZERO_ORD = "0".ord.freeze
 6 | ORD_PROC = proc { |char| char.ord - ZERO_ORD }
 7 | 
 8 | Benchmark.ips do |bm|
 9 |   bm.report("String#to_i") { CHARS.each(&:to_i) }
10 |   bm.report("String#ord - ZERO_ORD") { CHARS.each(&ORD_PROC) }
11 | end
12 | 
13 | # Calculating -----------------------------------------
14 | # String#to_i                           836.000 i/100ms
15 | # String#ord - ZERO_ORD                  1.083k i/100ms
16 | # -----------------------------------------------------
17 | # String#to_i             8.473k (± 1.2%) i/s - 42.636k
18 | # String#ord - ZERO_ORD  10.859k (± 1.4%) i/s - 55.233k
19 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tuning_dd_block_size/dd_obs_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Since we're dealing with dd, abort if any errors occur
 4 | set -e
 5 | 
 6 | TEST_FILE=${1:-dd_obs_testfile}
 7 | TEST_FILE_EXISTS=0
 8 | if [ -e "$TEST_FILE" ]; then TEST_FILE_EXISTS=1; fi
 9 | TEST_FILE_SIZE=134217728
10 | 
11 | if [ $EUID -ne 0 ]; then
12 |   echo "NOTE: Kernel cache will not be cleared between tests without sudo. This will likely cause inaccurate results." 1>&2
13 | fi
14 | 
15 | # Header
16 | PRINTF_FORMAT="%8s : %s\n"
17 | printf "$PRINTF_FORMAT" 'block size' 'transfer rate'
18 | 
19 | # Block sizes of 512b 1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M
20 | for BLOCK_SIZE in 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432 67108864
21 | do
22 |   # Calculate number of segments required to copy
23 |   COUNT=$(($TEST_FILE_SIZE / $BLOCK_SIZE))
24 | 
25 |   if [ $COUNT -le 0 ]; then
26 |     echo "Block size of $BLOCK_SIZE estimated to require $COUNT blocks, aborting further tests."
27 |     break
28 |   fi
29 | 
30 |   # Clear kernel cache to ensure more accurate test
31 |   [ $EUID -eq 0 ] && [ -e /proc/sys/vm/drop_caches ] && echo 3 > /proc/sys/vm/drop_caches
32 | 
33 |   # Create a test file with the specified block size
34 |   DD_RESULT=$(dd if=/dev/zero of=$TEST_FILE bs=$BLOCK_SIZE count=$COUNT conv=fsync 2>&1 1>/dev/null)
35 | 
36 |   # Extract the transfer rate from dd's STDERR output
37 |   TRANSFER_RATE=$(echo $DD_RESULT | \grep --only-matching -E '[0-9.]+ ([MGk]?B|bytes)/s(ec)?')
38 | 
39 |   # Clean up the test file if we created one
40 |   if [ $TEST_FILE_EXISTS -ne 0 ]; then rm $TEST_FILE; fi
41 | 
42 |   # Output the result
43 |   printf "$PRINTF_FORMAT" "$BLOCK_SIZE" "$TRANSFER_RATE"
44 | done
45 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tuning_dd_block_size/dd_ibs_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Since we're dealing with dd, abort if any errors occur
 4 | set -e
 5 | 
 6 | TEST_FILE=${1:-dd_ibs_testfile}
 7 | if [ -e "$TEST_FILE" ]; then TEST_FILE_EXISTS=$?; fi
 8 | TEST_FILE_SIZE=134217728
 9 | 
10 | # Exit if file exists
11 | if [ -e $TEST_FILE ]; then
12 |   echo "Test file $TEST_FILE exists, aborting."
13 |   exit 1
14 | fi
15 | TEST_FILE_EXISTS=1
16 | 
17 | if [ $EUID -ne 0 ]; then
18 |   echo "NOTE: Kernel cache will not be cleared between tests without sudo. This will likely cause inaccurate results." 1>&2
19 | fi
20 | 
21 | # Create test file
22 | echo 'Generating test file...'
23 | BLOCK_SIZE=65536
24 | COUNT=$(($TEST_FILE_SIZE / $BLOCK_SIZE))
25 | dd if=/dev/urandom of=$TEST_FILE bs=$BLOCK_SIZE count=$COUNT conv=fsync > /dev/null 2>&1
26 | 
27 | # Header
28 | PRINTF_FORMAT="%8s : %s\n"
29 | printf "$PRINTF_FORMAT" 'block size' 'transfer rate'
30 | 
31 | # Block sizes of 512b 1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M
32 | for BLOCK_SIZE in 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432 67108864
33 | do
34 |   # Clear kernel cache to ensure more accurate test
35 |   [ $EUID -eq 0 ] && [ -e /proc/sys/vm/drop_caches ] && echo 3 > /proc/sys/vm/drop_caches
36 | 
37 |   # Read test file out to /dev/null with specified block size
38 |   DD_RESULT=$(dd if=$TEST_FILE of=/dev/null bs=$BLOCK_SIZE 2>&1 1>/dev/null)
39 | 
40 |   # Extract transfer rate
41 |   TRANSFER_RATE=$(echo $DD_RESULT | \grep --only-matching -E '[0-9.]+ ([MGk]?B|bytes)/s(ec)?')
42 | 
43 |   printf "$PRINTF_FORMAT" "$BLOCK_SIZE" "$TRANSFER_RATE"
44 | done
45 | 
46 | # Clean up the test file if we created one
47 | if [ $TEST_FILE_EXISTS -ne 0 ]; then rm $TEST_FILE; fi
48 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fact_disasm.txt:
--------------------------------------------------------------------------------
 1 | == disasm: <RubyVM::InstructionSequence:<compiled>@<compiled>>==========
 2 | 0000 putspecialobject 3                                               (   1)
 3 | 0002 putnil
 4 | 0003 defineclass      :Factorial, <class:Factorial>, 0
 5 | 0007 leave
 6 | == disasm: <RubyVM::InstructionSequence:<class:Factorial>@<compiled>>===
 7 | 0000 putspecialobject 1                                               (   2)
 8 | 0002 putself
 9 | 0003 putobject        :fact_helper
10 | 0005 putiseq          fact_helper
11 | 0007 opt_send_without_block <callinfo!mid:core#define_singleton_method, argc:3, ARGS_SIMPLE>
12 | 0009 pop
13 | 0010 putspecialobject 1                                               (   6)
14 | 0012 putself
15 | 0013 putobject        :fact
16 | 0015 putiseq          fact
17 | 0017 opt_send_without_block <callinfo!mid:core#define_singleton_method, argc:3, ARGS_SIMPLE>
18 | 0019 leave
19 | == disasm: <RubyVM::InstructionSequence:fact_helper@<compiled>>=========
20 | local table (size: 3, argc: 2 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
21 | [ 3] n<Arg>     [ 2] res<Arg>
22 | 0000 getlocal_OP__WC__0 3                                             (   3)
23 | 0002 putobject_OP_INT2FIX_O_1_C_
24 | 0003 opt_eq           <callinfo!mid:==, argc:1, ARGS_SIMPLE>
25 | 0005 branchunless     11
26 | 0007 getlocal_OP__WC__0 2
27 | 0009 leave
28 | 0010 pop
29 | 0011 putself
30 | 0012 getlocal_OP__WC__0 3
31 | 0014 putobject_OP_INT2FIX_O_1_C_
32 | 0015 opt_minus        <callinfo!mid:-, argc:1, ARGS_SIMPLE>
33 | 0017 getlocal_OP__WC__0 3
34 | 0019 getlocal_OP__WC__0 2
35 | 0021 opt_mult         <callinfo!mid:*, argc:1, ARGS_SIMPLE>
36 | 0023 opt_send_without_block <callinfo!mid:fact_helper, argc:2, FCALL|ARGS_SIMPLE>
37 | 0025 leave
38 | == disasm: <RubyVM::InstructionSequence:fact@<compiled>>================
39 | local table (size: 2, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
40 | [ 2] n<Arg>
41 | 0000 putself                                                          (   7)
42 | 0001 getlocal_OP__WC__0 2
43 | 0003 putobject_OP_INT2FIX_O_1_C_
44 | 0004 opt_send_without_block <callinfo!mid:fact_helper, argc:2, FCALL|ARGS_SIMPLE>
45 | 0006 leave
46 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fact_tco_disasm.txt:
--------------------------------------------------------------------------------
 1 | == disasm: <RubyVM::InstructionSequence:<compiled>@<compiled>>==========
 2 | 0000 putspecialobject 3                                               (   1)
 3 | 0002 putnil
 4 | 0003 defineclass      :Factorial, <class:Factorial>, 0
 5 | 0007 leave
 6 | == disasm: <RubyVM::InstructionSequence:<class:Factorial>@<compiled>>===
 7 | 0000 putspecialobject 1                                               (   2)
 8 | 0002 putself
 9 | 0003 putobject        :fact_helper
10 | 0005 putiseq          fact_helper
11 | 0007 opt_send_without_block <callinfo!mid:core#define_singleton_method, argc:3, ARGS_SIMPLE>
12 | 0009 pop
13 | 0010 putspecialobject 1                                               (   6)
14 | 0012 putself
15 | 0013 putobject        :fact
16 | 0015 putiseq          fact
17 | 0017 opt_send_without_block <callinfo!mid:core#define_singleton_method, argc:3, TAILCALL|ARGS_SIMPLE>
18 | 0019 leave
19 | == disasm: <RubyVM::InstructionSequence:fact_helper@<compiled>>=========
20 | local table (size: 3, argc: 2 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
21 | [ 3] n<Arg>     [ 2] res<Arg>
22 | 0000 getlocal_OP__WC__0 3                                             (   3)
23 | 0002 putobject_OP_INT2FIX_O_1_C_
24 | 0003 opt_eq           <callinfo!mid:==, argc:1, ARGS_SIMPLE>
25 | 0005 branchunless     11
26 | 0007 getlocal_OP__WC__0 2
27 | 0009 leave
28 | 0010 pop
29 | 0011 putself
30 | 0012 getlocal_OP__WC__0 3
31 | 0014 putobject_OP_INT2FIX_O_1_C_
32 | 0015 opt_minus        <callinfo!mid:-, argc:1, ARGS_SIMPLE>
33 | 0017 getlocal_OP__WC__0 3
34 | 0019 getlocal_OP__WC__0 2
35 | 0021 opt_mult         <callinfo!mid:*, argc:1, ARGS_SIMPLE>
36 | 0023 opt_send_without_block <callinfo!mid:fact_helper, argc:2, FCALL|TAILCALL|ARGS_SIMPLE>
37 | 0025 leave
38 | == disasm: <RubyVM::InstructionSequence:fact@<compiled>>================
39 | local table (size: 2, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1])
40 | [ 2] n<Arg>
41 | 0000 putself                                                          (   7)
42 | 0001 getlocal_OP__WC__0 2
43 | 0003 putobject_OP_INT2FIX_O_1_C_
44 | 0004 opt_send_without_block <callinfo!mid:fact_helper, argc:2, FCALL|TAILCALL|ARGS_SIMPLE>
45 | 0006 leave
46 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/tail_optimized_reload.rb:
--------------------------------------------------------------------------------
 1 | # This script demonstrates that any file loaded after a change to
 2 | # RubyVM::InstructionSequence.compile_option will be compiled with the new
 3 | # compile options. Rather than do this with two scripts, this script is hacked
 4 | # together such that this can be demonstrated with one file that reloads itself
 5 | # the first time it is loaded.
 6 | 
 7 | # Flag indicating whether this is the first time time this file has been loaded.
 8 | $first_load = true if $first_load.nil?
 9 | 
10 | # We can actually turn on tailcall optimization here without affecting how the
11 | # script is loaded the first time because the RubyVM::InstructionSequence object
12 | # that is used to compile the file the first time has already been created and
13 | # as such won't be affected by changing the global compile option.
14 | RubyVM::InstructionSequence.compile_option = {
15 |   tailcall_optimization: true,
16 |   trace_instruction: false,
17 | }
18 | 
19 | # Declare classes to facilitate #instance_eval later
20 | class FirstLoadFactorial; end
21 | class ReloadedFactorial; end
22 | 
23 | # On the first load, extend FirstLoadFactorial,
24 | # on the second load, extend ReloadedFactorial.
25 | klass = $first_load ? FirstLoadFactorial : ReloadedFactorial
26 | 
27 | # Tail recursive factorial adapted from
28 | # https://github.com/ruby/ruby/blob/fcf6fa8781fe236a9761ad5d75fa1b87f1afeea2/test/ruby/test_optimization.rb#L213
29 | klass.instance_eval do
30 |   def self.fact_helper(n, res)
31 |     n == 1 ? res : fact_helper(n - 1, n * res)
32 |   end
33 | 
34 |   def self.fact(n)
35 |     fact_helper(n, 1)
36 |   end
37 | end
38 | 
39 | # This check avoids calculating the factorial twice; ReloadedFactorial will only
40 | # respond to :fact after the file has been reloaded.
41 | if ReloadedFactorial.respond_to?(:fact)
42 |   begin
43 |     puts "FirstLoadFactorial: #{FirstLoadFactorial.fact(50000).to_s.length}"
44 |   rescue SystemStackError
45 |     puts "FirstLoadFactorial: stack level too deep"
46 |   end
47 | 
48 |   puts "ReloadedFactorial: #{ReloadedFactorial.fact(50000).to_s.length}"
49 | end
50 | 
51 | # Reload the file on the first load only.
52 | if $first_load
53 |   $first_load = false
54 |   load __FILE__
55 | end
56 | 
57 | # $ ruby tail_optimized_reload.rb
58 | #   FirstLoadFactorial: stack level too deep
59 | #   ReloadedFactorial: 213237
60 | 


--------------------------------------------------------------------------------
/notes.txt:
--------------------------------------------------------------------------------
 1 | Coercion to Boolean compared to lazy evaluation counterpart
 2 |     > false | :WTF?
 3 |       => true
 4 |     > true & :WTF?
 5 |       => true
 6 |   Even with very large object on RHS, no efficiency gained by coercion
 7 |     Because of method call implementation?
 8 |   Other than tricky coercion, only gain is that it is ever so slightly faster
 9 |   than double negation:
10 |     require 'benchmark/ips'
11 | 
12 |     Benchmark.ips do |bm|
13 |       bm.report("Double negate") { !!(true && :a) }
14 | 
15 |       bm.report("Logical bit-wise coerce") { true & :a }
16 |     end
17 | 
18 |     # Calculating --------------------------------------------
19 |     #   Double negate                         138.008k i/100ms
20 |     #   Logical bit-wise coerce               139.350k i/100ms
21 |     # --------------------------------------------------------
22 |     #   Double negate            7.262M (± 1.0%) i/s - 36.434M
23 |     #   Logical bit-wise coerce  7.825M (± 1.3%) i/s - 39.157M
24 |     # --------------------------------------------------------
25 | 
26 | 
27 | 
28 | "Maybe use when you have very simple boolean expressions and the cost
29 | of short cutting (i.e. a branch) is greater than the time you save by
30 | not evaluating the later expressions."
31 |   http://stackoverflow.com/a/7105382/1169710
32 |     Secretly method calls in Ruby!
33 |     Doesn't seem to apply in Ruby. Branching always cheaper than a method call.
34 | 
35 | 
36 | Operator precedence:
37 |   > true || 1 && 3
38 |     => true
39 |   > true || (1 && 3)
40 |     => true
41 | 
42 |   > true | 1 && 3
43 |     => 3
44 |   > (true | 1) && 3
45 |     => 3
46 | 
47 | 
48 |   > false && true ^ true
49 |     => false
50 |   > false && (true ^ true)
51 |     => false
52 | 
53 |   > false & true ^ true
54 |     => true
55 |   > (false && true) ^ true
56 |     => true
57 | 
58 | Seems like they'd mostly be used for their side effects which is bad
59 | 
60 | Only works consistently for falsy values and true. Truthy values explosive!
61 | 
62 | Examples:
63 |   https://github.com/ruby/ruby/blob/75feee0968c9345e7ffd2bda9835fcd60b4c0880/benchmark/bm_so_k_nucleotide.rb#L40
64 |   https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/slice_spec.rb#L436
65 |   https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/shared/slice.rb#L419
66 |   https://github.com/rubyspec/rubyspec/blob/324c37bb67ea51f197954a37a2c71878eeadea01/core/string/plus_spec.rb#L41
67 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fib_tco_disasm.txt:
--------------------------------------------------------------------------------
 1 | == disasm: <RubyVM::InstructionSequence:<compiled>@<compiled>>==========
 2 | 0000 putspecialobject 3                                               (   1)
 3 | 0002 putnil
 4 | 0003 defineclass      :BlogSnippets, <module:BlogSnippets>, 2
 5 | 0007 leave
 6 | == disasm: <RubyVM::InstructionSequence:<module:BlogSnippets>@<compiled>>
 7 | 0000 putspecialobject 3                                               (   2)
 8 | 0002 putnil
 9 | 0003 defineclass      :TCOFib, <module:TCOFib>, 2
10 | 0007 leave
11 | == disasm: <RubyVM::InstructionSequence:<module:TCOFib>@<compiled>>=====
12 | 0000 putspecialobject 1                                               (   3)
13 | 0002 putself
14 | 0003 putobject        :acc
15 | 0005 putiseq          acc
16 | 0007 opt_send_simple  <callinfo!mid:core#define_singleton_method, argc:3, ARGS_SKIP>
17 | 0009 pop
18 | 0010 putspecialobject 1                                               (  11)
19 | 0012 putself
20 | 0013 putobject        :fib
21 | 0015 putiseq          fib
22 | 0017 opt_send_simple  <callinfo!mid:core#define_singleton_method, argc:3, TAILCALL|ARGS_SKIP>
23 | 0019 leave
24 | == disasm: <RubyVM::InstructionSequence:acc@<compiled>>=================
25 | local table (size: 4, argc: 3 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@5] s1)
26 | [ 4] i<Arg>     [ 3] n<Arg>     [ 2] result<Arg>
27 | 0000 getlocal_OP__WC__0 4                                             (   4)
28 | 0002 putobject        -1
29 | 0004 opt_eq           <callinfo!mid:==, argc:1, ARGS_SKIP>
30 | 0006 branchunless     12
31 | 0008 getlocal_OP__WC__0 2                                             (   5)
32 | 0010 leave                                                            (   4)
33 | 0011 pop
34 | 0012 putself                                                          (   7)
35 | 0013 getlocal_OP__WC__0 4
36 | 0015 putobject_OP_INT2FIX_O_1_C_
37 | 0016 opt_minus        <callinfo!mid:-, argc:1, ARGS_SKIP>
38 | 0018 getlocal_OP__WC__0 3
39 | 0020 getlocal_OP__WC__0 2
40 | 0022 opt_plus         <callinfo!mid:+, argc:1, ARGS_SKIP>
41 | 0024 getlocal_OP__WC__0 3
42 | 0026 opt_send_simple  <callinfo!mid:acc, argc:3, FCALL|TAILCALL|ARGS_SKIP>
43 | 0028 leave
44 | == disasm: <RubyVM::InstructionSequence:fib@<compiled>>=================
45 | local table (size: 2, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@3] s1)
46 | [ 2] i<Arg>
47 | 0000 putself                                                          (  12)
48 | 0001 getlocal_OP__WC__0 2
49 | 0003 putobject_OP_INT2FIX_O_1_C_
50 | 0004 putobject_OP_INT2FIX_O_0_C_
51 | 0005 opt_send_simple  <callinfo!mid:acc, argc:3, FCALL|TAILCALL|ARGS_SKIP>
52 | 0007 leave
53 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fib_disasm.txt:
--------------------------------------------------------------------------------
 1 | == disasm: <RubyVM::InstructionSequence:<compiled>@<compiled>>==========                                                                                                                                                               [44/385]
 2 | 0000 putspecialobject 3                                               (   1)
 3 | 0002 putnil
 4 | 0003 defineclass      :BlogSnippets, <module:BlogSnippets>, 2
 5 | 0007 leave
 6 | == disasm: <RubyVM::InstructionSequence:<module:BlogSnippets>@<compiled>>
 7 | 0000 putspecialobject 3                                               (   2)
 8 | 0002 putnil
 9 | 0003 defineclass      :Fib, <module:Fib>, 2
10 | 0007 leave
11 | == disasm: <RubyVM::InstructionSequence:<module:Fib>@<compiled>>========
12 | 0000 putspecialobject 1                                               (   3)
13 | 0002 putself
14 | 0003 putobject        :acc
15 | 0005 putiseq          acc
16 | 0007 opt_send_simple  <callinfo!mid:core#define_singleton_method, argc:3, ARGS_SKIP>
17 | 0009 pop
18 | 0010 putspecialobject 1                                               (  11)
19 | 0012 putself
20 | 0013 putobject        :fib
21 | 0015 putiseq          fib
22 | 0017 opt_send_simple  <callinfo!mid:core#define_singleton_method, argc:3, ARGS_SKIP>
23 | 0019 leave
24 | == disasm: <RubyVM::InstructionSequence:acc@<compiled>>=================
25 | local table (size: 4, argc: 3 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@5] s1)
26 | [ 4] i<Arg>     [ 3] n<Arg>     [ 2] result<Arg>
27 | 0000 getlocal_OP__WC__0 4                                             (   4)
28 | 0002 putobject        -1
29 | 0004 opt_eq           <callinfo!mid:==, argc:1, ARGS_SKIP>
30 | 0006 branchunless     12
31 | 0008 getlocal_OP__WC__0 2                                             (   5)
32 | 0010 leave                                                            (   4)
33 | 0011 pop
34 | 0012 putself                                                          (   7)
35 | 0013 getlocal_OP__WC__0 4
36 | 0015 putobject_OP_INT2FIX_O_1_C_
37 | 0016 opt_minus        <callinfo!mid:-, argc:1, ARGS_SKIP>
38 | 0018 getlocal_OP__WC__0 3
39 | 0020 getlocal_OP__WC__0 2
40 | 0022 opt_plus         <callinfo!mid:+, argc:1, ARGS_SKIP>
41 | 0024 getlocal_OP__WC__0 3
42 | 0026 opt_send_simple  <callinfo!mid:acc, argc:3, FCALL|ARGS_SKIP>
43 | 0028 leave
44 | == disasm: <RubyVM::InstructionSequence:fib@<compiled>>=================
45 | local table (size: 2, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@3] s1)
46 | [ 2] i<Arg>
47 | 0000 putself                                                          (  12)
48 | 0001 getlocal_OP__WC__0 2
49 | 0003 putobject_OP_INT2FIX_O_1_C_
50 | 0004 putobject_OP_INT2FIX_O_0_C_
51 | 0005 opt_send_simple  <callinfo!mid:acc, argc:3, FCALL|ARGS_SKIP>
52 | 0007 leave
53 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/eager_boolean_operators/notes.txt:
--------------------------------------------------------------------------------
 1 | Coercion to Boolean compared to lazy evaluation counterpart
 2 |     > false | :WTF?
 3 |       => true
 4 |     > true & :WTF?
 5 |       => true
 6 |   Even with very large object on RHS, no efficiency gained by coercion
 7 |     Because of method call implementation?
 8 |   Other than tricky coercion, only gain is that it is ever so slightly faster
 9 |   than double negation:
10 |     require 'benchmark/ips'
11 | 
12 |     Benchmark.ips do |bm|
13 |       bm.report("Double negate") { !!(true && :a) }
14 | 
15 |       bm.report("Logical bit-wise coerce") { true & :a }
16 |     end
17 | 
18 |     # Calculating --------------------------------------------
19 |     #   Double negate                         138.008k i/100ms
20 |     #   Logical bit-wise coerce               139.350k i/100ms
21 |     # --------------------------------------------------------
22 |     #   Double negate            7.262M (± 1.0%) i/s - 36.434M
23 |     #   Logical bit-wise coerce  7.825M (± 1.3%) i/s - 39.157M
24 |     # --------------------------------------------------------
25 | 
26 | 
27 | 
28 | "Maybe use when you have very simple boolean expressions and the cost
29 | of short cutting (i.e. a branch) is greater than the time you save by
30 | not evaluating the later expressions."
31 |   http://stackoverflow.com/a/7105382/1169710
32 |     Secretly method calls in Ruby!
33 |     Doesn't seem to apply in Ruby. Branching always cheaper than a method call.
34 | 
35 | 
36 | Operator precedence:
37 |   > true || 1 && 3
38 |     => true
39 |   > true || (1 && 3)
40 |     => true
41 | 
42 |   > true | 1 && 3
43 |     => 3
44 |   > (true | 1) && 3
45 |     => 3
46 | 
47 | 
48 |   > false && true ^ true
49 |     => false
50 |   > false && (true ^ true)
51 |     => false
52 | 
53 |   > false & true ^ true
54 |     => true
55 |   > (false && true) ^ true
56 |     => true
57 | 
58 | Seems like they'd mostly be used for their side effects which is bad
59 | 
60 | The console is the only somewhat reasonable use case I can think of.
61 | 
62 | Only works consistently for falsy values and true. Truthy values explosive!
63 | 
64 | Examples:
65 |   https://github.com/ruby/ruby/blob/75feee0968c9345e7ffd2bda9835fcd60b4c0880/benchmark/bm_so_k_nucleotide.rb#L40
66 |   https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/slice_spec.rb#L436
67 |   https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/shared/slice.rb#L419
68 |   https://github.com/rubyspec/rubyspec/blob/324c37bb67ea51f197954a37a2c71878eeadea01/core/string/plus_spec.rb#L41
69 | 
70 | 
71 | Method execution behavior means errors on the RHS, prevent the value from the
72 | LHS from being stored:
73 | 
74 |   or_result = nil
75 |   begin
76 |     or_result = true | Seriously(this(is(valid(Ruby!))))
77 |   rescue NameError
78 |     puts "NameError :("
79 |   end
80 |   or_result
81 |   # Name Error :(
82 |   # => nil
83 | 
84 |   and_result = nil
85 |   begin
86 |     and_result = false & 0/0
87 |   rescue ZeroDivisionError
88 |     puts "ZeroDivisionError :("
89 |   end
90 |   and_result
91 |   # ZeroDivisionError :(
92 |   # => nil
93 | 


--------------------------------------------------------------------------------
/test/unit/markdown_to_html_transformer_test.rb:
--------------------------------------------------------------------------------
  1 | require "test_helper"
  2 | require "blog_snippets/markdown_to_html_transformer"
  3 | 
  4 | class MarkdownToHTMLTransformerTest < BlogSnippets::TestCase
  5 |   Subject = BlogSnippets::MarkdownToHTMLTransformer
  6 | 
  7 |   subject { Subject }
  8 | 
  9 |   context "::default_markdown_extensions" do
 10 |     should "return expected defaults" do
 11 |       expected = {
 12 |         :autolink => true,
 13 |         :disable_indented_code_blocks => true,
 14 |         :fenced_code_blocks => true,
 15 |         :footnotes => true,
 16 |         :no_intra_emphasis => true,
 17 |         :space_after_headers => true,
 18 |         :strikethrough => true,
 19 |         :tables => true,
 20 |         :underline => true,
 21 |       }
 22 |       assert_equal expected, subject.default_markdown_extensions
 23 |     end
 24 | 
 25 |     should "return a new Hash instance each call" do
 26 |       first_defaults = subject.default_markdown_extensions
 27 |       second_defaults = subject.default_markdown_extensions
 28 |       refute_equal first_defaults.object_id, second_defaults.object_id
 29 |     end
 30 |   end
 31 | 
 32 |   context "#initialize" do
 33 |     [:parser_class, :renderer].each do |required_opt|
 34 |       should "raise unless #{required_opt} option is given" do
 35 |         assert_raises(ArgumentError) do
 36 |           opts = default_initialization_options
 37 |           opts.delete(required_opt)
 38 |           subject.new(opts)
 39 |         end
 40 |       end
 41 |     end
 42 | 
 43 |     should "assign given :renderer to #renderer" do
 44 |       instance = subject.new(default_initialization_options)
 45 |       assert_equal renderer, instance.renderer
 46 |     end
 47 | 
 48 |     should "take a Hash of Markdown extensions" do
 49 |       exts = { :tables => true }
 50 |       opts = default_initialization_options.merge(:markdown_extensions => exts)
 51 |       instance = subject.new(opts)
 52 |       assert_equal exts, instance.markdown_extensions
 53 |     end
 54 | 
 55 |     should "use default Markdown extensions if none given" do
 56 |       opts = default_initialization_options
 57 |       opts.delete(:markdown_extensions)
 58 |       instance = subject.new(opts)
 59 |       assert_equal subject.default_markdown_extensions, instance.markdown_extensions
 60 |     end
 61 | 
 62 |     should "assign :markdown_extensions to #markdown_extensions" do
 63 |       exts = { :tables => true }
 64 |       opts = default_initialization_options.merge(:markdown_extensions => exts)
 65 |       instance = subject.new(opts)
 66 |       assert_equal exts, instance.markdown_extensions
 67 |     end
 68 |   end
 69 | 
 70 |   context "instance_methods" do
 71 |     subject { Subject.new(default_initialization_options) }
 72 | 
 73 |     context "#parser" do
 74 |       should "initialize an instance of parser_class with renderer and markdown extensions" do
 75 |         parser_class.expects(:new).with(subject.renderer, subject.markdown_extensions)
 76 |         subject.parser
 77 |       end
 78 |     end
 79 | 
 80 |     context "#transform" do
 81 |       should "invoke parser#render with given markdown" do
 82 |         markdown = "# Hello World!"
 83 |         subject.expects(:parser).returns(mck = mock)
 84 |         mck.expects(:render).with(markdown)
 85 |         subject.transform(markdown)
 86 |       end
 87 |     end
 88 |   end
 89 | 
 90 |   def default_initialization_options
 91 |     {
 92 |       :parser_class => parser_class,
 93 |       :renderer => renderer,
 94 |     }
 95 |   end
 96 | 
 97 |   def parser_class
 98 |     @parser_class ||= mock
 99 |   end
100 | 
101 |   def renderer
102 |     @renderer ||= mock
103 |   end
104 | end
105 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/renderers/wordpress_html_renderer.rb:
--------------------------------------------------------------------------------
  1 | require "English"
  2 | require "json"
  3 | require "redcarpet"
  4 | 
  5 | module BlogSnippets
  6 |   module Renderers
  7 |     class WordpressHTMLRenderer < Redcarpet::Render::HTML
  8 |       UNTARGETED_LINK = /^(?:mailto:|#)/.freeze
  9 |       # http://rubular.com/r/apmHqN4joc
 10 |       HEADER_MATCHER = /(?<header><h(?<level>[1-6])[^>]+id="(?<id>[^"]+)".*?>.*?<\/h\k<level>>)/.freeze
 11 |       INDENTATION_TOKEN = "__WORDPRESS_HTML_RENDERER_INDENTATION__".freeze
 12 |       NEW_LINE_TOKEN = "__WORDPRESS_HTML_RENDERER_NEW_LINE__".freeze
 13 | 
 14 |       def initialize(options = nil)
 15 |         super(@options = options || default_options)
 16 |       end
 17 | 
 18 |       # Can't call super due to C-extension design, so fake it so we can
 19 |       # customize it.
 20 |       def link(link, title, content)
 21 |         element = %Q[<a href="#{link}"]
 22 |         attrs = link_attributes(link)
 23 |         attrs["title"] = title unless title.nil? || title.empty?
 24 |         attrs.each do |attr, value|
 25 |           element.concat(%Q[ #{attr}="#{value}"]) unless attr.nil? || attr.empty?
 26 |         end
 27 |         element.concat(%Q[>#{content}</a>])
 28 |         element
 29 |       end
 30 | 
 31 |       def block_code(code, language_or_attributes)
 32 |         # Replace line breaks with new-line token
 33 |         code.gsub!(/\n/, NEW_LINE_TOKEN)
 34 |         code.gsub!(/  /, INDENTATION_TOKEN)
 35 | 
 36 |         # Extract code tag attributes
 37 |         code_attrs = code_attributes(language_or_attributes)
 38 |         code_attrs &&= " #{code_attrs}"
 39 | 
 40 |         # Can't call super due to C-extension design, so fake it.
 41 |         [
 42 |           "[code#{code_attrs}]",
 43 |           NEW_LINE_TOKEN,
 44 |           code,
 45 |           "[/code]\n",
 46 |         ].join
 47 |       end
 48 | 
 49 |       def postprocess(document)
 50 |         remove_new_lines_and_white_space_runs!(document)
 51 |         replace_tokens!(document)
 52 |         add_header_links!(document)
 53 |       end
 54 | 
 55 |       private
 56 | 
 57 |       def add_header_links!(document)
 58 |         document.gsub!(HEADER_MATCHER) do |match|
 59 |           match_data = $LAST_MATCH_INFO
 60 |           match[0..-6] +
 61 |             %Q|<a href="##{match_data[:id]}"><i class="header-link dashicons dashicons-admin-links"></i></a>| +
 62 |             "</h#{match_data[:level]}>"
 63 |         end
 64 |         document
 65 |       end
 66 | 
 67 |       def link_attributes(link)
 68 |         return {} unless attrs = @options[:link_attributes]
 69 |         link_attrs = attrs.dup
 70 |         link_attrs.delete("target") if UNTARGETED_LINK === link
 71 |         link_attrs
 72 |       end
 73 | 
 74 |       def code_attributes(lang_or_attrs)
 75 |         return "language=\"#{lang_or_attrs}\"" unless /[, :]/ === lang_or_attrs
 76 | 
 77 |         # Curly braces are omitted for some reason, so restore them.
 78 |         attr_json = JSON.parse("{#{lang_or_attrs}}")
 79 |         attr_json.map { |key, value| "#{key}=\"#{value}\"" }.join(" ")
 80 |       end
 81 | 
 82 |       def default_options
 83 |         {
 84 |           :link_attributes => {
 85 |             "target" => "_blank",
 86 |           },
 87 |           :with_toc_data => true,
 88 |         }
 89 |       end
 90 | 
 91 |       def remove_new_lines_and_white_space_runs!(document)
 92 |         # Remove line breaks; HTML should handle breaking lines
 93 |         document.gsub!(/\n/, " ")
 94 |         # Removing line breaks may have introduced white space runs; zap 'em.
 95 |         # http://rubular.com/r/aaVCG1Wlep
 96 |         document.gsub!(/(?<=[^\s])\s{2,}/, " ")
 97 |         document
 98 |       end
 99 | 
100 |       def replace_tokens!(document)
101 |         # Replace tokens with desired characters
102 |         document.gsub!(/#{NEW_LINE_TOKEN}/, "\n")
103 |         document.gsub!(/#{INDENTATION_TOKEN}/, "  ")
104 |         document
105 |       end
106 |     end
107 |   end
108 | end
109 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/introducing_the_tco_method_gem/README.md:
--------------------------------------------------------------------------------
  1 | ![tco_method](https://s3.amazonaws.com/tdg5/blog/wp-content/uploads/2015/03/15014139/tco_method.jpg "tco_method")
  2 | 
  3 | Earlier this week I published a gem intended to help simplify the process of
  4 | compiling Ruby code with tail call optimization enabled in MRI Ruby. The gem,
  5 | [tco_method](https://rubygems.org/gems/tco_method), builds on my recent research
  6 | into the [internals of Ruby's implementation of tail call optimization](http://blog.tdg5.com/tail-call-optimization-ruby-deep-dive/)
  7 | and the ideas presented in [Nithin Bekal's article *Tail Optimization in Ruby*](http://nithinbekal.com/posts/ruby-tco/).
  8 | 
  9 | The gem aims to ease the process of compiling select Ruby code with tail call
 10 | optimization by providing a helper method, [**TCOMethod.tco_eval**](http://www.rubydoc.info/gems/tco_method/TCOMethod/Mixin:tco_eval),
 11 | for evaluating code with tail call optimization enabled and a mix-in,
 12 | [**TCOMethod::Mixin**](http://www.rubydoc.info/gems/tco_method/TCOMethod/Mixin),
 13 | for adding annotations to Classes and/or Modules for annotating singleton or
 14 | instance methods that should be compiled with tail call optimization enabled.
 15 | You can see what each of these approaches would look like below.
 16 | 
 17 | ## TCOMethod.eval
 18 | 
 19 | ```ruby
 20 | TCOMethod.tco_eval(<<-CODE)
 21 |   module MyFactorial
 22 |     def self.factorial(n, acc = 1)
 23 |       n <= 1 ? acc : factorial(n - 1, n * acc)
 24 |     end
 25 |   end
 26 | CODE
 27 | 
 28 | MyFactorial.factorial(10_000).to_s.length
 29 | # => 35660
 30 | ```
 31 | 
 32 | Though not as powerful as Ruby's native **eval** method, **TCOMethod.tco_eval** provides
 33 | easy access to the full power of Ruby with the added benefit of tail call
 34 | optimization. The major downside to using **tco_eval** is that code must be
 35 | provided as a String. Also, unlike Ruby's standard **eval** method, **tco_eval**
 36 | currently cannot take a binding for the evaluation which can make it awkward
 37 | at times to connect code that's being compiled with tail optimization to
 38 | other application code compiled by Ruby's primary compilation process.
 39 | 
 40 | All that said, I view **tco_eval** as more of a starting point than a solution.
 41 | It inches the door a little wider for the Ruby community to play with tail call
 42 | optimization and get a better sense of how and when it might be useful. I think
 43 | this is an exciting opportunity that Nithin Bekal's work with TCO method
 44 | decorators began to explore and, as we'll see momentarily, the
 45 | **TCOMethod::Mixin** continues to test the waters of.
 46 | 
 47 | Beyond the opportunity it offers the Ruby community, I'm also excited because
 48 | the [tco_method gem](https://rubygems.org/gems/tco_method) seems like a great
 49 | opportunity to dig into Ruby's C extensions and see how extending the gem to
 50 | interface with Ruby's C code more directly could extend the abilities of the gem
 51 | while further simplifying access to tail call optimization in Ruby.
 52 | 
 53 | ## TCOMethod::Mixin#tco_method
 54 | 
 55 | ```ruby
 56 | class MyFibonacci
 57 |   extend TCOMethod::Mixin
 58 | 
 59 |   def fibonacci(index, back_one = 1, back_two = 0)
 60 |     index < 1 ? back_two : fibonacci(index - 1, back_one + back_two, back_one)
 61 |   end
 62 |   tco_method :fibonacci
 63 | end
 64 | 
 65 | puts MyFibonacci.new.fibonacci(10_000).to_s.length
 66 | # => 2090
 67 | ```
 68 | 
 69 | The **TCOMethod::Mixin** module provides annotations at the Class and Module
 70 | level allowing a developer access to some of the niceties of tail call
 71 | optimization, but without the awkwardness that comes from String literal code or
 72 | heredocs. In the style of some of Ruby's other class annotations like
 73 | **private_class_method** or **module_function**, the **tco_module_method**,
 74 | **tco_class_method**, and eponymous *tco_method** annotation for instance
 75 | methods, allow a user to annotate a previously defined method indicating that
 76 | the specified method should be recompiled with tail call optimization enabled.
 77 | 
 78 | Currently these helper methods are little more than nicely wrapped hacks that
 79 | use some trickery to redefine the specified method with tail call optimization
 80 | enabled. More specifically, the helper annotations will:
 81 | 
 82 | - find the method identified by the given argument
 83 | - retrieve the source for that method using the [method_source
 84 |   gem](https://github.com/banister/method_source)
 85 | - generate a redefinition expression from the method source that
 86 |   reopens the defining Module or Class and redefines the method
 87 | - pass the generated redefinition expression to **TCOMethod.tco_eval**,
 88 |   effectively overriding the previously defined method with the new tail call
 89 |   optimized version
 90 | 
 91 | While this works in most situations, there are quite a few [pitfalls and
 92 | gotchas](https://github.com/tdg5/tco_method/tree/6241e57f8bb8478e2ef2286d4cc6e463c0198e61#gotchas)
 93 | that come from this approach.
 94 | 
 95 | For one, this approach only works for methods defined using the **def** keyword.
 96 | Though in some cases methods defined using **define_method** could be redefined
 97 | correctly, given that **define_method** takes a block that maintains a closure
 98 | with the definition context, there's no foolproof way to ensure that all methods
 99 | defined using **define_method** could be reevaluated with tail call optimization
100 | enabled because of references to the closure context.
101 | 
102 | Another gotcha worth mentioning is that because the current implementation
103 | relies on reopening the parent Module or Class, the helper methods won't work on
104 | anonymous Classes or Modules because they cannot be reopened by name. With more
105 | hacking there are ways to get around this limitation, but, at present, I don't
106 | think more hacking is the path forward and something more along the lines of a C
107 | extension is the right way to address these issues.
108 | 
109 | ## Interesting problems
110 | 
111 | As I said before, I think the [tco_method gem](https://rubygems.org/gems/tco_method)
112 | is a starting point, not a solution, and I'm excited by the various
113 | opportunities and challenges it presents. Though I am definitely interested in
114 | learning more about Ruby's C extension support, the [tco_method gem](https://rubygems.org/gems/tco_method)
115 | has already presented some interesting problems despite its current primitive
116 | and hacky nature.
117 | 
118 | For example, in order to test that a recursive factorial method would no longer
119 | encounter a stack overflow after being recompiled with tail call optimization
120 | enabled, I first had to devise a means of ensuring that that method would
121 | have encountered a stack overflow without tail call optimization enabled and at
122 | what point that stack overflow would have occurred. To achieve this, I wrote a
123 | test helper that performs [a binary search to discover how many stack frames a
124 | recursive function can allocate before a stack overflow is
125 | encountered](https://github.com/tdg5/tco_method/blob/c28895742e18e9d87393c97435db99e4b71c5fa3/test/test_helpers/stack_busters/factorial_stack_buster.rb#L25).
126 | 
127 | Though my current solution could use some refactoring, I thought this was a fun
128 | and interesting problem to solve. Though I don't find binary search particularly
129 | interesting on its own, I found this particular case interesting because the
130 | expensive nature of the **raise**/**rescue** cycle in Ruby introduces a sort of
131 | penalty to the process such that the process will be much quicker if the point
132 | of overflow can be discovered while causing as few **SystemStackError**
133 | exceptions as possible. I think this detail makes the binary search more
134 | interesting because there's more to it than just finding the desired result in as few
135 | operations as possible, there are also other considerations to keep in mind that
136 | could totally change how the utility of the search is assessed. In fact, given
137 | this behavior, a binary search may not be the best approach at all.
138 | 
139 | For now, I've taken the approach of using one binary search to find a point of
140 | overflow, then using a second binary search to find the exact point at which the
141 | recursive function begins to exceed the system stack between the last successful
142 | invocation and the overflowing invocation.
143 | 
144 | I haven't tried to do much research on this particular type of problem yet, but
145 | I'm excited to revisit this search function at some point in the future and see
146 | what other ideas are out there for me to throw at the problem.
147 | 
148 | **Update:** After discussing the peculiarities of this approach with my coworker
149 | Matt Bittarelli, he suggested a couple of alternatives to the binary search
150 | approach that seemed intriguing and simpler. The first idea was simply to [force
151 | a **SystemStackError** and check the length of the exception's backtrace from the
152 | **rescue** context to determine the maximum stack
153 | depth](https://github.com/tdg5/tco_method/commit/e2e7f30314fd3d0e1b2d138328d7deeb31e7bd96).
154 | Though this approach works in Ruby 2.2, [it does not work in Ruby 2.0 or Ruby
155 | 2.1](https://travis-ci.org/tdg5/tco_method/builds/54811953). The other idea Matt
156 | had was that maybe a **SystemStackError** wasn't necessary at all if a block
157 | could be used to monitor how the stack depth changed from iteration to
158 | iteration. Though a little mind bending, I was able to [use a recursive method
159 | that yields to a block to monitor how the stack depth changes and using that
160 | information determine whether the method had been compiled with tail call
161 | optimization enabled](https://github.com/tdg5/tco_method/commit/c2963276376f7705b2fb1b6b582d88f07954c02f).
162 | Though the means of determining if a method is compiled with tail call
163 | optimization has changed since I initially wrote this article, I think all three
164 | of the above approaches are interesting and I expect more interesting problems
165 | will emerge as work on this gem continues. Thanks again to Matt Bittarelli for
166 | his insights into the problem!
167 | 
168 | ## Test drive
169 | 
170 | Because tail recursive functions can typically be restated in other ways that
171 | don't require tail call optimization, I'm still on the fence as to whether TCO
172 | provides any real value other than expanding the expressiveness of the Ruby
173 | language. As such, I encourage you to take the [tco_method gem](https://rubygems.org/gems/tco_method)
174 | for a test drive and explore the opportunities it presents. If you do take
175 | it for a test drive, drop me a line to let me know how it went. I'd be
176 | interested to hear about your experiences both with tail call optimization in
177 | Ruby-land and with the API offered by the [tco_method gem](https://rubygems.org/gems/tco_method).
178 | Contributions are also always welcome!
179 | 
180 | [View the tco_method gem on RubyGems](https://rubygems.org/gems/tco_method)  
181 | [View the tco_method gem on GitHub](https://github.com/tdg5/tco_method)
182 | 
183 | As always, thanks for reading!
184 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_background/README.md:
--------------------------------------------------------------------------------
  1 | Back in November, care of [/r/ruby](https://www.reddit.com/r/ruby), I came
  2 | across [a blog post by Nithin Bekal, Tail Call Optimization in
  3 | Ruby](http://nithinbekal.com/posts/ruby-tco/), demonstrating Ruby's built-in
  4 | support for tail call optimization and I have to admit, my mind was a little
  5 | blown.
  6 | 
  7 | It's not that I have a specific need for tail call optimization. In fact,
  8 | I can't think of even a single situation where I would have done
  9 | things differently if I'd known the VM supported it. But, I guess I was
 10 | surprised to find that tail call optimization was just hiding somewhere in the
 11 | Ruby VM, waiting to be flipped on with a compile flag, or **at runtime**.
 12 | 
 13 | I think it was this ability to just turn it on at any time that blew my mind.
 14 | Not just that it was hiding in there somewhere, but that the VM is flexible
 15 | enough to swap in the machinery to support tail call optimization whenever you
 16 | decide you want it. Pretty awesome.
 17 | 
 18 | With no particular use for tail call optimization, I've just been sitting on the
 19 | knowledge, the notion bouncing around in my head. That is, until earlier
 20 | this week when I decided I would try to apply some of what I learned from reading
 21 | [Pat Shaughnessy's Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope)
 22 | to better understanding how the Ruby VM can be so flexible when it comes to tail
 23 | call optimization.
 24 | 
 25 | Though I think that that will make for an interesting blog post, it's turned into a
 26 | bit of an epic. So this week, I'm going to begin with a little background on
 27 | tail call optimization and hopefully build on what others have already shared
 28 | with some of what I've learned about Ruby's implementation of tail call
 29 | optimization while trudging through Ruby's depths. Then, in my next post, with
 30 | the stage already set, we can get into the internals of how the Ruby VM makes
 31 | tail call optimization happen at runtime.
 32 | 
 33 | Let's get started!
 34 | 
 35 | ## A little background on tail call optimization
 36 | [Nithin's article](http://nithinbekal.com/posts/ruby-tco/) does a great job of
 37 | explaining tail recursive functions and tail call optimization, so if you're a
 38 | little iffy on either subject, I'd recommend reading that before you continue
 39 | with this post. The [Tail call entry in Wikipedia](https://en.wikipedia.org/wiki/Tail_call)
 40 | is also a useful resource for even more depth on the subject.
 41 | 
 42 | To summarize, tail call optimization, or tail call elimination as it is also
 43 | known, is a special feature of some kinds of tail recursive functions that
 44 | allows for the tail call to be implemented without adding a new stack frame to
 45 | the call stack. This allows for more efficient tail calls while also
 46 | allowing the size of the stack to remain constant which in turn allows recursion
 47 | to be used in situations that might otherwise encounter a stack overflow without
 48 | tail call optimization.
 49 | 
 50 | ## Ruby and tail call optimization
 51 | Starting with Ruby 1.9.2, the Ruby VM offers built-in, though experimental,
 52 | support for tail call optimization. That said, there are other ways of achieving
 53 | tail call optimization without enabling it in the VM. [Magnus Holm offers a
 54 | couple of other hacks for achieving tail call optimization in Ruby in his blog post
 55 | Tailin' Ruby](http://timelessrepo.com/tailin-ruby), which is worth the read
 56 | just for the innovative ways he attempts to solve the problem, even if you're
 57 | fine to use the Ruby VM's implementation of tail call optimization. Maybe it's
 58 | just because I haven't had an itch that I needed tail call optimization to
 59 | scratch, but using **redo** to emulate tail call optimization in a performant
 60 | fashion is pretty damn clever.
 61 | 
 62 | Now, although support for tail call optimization is built into the VM, because
 63 | of its experimental nature it isn't enabled by default and must be turned on
 64 | either with a flag when compiling Ruby or by configuring
 65 | **RubyVM::InstructionSequence** at runtime with special compile options. There
 66 | was some talk of [enabling tail call optimization by default around the time
 67 | that Ruby 2.0 was released](https://bugs.ruby-lang.org/issues/6602), however
 68 | this hasn't come to be for a number of reasons: Primary concerns were that tail
 69 | call optimization makes it difficult to implement **set_trace_func** and also
 70 | causes backtrace weirdness due to the absence of a new stack frame.
 71 | 
 72 | Now that we have a little background on tail call optimization in Ruby, let's
 73 | take a look at an example of a tail recursive, tail call optimizable function.
 74 | 
 75 | ## A tail recursive Guinea pig
 76 | In order for us to take Ruby's implementation of tail call optimization for a
 77 | test drive and to help us get to the bottom of Ruby's implementation of tail
 78 | call optimization in my next post, we'll first need a tail recursive function to
 79 | be the subject of our experiments. As it turns out, we can actually extract such
 80 | a subject from the Ruby source code itself.
 81 | 
 82 | Depending on your feelings about the recent debate regarding how Ruby is
 83 | tested[^1][^2], it may surprise you to learn that our Guinea pig comes directly
 84 | from Ruby's built-in test suite. After all, though tail call optimization may
 85 | not be enabled by default, and though it may only be experimental at this time,
 86 | it's not unreasonable to think that there'd be a test for it somewhere. That
 87 | somewhere is among a handful of other tests for various optimizations to the
 88 | Ruby VM in the Ruby source at [test/ruby/test_optimization.rb](https://github.com/ruby/ruby/blob/fcf6fa8781fe236a9761ad5d75fa1b87f1afeea2/test/ruby/test_optimization.rb#L213).
 89 | 
 90 | The test that is home to our Guinea pig is somewhat unremarkable, so though
 91 | you're welcome to review the full contents of the test, for our purposes I've
 92 | extracted the tail recursive factorial function used by the test with some
 93 | refactoring to, among other things, isolate the HEREDOC and make it work outside
 94 | of the test:
 95 | 
 96 | ```ruby
 97 |   code = <<-CODE
 98 |     class Factorial
 99 |       def self.fact_helper(n, res)
100 |         n == 1 ? res : fact_helper(n - 1, n * res)
101 |       end
102 | 
103 |       def self.fact(n)
104 |         fact_helper(n, 1)
105 |       end
106 |     end
107 |   CODE
108 |   options = {
109 |     tailcall_optimization: true,
110 |     trace_instruction: false,
111 |   }
112 |   RubyVM::InstructionSequence.new(code, nil, nil, nil, options).eval
113 | ```
114 | 
115 | The tail recursive method of interest above is the **fact_helper** method. It
116 | should hopefully be pretty obvious that **fact_helper** is tail recursive given
117 | that, in all but the base case, the final action of the method is the invocation
118 | of the itself with primitive values. Other than the tail recursive nature
119 | of this function, there are a couple of other things going on here that are worth
120 | noting.
121 | 
122 | First, as I alluded to before in regard to tail call optimization not being
123 | enabled by default, currently it is not possible to turn on tail call
124 | optimization without also disabling the **set_trace_func** capabilities of the VM.
125 | This can be seen above in the option to **RubyVM::InstructionSequence** setting
126 | **trace_instruction** to false.
127 | 
128 | Second, this example demonstrates the best strategy of enabling tail call
129 | optimization that I have come across so far. I say this because the other
130 | examples I've referenced have all enabled tail call optimization by changing
131 | **RubyVM::InstructionSequence.compile_option**, effectively enabling tail call
132 | optimization globally.
133 | 
134 | Though at least one source suggested that the modified compile options would only be
135 | applied to code directly compiled with **RubyVM::InstructionSequence**, this is
136 | incorrect. In fact, any files loaded after the change to
137 | **RubyVM::InstructionSequence.compile_option** will be compiled with tail call
138 | optimization enabled. This can be verified by running the following contrived
139 | test script that adapts our Guinea pig both to evidence the global nature of
140 | **RubyVM::InstructionSequence.compile_option** and to demonstrate the utility of
141 | tail call optimization.
142 | 
143 | ```ruby
144 | # Flag indicating whether this is the first time time this file has been loaded
145 | $first_load = true if $first_load.nil?
146 | 
147 | # Declare classes to facilitate #instance_eval later
148 | class FirstLoadFactorial; end
149 | class ReloadedFactorial; end
150 | 
151 | # On the first load, extend FirstLoadFactorial,
152 | # On the second load, extend ReloadedFactorial.
153 | klass = $first_load ? FirstLoadFactorial : ReloadedFactorial
154 | 
155 | # Tail recursive factorial adapted from
156 | # https://github.com/ruby/ruby/blob/fcf6fa8781fe236a9761ad5d75fa1b87f1afeea2/test/ruby/test_optimization.rb#L213
157 | klass.instance_eval do
158 |   def self.fact_helper(n, res)
159 |     n == 1 ? res : fact_helper(n - 1, n * res)
160 |   end
161 | 
162 |   def self.fact(n)
163 |     fact_helper(n, 1)
164 |   end
165 | end
166 | 
167 | # Turn on tailcall optimization
168 | RubyVM::InstructionSequence.compile_option = {
169 |   tailcall_optimization: true,
170 |   trace_instruction: false,
171 | }
172 | 
173 | # This check avoids calculating the factorial twice; ReloadedFactorial will only
174 | # respond to :fact after the file has been reloaded.
175 | if ReloadedFactorial.respond_to?(:fact)
176 |   begin
177 |     puts "FirstLoadFactorial: #{FirstLoadFactorial.fact(50000).to_s.length}"
178 |   rescue SystemStackError
179 |     puts 'FirstLoadFactorial: stack level too deep'
180 |   end
181 | 
182 |   # 50000! is 213,237 digits long, so display just the length of the calculation
183 |   puts "ReloadedFactorial: #{ReloadedFactorial.fact(50000).to_s.length}"
184 | end
185 | 
186 | # Reload the file on the first load only
187 | if $first_load
188 |   $first_load = false
189 |   load __FILE__
190 | end
191 | 
192 | # $ ruby tail_optimized_reload.rb
193 | #   FirstLoadFactorial: stack level too deep
194 | #   ReloadedFactorial: 213237
195 | ```
196 | 
197 | [View on GitHub](https://github.com/tdg5/blog_snippets/blob/8cdc800e711f5270754e352b9f3458d7e429b87d/lib/blog_snippets/tail_call_optimization_in_ruby_internals/tail_optimized_reload.rb)
198 | 
199 | Since tail call optimization is still an experimental feature, if you're going
200 | to use tail call optimization in production code or in code that could become
201 | production code, the strategy demonstrated by the Ruby core test of creating a
202 | new **RubyVM::InstructionSequence** object that can be used to load/compile tail
203 | call optimized code without affecting other code compiled by the VM later is
204 | absolutely the right way to go.
205 | 
206 | ## End Part I
207 | That does it for our initial foray into tail call optimization in Ruby. I hope
208 | you've found something here today worth the price of admission. Stay tuned for
209 | my next post in which we'll take our tail recursive Guinea pig for a deep dive into the
210 | internals of Ruby, all the way from the Ruby source, through the YARV instructions
211 | just below the surface, down deep into the C weeds in search of the source
212 | of Ruby's tail call optimization implementation. It'll certainly be an
213 | interesting ride.
214 | 
215 | [^1]: http://rubini.us/2014/12/31/matz-s-ruby-developers-don-t-use-rubyspec/
216 | [^2]: https://gist.github.com/nateberkopec/11dbcf0ee7f2c08450ea
217 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c:
--------------------------------------------------------------------------------
  1 | static void
  2 | vm_search_method(rb_call_info_t *ci, VALUE recv)
  3 | {
  4 |   VALUE klass = CLASS_OF(recv);
  5 | 
  6 | #if OPT_INLINE_METHOD_CACHE
  7 |   if (LIKELY(GET_GLOBAL_METHOD_STATE() == ci->method_state && RCLASS_SERIAL(klass) == ci->class_serial)) {
  8 |     /* cache hit! */
  9 |     return;
 10 |   }
 11 | #endif
 12 | 
 13 |   ci->me = rb_method_entry(klass, ci->mid, &ci->defined_class);
 14 |   ci->klass = klass;
 15 |   ci->call = vm_call_general;
 16 | #if OPT_INLINE_METHOD_CACHE
 17 |   ci->method_state = GET_GLOBAL_METHOD_STATE();
 18 |   ci->class_serial = RCLASS_SERIAL(klass);
 19 | #endif
 20 | }
 21 | 
 22 | 
 23 | static VALUE
 24 | vm_call_general(rb_thread_t *th, rb_control_frame_t *reg_cfp, rb_call_info_t *ci)
 25 | {
 26 |     return vm_call_method(th, reg_cfp, ci);
 27 | }
 28 | 
 29 | 
 30 | VALUE
 31 | vm_call_method(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci)
 32 | {
 33 |   int enable_fastpath = 1;
 34 |   rb_call_info_t ci_temp;
 35 | 
 36 | start_method_dispatch:
 37 |   if (ci->me != 0) {
 38 |     if ((ci->me->flag == 0)) {
 39 |       VALUE klass;
 40 | 
 41 | normal_method_dispatch:
 42 |       switch (ci->me->def->type) {
 43 |         case VM_METHOD_TYPE_ISEQ:{
 44 |                                    CI_SET_FASTPATH(ci, vm_call_iseq_setup, enable_fastpath);
 45 |                                    return vm_call_iseq_setup(th, cfp, ci);
 46 |                                  }
 47 |         case VM_METHOD_TYPE_NOTIMPLEMENTED:
 48 |         case VM_METHOD_TYPE_CFUNC:
 49 |                                  CI_SET_FASTPATH(ci, vm_call_cfunc, enable_fastpath);
 50 |                                  return vm_call_cfunc(th, cfp, ci);
 51 |         case VM_METHOD_TYPE_ATTRSET:{
 52 |                                       CALLER_SETUP_ARG(cfp, ci);
 53 |                                       rb_check_arity(ci->argc, 1, 1);
 54 |                                       ci->aux.index = 0;
 55 |                                       CI_SET_FASTPATH(ci, vm_call_attrset, enable_fastpath && !(ci->flag & VM_CALL_ARGS_SPLAT));
 56 |                                       return vm_call_attrset(th, cfp, ci);
 57 |                                     }
 58 |         case VM_METHOD_TYPE_IVAR:{
 59 |                                    CALLER_SETUP_ARG(cfp, ci);
 60 |                                    rb_check_arity(ci->argc, 0, 0);
 61 |                                    ci->aux.index = 0;
 62 |                                    CI_SET_FASTPATH(ci, vm_call_ivar, enable_fastpath && !(ci->flag & VM_CALL_ARGS_SPLAT));
 63 |                                    return vm_call_ivar(th, cfp, ci);
 64 |                                  }
 65 |         case VM_METHOD_TYPE_MISSING:{
 66 |                                       ci->aux.missing_reason = 0;
 67 |                                       CI_SET_FASTPATH(ci, vm_call_method_missing, enable_fastpath);
 68 |                                       return vm_call_method_missing(th, cfp, ci);
 69 |                                     }
 70 |         case VM_METHOD_TYPE_BMETHOD:{
 71 |                                       CI_SET_FASTPATH(ci, vm_call_bmethod, enable_fastpath);
 72 |                                       return vm_call_bmethod(th, cfp, ci);
 73 |                                     }
 74 |         case VM_METHOD_TYPE_ZSUPER:{
 75 |                                      klass = ci->me->klass;
 76 |                                      klass = RCLASS_ORIGIN(klass);
 77 | zsuper_method_dispatch:
 78 |                                      klass = RCLASS_SUPER(klass);
 79 |                                      if (!klass) {
 80 |                                        ci->me = 0;
 81 |                                        goto start_method_dispatch;
 82 |                                      }
 83 |                                      ci_temp = *ci;
 84 |                                      ci = &ci_temp;
 85 | 
 86 |                                      ci->me = rb_method_entry(klass, ci->mid, &ci->defined_class);
 87 | 
 88 |                                      if (ci->me != 0) {
 89 |                                        goto normal_method_dispatch;
 90 |                                      }
 91 |                                      else {
 92 |                                        goto start_method_dispatch;
 93 |                                      }
 94 |                                    }
 95 |         case VM_METHOD_TYPE_OPTIMIZED:{
 96 |                                         switch (ci->me->def->body.optimize_type) {
 97 |                                           case OPTIMIZED_METHOD_TYPE_SEND:
 98 |                                             CI_SET_FASTPATH(ci, vm_call_opt_send, enable_fastpath);
 99 |                                             return vm_call_opt_send(th, cfp, ci);
100 |                                           case OPTIMIZED_METHOD_TYPE_CALL:
101 |                                             CI_SET_FASTPATH(ci, vm_call_opt_call, enable_fastpath);
102 |                                             return vm_call_opt_call(th, cfp, ci);
103 |                                           default:
104 |                                             rb_bug("vm_call_method: unsupported optimized method type (%d)",
105 |                                                 ci->me->def->body.optimize_type);
106 |                                         }
107 |                                         break;
108 |                                       }
109 |         case VM_METHOD_TYPE_UNDEF:
110 |                                       break;
111 |         case VM_METHOD_TYPE_REFINED:{
112 |                                       NODE *cref = rb_vm_get_cref(cfp->iseq, cfp->ep);
113 |                                       VALUE refinements = cref ? cref->nd_refinements : Qnil;
114 |                                       VALUE refinement, defined_class;
115 |                                       rb_method_entry_t *me;
116 | 
117 |                                       refinement = find_refinement(refinements,
118 |                                           ci->defined_class);
119 |                                       if (NIL_P(refinement)) {
120 |                                         goto no_refinement_dispatch;
121 |                                       }
122 |                                       me = rb_method_entry(refinement, ci->mid, &defined_class);
123 |                                       if (me) {
124 |                                         if (ci->call == vm_call_super_method) {
125 |                                           rb_control_frame_t *top_cfp = current_method_entry(th, cfp);
126 |                                           if (top_cfp->me &&
127 |                                               rb_method_definition_eq(me->def, top_cfp->me->def)) {
128 |                                             goto no_refinement_dispatch;
129 |                                           }
130 |                                         }
131 |                                         ci->me = me;
132 |                                         ci->defined_class = defined_class;
133 |                                         if (me->def->type != VM_METHOD_TYPE_REFINED) {
134 |                                           goto start_method_dispatch;
135 |                                         }
136 |                                       }
137 | 
138 | no_refinement_dispatch:
139 |                                       if (ci->me->def->body.orig_me) {
140 |                                         ci->me = ci->me->def->body.orig_me;
141 |                                         if (UNDEFINED_METHOD_ENTRY_P(ci->me)) {
142 |                                           ci->me = 0;
143 |                                         }
144 |                                         goto start_method_dispatch;
145 |                                       }
146 |                                       else {
147 |                                         klass = ci->me->klass;
148 |                                         goto zsuper_method_dispatch;
149 |                                       }
150 |                                     }
151 |       }
152 |       rb_bug("vm_call_method: unsupported method type (%d)", ci->me->def->type);
153 |     }
154 |     else {
155 |       int noex_safe;
156 |       if (!(ci->flag & VM_CALL_FCALL) && (ci->me->flag & NOEX_MASK) & NOEX_PRIVATE) {
157 |         int stat = NOEX_PRIVATE;
158 | 
159 |         if (ci->flag & VM_CALL_VCALL) {
160 |           stat |= NOEX_VCALL;
161 |         }
162 |         ci->aux.missing_reason = stat;
163 |         CI_SET_FASTPATH(ci, vm_call_method_missing, 1);
164 |         return vm_call_method_missing(th, cfp, ci);
165 |       }
166 |       else if (!(ci->flag & VM_CALL_OPT_SEND) && (ci->me->flag & NOEX_MASK) & NOEX_PROTECTED) {
167 |         enable_fastpath = 0;
168 |         if (!rb_obj_is_kind_of(cfp->self, ci->defined_class)) {
169 |           ci->aux.missing_reason = NOEX_PROTECTED;
170 |           return vm_call_method_missing(th, cfp, ci);
171 |         }
172 |         else {
173 |           goto normal_method_dispatch;
174 |         }
175 |       }
176 |       else if ((noex_safe = NOEX_SAFE(ci->me->flag)) > th->safe_level && (noex_safe > 2)) {
177 |         rb_raise(rb_eSecurityError, "calling insecure method: %"PRIsVALUE, rb_id2str(ci->mid));
178 |       }
179 |       else {
180 |         goto normal_method_dispatch;
181 |       }
182 |     }
183 |   }
184 |   else {
185 |     /* method missing */
186 |     int stat = 0;
187 |     if (ci->flag & VM_CALL_VCALL) {
188 |       stat |= NOEX_VCALL;
189 |     }
190 |     if (ci->flag & VM_CALL_SUPER) {
191 |       stat |= NOEX_SUPER;
192 |     }
193 |     if (ci->mid == idMethodMissing) {
194 |       rb_control_frame_t *reg_cfp = cfp;
195 |       VALUE *argv = STACK_ADDR_FROM_TOP(ci->argc);
196 |       rb_raise_method_missing(th, ci->argc, argv, ci->recv, stat);
197 |     }
198 |     else {
199 |       ci->aux.missing_reason = stat;
200 |       CI_SET_FASTPATH(ci, vm_call_method_missing, 1);
201 |       return vm_call_method_missing(th, cfp, ci);
202 |     }
203 |   }
204 | 
205 |   rb_bug("vm_call_method: unreachable");
206 | }
207 | 
208 | 
209 | static VALUE
210 | vm_call_iseq_setup(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci)
211 | {
212 |   vm_callee_setup_arg(th, ci, ci->me->def->body.iseq, cfp->sp - ci->argc);
213 |   return vm_call_iseq_setup_2(th, cfp, ci);
214 | }
215 | 
216 | 
217 | static VALUE
218 | vm_call_iseq_setup_2(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci)
219 | {
220 |   if (LIKELY(!(ci->flag & VM_CALL_TAILCALL))) {
221 |     return vm_call_iseq_setup_normal(th, cfp, ci);
222 |   }
223 |   else {
224 |     return vm_call_iseq_setup_tailcall(th, cfp, ci);
225 |   }
226 | }
227 | 
228 | 
229 | static inline VALUE
230 | vm_call_iseq_setup_normal(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci)
231 | {
232 |   int i, local_size;
233 |   VALUE *argv = cfp->sp - ci->argc;
234 |   rb_iseq_t *iseq = ci->me->def->body.iseq;
235 |   VALUE *sp = argv + iseq->param.size;
236 | 
237 |   /* clear local variables (arg_size...local_size) */
238 |   for (i = iseq->param.size, local_size = iseq->local_size; i < local_size; i++) {
239 |     *sp++ = Qnil;
240 |   }
241 | 
242 |   vm_push_frame(th, iseq, VM_FRAME_MAGIC_METHOD, ci->recv, ci->defined_class,
243 |       VM_ENVVAL_BLOCK_PTR(ci->blockptr),
244 |       iseq->iseq_encoded + ci->aux.opt_pc, sp, 0, ci->me, iseq->stack_max);
245 | 
246 |   cfp->sp = argv - 1 /* recv */;
247 |   return Qundef;
248 | }
249 | 
250 | 
251 | static inline VALUE
252 | vm_call_iseq_setup_tailcall(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci)
253 | {
254 |   int i;
255 |   VALUE *argv = cfp->sp - ci->argc;
256 |   rb_iseq_t *iseq = ci->me->def->body.iseq;
257 |   VALUE *src_argv = argv;
258 |   VALUE *sp_orig, *sp;
259 |   VALUE finish_flag = VM_FRAME_TYPE_FINISH_P(cfp) ? VM_FRAME_FLAG_FINISH : 0;
260 | 
261 |   cfp = th->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(th->cfp); /* pop cf */
262 | 
263 |   RUBY_VM_CHECK_INTS(th);
264 | 
265 |   sp_orig = sp = cfp->sp;
266 | 
267 |   /* push self */
268 |   sp[0] = ci->recv;
269 |   sp++;
270 | 
271 |   /* copy arguments */
272 |   for (i=0; i < iseq->param.size; i++) {
273 |     *sp++ = src_argv[i];
274 |   }
275 | 
276 |   /* clear local variables */
277 |   for (i = 0; i < iseq->local_size - iseq->param.size; i++) {
278 |     *sp++ = Qnil;
279 |   }
280 | 
281 |   vm_push_frame(th, iseq, VM_FRAME_MAGIC_METHOD | finish_flag,
282 |     ci->recv, ci->defined_class, VM_ENVVAL_BLOCK_PTR(ci->blockptr),
283 |     iseq->iseq_encoded + ci->aux.opt_pc, sp, 0, ci->me, iseq->stack_max);
284 | 
285 |   cfp->sp = sp_orig;
286 |   return Qundef;
287 | }
288 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tuning_dd_block_size/README.md:
--------------------------------------------------------------------------------
  1 | Though I wouldn't call myself a dd expert, I have had my fair share of occasions
  2 | to yield the might that is dd. From my first job after college using
  3 | [KNOPPIX](http://www.knopper.net/knoppix/index-en.html) and dd to rescue NFL
  4 | game footage from dying HDDs on behalf of NFL video coordinators, to using dd
  5 | this past summer to move [my girlfriend's](http://alilallovertheplace.com/) OSX
  6 | installation over to a faster SSD, dd has been an invaluable tool in my Unix
  7 | arsenal for almost 10 years.
  8 | 
  9 | Maybe it's because everyone focuses on getting the **of** (output file) argument
 10 | right, or maybe there's more to it, but in my time with dd, one aspect of dd's
 11 | usage that I've found often overlooked relates to dd's three block size
 12 | arguments, **ibs** (input block size), **obs** (output block size), and the all
 13 | encompassing **bs** (input and output block size). Don't get me wrong, making
 14 | sure you've determined the correct **of** argument is of paramount importance,
 15 | but once you've got that nailed down, there's more to be done than breathe a
 16 | giant sigh of relief. The various block size arguments that dd takes will be the
 17 | deciding factor between whether the copy completes in a day or in two hours.
 18 | 
 19 | ## A little background on block size
 20 | A **block** in terms of dd as explained by Wikipedia:
 21 | > A block is a unit measuring the number of bytes that are read, written, or
 22 | > converted at one time.[^1]
 23 | 
 24 | As such, the various block size arguments tell dd how many sectors should be
 25 | copied at once, whether for input, output, or both. By default, most versions of
 26 | dd will use a block size 512 bytes for both input and output.[^2] This may have
 27 | been fine pre-1999 when most hard drives had a sector size of 512 bytes, but
 28 | in recent years most hard drives have a sector size of at least 4KB (4096
 29 | bytes). This change may seem inconsequential but can lead to enormous
 30 | inefficiencies when combined with the fact that these days many typical consumer
 31 | hard drives have more than a terabyte of capacity. When dealing with a terabyte
 32 | or more of data, you **really** want to make sure you choose an optimal block
 33 | size.
 34 | 
 35 | There's a useful, though pretty dated, [message in the archive of the Eugene,
 36 | Oregon Linux User's Group (Eug-Lug) that offers some perspective on optimal
 37 | block sizes for dd](http://www.mail-archive.com/eug-lug@efn.org/msg12073.html)
 38 | that can be useful as a jumping off point for your own tests or in those
 39 | situations where testing different block sizes isn't feasible.
 40 | The findings presented in the message show that for the author's particular
 41 | hardware, a block size of about 64K was pretty close to optimal.
 42 | 
 43 | That's nice advice, but without more context it's somewhat meaningless, so let's
 44 | perform a few experiments.
 45 | 
 46 | ## Science!
 47 | As an example of the impact that an inefficient/optimal block size can have,
 48 | I've run a few tests for your consideration. These results are all specific to
 49 | my hardware, and though they may offer a rule-of-thumb for similar situations,
 50 | it's important to keep in mind that there is no universally correct block size;
 51 | what is optimal for one situation may be terribly inefficient for another.  To
 52 | that end, the tests below are meant to provide a simple example of the benefits
 53 | of optimizing the block size used by dd; they are not intended to accurately
 54 | replicate real world copy scenarios.
 55 | 
 56 | For simplicity, we will be reading data from */dev/zero*, which should be able
 57 | to churn out zeros at a much, much faster rate than we can actually write them, which,
 58 | in turn, means that these examples are actually testing optimal output block
 59 | sizes and are, more or less, ignoring input block size entirely. Optimizing input
 60 | block sizing is left as an exercise for the reader and should be easy enough to
 61 | achieve by reading data from the desired disk and writing it out to */dev/null*.
 62 | 
 63 | On with the experiments!
 64 | 
 65 | Let's start off with a few tests writing out to a HDD:
 66 | 
 67 | - Reading from */dev/zero* and writing out to a HDD with the default block size
 68 |   of 512 bytes yields a throughput of 10.9 MB/s. At that rate, writing 1TB of
 69 |   data would take about 96,200 seconds or just north of 26 hours.
 70 | 
 71 | - Reading from */dev/zero* and writing out to a HDD with the Eug-Lug suggested
 72 |   block size of 64K yields a throughput of 108 MB/s. At that rate, writing 1TB
 73 |   of data would take 9,709 seconds or about 2.7 hours to complete.  This is a
 74 |   huge improvement, nearly an order of magnitude, over the default block size of
 75 |   512 bytes.
 76 | 
 77 | - Reading from */dev/zero* and writing out to a HDD with a more
 78 |   optimal block size of 512K yields a throughput of 131 MB/s. At that rate,
 79 |   writing 1TB of data would take about 8,004 seconds or about 2.2 hours. Though
 80 |   not as pronounced a difference, this is even faster than the Eug-Lug
 81 |   suggestion and is more than a full order of magnitude faster than the default
 82 |   block size of 512 bytes.
 83 | 
 84 | Let's switch gears and try a couple of experiments writing out to a SSD:
 85 | 
 86 | - Reading from */dev/zero* and writing out to a SSD with the default block size
 87 |   of 512 bytes yields a throughput of 39.6 MB/s. At that rate writing 1TB of
 88 |   data would take about 26,479 seconds or about 7.4 hours.
 89 | 
 90 | - Reading from */dev/zero* and writing out to a SSD with the Eug-Lug suggested
 91 |   block size of 64K yields a throughput of 266 MB/s. At that rate, writing 1TB
 92 |   of data would take about 3,942 seconds or about 1.1 hours.  Once again, this
 93 |   is a huge improvement, nearly an order of magnitude faster than the default
 94 |   block size of 512 bytes.
 95 | 
 96 | - Reading from */dev/zero* and writing out to a SSD with a more
 97 |   optimal block size of 256K yields a throughput of 280 MB/s. At that rate,
 98 |   writing 1TB of data would take about 3,744 seconds or about 1 hour.  Once
 99 |   again this is faster than both the Eug-Lug suggestion and the default, though
100 |   not as much of an improvement as in the HDD case.
101 | 
102 | Let's switch gears one last time and try a few experiments writing out to RAM:
103 | 
104 | - Reading from */dev/zero* and writing out to RAM with the default block size
105 |   of 512 bytes yields a throughput of 221 MB/s. At that rate, writing 1TB of
106 |   data would take about 4,745 seconds or about 1.3 hours.
107 | 
108 | - Reading from */dev/zero* and writing out to RAM with the Eug-Lug suggested
109 |   block size of 64K yields a throughput of 1,433 MB/s. At that rate, writing 1TB
110 |   of data would take about 731 seconds or about 12 minutes to complete the
111 |   transfer. Once again, this is a huge improvement, nearly an order of
112 |   magnitude faster than the default block size.
113 | 
114 | - Reading from */dev/zero* and writing out to RAM with a more
115 |   optimal block size of 256K yields a throughput of 1,536 MB/s. At that rate,
116 |   writing 1TB of data would take about 682 seconds or about 11 minutes.  This is
117 |   once again faster than the default and the Eug-Lug suggestion, but once
118 |   again, pretty comparable to the Eug-Lug suggestion.
119 | 
120 | These experiments should help illustrate that depending on the type,
121 | manufacturer, and state of the source and destination media, optimal block sizes
122 | can vary wildly. This should also help demonstrate that on modern hardware the
123 | default block size of 512 bytes tends to be horribly inefficient. That said,
124 | though not always the most optimal, the Eug-Lug suggested block size of 64K can
125 | be a somewhat reliable option for a more modern default.
126 | 
127 | ## A pair of scripts to find more optimal block sizes
128 | Because of the wild variance in optimal block sizing, I've written a couple of
129 | scripts to test a range of different input and output block size options for use
130 | prior to starting any large copies with dd. However, before we discuss the
131 | scripts, **be warned that this both scripts use dd behind the scenes, so it's
132 | important to use caution when running either script so as to avoid summoning
133 | dd's alter ego, disk destroyer.**[^3] The scripts are short enough that I
134 | encourage you to read both scripts before using either one of them so you have a
135 | better understanding of what is going on behind the scenes.  That said, first
136 | we'll look at a script for determining an optimal output block size.
137 | 
138 | ### dd_obs_test.sh
139 | 
140 | Let's just jump straight into the script:
141 | 
142 | ```bash
143 | #!/bin/bash
144 | 
145 | # Since we're dealing with dd, abort if any errors occur
146 | set -e
147 | 
148 | TEST_FILE=${1:-dd_obs_testfile}
149 | [ -e "$TEST_FILE" ]; TEST_FILE_EXISTS=$?
150 | TEST_FILE_SIZE=134217728
151 | 
152 | # Header
153 | PRINTF_FORMAT="%8s : %s\n"
154 | printf "$PRINTF_FORMAT" 'block size' 'transfer rate'
155 | 
156 | # Block sizes of 512b 1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M
157 | for BLOCK_SIZE in 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432 67108864
158 | do
159 |   # Calculate number of segments required to copy
160 |   COUNT=$(($TEST_FILE_SIZE / $BLOCK_SIZE))
161 | 
162 |   if [ $COUNT -le 0 ]; then
163 |     echo "Block size of $BLOCK_SIZE estimated to require $COUNT blocks, aborting further tests."
164 |     break
165 |   fi
166 | 
167 |   # Create a test file with the specified block size
168 |   DD_RESULT=$(dd if=/dev/zero of=$TEST_FILE bs=$BLOCK_SIZE count=$COUNT 2>&1 1>/dev/null)
169 | 
170 |   # Extract the transfer rate from dd's STDERR output
171 |   TRANSFER_RATE=$(echo $DD_RESULT | \grep --only-matching -E '[0-9.]+ ([MGk]?B|bytes)/s(ec)?')
172 | 
173 |   # Clean up the test file if we created one
174 |   [ $TEST_FILE_EXISTS -ne 0 ] && rm $TEST_FILE
175 | 
176 |   # Output the result
177 |   printf "$PRINTF_FORMAT" "$BLOCK_SIZE" "$TRANSFER_RATE"
178 | done
179 | ```
180 | 
181 | [View on GitHub](https://github.com/tdg5/blog_snippets/blob/master/lib/blog_snippets/articles/tuning_dd_block_size/dd_obs_test.sh)
182 | 
183 | As you can see, the script is a pretty basic for-loop that uses dd to create a
184 | test file of 128MB using a variety of block sizes, from the default of 512
185 | bytes, all the way up to 64M. There are a few extra arguments to the dd command
186 | to make writing out a 128M file easy and there's also some grepping to pull out
187 | the transfer rate, but otherwise, that's pretty much all there is to it.
188 | 
189 | By default the command will create a test file named *dd_obs_testfile* in the
190 | current directory. Alternatively, you can provide a path to a custom test file
191 | by providing a path after the script name:
192 | 
193 | ```bash
194 | $ ./dd_obs_test.sh /path/to/disk/or/test_file
195 | ```
196 | 
197 | The output of the script is a list of the tested block sizes and their respective transfer
198 | rates like so:
199 | 
200 | ```bash
201 | $ ./dd_obs_test.sh /dev/null
202 | 512: 1.4 GB/s
203 | 1K: 2.6 GB/s
204 | 2K: 4.3 GB/s
205 | 4K: 6.5 GB/s
206 | 8K: 7.8 GB/s
207 | 16K: 9.0 GB/s
208 | 32K: 8.1 GB/s
209 | 64K: 7.6 GB/s
210 | 128K: 9.8 GB/s
211 | 256K: 7.9 GB/s
212 | 512K: 9.7 GB/s
213 | 1M: 12.8 GB/s
214 | 2M: 8.8 GB/s
215 | 4M: 7.2 GB/s
216 | 8M: 7.3 GB/s
217 | 16M: 5.5 GB/s
218 | 32M: 6.4 GB/s
219 | 64M: 4.0 GB/s
220 | ```
221 | 
222 | Wow, I guess [*/dev/null* really is
223 | web-scale.](https://www.youtube.com/watch?v=b2F-DItXtZs&t=1m42s)
224 | 
225 | ### dd_ibs_test.sh
226 | Now let's look at a similar script for determining an optimal input block size.
227 | We can follow pretty much the same pattern expect for a couple of key
228 | differences: instead of reading from */dev/zero* and writing out the test
229 | file, this script reads from */dev/urandom* to create a test file of random bits
230 | and then uses dd to copy that test file to */dev/null* using a variety of
231 | different block sizes. Since this script creates the test file at the path you
232 | specify, you will want to be careful not to accidentally overwrite an existing
233 | file by pointing the script at an existing path.
234 | 
235 | Here's the script:
236 | 
237 | ```bash
238 | #!/bin/bash
239 | 
240 | # Since we're dealing with dd, abort if any errors occur
241 | set -e
242 | 
243 | TEST_FILE=${1:-dd_ibs_testfile}
244 | [ -e "$TEST_FILE" ]; TEST_FILE_EXISTS=$?
245 | TEST_FILE_SIZE=134217728
246 | 
247 | # Exit if file exists
248 | if [ -e $TEST_FILE ]; then
249 |   echo "Test file $TEST_FILE exists, aborting."
250 |   exit 1
251 | fi
252 | 
253 | # Create test file
254 | echo 'Generating test file...'
255 | BLOCK_SIZE=65536
256 | COUNT=$(($TEST_FILE_SIZE / $BLOCK_SIZE))
257 | dd if=/dev/urandom of=$TEST_FILE bs=$BLOCK_SIZE count=$COUNT > /dev/null 2>&1
258 | 
259 | # Header
260 | PRINTF_FORMAT="%8s : %s\n"
261 | printf "$PRINTF_FORMAT" 'block size' 'transfer rate'
262 | 
263 | # Block sizes of 512b 1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M
264 | for BLOCK_SIZE in 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432 67108864
265 | do
266 |   # Read test file out to /dev/null with specified block size
267 |   DD_RESULT=$(dd if=$TEST_FILE of=/dev/null bs=$BLOCK_SIZE 2>&1 1>/dev/null)
268 | 
269 |   # Extract transfer rate
270 |   TRANSFER_RATE=$(echo $DD_RESULT | \grep --only-matching -E '[0-9.]+ ([MGk]?B|bytes)/s(ec)?')
271 | 
272 |   printf "$PRINTF_FORMAT" "$BLOCK_SIZE" "$TRANSFER_RATE"
273 | done
274 | 
275 | # Clean up the test file if we created one
276 | [ $TEST_FILE_EXISTS -ne 0 ] && rm $TEST_FILE
277 | ```
278 | 
279 | [View on GitHub](https://github.com/tdg5/blog_snippets/blob/master/lib/blog_snippets/articles/tuning_dd_block_size/dd_ibs_test.sh)
280 | 
281 | Similar to the *dd_obs_test.sh* script, this script will create a default test
282 | file named *dd_ibs_testfile* but you you can also provide the script with a path
283 | argument to test input block sizes on different devices:
284 | 
285 | ```bash
286 | $ ./dd_ibs_test.sh /path/to/disk/test_file
287 | ```
288 | 
289 | Again, it is important to remember that the script will try to overwrite the
290 | test file and later will remove the file after it has been written, so use
291 | extreme caution to avoid blowing away something you didn't mean to destroy. It
292 | is likely that you will need to tweak this script to meet your particular use
293 | case.
294 | 
295 | Also like *dd_obs_test.sh*, the output of this script is a list of the tested
296 | block sizes and their respective transfer rates like so:
297 | 
298 | ```bash
299 | $ ./dd_ibs_test.sh
300 | 512: 1.1 GB/s
301 | 1K: 1.8 GB/s
302 | 2K: 3.0 GB/s
303 | 4K: 4.2 GB/s
304 | 8K: 5.1 GB/s
305 | 16K: 5.7 GB/s
306 | 32K: 5.4 GB/s
307 | 64K: 5.8 GB/s
308 | 128K: 6.3 GB/s
309 | 256K: 5.4 GB/s
310 | 512K: 5.8 GB/s
311 | 1M: 5.8 GB/s
312 | 2M: 5.3 GB/s
313 | 4M: 5.0 GB/s
314 | 8M: 4.9 GB/s
315 | 16M: 4.5 GB/s
316 | 32M: 4.4 GB/s
317 | 64M: 3.5 GB/s
318 | ```
319 | 
320 | In the above example it can be seen that an input block size of 128K is optimal
321 | for my particular setup.
322 | 
323 | ## The end
324 | I hope this post has given you some insight into tuning dd's block size
325 | arguments and maybe even saved you a day spent transferring blocks 512 bytes at
326 | a time.
327 | 
328 | Thanks for reading!
329 | 
330 | [^1]: ["A block is a unit measuring the number of bytes that are read, written, or converted at one time."](https://en.wikipedia.org/wiki/Dd_(Unix)#Block_size)
331 | [^2]: [**dd's** ibs (input block size) and obs (output block size) arguments both default to 512 bytes](http://man7.org/linux/man-pages/man1/dd.1.html)
332 | [^3]: ["Some people believe dd means "Destroy Disk" or "Delete Data" because if it is misused, a partition or output file can be trashed very quickly."](http://www.codecoffee.com/tipsforlinux/articles/036.html)
333 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/module_factory_for_dependency_management/README.md:
--------------------------------------------------------------------------------
  1 | At last year's RubyConf in San Diego, [Craig Buchek](https://twitter.com/craigbuchek)
  2 | gave a presentation entitled [Ruby Idioms You're Not Using Yet](https://www.youtube.com/watch?v=hc_wtllfKtQ),
  3 | focusing on some of Ruby's under-utilized and emerging idioms. In this post
  4 | we'll discuss one of those idioms, an idiom Craig appropriately calls **Module
  5 | Factory**. In particular, we'll explore the using a Module Factory as a pattern
  6 | for dependency and load-order management.
  7 | 
  8 | ## Hey! Who you callin' an idiom?
  9 | 
 10 | For those unfamiliar with idioms or, more likely, unfamiliar with what idioms
 11 | refer to in the context of a programming language, Craig presents a number of
 12 | different perspectives, my favorite of which is:
 13 | 
 14 | > A style or form of expression that is characteristic of a particular person,
 15 | > type of art, etc.[^1]
 16 | 
 17 | Craig also offers his own perspective, which I think helps clarify and distill
 18 | this concept further:
 19 | 
 20 | > A way in which we normally express ourselves in a language.
 21 | 
 22 | Though I think this definition captures the idea nicely, I think there's a pearl
 23 | of enlightenment to be found in reducing the concept down to its roots:
 24 | 
 25 | > Late Latin idioma, idiomat-, from Greek, from idiousthai, to make one's own,
 26 | > from idios, own, personal, private.[^2]
 27 | 
 28 | I find this etymology charming because while formal definitions tend to focus on
 29 | existing patterns of language belonging to specific communities and cultures,
 30 | the origin of the word hints at a deeper essence that leads ultimately to the
 31 | cradle of all idiomatic expression: idioms are an emergent behavior of the
 32 | efforts of individuals and communities to make a language their own.
 33 | 
 34 | ## Idioms in Ruby
 35 | 
 36 | In terms of Ruby, let's take a look at a couple of concrete examples of common
 37 | Ruby idioms juxtaposed with their less idiomatic counterparts to give ourselves
 38 | some grounding. Hopefully you'll agree that within each example, each variation
 39 | gets further and further from how you'd expect to see an idea expressed in Ruby.
 40 | 
 41 | ###### Conditional assignment:
 42 | ```ruby
 43 | # Idiomatic Ruby
 44 | a ||= b
 45 | 
 46 | # Less idiomatic
 47 | a || a = b
 48 | 
 49 | # And lastly, please don't do this
 50 | a = b if a == nil || a == false
 51 | ```
 52 | 
 53 | ###### Sequential iteration
 54 | ```ruby
 55 | # Idiomatic Ruby
 56 | 5.times { |i| puts i }
 57 | 
 58 | # Less idiomatic, though more performant
 59 | i = 0
 60 | while i < 5
 61 |   puts i
 62 |   i += 1
 63 | end
 64 | 
 65 | # And finally, the dreaded `for` statement
 66 | for i in 0..4
 67 |   puts i
 68 | end
 69 | ```
 70 | 
 71 | Hopefully, these examples give you a good idea of idioms in Ruby, but if not,
 72 | I'd encourage you to watch [Ruby Idioms You're Not Using Yet](https://www.youtube.com/watch?v=hc_wtllfKtQ),
 73 | as it provides more examples which may help to further elucidate the concept.
 74 | 
 75 | On with the show!
 76 | 
 77 | ## Module Factory: An Introduction
 78 | 
 79 | The Module Factory pattern as described in the presentation constitutes the use
 80 | of some variety of [Factory Method](https://en.wikipedia.org/wiki/Factory_method_pattern)
 81 | in place of a reference to a concrete Module when calling **extend** or
 82 | **include** from a Class or a Module. This is a fairly technical description, so
 83 | let's take a look at the example the presentation uses to demonstrate this
 84 | pattern. This example comes from the README for the [Virtus gem](https://rubygems.org/gems/virtus):
 85 | 
 86 | ```ruby
 87 | class User
 88 |   include Virtus.model(:constructor => false, :mass_assignment => false)
 89 | end
 90 | ```
 91 | 
 92 | [View on GitHub](https://github.com/solnic/virtus/blob/e648e2fe771d715179bddb7b0df9b0169a295ae3/README.md#cherry-picking-extensions)
 93 | 
 94 | Though it may be unclear what is going on here, if we trust that neither the
 95 | [Virtus docs](https://github.com/solnic/virtus/blob/e648e2fe771d715179bddb7b0df9b0169a295ae3/README.md#cherry-picking-extensions)
 96 | nor the Ruby docs for [Module#include](http://www.ruby-doc.org/core-2.2.0/Module.html#method-i-include)
 97 | contain an error, we can use a little deduction to piece together what's going
 98 | on:
 99 | 
100 | - Though the Ruby docs aren't totally explicit about it, **Module#include**
101 |   will raise an error unless given one or more Modules. From this we can infer
102 |   that **Virtus.model** must be returning one or more Modules.
103 | - A little trial and error in irb further uncovers that though
104 |   **Module#include** supports being invoked with multiple Modules, these Modules
105 |   cannot be provided in an Array, but must be normal method arguments (or in the
106 |   case of an Array, must be exploded with the [splat operator](https://endofline.wordpress.com/2011/01/21/the-strange-ruby-splat/#calling_methods)
107 |   into normal method arguments). Since the Virtus docs don't use the splat
108 |   operator, we can further narrow our inference to deduce that **Virtus.model**
109 |   must be returning a single module.
110 | 
111 | Now that we have a clearer understanding of what's going on in this example, it
112 | becomes easier to see how it fulfills our definition of a Module Factory:
113 | Instead of referencing a concrete Module, **Module#include** is invoked with the
114 | result of invoking the **Virtus.model** method. Furthermore, we've deduced that
115 | **Virtus.model** must return a Module of some sort and given the arguments it
116 | takes, it's safe to assume there's some sort of factory logic going on inside.
117 | In fact, this Module Factory allows the including class to cherry-pick a subset
118 | of Virtus' model extensions and include only those selected modules.
119 | 
120 | Alright! Not so bad, right? Now that we've got one Module Factory under our
121 | belt, let's take a look at how the Module Factory patten can help with
122 | dependency management and load ordering.
123 | 
124 | ## A job for refactoring
125 | 
126 | In order to provide some context for our discussion, let's start with some example
127 | code that I think could benefit from a refactoring to use the Module Factory
128 | pattern. For the sake of brevity, this code is non-functional and skips many of
129 | the details that don't impact our particular interests. That said, the code
130 | below should have a familiar flavor to anyone who has worked with an
131 | asynchronous job framework in the past, such as
132 | [Resque](https://github.com/resque/resque),
133 | [Sidekiq](https://github.com/mperham/sidekiq),
134 | [Backburner](https://github.com/nesquena/backburner), or
135 | [Rails' ActiveJob](https://github.com/rails/rails/tree/master/activejob).
136 | 
137 | The example code outlines the skeleton of a job class that performs some
138 | undefined unit of work. For those unfamiliar with any of the job frameworks I
139 | mentioned above, the typical usage pattern for such a framework tends to involve
140 | subclassing a class provided by the job framework which encapsulates and handles
141 | most of the required behaviors of a job. In the example below, this role is
142 | filled by the fictitious class **JobFramework::Job**.
143 | 
144 | Generally, by subclassing a class like **JobFramework::Job**,
145 | the subclass agrees to an interface contract that typically requires the
146 | subclass to implement a **perform** method at the instance level. This pattern
147 | is also followed in the example below, as can be seen by the **perform**
148 | instance method on the **ImportantJob** class.
149 | 
150 | One final point worth discussing before getting into the example is that the job
151 | classes provided by many job frameworks tend to provide an **around_perform**
152 | method hook or similar functionality to allow for adding middleware-type
153 | behavior around job execution in a generic, unobtrusive way. The example below
154 | also borrows this pattern, however it can be inferred that **JobFramework::Job**
155 | provides this behavior in a very naive manner that relies heavily upon the class
156 | hierarchy and repeated calls to **super**.
157 | 
158 | OK, that should be enough background, on to the example!
159 | 
160 | **important_job.rb**
161 | 
162 | ```ruby
163 | class ImportantJob < JobFramework::Job
164 |   # NineLives must be included before ExceptionNotification,
165 |   # otherwise up to nine alert emails will be sent per failed
166 |   # job and in many cases, exception notifications will be
167 |   # sent when the job didn't actually fail!
168 |   include NineLives
169 |   include ExceptionNotification
170 | 
171 |   def perform(*args)
172 |     # Important work
173 |   end
174 | end
175 | ```
176 | 
177 | **job_extensions.rb**
178 | 
179 | ```ruby
180 | module NineLives
181 |   def around_perform(*args)
182 |     retry_count = 0
183 |     begin
184 |       super
185 |     rescue TransientError
186 |       if retry_count < 9
187 |         retry_count += 1
188 |         retry
189 |       else
190 |         raise
191 |       end
192 |     end
193 |   end
194 | end
195 | 
196 | module ExceptionNotification
197 |   def around_perform(*args)
198 |     super
199 |   rescue
200 |     # dispatch an email notification of the exception
201 |   end
202 | end
203 | ```
204 | 
205 | Here's a quick rundown of what we can expect the lifetime of an execution of the
206 | **ImportantJob** class to look like:
207 | 
208 | 1. Some code somewhere else in the codebase calls **ImportantJob.perform**.
209 |   This class level **perform** method is provided by **JobFramework::Job** as a
210 |   convenience method to enqueue an **ImportantJob** to be completed
211 |   asynchronously.
212 | 2. Elsewhere, a worker process, also typically running code provided by the job
213 |   framework, pops the job off of the job queue and instantiates a new instance
214 |   of the **ImportantJob** class with the provided arguments. The internals of
215 |   the worker process then take steps to execute the job which causes the
216 |   **around_perform** method of the instance to be executed. Normally, the
217 |   invocation of **around_perform** would simply cause **ImportantJob#perform**
218 |   to be executed, however, since we've overwritten **around_perform** a couple
219 |   of times, the behavior in the example is not so simple. The first version of
220 |   **around_perform** that will be executed, perhaps counterintuitively, is the
221 |   version from the last module we included in **ImportantJob**, **ExceptionNotification.around_perform**.
222 | 3. **ExceptionNotification.around_perform** immediately calls
223 |   **super**, but includes a rescue block that catches any errors that bubble up
224 |   and, hypothetically, dispatches email alerts about those exceptions. The
225 |   invocation of **super** triggers the **around_perform** method from the first
226 |   module we included in **ImportantJob**, **NineLives#around_perform**.
227 | 4. **NineLives#around_perform** is more involved, but its goals
228 |   are pretty simple: Similar to **ExceptionNotification.around_perform**, it
229 |   calls **super** almost immediately but adds some special error handling that
230 |   catches errors of the **TransientError** class. The error handling will retry
231 |   the call to **super** up to 9 times if the **TransientError** exception
232 |   continues to occur. After 9 times, the error will be raised up to
233 |   **ExceptionNotification** at which point an email should be dispatched. The
234 |   call to **super** this time around invokes the original **around_perform**
235 |   method, **JobFramework::Job#around_perform**, which as we discussed earlier,
236 |   invokes **ImportantJob#perform**.
237 | 
238 | Now that we've got a solid understanding of the example job, let's see how using
239 | the Module Factory pattern could benefit this class.
240 | 
241 | ## What's wrong with a well written comment?
242 | 
243 | You may already have an intuition for where we should begin our refactoring to
244 | introduce a Module Factory, but if you don't that's fine too. Personally, I'm
245 | inclined to start with the very first line of the **ImportantJob** class. No,
246 | not **include NineLives**. The honking four line comment that explains why the
247 | **NineLives** module must be included before the **ExceptionNotification**
248 | module. In a small enough codebase, the current form of **ImportantJob** might
249 | be fine, but if that codebase is likely to grow, or if the codebase is already
250 | of reasonable size, I'd argue that the comment and the rigid load-order are bad
251 | news.
252 | 
253 | You may have your own arguments for or against the current implementation, but
254 | here are my arguments against:
255 | 
256 | - That whopper of a comment is going to be repeated in every other job class
257 |   that uses both the NineLives and ExceptionNotification modules (and if it's
258 |   not, it should be). Trust me, I've seen it happen. Not only is this a
259 |   violation of [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself),
260 |   but because it's a comment it's pretty likely to mutate and/or deteriorate
261 |   with each subsequent duplication. Eventually this leads to a situation
262 |   where a newcomer to the code base doesn't know which version of the comment is
263 |   accurate, or, alternatively, you end up with some job classes that tag
264 |   **include NineLives** simply with "Must be included before
265 |   ExceptionNotification" and no additional explanation. After this reduction,
266 |   the comment starts to disappear entirely.
267 | - Without the comment, there is no other clue that there is a load-order
268 |   dependency between these two modules. Obviously, this is why the comment was
269 |   added, but a comment can't help the situation where a job class that already
270 |   includes **NineLives** now needs to include **ExceptionNotification**, or vice
271 |   versa. If the dev making the change is lucky enough to have seen the comment
272 |   elsewhere in the codebase, or another dev happens to catch the issue in a code
273 |   review, maybe you can avoid a Spam dinner, but if not, it's
274 |   Spam-a-lam-a-ding-dong until the next deploy goes out.
275 | - What happens when another load-order dependency is added with the inclusion of
276 |   a new module? Another giant comment in every class that needs some combination
277 |   of the three modules? One giant comment that tries to encompass all the
278 |   permutations in a generic fashion? How would you feel if the purpose of the
279 |   **ImportantJob** class was to perform a payment on a loan and the newly
280 |   included module was added to lower someone's credit score every time an
281 |   exception bubbled out of **NineLives#around_perform**? It's a bit of a
282 |   stretch, but don't think that financial systems are immune to these
283 |   situations, and I certainly hope they're using a better design than repeated
284 |   comments.
285 | 
286 | One could certainly make the argument for handling this issue by introducing
287 | another module to encapsulate the load-order dependency, but in my experience
288 | that doesn't actually solve any of these problems, but instead, it just moves
289 | the problems into other parts of the codebase or mutates them into slightly
290 | different issues.
291 | 
292 | While we could explore alternative solutions for handling this situation all
293 | day, let's move on and get an idea of how a Module Factory could be used to
294 | address all of the concerns I've raised.
295 | 
296 | ## A Module Factory for job extensions
297 | 
298 | Before we look at how me might go about implementing a Module Factory to address
299 | the issues I raised above, let's take a look at what **ImportantJob** might look
300 | like after we refactored it to use a Module Factory.
301 | 
302 | ```ruby
303 | class ImportantJob < JobFramework::Job
304 |   include JobExtensions.select(:exception_notification, :nine_lives)
305 | 
306 |   def perform(*args)
307 |     # Important work
308 |   end
309 | end
310 | ```
311 | 
312 | We have to make some assumptions for now, but hopefully you'll agree that this
313 | is already a significant improvement.
314 | 
315 | We can't yet make a determination on the ultimate fate of the comment because
316 | it's no longer included in **ImportantJob**, but this by itself is a good sign.
317 | Realistically, I don't think there was ever hope of going completely comment
318 | free, but, at least for the moment, things have a much DRYer feeling.
319 | 
320 | Otherwise, there's still no hint that a load-order dependency exists somewhere,
321 | but given the order of the arguments to **JobExtensions.select**, we can hope it
322 | doesn't matter anymore. If the order of the arguments truly doesn't matter, than
323 | this also helps the situation where someone wants to add **ExceptionNotification**
324 | to a class that already includes **NineLives**, as it seems like they could just
325 | add the snake-cased name of the extension to the list of selected extensions and
326 | continue on their way. The same applies for any new extension that might be
327 | added in the future. In fact, the use of the snake-cased names actually involves
328 | less coupling than the original version because though the snake-cased names
329 | match the module names in this case, there really is no need for the module name
330 | and the snake-cased name passed to the factory method to match. This means that
331 | the module implementing :nine_lives could change to an entirely different module
332 | with fewer repercussions to the codebase.
333 | 
334 | So far, so good. So what kind of sorcery is required to make this interface
335 | possible? Behold! The **JobExtensions** module:
336 | 
337 | ```ruby
338 | module JobExtensions
339 |   def self.select(*selected_extensions)
340 |     Module.new do
341 |       # NineLives must be included before ExceptionNotification,
342 |       # otherwise up to nine alert emails will be sent per failed
343 |       # job and in many cases, exception notifications will be
344 |       # sent when the job didn't actually fail!
345 |       if selected_extensions.include?(:nine_lives)
346 |         include NineLives
347 |       end
348 |       if selected_extensions.include?(:exception_notification)
349 |         include ExceptionNotification
350 |       end
351 |     end
352 |   end
353 | end
354 | ```
355 | 
356 | Maybe a little magical, but certainly not sorcery, in fact it looks a lot like
357 | we took the comment and includes from the former version of **ImportantJob**,
358 | added some conditional logic, and wrapped all that in a **Module.new** block.
359 | What's going on here?
360 | 
361 | I suspect I don't need to explain the internals of the block, but **Module.new**
362 | is definitely worth taking a closer look at on its own.
363 | 
364 | [Module.new](http://www.ruby-doc.org/core-2.2.0/Module.html#method-c-new), is
365 | the more metaprogramming-friendly version of your standard module declaration
366 | using the **module** keyword. In fact, when used with a block, it's even more
367 | similar to a standard module declaration than might be obvious because in the
368 | context of the block the target of **self** is the module being constructed.
369 | This behavior is what allows us to make normal calls to **include** without
370 | having to use an explicit receiver or having to call **send**.
371 | 
372 | For our particular purposes, **Module.new** does offer one advantage over the
373 | **module** keyword worth mentioning. Because **Module.new** uses a block, a
374 | closure is created that allows us to reach outside of the block and access the
375 | list of **selected_extensions** while building the new module. Access to this
376 | list is crucial to our Module Factory's ability to build a customized module on
377 | demand. Without access to the list we'd have to figure out another way to
378 | assemble the desired module, which is certainly doable, but would be less
379 | pleasant to look at and would require using **send** to circumvent the generated
380 | module's public access rules.
381 | 
382 | Other than the call to **Module.new**, I expect everything else in this factory
383 | method should make sense. We've found our missing comment and can be fairly
384 | confidant that in this form it's unlikely to be repeated. If it is repeated in
385 | the future, it will likely be a modified version that documents the load-order
386 | gotchas of a different extension that this Module Factory supports. While there
387 | is probably a better way to document the specifics of this particular load-order
388 | requirement, I'm much less concerned with many similar comments documenting
389 | similar behavior inside a particular method than I am with the same spread all
390 | across the codebase in any number of unaffiliated jobs.
391 | 
392 | ## Before you get too excited: A couple of trade offs
393 | 
394 | Though the Module Factory we've built certainly helps deal with handling the
395 | load-order logic in a DRY fashion, there are a couple of potential trade offs
396 | that I should mention. These issues can be addressed, but I won't go into great
397 | detail about how to address them. The good news, though, is that both trade offs
398 | are solved by pretty much the same code.
399 | 
400 | The first trade off is that generating a module dynamically like we did above
401 | produces a more anonymous module than you might be used to seeing if you usually
402 | create modules using the **module** keyword. For example, here's the fictitious
403 | ancestry of the **ImportantJob** class:
404 | 
405 | ```ruby
406 | ImportantJob.ancestors
407 | # => [
408 | #      ImportantJob, #<Module:0x00000000e39c48>,
409 | #      JobFramework::Job, Object, Kernel, BasicObject
410 | #    ]
411 | ```
412 | 
413 | That funky Module between **ImportantJob** and **JobFramework::Job** is our
414 | generated module. Though we've handled the load-order issue in a more robust
415 | fashion, we've obscured the class hierarchy which makes it harder to find
416 | information about the class via interrogation or examination.
417 | 
418 | To get some insight into the second trade off introduced by the Module Factory
419 | pattern, let's pretend we've created another job class, **ReallyImportantJob**,
420 | that is an exact duplicate of **ImportantJob**, except named differently. What
421 | does the class hierarchy for **ReallyImportantJob** look like?
422 | 
423 | ```ruby
424 | ReallyImportantJob.ancestors
425 | # => [
426 | #      ReallyImportantJob, #<Module:0x00000000d4a058>,
427 | #      JobFramework::Job, Object, Kernel, BasicObject
428 | #    ]
429 | ```
430 | 
431 | What may not be clear from this output is that though the two job classes are
432 | made up of the exact same code and modules, each generates its own special
433 | module when the **JobExtensions.select** factory method is called. This can be
434 | seen in the output above in that the each of the generated modules is identified
435 | by a different memory address. This might not be the end of the world in a small
436 | codebase, but it should make it clear that every class is going to generate its
437 | own version of the module, even if one matching the requested requirements
438 | already exists. This is obviously inefficient in terms of time and memory, but
439 | it also adds another complication to understanding a class by interrogation or
440 | inspection because though another dev might expect the class hierarchies of
441 | **ImportantJob** and **ReallyImportantJob** to include the same modules, they
442 | don't, but they do, but they don't.
443 | 
444 | So what's the solution? Well, it turns out both issues can be solved by dealing
445 | with some naming issues. In terms of the first trade off, the anonymous module,
446 | Ruby uses an anonymous name because we never assigned the module to a constant.
447 | This is one of the implicit benefits of the **module** keyword: you assign the
448 | module to a constant at inception. So, if we can come up with a way to generate
449 | a name for the generated module, all we need to do is assign a constant with the
450 | generated name to point to the generated module and Ruby will use that name to
451 | refer to the generated module.
452 | 
453 | Though it's not obvious, generating a name also helps us to address the second
454 | trade off of generating a new module every time the factory method is invoked. A
455 | name helps solve this problem because if we can generate a name that uniquely
456 | identifies the contents of a generated module and assign the appropriate
457 | constant, we can also check that constant in the future before generating a new
458 | module. If the constant is defined, we return the previously generated module,
459 | if not, we generate a new module and assign it to the constant.
460 | 
461 | In terms of our example job, the actual implementation is left to the reader as
462 | an exercise, but generating a name that uniquely identifies each generated
463 | module could be as simple as creating a string from the sorted, title-cased
464 | collection of extensions that are used in the module being named. Title casing
465 | is important for readability, consistency, and so Ruby will accept the name as a
466 | constant.[^3] Sorting is also important because, at least in the case of our
467 | example, we don't want the order of the arguments to change the name of the
468 | class being created because whether **:exception_rety** is passed in before
469 | **:nine_lives**, or vice versa, both invocations should generate and refer to
470 | the same module. This naming pattern still has some problems because it is still
471 | unclear what the module does, but it is at least a little better than the module
472 | being identified by its raw memory address.
473 | 
474 | ## Closing thoughts
475 | 
476 | Though it may not feel like it, this post has really only scratched the surface
477 | of the power and potential of the Module Factory pattern. Though we've discussed
478 | how it can be used to improve code readability, maintainability, reliability, and
479 | flexibility, there's really a lot more opportunity out there. And so, rather
480 | than summarize what we've covered in this post, I'll leave you to ponder these
481 | possibilities:
482 | 
483 | - As evidenced by [Kernel#Array](http://www.ruby-doc.org/core-2.2.0/Kernel.html#method-i-Array)
484 |   and [Kernel#Integer](http://www.ruby-doc.org/core-2.2.0/Kernel.html#method-i-Integer)
485 |   Ruby doesn't require method names to start with a lowercase letter. How might
486 |   a method with a title-cased name be used to compliment the Module Factory
487 |   pattern? Are there trade offs that come with this type of naming convention?
488 | - Ruby method names don't need to be words at all, take for example
489 |   [Hash::[]](http://ruby-doc.org/core-2.1.5/Hash.html#method-c-5B-5D). How might
490 |   an operator style of method name pair with the Module Factory pattern? 
491 | - How else could the power of a method call be leveraged for Module Factory
492 |   awesomeness? What magic could be yielded (pun intended!) by a factory method
493 |   that takes a block? How might keyword arguments, Hash arguments, or splat
494 |   arguments be leveraged in combination with a Module Factory?
495 | - If you've ever used a framework that uses dependency injection like
496 |   Javascript's AngularJS, then the examples above may have caused your Spidey
497 |   sense to tingle. How might the Module Factory pattern be used for dependency
498 |   injection in Ruby?
499 | 
500 | [^1]: Source: [Merriam-Webster](http://www.merriam-webster.com/dictionary/idiom)
501 | [^2]: Source [thefreedictionary.com](http://www.thefreedictionary.com/idiom)
502 | [^3]: A third-party library like [**ActiveSupport**](https://rubygems.org/gems/activesupport) can make the work of title casing the string trivial.
503 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/on_the_road_from_ruby_journeyman_to_ruby_master/README.md:
--------------------------------------------------------------------------------
  1 | Mind-blowingly awful are really the only words that come to mind to describe my
  2 | first bunch of Ruby scripts.[^1] Sure, this is probably unfair and over-critical
  3 | given that Ruby, algorithms, and the whole shebang were all new to me at the
  4 | time, but *damn*. There are so many decisions I can't even begin to comprehend
  5 | or defend today.
  6 | 
  7 | I imagine few Ruby devs still have their first scripts available to reflect on.
  8 | This may be for the best, yet, as I looked over a few of my early scripts this
  9 | past weekend, I began to ponder the value of occasionally revisiting old code
 10 | samples to better gauge one's progress and get a periodic sense of perspective.
 11 | Similarly, I also found myself contemplating the value of occasionally taking a
 12 | step away from production code to draw a new line in the sand recording one's
 13 | state as a developer in that moment; A coded testament of one's values, whether
 14 | in terms of syntax, tradeoffs, or any number of other metrics; a mile marker
 15 | somewhere along the road from Ruby journeyman to Ruby master.
 16 | 
 17 | To that end, in this article I'll be sharing and discussing one of those early
 18 | scripts. From there, I'll also leave behind a new mile marker by taking a stab
 19 | at how I might solve the same problem today. With any luck, we'll all learn
 20 | something along the way, and if not, it seems like I'll be back to rant about
 21 | the inferior quality of my past work in no time. For now though, onward!
 22 | 
 23 | ## When Danny met Ruby...
 24 | 
 25 | Back in 2009, at the encouragement of my stepfather who thought the future had
 26 | great things in store for Ruby and Rails (boy, was he wrong!), I began to
 27 | explore the Ruby programming language by trying to solve a few of the math heavy
 28 | programming problems over at [Project Euler](https://projecteuler.net/). Up
 29 | until this point, I'd only ever done any "programming" in Basic and Visual
 30 | Basic, as these were the focus of the programming courses taught at my high
 31 | school. I'd argue that I got pretty advanced in my usage of Visual Basic, going
 32 | so far as to develop a reasonable grasp on the Win32 API, but given my present
 33 | distaste for my early Ruby code, I can only imagine that my earlier VB code must
 34 | have been transcendently awful. In VB, I'd only ever written small utilities and
 35 | weak attempts at games, so using Ruby to efficiently solve what were essentially
 36 | math problems was new territory for me.
 37 | 
 38 | For each problem that I attempted, I followed two rules. First, and obviously,
 39 | the computed solution had to be correct. Second, the script had to run to
 40 | completion in less than one minute. I don't remember if the second rule was
 41 | stipulated from the beginning or if my naive tendency toward brute-force
 42 | solutions prompted my stepfather to introduce the rule, but I definitely
 43 | remember struggling to get my scripts to run in less than a minute at various
 44 | times. For anyone getting started with this type of endeavor, it's definitely a
 45 | great constraint to have in place. That said, the problem we're going to look at
 46 | today isn't one of those long running problems, in fact, even my early attempts
 47 | at solving the problem take less than a second to run. Let's have a look, shall
 48 | we?
 49 | 
 50 | ## Problem 8: Largest Product in a Series
 51 | 
 52 | Though it's not the first problem I solved, [Problem 8: Largest Product in a
 53 | Series](https://projecteuler.net/problem=8), seems like a problem of sufficient
 54 | complexity to merit a bit of discussion. For your convenience here is the full
 55 | text of the question:
 56 | 
 57 | >The four adjacent digits in the 1000-digit number that have the greatest
 58 | >product are 9 × 9 × 8 × 9 = 5832.
 59 | >
 60 | > 73167176531330624919225119674426574742355349194934  
 61 | > 96983520312774506326239578318016984801869478851843  
 62 | > 85861560789112949495459501737958331952853208805511  
 63 | > 12540698747158523863050715693290963295227443043557  
 64 | > 66896648950445244523161731856403098711121722383113  
 65 | > 62229893423380308135336276614282806444486645238749  
 66 | > 30358907296290491560440772390713810515859307960866  
 67 | > 70172427121883998797908792274921901699720888093776  
 68 | > 65727333001053367881220235421809751254540594752243  
 69 | > 52584907711670556013604839586446706324415722155397  
 70 | > 53697817977846174064955149290862569321978468622482  
 71 | > 83972241375657056057490261407972968652414535100474  
 72 | > 82166370484403199890008895243450658541227588666881  
 73 | > 16427171479924442928230863465674813919123162824586  
 74 | > 17866458359124566529476545682848912883142607690042  
 75 | > 24219022671055626321111109370544217506941658960408  
 76 | > 07198403850962455444362981230987879927244284909188  
 77 | > 84580156166097919133875499200524063689912560717606  
 78 | > 05886116467109405077541002256983155200055935729725  
 79 | > 71636269561882670428252483600823257530420752963450  
 80 | >
 81 | > Find the thirteen adjacent digits in the 1000-digit number that have the
 82 | > greatest product. What is the value of this product?
 83 | 
 84 | It's worth noting that the requirements of the problem were modified in 2014 to
 85 | encourage more programmatic solutions to the exercise. More specifically, the
 86 | question originally asked for the largest product of not 13 adjacent digits but
 87 | of just 5 adjacent digits in the 1000-digit number. A minor difference, but one
 88 | that will, at the very least, help better explain at least one of the decisions
 89 | I made in my 2009 solution.
 90 | 
 91 | To that end, a modified version of my 2009 solution appears below. The solution
 92 | has been modified from its original form in two ways. First, as necessitated by
 93 | the change in the problem requirements, the solution has been extended, in a
 94 | manner consistent with the original solution, to handle runs of 13 digits.
 95 | Second, rather than repeat the 1000-digit number, we will assume it is stored in
 96 | the constant NUMBER as a Bignum. I won't explain the solution, but hopefully my
 97 | discussion of it should help fill in any gaps in understanding. Instead, I'll
 98 | jump right into my thoughts on the shortcomings of this script.
 99 | 
100 | ### 2009 Edition
101 | 
102 | ```ruby
103 | a=NUMBER.to_s
104 | big = 0
105 | for i in 1..(987)
106 |   su=a[i,1].to_i*a[i+1,1].to_i*a[i+2,1].to_i*a[i+3,1].to_i*a[i+4,1].to_i*a[i+5,1].to_i*
107 |     a[i+6,1].to_i*a[i+7,1].to_i*a[i+8,1].to_i*a[i+9,1].to_i*a[i+10,1].to_i*a[i+11,1].to_i*
108 |     a[i+12,1].to_i
109 |   if su>big
110 |     big=su
111 |   end
112 | end
113 | puts big
114 | ```
115 | 
116 | #### Where's the whitespace?
117 | 
118 | The first thing that strikes me about this script, and many of the others I've
119 | reviewed from this period, is the omission of optional spaces. This is one of
120 | those situations where I can't even begin to understand what I was thinking.
121 | Given that I do add optional spaces in at least one place, we can rule out the
122 | possibility that my spacebar was broken. This being the case, I'm inclined to
123 | believe I simply wasn't thinking about it, but it seems so blatantly obvious to
124 | me now that I find this hard to believe.
125 | 
126 | It is certainly possible that I had no notion of (or concern for) readability.
127 | It's also possible that my mental parser was in a sufficiently unformed,
128 | immature, or plastic state that the omission of optional spaces felt as readable
129 | to me then as when optional spaces were included. This seems a bit unfathomable
130 | now, but that's really all I can come up with.
131 | 
132 | In the JavaScript world, you will sometimes see libraries that achieve some feat
133 | in less than 1KB or some other very minimal file size. In JavaScript, where
134 | libraries are typically transmitted over the wire to web browsers across the
135 | world, this type of optimization can be desirable to reduce the size of the
136 | payload being transmitted (though it really should be the job of a minifier).
137 | But in Ruby, where libraries typically live on the server, there is no benefit
138 | to this type of optimization as far as I'm aware. If there is a benefit to this
139 | approach that I am unaware of, I can assure you it's not what I was striving for
140 | at the time.
141 | 
142 | #### Hmm, seems like a loop might help...
143 | 
144 | Next on my list of grievances is the ginormous series of substring accesses of
145 | the form **a[i+n, 1]**. First, let's get it out of the way that the second
146 | argument to **String#[]** is totally useless here, being as it is that the
147 | default behavior is to return the 1-character substring located at the index
148 | given by the first argument. Normally, this might be an excusable offense, but
149 | given that this snippet could benefit from some serious DRYing, it's a little
150 | more intolerable because the extraneous argument would have to be removed in 13
151 | different places.
152 | 
153 | Given that this seems like an obvious situation for a loop of some sort, why the
154 | no loop? In this particular case, I do have some recollection of my thinking,
155 | and I'm fairly certain that forgoing a loop was a conscious decision. If you'll
156 | recall, the problem at the time was concerned with 5 consecutive digits instead
157 | of 13 which made the repeated code a little more manageable and perhaps even
158 | tolerable.
159 | 
160 | At the time, I may have hoped to gain some performance by skipping the loop and
161 | retrieving each element directly, though this concept seems like it would have
162 | been too advanced for my thinking at the time. Instead, I'm inclined to believe
163 | that I may have chosen five direct accesses because it was easier for me at the
164 | time than setting up a loop, though I'm not sure. Though skipping the loop is a
165 | teensy bit faster, it's clearly not DRY and it also hardcodes an implicit over
166 | specification into the solution that makes it very difficult to change the
167 | length of the series of adjacent digits that should be tested. As such, to
168 | update the code to test a series of 13 digits, I had to more than double the
169 | number of element accesses, moving the code even further from the goals of DRY.
170 | 
171 | If it's not already clear, using a simple loop would have been a better choice.
172 | Though insignificantly slower, a simple loop would make the code much DRYer
173 | while also enabling the solution to be more generic. This would better prepare
174 | the solution to handle any number of adjacent digits while also making the code
175 | easier to read, follow, and understand. Generality definitely wasn't something
176 | that was on my mind in solving this problem as we'll see again in a moment.
177 | 
178 | #### Maybe one loop was a better choice...
179 | 
180 | Though we can hopefully agree that it seems like a loop would have been a better
181 | choice in the situation above, there are enough problems with that loop already
182 | used that it starts to seem like utilizing another loop might not have been a
183 | good idea. The loop already in use is a **for loop** operating over a range of
184 | Integers that allows for traversing the vector of digits. There are a number of
185 | things about this loop that are less than ideal, some more obvious than others.
186 | 
187 | One thing that may stick out to more experienced Rubyists is the choice of a
188 | **for loop** over other alternatives. Though not technically wrong, the **for
189 | loop** is not commonly seen in Ruby and typically more idiomatic loop primitives
190 | are used instead. Another thing that may stick out to more experienced Rubyists
191 | is the unnecessary use of parentheses around the terminal Integer or upper bound
192 | of the Range expression. Again, not wrong per se, but certainly an indicator of
193 | my noob status and perhaps an indicator that I didn't fully grok the Range
194 | expression and perhaps thought I was calling a dot method on the Integer class,
195 | like **Integer#.**, that returned a Range instance when invoked with an Integer
196 | for an argument. Novel perhaps, but wrong.
197 | 
198 | Returning to the topic of generality, the loop also hardcodes **two** more
199 | over-specifications into the solution that make the solution more rigid and less
200 | reuseable. As if this weren't bad enough, the two over-specifications interact
201 | with each other in such a way that it's not obvious what's going on. In fact,
202 | they're both encapsulated in the seemingly random choice of 987 for the upper
203 | bound of the Range. Being as astute as you are, I imagine if you were paying
204 | attention to the problem description then you've already surmised that 987 is
205 | none other than, 1,000, the length of the input digit, minus 13, the length of
206 | the run of adjacent digits we're calculating the product of. This upper bound
207 | makes sure our product calculations don't overflow the length of the provided
208 | number. Duh, right?
209 | 
210 | Wrapped up there in one little number are three flavors of weak. First, the
211 | hardcoded reference to 1,000 means we won't be able to reliably use this solution
212 | on a similar problem that features a number that is anything other than exactly
213 | 1,000 digits.  Second, the hardcoded reference to 13 means yet another place an
214 | update will be required in order to mutate the solution to handle runs of
215 | lengths other than 13. Finally, both of these facts are obscured by the use of
216 | the precalculated value of 987 for the upper bound of the range.  Instead of
217 | hardcoding the value, calculating the upper bound by taking the difference of
218 | the length of **NUMBER** and the desired length of adjacent digits would be
219 | better. Having no reliance on knowing the length of **NUMBER** would be even
220 | better, if possible.
221 | 
222 | One final point about the loop before we move on: it's wrong! Given the
223 | magnitude of the wrongness, you may prefer to think of it as a bug, but at the
224 | end of the day, it's just plain old wrong. The problem is that the Range starts
225 | at 1, which translates to index 1 of the stringified **NUMBER**. Starting with
226 | index 1 means that the digit at index 0 is totally ignored, which means that if,
227 | by some chance, the 13 consecutive digits with the largest product were the
228 | first 13 digits, this solution would fail to find the correct product. Whether
229 | you call this a bug or broken, it's bad news. So yeah, maybe one loop was the
230 | way to go.
231 | 
232 | #### A final look back at 2009
233 | 
234 | Before we look at how I might solve this problem today, I want to make two final
235 | points about my 2009 solution. First, the variable names suck. The only variable
236 | name that comes close to being tolerable is **big**, and even that isn't great.
237 | Finally, a compliment. Despite all of its problems, my 2009 solution does excel
238 | as example of the lowest of low Ruby newbie code. Certainly, that's a
239 | back-handed compliment, but I really could not have written an example like this
240 | today if I wanted to: it simply would have felt far too contrived.
241 | 
242 | With the past firmly behind us, let's take a look at how I might solve this
243 | problem today.
244 | 
245 | ### Solution 2015
246 | 
247 | ```ruby
248 | # Project Euler #8 - Largest product in a series
249 | # https://projecteuler.net/problem=8
250 | #
251 | # Find the thirteen adjacent digits in the 1000-digit number that have the
252 | # greatest product. What is the value of this product?
253 | 
254 | def largest_product_in_series(number, adjacency_length = 13)
255 |   series = number.to_s
256 |   zero_ord = '0'.ord
257 |   factors = []
258 |   largest_product = 0
259 |   current_product = 1
260 |   series.each_char do |new_factor|
261 |     # This String-to-Integer conversion assumes we can trust our input will only
262 |     # contain digits. If we can safely assume this, calling String#ord and then
263 |     # subtracting the ordinal of the String '0' will work faster than
264 |     # String#to_i.
265 |     new_factor = new_factor.ord - zero_ord
266 | 
267 |     # If our new_factor is zero, we know that the product of anything
268 |     # currently in our collection of factors will be zero. so, rather than
269 |     # work through that, just drop the current set of factors, drop the
270 |     # zero, reset our current product, and move on to the next iteration.
271 |     if new_factor.zero?
272 |       factors.clear
273 |       current_product = 1
274 |       next
275 |     end
276 | 
277 |     factors << new_factor
278 |     current_product *= new_factor
279 |     next if factors.length < adjacency_length
280 | 
281 |     largest_product = current_product if current_product > largest_product
282 |     current_product /= factors.shift
283 |   end
284 |   largest_product
285 | end
286 | 
287 | puts largest_product_in_series(NUMBER)
288 | ```
289 | 
290 | I think I'm still too close to this solution to offer much objective criticism,
291 | so though I'll touch on a few concerns later, for the most part, we'll leave
292 | criticism to future-Danny to worry about. So, let's start by seeing how the
293 | updated solution fairs in regard to some specific points that were brought up
294 | while dissecting my 2009 solution. After that, we'll look at some new goodness
295 | it brings to the table. Like the 2009 solution, I won't explain exactly what's
296 | going on, but hopefully the discussion below and included comments will suffice
297 | to convey the intention of the code.
298 | 
299 | #### Lessons learned
300 | 
301 | Here's a brief rundown of a few of the concerns I raised about the 2009 solution
302 | and how those concerns have faired in the 2015 solution:
303 | 
304 | - Spacing is kind of funny in that you might not think about it if it's there,
305 |   but if it's missing you'll definitely notice. Whether you noticed the
306 |   additional white space or not, hopefully you'll agree that the use of
307 |   consistent white space makes this solution much more readable than its
308 |   counterpart.
309 | 
310 | - Variable names, like white space, can be a little funny too given how personal
311 |   and subjective they tend to be. Whether you think the variable names used in
312 |   the updated solution are great, too short, too long, or just a little off,
313 |   hopefully we can all agree they are a significant improvement over the
314 |   variable names of the 2009 solution.
315 | 
316 | - In terms of rigidity and over-specificity, the 2015 solution is much more
317 |   flexible and generic. It has no dependency on the length of the number given,
318 |   meaning the provided number could be 1,000 digits long or 10,000 digits long.
319 |   Though it still needs to know how long a run of digits should be tested, it is
320 |   not hardcoded to a certain length. A default length of 13 is used, but this
321 |   can easily be overridden by invoking the **largest_product_in_series** method
322 |   with a specific value for **adjacency_length**. This means that we could
323 |   answer both the original 5-digit version of the question and the updated
324 |   13-digit version of the question with one algorithm.
325 | 
326 | - Because the solutions are so different, any discussion in terms of the number
327 |   of loops is somewhat moot, however the loop used in the 2015 solution does
328 |   have one characteristic that I'd previously suggested could be desirable: it
329 |   does not depend upon knowing the length of **NUMBER**. Instead, it iterates
330 |   over every character in the String derived from **NUMBER**, **series**, using
331 |   String#each_char. In this case, we still know **series** comes from the full
332 |   **NUMBER** so, we're not a lot closer to a solution that would work for true
333 |   streams of numbers, but the length agnostic nature of the loop is a step in
334 |   the right direction.
335 | 
336 | - One other big improvement included in the updated solution that we didn't
337 |   mention in terms of the 2009 solution is the addition of comments. There are
338 |   two flavors of comments in the updated solution that help provide clarity to
339 |   the solution. First, the problem description is included as a comment at the
340 |   head of the solution. This is really handy for someone else looking at the
341 |   code or for coming back to the code six years later. Second, comments
342 |   explaining some of the solution's logic have been added making it easier for a
343 |   reader to understand what is going on and why those decisions were made.
344 | 
345 | #### An alternate approach
346 | 
347 | Beyond the better coding practices exhibited by the 2015 solution, the solution
348 | also leverages a better approach to solving the problem. Better can be somewhat
349 | subjective, so I should be clear that in this case I think the 2015 solution is
350 | superior because the algorithm is more efficient and offers a performance
351 | improvement of about an order of magnitude while still using about the same
352 | amount of memory. The concept for the alternate approach emerged from two
353 | seemingly unrelated notions, each of which I thought could be useful
354 | independently to squeeze some extra performance out of the algorithm. As it
355 | turns out, they weren't completely independent notions and one is actually much
356 | easier to implement when built on top of the other.
357 | 
358 | The first idea for optimization revolved around a means to more efficiently
359 | calculate the new product each iteration. While the 2009 solution calculated the
360 | new product each iteration by performing 12 multiplications, I reasoned that
361 | since we're really only changing two numbers each iteration (the digit going out
362 | of focus and the digit coming in to focus), it should be possible to calculate
363 | the new product with only two operations (divide out the digit going out of
364 | focus, and multiply in the digit coming into focus). The only situation where
365 | this would be complicated is when a zero was encountered because a zero would
366 | effectively destroy our partial product when it got multiplied in, not to
367 | mention trying to divide by zero later would also be a fatal error. A better
368 | means of handling zeros would be required to calculate products in this manner
369 | and that's just what the second idea offered.
370 | 
371 | The second notion I had for optimizing the algorithm stemmed from removing the
372 | extraneous work that was being performed the iteration in which a zero was
373 | encountered and the 12 subsequent iterations after. Because zero multiplied by
374 | any other number is always going to be zero, there were effectively 13
375 | iterations for every zero where the algorithm would do all the work despite the
376 | fact that the answer was guaranteed to be zero. It seemed to me that there had
377 | to be a way to avoid this extraneous effort and actually use zeros as a way to
378 | speed up the calculation. As it turns out, handling zeros is pretty easy because
379 | all that needs to be done when a zero is encountered is reset the partial
380 | product to its initial value, 1, and move on.
381 | 
382 | With zeros taken care of, the more efficient means of calculating the product is
383 | simplified to keeping a queue of the factors of the partial product. Then, each
384 | iteration the digit going out of focus is removed from the queue and divided
385 | out of the partial product and the number coming into focus is added to the
386 | queue and multiplied into the partial product. One final bit of house keeping
387 | that is required is that when a zero is encountered, the queue of factors must
388 | be reset as well.
389 | 
390 | #### A faster Char#to_i
391 | 
392 | One final bit of hackery (of debatable merit) is the means by which the updated
393 | solution turns the String form of a digit into its Integer form. Though
394 | **String#to_i**, is the obvious candidate for this conversion, I wondered if
395 | there might be a faster way since this problem has little need for error
396 | checking or converting large strings of digits. If Ruby had a **Char** class
397 | for single characters, **Char#to_i** would likely have a different performance
398 | character than **String#to_s**, and a **Char#to_s** style approach was more what
399 | I was looking for.
400 | 
401 | One way I had seen this done for individual numbers in other languages was to
402 | take the ordinal, or character code, of an ASCII number and subtract from it the
403 | ordinal for the character "0" to get the Integer equivalent of the character.
404 | This is exactly what the updated solution does using **String#ord**. In each of
405 | my trials, I found the **String#ord** trick to be 25-30% faster than
406 | **String#to_i**. Whether using this trick is a good idea or not (given that this
407 | method makes no checks to verify that the provided character is a number) is a
408 | whole other blog post. In this particular case, I thought the approach novel and
409 | performant enough to utilize it.
410 | 
411 | #### Still a Ruby journeyman: A few concerns
412 | 
413 | Before concluding this post, I want to mention a few concerns that have come to
414 | mind as I've spent some time analyzing the updated solution. Most stem from
415 | tradeoffs or implementation details. I can't help but wonder if a few of these
416 | concerns are going to be the reasons future-Danny gives for this solution being
417 | mind-blowingly awful in its own way.
418 | 
419 | - Did I put way too much effort into the updated solution? 2009 for all of it's
420 |   shortcomings was much more pragmatic in that it was all about getting the
421 |   correct solution and moving on. The goals of the 2009 solution and the 2015
422 |   solution are clearly different, so maybe I put exactly the right amount of
423 |   time into the updated solution. I suspect it's something only future-Danny
424 |   will be able to make a ruling on.
425 | 
426 | - Should the solution include more/any error handling? The use of the
427 |   **String#ord** trick certainly opens up opportunities for misuse. But even
428 |   that hack aside, what happens when the number provided is shorter than the
429 |   adjacency length? Currently it does a correct thing and returns zero, but
430 |   should that raise an error instead? Is additional error handling worth the
431 |   time?
432 | 
433 | - Why the focus on performance? Is performance really critical for this problem
434 |   or is the focus on performance more to provide some concrete metric of how the
435 |   efficiency of my programming has improved over the last 6 years? The
436 |   **String#ord** trick is nice, but is it really worth the extra complexity,
437 |   confusion, and possible bugs? What benefit might a simpler, less efficient
438 |   solution offer?
439 | 
440 | - Should the **String#ord** trick be extracted into a method to make it easier
441 |   to substitute a different means of converting a digit character into its
442 |   Integer form?
443 | 
444 | - Why convert **NUMBER** to a String? For all the focus on performance, this is
445 |   likely not the most efficient option. If **NUMBER** can remain a Bignum and
446 |   each of the digits could be extracted from it in Integer form, would that be a
447 |   more performant solution? Would it be a simpler solution?
448 | 
449 | - Why the long method format? Sandi Metz would likely argue for smaller methods,
450 |   as would Martin Fowler. The long method was partly due to performance concerns
451 |   and partly because **Replace Method With Method Object** seemed excessive by
452 |   the time it made sense. That said, should this method be broken up into
453 |   smaller methods encapsulated in a class of some sort?
454 | 
455 | ## Happily ever after?
456 | 
457 | Though my exploration of Ruby, and the many other concepts secretly embodied by
458 | the set of problems at Project Euler, didn't pay off in an obvious way at the
459 | time I was focusing on them, I'm happy to have begun my career with Ruby
460 | struggling to write efficient algorithms. Though a friend of mine, a Gopher
461 | through and through, would argue that all Ruby is struggling to write efficient
462 | algorithms, this is a sentiment I've never shared. Perhaps, our disagreement on
463 | the subject stems from my beginnings with Ruby where any algorithmic
464 | inefficiencies were almost always my own and not some fault of the language.
465 | Though there is certainly an argument to be made for using the right tool for
466 | the job, at least in the part of the stack I tend to work in, I have yet to come
467 | across a situation where Ruby was clearly inappropriate. But maybe that's just
468 | me defending an old friend.
469 | 
470 | In the end, I'm glad I've held on to my old Project Euler solutions because
471 | though I wouldn't land my first Rails job until late 2011 and I'd spend two more
472 | years on the Microsoft stack dabbling in C# and relational concepts in MSSQL,
473 | and though, for a time, Ruby and I would talk less often, given our history
474 | together, it's nice to be able to look all the way back to the beginning of my
475 | time with Ruby. It helps me to understand that, frankly, I hope to always be
476 | writing code that is four years away from being mind-blowingly awful. If this
477 | stops being the case then I've stopped learning or I've stopped caring and
478 | either way, that'd be pretty sad.
479 | 
480 | [^1]: I would **never** talk about another person's code in these terms, especially if that person was as junior as I was when I wrote these scripts. In the words of the [Ten Commandments of Egoless Programming](http://blog.stephenwyattbush.com/2012/04/07/dad-and-the-ten-commandments-of-egoless-programming), "Treat people who know less than you with respect, deference, and patience." I hope you too will follow this advice and save harsher criticisms for your own work.
481 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/README.md:
--------------------------------------------------------------------------------
  1 | In [my last post](http://blog.tdg5.com/tail-call-optimization-ruby-background/),
  2 | I began an exploration of tail call optimization in Ruby with some
  3 | [background on tail call optimization and its little known existence and usage
  4 | in Ruby](http://blog.tdg5.com/tail-call-optimization-ruby-background/).
  5 | In this post, we'll continue that exploration at a much lower level, moving out
  6 | of the Ruby layer and descending to whatever depths are necessary to get to the
  7 | bottom of how the Ruby VM implements tail call optimization internally.
  8 | 
  9 | A lot of what follows wouldn't be possible without [Pat Shaughnessy's Ruby Under
 10 | a Microscope](http://patshaughnessy.net/ruby-under-a-microscope) (and a healthy
 11 | dose of [K & R](https://en.wikipedia.org/wiki/The_C_Programming_Language)). If
 12 | you find you enjoy the conceptual level of this article and you're interested in
 13 | more, I'd highly recommend [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope). 
 14 | I found it an enjoyable, empowering, fascinating, and approachable introduction
 15 | to the internals of Ruby. If you're curious about the book, but you're still
 16 | unsure about it, I'd encourage you to check out [Ruby Rogues #146, a book club
 17 | episode featuring Ruby Under a Microscope](http://devchat.tv/ruby-rogues/146-rr-book-club-ruby-under-a-microscope-with-pat-shaughnessy)
 18 | with guest appearances by the author, [Pat Shaughnessy](http://patshaughnessy.net/),
 19 | and [Aaron Patterson](http://tenderlovemaking.com/), of Ruby and Rails fame, and
 20 | who also wrote the foreword of the book. It's an enjoyable episode that
 21 | definitely helped guide my decision to read the book.
 22 | 
 23 | So, getting on to the subject of today's post. Hold on to your butts.
 24 | 
 25 | ## Revisiting our tail recursive Guinea pig
 26 | In [my last post](http://blog.tdg5.com/tail-call-optimization-ruby-background/),
 27 | we discovered a tail recursive function in [the Ruby test suite](https://github.com/ruby/ruby/blob/fcf6fa8781fe236a9761ad5d75fa1b87f1afeea2/test/ruby/test_optimization.rb#L213),
 28 | which we extracted (with a few tweaks) to demonstrate tail call optimization in
 29 | Ruby. We'll need our Guinea pig again for today's exercise, so allow me to
 30 | introduce her one more time:
 31 | 
 32 | ```ruby
 33 | code = <<-CODE
 34 |   class Factorial
 35 |     def self.fact_helper(n, res)
 36 |       n == 1 ? res : fact_helper(n - 1, n * res)
 37 |     end
 38 | 
 39 |     def self.fact(n)
 40 |       fact_helper(n, 1)
 41 |     end
 42 |   end
 43 | CODE
 44 | options = {
 45 |   tailcall_optimization: true,
 46 |   trace_instruction: false,
 47 | }
 48 | RubyVM::InstructionSequence.new(code, nil, nil, nil, options).eval
 49 | ```
 50 | 
 51 | I won't go into the details again, but suffice it to say that this code snippet
 52 | will add a **Factorial** class with a tail call optimized **fact** method to our
 53 | environment. Our journey begins with this class method.
 54 | 
 55 | ## Initial descent
 56 | With our tail recursive Guinea pig revived, we can begin our descent into the
 57 | internals of Ruby's implementation of tail call optimization. A month ago I
 58 | wouldn't have known where to begin such a quest, but this is where some of the
 59 | background and methods employed in [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope)
 60 | will be of great utility.
 61 | 
 62 | One method that [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope)
 63 | uses to great effect is using [**RubyVM::InstructionSequence#disasm**](http://www.ruby-doc.org/core-2.2.0/RubyVM/InstructionSequence.html#method-c-disasm)
 64 | to disassemble Ruby code into the underlying YARV instructions that the Ruby VM
 65 | will actually execute at runtime. Using this technique we should be able to
 66 | disassemble both a tail call optimized version and an unoptimized version of our
 67 | **Factorial#fact** method and compare the instruction sequences for differences.
 68 | 
 69 | Before we continue, let's rewind for a second and discuss YARV. YARV, which
 70 | stands for Yet Another Ruby Virtual Machine, is a stack-oriented VM internal to
 71 | Ruby that is responsible for compiling your Ruby code into low-level bytecode
 72 | instructions (called YARV instructions) and executing those instructions. YARV
 73 | was introduced in Ruby 1.9 to improve performance over Ruby 1.8's direct
 74 | traversal and interpretation of the Abstract Syntax Tree generated by parsing a
 75 | Ruby program. For more insight into on how Ruby executes your code, you can
 76 | check out an excerpt from [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope),
 77 | [How Ruby Executes Your Code by Pat Shaughnessy](http://patshaughnessy.net/2012/6/29/how-ruby-executes-your-code).
 78 | 
 79 | Back to our regularly scheduled broadcast.
 80 | 
 81 | To facilitate comparing the YARV instructions of the tail call optimized and
 82 | unoptimized versions of our factorial function, I've tweaked our Guinea pig
 83 | script to disassemble both versions of the function and **puts** them to STDOUT.
 84 | Here's the resulting script:
 85 | 
 86 | ```ruby
 87 | code = <<-CODE
 88 |   class Factorial
 89 |     def self.fact_helper(n, res)
 90 |       n == 1 ? res : fact_helper(n - 1, n * res)
 91 |     end
 92 | 
 93 |     def self.fact(n)
 94 |       fact_helper(n, 1)
 95 |     end
 96 |   end
 97 | CODE
 98 | 
 99 | {
100 |   'unoptimized' => { :tailcall_optimization => false, :trace_instruction => false },
101 |   'tail call optimized' => { :tailcall_optimization => true, :trace_instruction => false },
102 | }.each do |identifier, compile_options|
103 |   instruction_sequence = RubyVM::InstructionSequence.new(code, nil, nil, nil, compile_options)
104 |   puts "#{identifier}:\n#{instruction_sequence.disasm}"
105 | end
106 | ```
107 | 
108 | There are two things here worth making note of. First, I've chosen to disable
109 | the trace instruction for both versions to avoid unnecessary differences between
110 | the two instruction sequences that don't actually pertain to how Ruby implements
111 | tail call optimization internally. Second, though it is not explicit in this
112 | script, I am running MRI Ruby 2.2.0 locally, so all of the YARV instructions and
113 | C code that we'll look at are specific to MRI Ruby 2.2.0 and may be different
114 | from other versions.
115 | 
116 | You can view [the YARV instructions of the unoptimized Factorial class here](https://github.com/tdg5/blog_snippets/blob/60b19663b0c9a34117b47665045ba66679584e14/lib/blog_snippets/tail_call_optimization_in_ruby_internals/fact_disasm.txt)
117 | and [the YARV instructions of the tail call optimized Factorial class here](https://github.com/tdg5/blog_snippets/blob/60b19663b0c9a34117b47665045ba66679584e14/lib/blog_snippets/tail_call_optimization_in_ruby_internals/fact_tco_disasm.txt).
118 | 
119 | A vimdiff of the two instruction sequences with changed lines highlighted in
120 | purple and the actual changes highlighted in red looks like so:
121 | 
122 | [![Differences between the unoptimized Factorial class and the tail call optimized Factorial class](https://tdg5.s3.amazonaws.com/blog/wp-content/uploads/2014/02/tco_diff.jpg)](https://tdg5.s3.amazonaws.com/blog/wp-content/uploads/2014/02/tco_diff.jpg)
123 | 
124 | Oh no! Disaster! It seems that our initial descent is some what of a failure.
125 | Other than the addition of a **TAILCALL** flag to a few of the
126 | **opt_send_without_block** instructions, the YARV instructions for both the
127 | unoptimized version and the tail call optimized version are **exactly the
128 | same**. What gives?
129 | 
130 | From here it seems like our only logical course of action is to descend even
131 | further and look at the C code that makes up those YARV instructions with the
132 | hope that the **TAILCALL** flag is really all that's needed to transform an
133 | unoptimized call into a tail call optimized call.
134 | 
135 | ## Descending into the C
136 | We begin our journey into Ruby's C internals where our YARV instructions left
137 | us, with the **opt_send_without_block** instruction. Hopefully, we can find
138 | something in the implementation of that instruction that will help us find
139 | our way to where Ruby implements tail call optimization internally.
140 | 
141 | As discussed in [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope),
142 | the definitions that are used during the Ruby build process to generate the C
143 | code for all the YARV instructions live in the Ruby source in [insns.def](https://github.com/ruby/ruby/blob/6c0a375c58e99d1f5f1c9b9754d1bb87f1646f61/insns.def).
144 | With a little grepping, we can find the definition of **opt_send_without_block**
145 | around [line 1047 of insns.def](https://github.com/ruby/ruby/blob/6c0a375c58e99d1f5f1c9b9754d1bb87f1646f61/insns.def#L1047):
146 | 
147 | ```c
148 | DEFINE_INSN
149 | opt_send_without_block
150 | (CALL_INFO ci)
151 | (...)
152 | (VALUE val) // inc += -ci->orig_argc;
153 | {
154 |   ci->argc = ci->orig_argc;
155 |   vm_search_method(ci, ci->recv = TOPN(ci->argc));
156 |   CALL_METHOD(ci);
157 | }
158 | ```
159 | 
160 | As you've almost certainly noticed, this isn't quite C. Rather, during the Ruby
161 | build process this definition is used to generate the actual C code for the
162 | **opt_send_without_block** instruction. [You can view the fully generated C code
163 | for **opt_send_without_block** in all its monstrous glory here](https://github.com/tdg5/blog_snippets/blob/2a9e48ccc10082d37c821e3b838f223597a0d7b6/lib/blog_snippets/tail_call_optimization_in_ruby_internals/opt_send_without_block.vm.inc).
164 | 
165 | Luckily, for our purposes, we don't have to go quite to that extreme and can
166 | operate at the instruction definition level. One mutation I will make before we
167 | continue is to expand the **CALL_METHOD** macro and remove some noise added to
168 | facilitate the macro. That brings us to the following:
169 | 
170 | ```c
171 | ci->argc = ci->orig_argc;
172 | vm_search_method(ci, ci->recv = TOPN(ci->argc));
173 | VALUE v = (*(ci)->call)(th, GET_CFP(), (ci));
174 | if (v == Qundef) {
175 |   RESTORE_REGS();
176 |   NEXT_INSN();
177 | }
178 | else {
179 |   val = v;
180 | }
181 | ```
182 | 
183 | OK, so what in the name of Neptune is going on here? Well, the first thing to
184 | notice is there's no sign of tail call optimization here, so the question for
185 | now is, where to next?
186 | 
187 | In this case, the **ci** variable is of most interest to our particular quest.
188 | The **ci** variable references a **rb_call_info_t** struct which encapsulates a
189 | variety of data about a method call including, among other things, the receiver
190 | of the call, how many arguments the call takes, and a reference to the C
191 | function that should actually be executed by the call. It's this final reference,
192 | **ci->call**, that we're most interested in, as we hope to find some trace of
193 | tail call optimization therein.
194 | 
195 | From the code above we can ascertain that when the Ruby VM executes a method
196 | call, it invokes the function pointed to by the **rb_call_info_t* struct's
197 | **call** field with the current thread (**th**), the current frame pointer
198 | (result of **GET_CFP**), and the **rb_call_info_t** struct itself (**ci**) for
199 | arguments.
200 | 
201 | This is definitely a step in the right direction, but since we have no insight
202 | into the origins of the function pointed to by the **rb_call_info_t** struct's
203 | **call** pointer, we'll need to step backward before we can step forward.
204 | Luckily for us, we literally only need to take one step backward to the previous
205 | line where **vm_search_method** is invoked.
206 | 
207 | At this point, rather than drill into every call
208 | that is made on the way to our goal, let's speed things up a bit. We'll still
209 | walk through each step, but we'll be more brief and skip the code snippets
210 | until we get a whiff of tail call optimization. That said, I've collected [the
211 | source for each step of the way from **CALL_METHOD** to the internals of Ruby's
212 | tail call optimization into one file](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c)
213 | for your viewing pleasure.
214 | 
215 | Take a deep breath...
216 | 
217 | - The call to [**vm_search_method**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L2)
218 |   is where the value of [**ci->call** is set, and it is set to reference another
219 |   function, **vm_call_general**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L15).
220 | 
221 | - [**vm_call_general**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L24)
222 |   when called [invokes and returns the result of another method, **vm_call_method**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L26).
223 | 
224 | - [**vm_call_method**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L31)
225 |   at about 155 lines, is a monster of a function, that handles every type of
226 |   method invocation that the Ruby VM supports. It'd be pretty easy to get lost in
227 |   this method, but we are fortunate in that we know we are dealing with an
228 |   instruction sequence method type because we got to this point from a YARV
229 |   instruction. This allows us to jump right to the portion of the
230 |   switch statement that deals with instruction sequence type methods. In which
231 |   case, [**vm_call_method** returns the result of yet another function invocation **vm_call_iseq_setup**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L45).
232 | 
233 | (If you're beginning to wonder if this rabbit hole of a descent has a bottom,
234 | don't worry, we're almost there.)
235 | 
236 | - [**vm_call_iseq_setup**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L210)
237 |   is a two-liner that sets up the callee of the method and then [returns the
238 |   result of another function invocation, **vm_call_iseq_setup_2**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L213).
239 | 
240 | - [**vm_call_iseq_setup_2**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L218)
241 |   is where we finally get our first whiff of tail call optimization. In fact, the
242 |   only purpose of **vm_call_iseq_setup_2** is to check if tail call optimization
243 |   is enabled and if so [it calls yet another function, **vm_call_iseq_setup_tailcall**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L224).
244 | 
245 | (**So close!** But, while we're here, it's worth noting that normally [when tail
246 | call optimization is not enabled, **vm_call_iseq_setup_2** will call
247 | **vm_call_iseq_setup_normal**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L221)
248 | instead of **vm_call_iseq_setup_tailcall**. We'll come back to this alternative
249 | path in a moment.)
250 | 
251 | - One look at [**vm_call_iseq_setup_tailcall**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L252)
252 |   and it's obvious that we've found what we've been searching for, the heart of
253 |   Ruby's support for tail call optimization.
254 | 
255 | Success! Well, sort of, we still need to grok what's going on here, and come to
256 | think of it, where the hell are we? Let's take a look at what's going on inside
257 | **vm_call_iseq_setup_tailcall** and see if we can find our bearings and see how
258 | this call translates into the goodness of tail call optimization.
259 | 
260 | ## Just when you were starting to think it was turtles all the way down
261 | Though we could consider **vm_call_iseq_setup_tailcall** on its own, we would
262 | probably do better to use the same strategy that we employed earlier and compare
263 | the unoptimized version to the tail call optimized version, and see what is
264 | different between the two. It didn't work for us last time, but maybe we'll have
265 | better luck this time around.
266 | 
267 | We've established that the tail optimized version can be found in
268 | **vm_call_iseq_setup_tailcall**, and if it wasn't obvious from its name or from
269 | my making a point of mentioning it during our descent, the unoptimized version
270 | can be found in [**vm_call_iseq_setup_normal**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L221).
271 | Looking at both methods at a high level, it looks like we're still in the
272 | process of making the method call, as both of these functions seem to be
273 | preparing Ruby's internal stack prior to pushing a new frame onto the call
274 | stack.
275 | 
276 | Here's a side-by-side vimdiff highlighting the differences between the two
277 | functions, though I should warn you that I made a couple of minor adjustments to
278 | **vm_call_iseq_setup_normal** to suppress irrelevant differences:
279 | 
280 | [![Differences between vm_call_iseq_setup_normal and vm_call_iseq_setup_tailcall](http://tdg5.s3.amazonaws.com/blog/wp-content/uploads/2014/02/vm_call_iseq_setup_diff.jpg)](http://tdg5.s3.amazonaws.com/blog/wp-content/uploads/2014/02/vm_call_iseq_setup_diff.jpg)
281 | 
282 | Compared to the extremely minimal differences in the our initial diff, I'm much
283 | more optimistic that we'll find what we're looking for in this larger change
284 | set. Let's start with **vm_call_iseq_setup_normal** since it is the shorter and
285 | more typical of the two functions.
286 | 
287 | ## vm_call_iseq_setup_normal
288 | ```c
289 | VALUE *argv = cfp->sp - ci->argc;
290 | ```
291 | 
292 | **vm_call_iseq_setup_normal** begins by creating a pointer to the position on
293 | the stack where the argument vector (**argv**) for the next iteration of the
294 | recursive call begins. This is achieved by taking the current stack frame's
295 | stack pointer (**cfp->sp**) and moving backward down the stack the appropriate
296 | number of elements, as determined by our old friend the call info struct
297 | (**rb_call_info_t**) and its argument count field (**ci->argc**).
298 | 
299 | ```c
300 | rb_iseq_t *iseq = ci->me->def->body.iseq;
301 | ```
302 | 
303 | **vm_call_iseq_setup_normal** then continues by creating a pointer to the
304 | **rb_iseq_t** struct identifying and encapsulating data about the instruction
305 | sequence that will be invoked by this call.
306 | 
307 | ```c
308 | VALUE *sp = argv + iseq->param.size;
309 | ```
310 | 
311 | **vm_call_iseq_setup_normal** next creates a new pointer (**sp**) and points it
312 | to where it calculates the end of the argument vector (**argv**) to be using the
313 | value returned by **iseq->param.size**, a field related to the instruction
314 | sequence indicating how many parameters the instruction sequence takes.
315 | 
316 | It may seem strange that the VM determines the beginning of **argv** by descending
317 | **ci->argc** elements from the top of the stack and then later finds the end of
318 | **argv** by ascending **iseq->param.size** elements up the stack, however the use
319 | of **iseq->param.size** allows the VM to allocate extra space on the stack in
320 | situations that use special types of arguments. In this case however, our Guinea
321 | pig function uses only simple arguments so **ci->argc** and **iseq->param.size**
322 | are equal. This brings us right back to where we started at the top of the stack.
323 | 
324 | ```c
325 |   for (i = 0; i < iseq->local_size - iseq->param.size; i++) {
326 |     *sp++ = Qnil;
327 |   }
328 | ```
329 | 
330 | This next segment is responsible for allocating and clearing out space on the
331 | stack for local variables and special variables that will be required to execute
332 | the method call. In this case, our Guinea pig function doesn't use any local
333 | variables so no space is needed for those, but the VM does need to allocate a
334 | spot on the stack for special variables. That said, though the VM allocates a
335 | spot on the stack for special variables, our function doesn't actually use any
336 | of Ruby's special variables[^1], so that spot on the stack will remain nil.
337 | 
338 | ```c
339 | vm_push_frame(th, iseq, VM_FRAME_MAGIC_METHOD,
340 |   ci->recv, ci->defined_class, VM_ENVVAL_BLOCK_PTR(ci->blockptr),
341 |   iseq->iseq_encoded + ci->aux.opt_pc, sp, 0, ci->me, iseq->stack_max);
342 | ```
343 | 
344 | For our particular intentions we don't need to get into the nitty-gritty details
345 | of this function invocation, but suffice it to say this call is responsible for
346 | pushing a new frame on to the stack for executing the method call. This new
347 | frame is the next iteration of our recursive function.
348 | 
349 | ```c
350 | cfp->sp = argv - 1 /* recv */;
351 | ```
352 | 
353 | This last bit of logic sets the current frame's stack pointer (**cfp->sp**) to
354 | point to the position on the stack just before the beginning of the argument
355 | vector (**argv - 1**). When this line is executed, that position on the stack is
356 | occupied by the receiver of the next iteration of our function call. This may
357 | seem a little strange, but this assignment is preparing the current stack frame
358 | for when it resumes execution after the completion of the frame we've just
359 | pushed on to the stack. When the current frame resumes, it can assume the
360 | arguments further up the stack have already been consumed and should continue
361 | from further down the stack. Though it's not obvious, we'll see in a minute that
362 | this behavior is important for supporting tail call optimization.
363 | 
364 | Whew, one down. Now let's take a look at how Ruby handles things differently in
365 | the tail call optimized case.
366 | 
367 | ## vm_call_iseq_setup_tailcall
368 | 
369 | ```c
370 |   VALUE *argv = cfp->sp - ci->argc;
371 |   rb_iseq_t *iseq = ci->me->def->body.iseq;
372 | ```
373 | 
374 | **vm_call_iseq_setup_tailcall** starts exactly the same as its counterpart: It
375 | creates a pointer to the beginning of the argument vector (**argv**) of the next
376 | iteration of our recursive function and extracts a reference to the instruction
377 | sequence struct from the call info struct.
378 | 
379 | ```c
380 | VALUE *src_argv = argv;
381 | VALUE *sp_orig, *sp;
382 | VALUE finish_flag = VM_FRAME_TYPE_FINISH_P(cfp) ? VM_FRAME_FLAG_FINISH : 0;
383 | ```
384 | 
385 | Though the functions start the same, **vm_call_iseq_setup_tailcall** soon
386 | distinguishes itself with the allocation of a number of additional variables.
387 | First, a new pointer (**src_argv**) is created pointing to the beginning of the
388 | argument vector (**argv**). Next, two pointers (**sp_orig** and **sp**) are
389 | allocated, but not assigned. Finally, a fourth variable (**finish_flag**) is
390 | allocated and conditionally assigned.
391 | 
392 | The final variable, **finish_flag**, is used to allow tail call optimization of
393 | special types of stack frames called **finish frames**. Since we're working with
394 | normal method frames, the **finish_flag** variable can be safely ignored.
395 | 
396 | ```c
397 | cfp = th->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(th->cfp);
398 | ```
399 | 
400 | This is where the cleverness of tail call optimization begins to surface.
401 | Whereas the normal recursive strategy continues to accumulate frame after frame,
402 | this line begins to demonstrate how an optimized tail recursive call can avoid
403 | doing so.
404 | 
405 | The secret sauce behind the success of **vm_call_iseq_setup_tailcall**, and tail
406 | call optimization in general, is that each iteration actually removes itself
407 | from the stack, as part of invoking the next iteration. Since the nature of
408 | recursion can make discussion difficult, it's worth taking a moment here for
409 | clarity.
410 | 
411 | The beginning of **vm_call_iseq_setup_tailcall**, places us at the point in the
412 | sequence of events where the current frame, iteration n of
413 | **Factorial.fact_helper**, is preparing the stack for the recursive invocation
414 | of iteration n+1 of **Factorial.fact_helper**. Iteration n, after storing a
415 | reference to the argument vector intended for iteration n+1, pops the current
416 | stack frame (itself) off of the call stack, effectively removing itself from the
417 | stack and giving the appearance that **Factorial.fact** is the call in the stack
418 | before iteration n+1 of **Factorial.fact_helper**.
419 | 
420 | In terms of another metaphor, if you think of the factorial calculation as
421 | exercise and the call stack as distance traveled, tail call optimization is kind
422 | of like a hamster (or Guinea pig) running on a hamster wheel. Though both the
423 | hamster and the recursive call are running in place, they both still make
424 | progress on the work they are performing. This analogy may also elucidate why
425 | tail recursion can be thought of as a special kind of loop construct.
426 | 
427 | Returning our focus to **vm_call_iseq_setup_tailcall**, after popping the
428 | current frame from the call stack, **vm_call_iseq_setup_tailcall** then updates
429 | the thread's current frame pointer (**th->cfp**) and the **cfp** variable to
430 | point at the stack frame prior to the invocation of our tail recursive function,
431 | **Factorial.fact**.
432 | 
433 | Though this mechanism allows tail call optimization to avoid the stack overflows
434 | inherent to its counterpart, we will see in a moment that it also has other
435 | benefits.
436 | 
437 | ```c
438 | RUBY_VM_CHECK_INTS(th);
439 | ```
440 | 
441 | This line handles a little extra bookkeeping that tail call optimization in Ruby
442 | incurs. Usually, when Ruby switches from one stack frame to another, it takes a
443 | moment to check for pending interrupts. However, since the stack frame was
444 | manually popped off of the call stack, the check for interrupts must also be
445 | handled manually.
446 | 
447 | ```c
448 | sp_orig = sp = cfp->sp;
449 | ```
450 | 
451 | Though it is pretty clear that this line assigns the **sp_orig** and **sp**
452 | variables to the value stored in the current frame's stack pointer (**cfp->sp**)
453 | field, keep in mind that **cfp** now refers to the call to **Factorial.fact**.
454 | 
455 | As you'll recall from the normal setup function, before the first invocation of
456 | **Factorial.fact_helper**, the previous frame (**Factorial.fact**) would have
457 | rewound it's stack pointer to the position on the stack that it should resume
458 | execution from, which would have been the point on the stack right before the
459 | arguments consumed by the first iteration of **Factorial.fact_helper**. This
460 | behavior benefits tail call optimization in a few ways.
461 | 
462 | First, because the function call that just ended is exactly the same as the one
463 | that's being set up, it can be assumed that there's enough room on the stack for
464 | the call being prepared. This means that the stack pointer from the call prior
465 | to our tail optimized call (**cfp->sp**) can be used as the starting position
466 | for the new stack (**sp**) thats being assembled.
467 | 
468 | Second, because the character of the stack is likely consistent for each
469 | recursive call, less overhead is required when setting up the stack. For
470 | example, earlier I mentioned that the Ruby VM allocates a spot on the stack for
471 | special variables that might be used by the function, but that since the
472 | function doesn't use any special variables, that field remains nil. Because of
473 | the alignment of values on stack from iteration to iteration, that nil field is
474 | actually only assigned on the first iteration and on every other iteration the
475 | assignment can be skipped because the value is already nil.
476 | 
477 | The final benefit that comes from being able to reuse the stack pointer from the
478 | stack frame prior to our tail optimized call (**cfp->sp**) is that that same
479 | pointer also doubles as a pointer to the place on the stack that our current
480 | frame's stack pointer (**cfp->sp**) will need to be rewound later.  To
481 | facilitate this usage a reference is set aside in **sp_orig** for later use.
482 | 
483 | ```c
484 | sp[0] = ci->recv;
485 | sp++;
486 | ```
487 | 
488 | With this line, **vm_call_iseq_setup_tailcall** begins to rebuild the stack for
489 | the next iteration of the recursive call. To achieve this, it first pushes the
490 | receiver of the call (**ci-> recv**) into the position at the head of the stack
491 | (**sp[0]**), and increments the stack pointer to the next position.
492 | 
493 | ```c
494 | for (i=0; i < iseq->param.size; i++) {
495 |   *sp++ = src_argv[i];
496 | }
497 | ```
498 | 
499 | Next, the function continues by pushing each of the arguments for the next
500 | iteration onto the stack. This is where it becomes clear why a reference to the
501 | next iteration's argument vector is needed, as the **cfp** pointer was replaced,
502 | and without this reference (**src_argv**) there'd be no consistent means by
503 | which to access those arguments.
504 | 
505 | This loop is also responsible for the behavior I alluded to above where each
506 | argument is written to a consistent position on the stack with each iteration.
507 | 
508 | 
509 | ```c
510 | for (i = 0; i < iseq->local_size - iseq->param.size; i++) {
511 |   *sp++ = Qnil;
512 | }
513 | ```
514 | 
515 | Consistent with the normal setup function, the tail call optimized setup function
516 | also reserves and resets additional space on the stack for the method call as
517 | required.
518 | 
519 | ```c
520 | vm_push_frame(th, iseq, VM_FRAME_MAGIC_METHOD | finish_flag,
521 |   ci->recv, ci->defined_class, VM_ENVVAL_BLOCK_PTR(ci->blockptr),
522 |   iseq->iseq_encoded + ci->aux.opt_pc, sp, 0, ci->me, iseq->stack_max);
523 | ```
524 | 
525 | The process of pushing a new frame on to the stack is almost exactly the same as
526 | in the normal setup function, except for one slight difference: The bitwise
527 | logic related to the **finish_flag** variable is added to allow tail call
528 | optimization to be performed on **finish frames** as we briefly discussed
529 | earlier.
530 | 
531 | ```c
532 | cfp->sp = sp_orig;
533 | ```
534 | 
535 | Last but not least, after pushing the new frame on to the stack, the setup
536 | function sets the current frame pointer's stack pointer (**cfp->sp**) to the
537 | point on the stack that it should resume from. In this case, that position
538 | matches the original position of the frame's stack pointer which was tucked away
539 | in **sp_orig** for later use.
540 | 
541 | At this point we're back in sync with **vm_call_iseq_setup_normal**, but whereas
542 | **vm_call_iseq_setup_normal** would have picked up another stack frame, after
543 | some minor stack shuffling, **vm_call_iseq_setup_tailcall** leaves us right back
544 | where we started, but one step closer to the solution to our factorial
545 | calculation.
546 | 
547 | ## The bends
548 | Wow. I don't know about you, but I didn't expect the bottom to be quite so far
549 | down there. Though I'm eager to come back up for air, as are you I'm sure, it's
550 | worth deferring our ascent a moment to reflect on what we found in the depths.
551 | 
552 | Ruby's implementation of tail call optimization emerges from the Ruby VM's
553 | stack-oriented nature and ability to discard the current stack frame as it
554 | prepares the next frame for execution. Given this design it becomes more clear
555 | why tail call optimization is handled by Ruby on the C side instead of on the
556 | YARV side since method call setup is below the conceptual level at which YARV
557 | tends to work.
558 | 
559 | In the end, there's a satirical humor in that we had to go to such depths to
560 | understand the facilities that allow the Ruby VM to handle tail recursive
561 | functions like treading water at the top of the stack.
562 | 
563 | It's been a long journey, but I hope you learned something along the way, I know
564 | I certainly did. Thanks for reading!
565 | 
566 | (I swear my next post will be shorter!)
567 | 
568 | [^1]: Ruby's special **$** variables are out of the scope of this article, but you can see where the [parser defines the various special variables here](https://github.com/ruby/ruby/blob/17a65c320d9ce3bce3d7fe0177d74bf78314b8fa/parse.y#L7606).
569 | 


--------------------------------------------------------------------------------
/lib/blog_snippets/articles/eager_boolean_operators/README.md:
--------------------------------------------------------------------------------
  1 | ![I'm going to continue never washing this cheek again](https://s3.amazonaws.com/tdg5/blog/wp-content/uploads/2015/02/24024043/never_washing_this_cheek_again.jpg)
  2 | In relaying the story of eager Boolean operators, it is best to begin with their
  3 | more ubiquitous siblings, short-circuiting logical Boolean operators. This is
  4 | perhaps best achieved with an example:
  5 | 
  6 | ```ruby
  7 | true || Seriously(this(is(valid(Ruby!))))
  8 | # => true
  9 | 
 10 | false && 0/0
 11 | # => false
 12 | ```
 13 | 
 14 | In Ruby, and many other common programming languages,[^1] the Boolean operators
 15 | used for chaining together logical expressions are designed to minimize the
 16 | amount of work required to determine the outcome of a logical expression. More
 17 | specifically, when determining the outcome of a logical expression as few of
 18 | the statements in the expression will be evaluated as possible. In the previous
 19 | example, this notion, known as [short-circuit evaluation](https://en.wikipedia.org/wiki/Short-circuit_evaluation),
 20 | is exploited to include some very bad code in a manner that renders that bad
 21 | code completely innocuous.
 22 | 
 23 | In the first example, the short-circuiting behavior of the **||** Boolean
 24 | operator, representing a [logical **OR** or logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction)
 25 | operation, prevents a series of undefined methods from causing a fatal
 26 | **NoMethodError** exception. This code can safely be executed because when the
 27 | first argument of an **OR** operation is **true** then the overall value of the
 28 | expression must also be **true**. Put more simply, **true OR _anything_** will
 29 | always result in **true**. Given this logical maxim, at runtime the program does
 30 | not need to execute the right-hand side of the expression and can move on
 31 | without executing the explosive code.
 32 | 
 33 | Similarly, in the second example, the short-circuiting behavior of the **&&**
 34 | Boolean operator, representing a [logical **AND** or logical conjunction](https://en.wikipedia.org/wiki/Logical_conjunction)
 35 | operation, prevents a fatal **ZeroDivisionError** exception. This code can
 36 | safely be executed because when the first argument of an **AND** operation is
 37 | **false** then the overall value of the expression must also be **false**. In
 38 | simpler terms, **false AND _anything_** will always result in **false**. Given
 39 | this basic tenant of Boolean logic, at runtime the program can decide the
 40 | outcome of the logical expression without executing the subversive right-hand
 41 | side of the expression.
 42 | 
 43 | It's interesting to note that, because of their short-circuiting behavior, the
 44 | **||** and **&&** Boolean operators are more than just logical operators,
 45 | they actually also function as control structures. To demonstrate this,
 46 | though the previous example used Boolean operators, it could just have easily
 47 | have been written with more traditional flow control structures like **if** or
 48 | **unless**:
 49 | 
 50 | ```ruby
 51 | # The true result is lost, but we weren't storing it anyway, so no problemo.
 52 | Seriously(this(is(valid(Ruby!)))) unless true
 53 | # => nil
 54 | 
 55 | # Again, the result of false is lost, but for this example that's okay.
 56 | 0/0 if false
 57 | # => nil
 58 | ```
 59 | 
 60 | Eager Boolean operators come into play when someone inevitably asks the
 61 | question, "what if we don't want to short-circuit?"
 62 | 
 63 | ## Eager Boolean Operators
 64 | 
 65 | As their name suggests, eager Boolean operators are logical operators that do
 66 | not short-circuit. Instead, even when the outcome of a logical expression is
 67 | determined, they continue to execute the logical expression until it has been
 68 | fully evaluated. If we changed our example of short-circuiting Boolean operators
 69 | to use eager Boolean operators instead, we'd no longer be safe from that
 70 | sinister code. Here it is again as such with a couple of other tweaks:
 71 | 
 72 | ```ruby
 73 | begin
 74 |   true | Seriously(this(is(valid(Ruby!))))
 75 | rescue NoMethodError => e
 76 |   e.class
 77 | end
 78 | # => NoMethodError
 79 | 
 80 | begin
 81 |   false & 0/0
 82 | rescue ZeroDivisionError => e
 83 |   e.class
 84 | end
 85 | # => ZeroDivisionError
 86 | ```
 87 | 
 88 | In the first example above, I've modified the earlier example to replace the
 89 | **||** Boolean operator with an alternative Boolean operator included in Ruby
 90 | that offers eager evaluation of logical **OR** expressions, **|**. Though more
 91 | commonly used for bitwise operations, when used with **true**, **false**, or
 92 | **nil**, the **|** operator functions similarly to its counterpart, **||**,
 93 | except without the short-circuiting behavior. Evidence of this eager evaluation
 94 | behavior can be seen above in that the outcome of the **begin** block is not
 95 | true, as would be the case if **|** were a short circuiting operator, but it is
 96 | instead the exception class we would expect to be raised if the right-hand side
 97 | of the logical expression had been evaluated.
 98 | 
 99 | Similarly, in the second example above, I've modified the earlier example and
100 | replaced the **&&** Boolean operator with Ruby's eager Boolean **AND**
101 | operator, **&**. Also more commonly used in bitwise expressions, when used with
102 | **true**, **false**, or **nil**, the **&** operator behaves similarly to its
103 | short-circuiting cousin, **&&**, except that it eagerly evaluates the right-hand
104 | side of the logical expression even if the overall outcome of the expression has
105 | already been determined. Once again, this behavior can be seen in that the
106 | result of the **begin** block is the **ZeroDivisionError** class, which would
107 | only be the case if the right-hand side of the logical expression had been
108 | evaluated.
109 | 
110 | Though this example helps demonstrate the eager evaluation properties of the
111 | **|** and **&** Boolean operators, given its explosive nature, it doesn't offer
112 | much insight into how eager Boolean operators might be useful. Having addressed
113 | the question of "what if we don't want to short-circuit?", let us consider
114 | another question that may actually be a better answer to the question than the
115 | one I've just outlined: "why wouldn't you want to short-circuit?"
116 | 
117 | ## Bitwise digression
118 | 
119 | Before we look at a handful of examples of eager Boolean operators, I'd like to
120 | digress for a moment for a brief discussion of bitwise Boolean operators.
121 | Bitwise Boolean operators are operators like **&** and **|** that perform
122 | operations on Boolean values as though those Boolean values were bits or binary
123 | 0s and 1s, where **false** and **nil** are both 0 and **true** is 1. For
124 | example, consider the following truth table for the **&** bitwise operation that
125 | demonstrates the equivalence of the two operations.
126 | 
127 | | Truth of & | nil   ( 0 ) | false ( 0 ) | true  ( 1 ) |
128 | |------------|-------------|-------------|-------------|
129 | |nil   ( 0 ) | false ( 0 ) | false ( 0 ) | false ( 0 ) |
130 | |false ( 0 ) | false ( 0 ) | false ( 0 ) | false ( 0 ) |
131 | |true  ( 1 ) | false ( 0 ) | false ( 0 ) | true  ( 1 ) |
132 | 
133 | One behavior of bitwise Boolean operators worth noting is that they always
134 | return a Boolean value. Even if the second argument to a bitwise Boolean
135 | operator is truthy or falsy, or even if the first argument to the bitwise
136 | Boolean operator is falsy, as is the case with **nil**, the result of the
137 | expression will still be a Boolean value. This is in contrast to their logical
138 | Boolean counterparts who are more than content to return a truthy or falsy value
139 | in place of a strict Boolean value.
140 | 
141 | This behavior can be useful at times, but can certainly come as a surprise to
142 | those who are more familiar with the more ubiquitous logical Boolean operators
143 | and their penchant for returning truthy and falsy values.  The behavior of
144 | bitwise Boolean operators can also surprise the unaware in that unlike the
145 | logical Boolean operators which can be invoked with any two values, the bitwise
146 | Boolean operators must be invoked with either **true**, **false**, or **nil** on
147 | the left-hand side of the expression, otherwise, an error or other unexpected
148 | behavior will occur.
149 | 
150 | In terms of eager Boolean operators, the bitwise Boolean operators are important
151 | because the eager Boolean operators are a sort of subset of the bitwise Boolean
152 | operators. The **&** and **|** operators are both bitwise Boolean operators, but
153 | in the cases of **true | _anything_** and **false & _anything_** they are also
154 | eager Boolean operators. If this is unclear, the following examples may help.
155 | 
156 | ## Eager Boolean Operators in Practice
157 | 
158 | Let's look at a couple of examples of eager Boolean operators in practice. After
159 | we've considered a couple of examples, perhaps we'll be better prepared to take
160 | a step back and get more clarity on what aspects or behaviors of eager
161 | evaluation are exploited by these examples in the name of utility. I've done
162 | what I can to try to find examples of eager Boolean operators out in the wild,
163 | but I've not had enormous success. To that end, I've tried to evaluate and order
164 | the examples below in terms of utility. Some examples are mine, some come from
165 | more popular libraries.
166 | 
167 | ### Enumerable#eager_all?
168 | 
169 | The first example is far and away the best use-case I've found for both bitwise
170 | and eager Boolean operators that I've come across. The example below uses the
171 | bitwise **AND** operator, **&**, to create a version of
172 | [**Enumerable#all?**](http://ruby-doc.org/core-2.2.0/Enumerable.html#method-i-all-3F)
173 | that is guaranteed to evaluate all elements in a collection. This is different
174 | from the normal behavior of **Enumerable#all?** in that **Enumerable#all?**
175 | normally discontinues evaluation of the collection as soon as any element in the
176 | collection returns **false** for the provided block.
177 | 
178 | ```ruby
179 | module Enumerable
180 |   def eager_all?
181 |     inject(true) do |result, item|
182 |       result & (block_given? ? yield(item) : item)
183 |     end
184 |   end
185 | end
186 | ```
187 | 
188 | This example leverages the **&** operator to ensure that the right-hand side of
189 | the logical expression is always evaluated. This behavior is combined with
190 | [**Enumerable#inject**](http://ruby-doc.org/core-2.2.0/Enumerable.html#method-i-inject)
191 | to ensure that all elements of the collection are evaluated, ultimately
192 | accumulating to the correct result.
193 | 
194 | The astute among you may have noticed that this example could alternatively have
195 | used the short-circuiting **&&** Boolean operator by flipping the operands like
196 | so:
197 | 
198 | ```ruby
199 | module Enumerable
200 |   def alternative_eager_all?
201 |     inject(true) do |result, item|
202 |       (block_given? ? yield(item) : item) && result
203 |     end
204 |   end
205 | end
206 | ```
207 | 
208 | Though this is true, at runtime this alternative approach draws attention to the
209 | bitwise nature of the **&** operator as compared to its short-circuiting cousin,
210 | **&&**, a difference in nature which I think in this case gives the eager
211 | Boolean operator the edge. The bitwise nature I refer to is, as I mentioned
212 | before and as is demonstrated below, eager Boolean operators will always return
213 | **true** or **false** while the short-circuiting Boolean operators could return
214 | any object depending on the operator and the arguments given to it.  We don't
215 | have to worry about *any* object in the alternative example since the result of
216 | the yield combined with **true** or **false** using **&&**, but we do have
217 | to worry about one other object, **nil**. Because of the short-circuiting nature
218 | of **&&**, if the result of the **yield** is **nil**, the result of the call to
219 | **alternative_eager_all?** will also result in **nil** as demonstrated below:
220 | 
221 | ```ruby
222 | [false, nil].eager_all?
223 | # => false
224 | 
225 | [false, nil].alternative_eager_all?
226 | # => nil
227 | ```
228 | 
229 | Given that **nil** is also falsy, this isn't really a problem, but I think it
230 | does make **alternative_eager_all?** less robust than it could be.
231 | 
232 | Another way the **nil** case could be handled without resorting to using an
233 | eager Boolean operator is by double negating the result of the **inject** call
234 | to ensure that a Boolean is returned. That would look like this:
235 | 
236 | ```ruby
237 | module Enumerable
238 |   def alternative_eager_all?
239 |     !!inject(true) do |result, item|
240 |       (block_given? ? yield(item) : item) && result
241 |     end
242 |   end
243 | end
244 | ```
245 | 
246 | Though the practice of double negation is pretty common, as it turns out, the
247 | coercive nature of the bitwise Boolean operators is actually slightly faster
248 | than the more idiomatic double negation. Consider this benchmark generated using
249 | the [benchmark-ips gem](https://github.com/evanphx/benchmark-ips):
250 | 
251 | ```ruby
252 | require "benchmark/ips"
253 | 
254 | Benchmark.ips do |bm|
255 |   bm.config(:time => 20, :warmup => 5)
256 | 
257 |   bm.report("Double negate") { !!(true && :a) }
258 |   bm.report("Logical bit-wise coerce") { true & :a }
259 | end
260 | 
261 | # Calculating --------------------------------------------
262 | #   Double negate                         138.008k i/100ms
263 | #   Logical bit-wise coerce               139.350k i/100ms
264 | # --------------------------------------------------------
265 | #   Double negate            7.262M (± 1.0%) i/s - 36.434M
266 | #   Logical bit-wise coerce  7.825M (± 1.3%) i/s - 39.157M
267 | # --------------------------------------------------------
268 | ```
269 | 
270 | The difference in performance between the two approaches is pretty negligible
271 | and certainly isn't substantial enough to merit choosing bitwise Boolean
272 | coercion over double negation. Keep in mind also that the bitwise coercion (if
273 | you want to call it that) to **true** or **false** is not without its downside.
274 | As I mentioned before, the coercive behavior of eager Boolean operators may
275 | come as a surprise for developers who are more familiar with the behavior of the
276 | more common short-circuiting logical Boolean operators.
277 | 
278 | ### Bringing *before_suite* type behavior to Minitest
279 | 
280 | The next example is a bit of questionable code of mine from a few years ago. In
281 | this example, I use the **&** eager Boolean operator in an attempt to emulate
282 | behavior similar to **RSpec's #before_suite** hook in a **Minitest** test case
283 | seeing as **Minitest** does not offer a similar behavior.
284 | 
285 | ```ruby
286 | class SomeTest < Minitest::TestCase
287 |   setup { self.class.one_time_setup }
288 | 
289 |   def self.one_time_setup
290 |     return if @setup_complete & @setup_complete ||= true
291 |     # Some expensive or non-idempotent setup
292 |   end
293 | 
294 |   def test_something
295 |     # ...
296 |   end
297 | end
298 | ```
299 | 
300 | At the time, I thought this was clever, probably because of its condensed
301 | nature, but a few years later and I can see that this code is excessively tricky
302 | and has obvious, though minor, inefficiencies. This example exploits two tricks
303 | to create a sort of switch that doesn't fire the first time it's evaluated, but
304 | will fire on all subsequent evaluations.
305 | 
306 | The first trick in this example takes advantage of the fact that accessing a
307 | nonexistent instance variable will never result in an error. The second trick
308 | takes advantage of the **&** operator to ensure that even when the
309 | **@setup_complete** instance variable is **nil**, a second statement is
310 | evaluated that will set **@setup_complete** to true, while still returning
311 | **nil** to the **if** statement. These two tricks allow for the described
312 | behavior as more concisely demonstrated below:
313 | 
314 | ```ruby
315 | def first_time_only
316 |   return if @not_first_time & @not_first_time ||= true
317 |   "Hello world!"
318 | end
319 | 
320 | first_time_only
321 | # => "Hello world!"
322 | 
323 | first_time_only
324 | # => nil
325 | ```
326 | 
327 |  The inefficiency of this approach that I referenced earlier is that the
328 |  **@not_first_time** variable is going to be evaluated twice every time the
329 |  **first_time_only** method is invoked, once on both the left and right hand
330 |  sides of the **&** operator. Since this evaluation is cheap, it's not the end
331 |  of the world, but it starts to beg a question that has been nagging me as I've
332 |  become more familiar with bitwise and eager Boolean operators: When is chaining
333 |  logical expressions using eager Boolean operators a better choice than just
334 |  splitting the expression into two statements?
335 | 
336 | In terms of the **first_time_only** example above, the method could be rewritten
337 | like so by splitting the logical expression into two parts instead of relying on
338 | the tricky behavior of the **&** operator:
339 | 
340 | ```ruby
341 | def first_time_only
342 |   return if @not_first_time
343 |   @not_first_time = true
344 |   "Hello world!"
345 | end
346 | ```
347 | 
348 | ## Examples from the real world
349 | 
350 | I've led with two of my own examples not because of my acute egomania, but
351 | because frankly, I couldn't find many examples of bitwise Boolean operators,
352 | much less eager Boolean operators out there in the wild. Maybe there was a flaw
353 | in the regular expression I used to grep through the wealth of gems I've
354 | accumulated or maybe I've missed some genius examples in the noise of numerical
355 | bitwise expressions and Array intersections, I don't know.
356 | 
357 | In the end, I was only able to find 4 examples, and unfortunately, three of
358 | those four were similar enough (two were exactly the same!) to make it really
359 | only worth mentioning one. Making matters worse, I'm not convinced any of the
360 | examples are using eager or bitwise Boolean operators in an effective way. But
361 | again, maybe I'm missing something. You be the judge.
362 | 
363 | ### RubySpec: Three flavors of tainted?
364 | 
365 | The three very similar examples I mentioned above come from the now defunct
366 | [RubySpec](https://github.com/rubyspec/rubyspec) project. Each occurs while
367 | testing whether a **String** has become tainted following a slice operation
368 | [[1]](https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/slice_spec.rb#L436)
369 | [[2]](https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/shared/slice.rb#L419)
370 | or a [concatenation using the **+** operator](https://github.com/rubyspec/rubyspec/blob/324c37bb67ea51f197954a37a2c71878eeadea01/core/string/plus_spec.rb#L41).
371 | The example testing concatenation with **+** is the shortest of the bunch, so
372 | let's have a look.
373 | 
374 | ```ruby
375 | it "taints the result when self or other is tainted" do
376 |   strs = ["", "OK", StringSpecs::MyString.new(""), StringSpecs::MyString.new("OK")]
377 |   strs += strs.map { |s| s.dup.taint }
378 | 
379 |   strs.each do |str|
380 |     strs.each do |other|
381 |       (str + other).tainted?.should == (str.tainted? | other.tainted?)
382 |     end
383 |   end
384 | end
385 | ```
386 | 
387 | In this example, a few instances of the **String** class and their tainted alter
388 | egos are created and then each of the instances is concatenated with each of the
389 | other instances using the **+** operator. For each concatenation produced, the
390 | result is tested to ensure that it is considered tainted if either of its
391 | parents were tainted. During the test to determine if a result **String** should
392 | be tainted or not, we find our bitwise Boolean friend, the **|** operator. But
393 | what advantage does the **|** operator offer in this situation over its
394 | short-circuiting counterpart, **||**?
395 | 
396 | When **str.tainted?** is **true**, the result of parenthetical expression will
397 | be **true**, however, keep in mind that **other.tainted?** will still be
398 | evaluated, though the result will be discarded. Unless there is some hidden side
399 | effect of calling **other.tainted?** at this point in the test, this seems like
400 | extraneous work to me. If there is a side effect to calling **other.tainted?**
401 | at this point in the test, that's a whole other problem because it seems quite
402 | possible that whatever that side effect is, it could have impacted the outcome
403 | of **(str + other).tainted?**, in which case, who knows what's really being
404 | tested. All this taken into account, I'm inclined to believe that
405 | short-circuiting would be desirable alternative in this case.
406 | 
407 | Conversely, when **str.tainted?** is **false**, the result of the parenthetical
408 | expression depends entirely on the outcome of **other.tainted?**. This may seem
409 | good in that when **other.tainted?** is **true**, the parenthetical expression
410 | will be **true** and when **other.tainted?** is **false**, the parenthetical
411 | expression will be **false**. However, as we discussed earlier, the eager
412 | Boolean operators only return **true** or **false** unlike their
413 | short-circuiting counterparts. This means that **other.tainted?** could return
414 | **:wtf?** or **nil** and the parenthetical expression would evaluate to **true**
415 | or **false**, respectively. Perhaps this coercion to **true** or **false** was
416 | the goal in choosing **|** over **||**, but in a test, particularly a test aimed
417 | at describing how the language itself should work, this seems like a bad idea to
418 | me.
419 | 
420 | Overall, it seems like **||** would be a much better choice here than **|**, as
421 | it ensures the minimal amount of evaluation is performed while also ensuring
422 | that the output values of both **str.tainted?** and **other.tainted?** are
423 | tested for validity.
424 | 
425 | ### Ruby: k-nucleotide benchmark
426 | 
427 | The final example we'll look at is a Ruby implementation of the
428 | [k-nucleotide benchmark](http://benchmarksgame.alioth.debian.org/u32/performance.php?test=knucleotide#about).
429 | Unchanged since it was added to the Ruby source tree in 2007,
430 | [bm_so_k_nucelotide.rb](https://github.com/ruby/ruby/blob/75feee0968c9345e7ffd2bda9835fcd60b4c0880/benchmark/bm_so_k_nucleotide.rb#L40)
431 | utilizes the eager Boolean operator **&** to read lines from a file until a line
432 | is encountered that starts with ">".
433 | 
434 | ```ruby
435 | while (line !~ /^>/) & line do
436 |   seq << line.chomp
437 |   line = input.gets
438 | end
439 | ```
440 | 
441 | The purpose of this code is fairly straightforward, however what is less clear,
442 | is the utility of taking the eager logical conjunction (**&**) of **(line !~ /^>/)**
443 | and **line**.
444 | 
445 | When the result of the **!~** operation results in **false**, the right-hand
446 | side of the expression will be evaluated and the result discarded. It's
447 | important to keep in mind that this will only happen once because the result of
448 | **false** will end the loop, but more generally speaking, in circumstances
449 | similar to this there's no reason to waste CPU time extraneously evaluating the
450 | right-hand side of the expression. We can be pretty confidant that this
451 | operation is wasteful because the value of **line** has no impact on the outcome
452 | of the logical expression and since we know that **line** is a reference to an
453 | object and not a method call, we know that the evaluation of **line** should not
454 | cause any side effects that might be worth preserving. Again though, since the
455 | eager evaluation is only going to happen once for this loop, it's really not of
456 | great concern.
457 | 
458 | The case when the **!~** expression evaluates to **true** is a little trickier.
459 | One would think that when the left-hand side of the expression evaluates to
460 | **true**, there would be no point in evaluating **line** as we might expect that
461 | the value of **line** is a **String** that will be coerced into **true** by
462 | **&**. However, the **!~** operator is defined for more than just instances of
463 | **String**. In fact, **true**, **false**, **nil**, and anything that inherits
464 | from **Object** all implement the complement method to **!~**, **=~**, and by
465 | default they all return a value of **nil** for **=~**. This means that in most
466 | cases the **!~** operator will be negating **nil** which means the left-hand
467 | side is going to evaluate to **true** in a lot of cases we might not expect.
468 | 
469 | In reality though, I suspect that the real reason the right-hand side of the
470 | expression is included is as a guard against **line** having a value of **nil**.
471 | If this is the case, then the only reason to choose **&** over **&&** would be
472 | the ability of **&** to coerce truthy values to **true**. If the result of the
473 | expression were being stored, this might make sense, however, since the result
474 | of the expression is being used as the condition for a **while** loop, it seems
475 | unlikely that this coercion would yield any perceivable benefit. As such, I
476 | think **&&** would be a better choice here because it is more familiar to most
477 | programmers and it will still guard against **nil** values.
478 | 
479 | In the event that a value of **true** is easier for **while** statement to
480 | consume than other truthy values, we can always flip the condition around like
481 | so:
482 | 
483 | ```ruby
484 | while line && (line !~ /^>/) do
485 |   # ...
486 | end
487 | ```
488 | 
489 | This arrangement has the added benefit of removing the need for the parentheses
490 | and short-circuiting the **!~** operation in situations where **line** is falsy.
491 | 
492 | But why stop there? Why explicitly guard against **nil** and **false** at all?
493 | Especially when every other **Object** out in the Ruby universe is going to slip
494 | right past this check, resulting in a **NoMethodError** when the program
495 | attempts to call **chomp** on an object that doesn't support **chomp**. When it
496 | comes down to it, the condition of this **while** loop is pretty inadequate.
497 | 
498 | A lot of the problem with the condition comes from the negation of the **=~**
499 | operation, what if we could avoid that? Given the regular expression of
500 | **/^>/**, it would seem that we're on the lookout for any line that starts with
501 | ">". But, what if, instead, we changed the condition such that it were **true**
502 | as long as a line started with anything other than ">"? This can be achieved by
503 | modifying the regular expression and would change the **while** loop to look
504 | like so:
505 | 
506 | ```ruby
507 | while line =~ /^[^>]/ do
508 |   # ...
509 | end
510 | ```
511 | 
512 | Though the regular expression is more complex, I think the whole expression is
513 | much easier to reason about without the negation, extra logical expression, and
514 | parentheses.
515 | 
516 | I've gotten a little off topic here, so we should move on, but before we do so,
517 | here are a few benchmarks generated using the [benchmark-ips gem](https://github.com/evanphx/benchmark-ips)
518 | for the **&**, **&&**, and altered **Regexp** versions of the **while**
519 | loop when run in the actual context of the nucleotide benchmark:
520 | 
521 | ```ruby
522 | # Calculating ----------------------------------------------
523 | #                    &                        2.000  i/100ms
524 | #                   &&                        2.000  i/100ms
525 | #     Alternate Regexp                        3.000  i/100ms
526 | # ----------------------------------------------------------
527 | #                    &     27.538  (± 3.6%) i/s -    550.000
528 | #                   &&     28.092  (± 3.6%) i/s -    562.000
529 | #     Alternate Regexp     29.000  (± 3.4%) i/s -    582.000
530 | # ----------------------------------------------------------
531 | ```
532 | 
533 | Very minor performance differences, but another case where bitwise Boolean
534 | operators don't seem to be the best choice for the job.
535 | 
536 | ## Optimization by branch avoidance
537 | 
538 | Having been through a few examples of eager Boolean operators in Ruby, I imagine
539 | you're opinions on the matter are starting to coalesce, I know mine certainly
540 | are. Though I started this article to get a better understanding of when and
541 | why one might want to use eager Boolean operators, the more research I've done,
542 | the more the question for me has become "Why would I ever want to use bitwise or
543 | eager Boolean operators?"
544 | 
545 | If you looked at the [list of programming languages that support both short-circuiting and eager Boolean operators](https://en.wikipedia.org/wiki/Short-circuit_evaluation#Support_in_common_programming_languages)
546 | I referenced earlier, you may have noticed that quite a few languages support
547 | both types of operators. This seems like a clue that there is a strong
548 | reason to have both types of operators. However, perhaps my Google-fu failed me,
549 | but I really couldn't find a strong argument for using eager Boolean operators.
550 | 
551 | The best argument I came across that we haven't already discussed in some form
552 | comes from [a Stack Overflow question asking about the difference between the
553 | **||** operator and the **|** operator](https://stackoverflow.com/questions/7101992/why-do-we-usually-use-not-what-is-the-difference/7105382#7105382).
554 | All the way down 8 or 9 answers in is [an answer from Peter Lawrey](http://stackoverflow.com/a/7105382/1169710)
555 | that I think has some merit. Peter writes:
556 | 
557 | > Maybe use [eager Boolean operators] when you have very simple boolean
558 | > expressions and the cost of short cutting (i.e. a branch) is greater than the
559 | > time you save by not evaluating the later expressions.
560 | 
561 | I was certainly intrigued by this idea, especially since one of the commenters
562 | on Peter's answer claimed to have actually come across this behavior on some
563 | CPUs.
564 | 
565 | I could see this type of behavior pretty easily existing in a lower level
566 | language like C, but I had reservations about whether or not something that must
567 | be a pretty minor micro-optimization could bubble all the way up into a higher
568 | level language like Ruby. To find out, I put together the following benchmark,
569 | again making use of the [benchmark-ips gem](https://github.com/evanphx/benchmark-ips):
570 | 
571 | ```ruby
572 | require "benchmark/ips"
573 | 
574 | Benchmark.ips do |bm|
575 |   bm.config(:time => 20, :warmup => 5)
576 | 
577 |   bm.report(";") { true ; true }
578 |   bm.report("&&") { true && true }
579 |   bm.report("&") { true & true }
580 | end
581 | ```
582 | 
583 | The goal of this benchmark is to use the simplest case possible to get an idea
584 | of the cost of branching compared to a more strict eager evaluation alternative.
585 | To this end, both the **&&** and **&** operators are benchmarked. In addition,
586 | to provide a baseline, the benchmarks also include a version that simply
587 | evaluates **true** twice to ensure a benchmark that includes no branching or
588 | other silly business. I found the results surprising:
589 | 
590 | ```ruby
591 | # Calculating -------------------------
592 | #    ;                 131.478k i/100ms
593 | #   &&                 128.222k i/100ms
594 | #    &                 126.305k i/100ms
595 | # -------------------------------------
596 | #    ;   9.346M (± 3.4%) i/s - 186.699M
597 | #   &&   8.867M (± 3.2%) i/s - 177.075M
598 | #    &   7.812M (± 2.6%) i/s - 156.113M
599 | # -------------------------------------
600 | ```
601 | 
602 | I wasn't surprised to find that **&** wasn't faster than **&&**, but what did
603 | surprise me was how much slower **&** actually was compared to **&&**,
604 | especially in a case where I expected there to be a fairly negligible
605 | difference. It's pretty clear from this benchmark that, at least in Ruby, any
606 | branching that's avoided by using the **&** operator is insignificant in
607 | comparison to other overhead. But what could that other overhead be? Though it
608 | may surprise you, that overhead is a method call. *Say what?*
609 | 
610 | ## Holy method calls, Batman!
611 | 
612 | As it turns out, in the case of Boolean values, bitwise operators like **&** and
613 | **|** aren't so much operators as they are methods on **TrueClass**,
614 | **FalseClass**, and **NilClass**! Consider for example the C source of the
615 | bitwise **|** method on **TrueClass**:
616 | 
617 | ```c
618 | static VALUE
619 | true_or(VALUE obj, VALUE obj2)
620 | {
621 |     return Qtrue;
622 | }
623 | ```
624 | 
625 | [View on GitHub](https://github.com/ruby/ruby/blob/16294913f71b8a38526096cf6458340b19b45f9f/object.c#L1247)
626 | 
627 | Thankfully, this is one of the simplest examples of Ruby's C source you'll come
628 | across. Though it's simple to read, the nuance of what is going on here is a
629 | little more complicated. The **true_or** method is simply a method that takes
630 | two arguments (actually only one really since the first argument will always be
631 | the **true** singleton), and regardless of what those arguments are, returns
632 | **true**. What may not be completely obvious from this code is how this method
633 | implementation leads to the eager evaluation of the right-hand side of a logical
634 | expression.
635 | 
636 | Throughout this article we've treated **|** like a primitive operator, perhaps
637 | if we treat it more like a method call, it will make it more obvious how this
638 | simple method equates to eager evaluation. Let's consider something along the
639 | lines of the simplest possible case and while we're at it, let's see if
640 | **||** is also implemented as a method on **TrueClass**. Let's see what happens
641 | if we try to use **Object#send**:
642 | 
643 | ```ruby
644 | true.send("||", true)
645 | # => NoMethodError: undefined method `||' for true:TrueClass
646 | 
647 | true.send("|", true)
648 | # => true
649 | ```
650 | 
651 | Interesting! So we've learned that **||** is not a method, but must be a more
652 | primitive operator. Additionally, we can see much more clearly now that **|** is
653 | definitely a method of **TrueClass**.
654 | 
655 | With some closer examination, this example should also help make it clear how
656 | implementing **TrueClass#|** as a method call leads to eager evaluation. Though
657 | the argument we passed to **TrueClass#|** in the example above was a primitive
658 | **true** value, it could have been any arbitrary Ruby expression. Unlike **||**
659 | which could completely ignore the right-hand side of the expression when the
660 | left-hand side of the operation is **true**, **TrueClass#|** cannot skip the
661 | right-hand side of the expression because it is a method call. In fact, before
662 | **TrueClass#|** is invoked, the RubyVM has already evaluated the right-hand side
663 | of the expression, reducing it to the value that will be used as the argument to
664 | **TrueClass#|**.
665 | 
666 | So, that's the magic behind one of the eager bitwise Boolean operators, what
667 | about one of the bitwise Boolean operators? How is that implemented? Is it
668 | also a method call? As it turns out, yes. Consider the implementation of
669 | **TrueClass#&**:
670 | 
671 | ```ruby
672 | static VALUE
673 | true_and(VALUE obj, VALUE obj2)
674 | {
675 |     return RTEST(obj2)?Qtrue:Qfalse;
676 | }
677 | ```
678 | 
679 | [View on GitHub](https://github.com/ruby/ruby/blob/16294913f71b8a38526096cf6458340b19b45f9f/object.c#L1225)
680 | 
681 | Thankfully, this method is also pretty easy to read. It's a little more
682 | complicated than **TrueClass#|**, but it's pretty easy to see that the method
683 | evaluates the **RTEST** macro on **obj2** and returns **true** or **false**
684 | depending on the outcome of that evaluation. I won't go into the inner workings
685 | of **RTEST**, but you can view [the C source for the **RTEST** macro here](https://github.com/ruby/ruby/blob/01195a202cb9fcc6ddb6cf793868e4c7d85292dc/include/ruby/ruby.h#L422)
686 | if you're interested. Basically, **RTEST** uses a couple of numeric bitwise
687 | operations to determine if its argument is **false** or **nil** and if not
688 | returns **true**, which in turn causes **true_and** to do the same.
689 | 
690 | Okay, so given all that, it should make more sense that using a bitwise/eager
691 | Boolean operator would be slower than a more primitive operator. Unfortunately
692 | though, slower execution is not the only drawback of these these method-based
693 | bitwise Boolean operators.
694 | 
695 | # Inconsistent precedence
696 | 
697 | The fundamentally different nature of the method-based bitwise Boolean
698 | operators and the more primitive logical Boolean operators is unfortunately not
699 | without consequence. The overhead of a method call is only one consequence.
700 | Another consequence is that the bitwise Boolean operators have a different
701 | precedence than their logical cousins.
702 | 
703 | I won't get into the nature of [precedence, or order of operations,](https://en.wikipedia.org/wiki/Order_of_operations)
704 | in this article, but I will offer these examples for your consideration:
705 | 
706 | ```ruby
707 | true || 1 && 3
708 | # => true
709 | 
710 | true | 1 && 3
711 | # => 3
712 | 
713 | # wtf?
714 | # `true || 1 && 3` evaluates like `true || (1 && 3)` while
715 | # `true  | 1 && 3` evaluates like `(true | 1) && 3`
716 | 
717 | 
718 | false && true ^ true
719 | # => false
720 | 
721 | false & true ^ true
722 | # => true
723 | 
724 | # wtf?
725 | # `false && true ^ true` evaluates like `false && (true ^ true)` while
726 | # `false  & true ^ true` evaluates like `(false && true) ^ true`
727 | ```
728 | 
729 | As if the bitwise Boolean operators didn't have enough going against them, the
730 | differences in operator precedence reek too much of a 4-hour debugging session
731 | for my taste.
732 | 
733 | ## The case against bitwise Boolean operators
734 | 
735 | Though I started this article with an agenda for finding a use-case appropriate
736 | for eager Boolean operators, the search for such a use-case has ultimately led
737 | me to the opposite end of the spectrum. Where once I sought to bring light to
738 | eager Boolean operators, I now find myself at odds with the whole family of
739 | bitwise Boolean operators. We've been through many of the arguments against, but
740 | here they are again, in summary:
741 | 
742 | - Rare usage in community code suggests limited understanding and familiarity
743 | - The primary benefit of eager evaluation is side effects.
744 |   - Side effects make the code harder to debug, harder to reason about, and
745 |     harder to test.
746 | - Errors encountered during eager evaluation occur before assignment operations
747 |   - Even if errors during eager evaluation are caught, the value of the logical
748 |     expression is lost.[^2]
749 | - Bitwise Boolean operators have too many differences from their logical
750 |   counterparts.
751 |     - Return values are converted to Booleans
752 |     - Operator precedence is different
753 |     - Operators are implemented as method calls, which are about 10% slower
754 |     - Can only be invoked on **true**, **false**, or **nil**
755 | 
756 | With such an abundance of arguments against, arguments in favor had better
757 | be significant in length or benefit. Unfortunately, they're not.
758 | 
759 | - Conversion of return values to Booleans slightly faster than double negation.
760 | - Eager evaluation?
761 |   - Maybe useful in irb?
762 | 
763 | I didn't expect to find so many reasons not to use eager or bitwise Boolean
764 | operators, but maybe that's part of the reason I had so much trouble finding
765 | examples of bitwise Boolean operators at large. With the evidence laid out
766 | before you, I hope you will join me in continuing to never use any of the
767 | bitwise Boolean operators in Ruby without a comment and a damn good reason.
768 | 
769 | Thanks for reading!
770 | 
771 | *Have I missed something? Do you know of an example of bitwise and/or eager
772 | Boolean operators being used effectively? Have I got it all wrong? Leave me a
773 | comment and let me know! I'd love to hear your feedback and/or find a
774 | legitimate reason to utilize the family of bitwise Boolean operators.*
775 | 
776 | [^1]: [Short-circuit evaluation - Support in common programming languages](https://en.wikipedia.org/wiki/Short-circuit_evaluation#Support_in_common_programming_languages)
777 | [^2]: [Gist: Errors during eager evaluation cause result of logical expression to be lost](https://gist.github.com/tdg5/12eccaae6132e72c0490)
778 | 


--------------------------------------------------------------------------------