├── html └── .gitkeep ├── .rspec ├── README.md ├── lib ├── blog_snippets.rb └── blog_snippets │ ├── version.rb │ ├── articles │ ├── attr_optimizations.rb │ ├── attr_optimizations │ │ ├── minimalist_attrs.rb │ │ └── excessive_attrs.rb │ ├── tail_call_optimization_in_ruby_internals │ │ ├── fact.rb │ │ ├── fib.rb │ │ ├── opt_send_without_block.vm.inc │ │ ├── fact_disasm.txt │ │ ├── fact_tco_disasm.txt │ │ ├── tail_optimized_reload.rb │ │ ├── fib_tco_disasm.txt │ │ ├── fib_disasm.txt │ │ ├── from_call_method_to_tco.c │ │ └── README.md │ ├── on_the_road_from_ruby_journeyman_to_ruby_master │ │ ├── int_from_ord_diff_benchmark.rb │ │ └── README.md │ ├── tuning_dd_block_size │ │ ├── dd_obs_test.sh │ │ ├── dd_ibs_test.sh │ │ └── README.md │ ├── eager_boolean_operators │ │ ├── notes.txt │ │ └── README.md │ ├── introducing_the_tco_method_gem │ │ └── README.md │ ├── tail_call_optimization_in_ruby_background │ │ └── README.md │ └── module_factory_for_dependency_management │ │ └── README.md │ ├── markdown_to_html_transformer.rb │ └── renderers │ └── wordpress_html_renderer.rb ├── test ├── test_helper.rb ├── concerns │ ├── coverage.rb │ └── test_case.rb └── unit │ ├── blog_snippets_test.rb │ └── markdown_to_html_transformer_test.rb ├── .travis.yml ├── script ├── wp-console └── update_remote_revision ├── Guardfile ├── Gemfile ├── .gitignore ├── blog_snippets.gemspec ├── Rakefile ├── LICENSE └── notes.txt /html/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --require spec_helper 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Blog Snippets 2 | 3 | Code snippets from my blog (http://blog.tdg5.com) 4 | -------------------------------------------------------------------------------- /lib/blog_snippets.rb: -------------------------------------------------------------------------------- 1 | require "blog_snippets/version" 2 | 3 | module BlogSnippets 4 | end 5 | -------------------------------------------------------------------------------- /lib/blog_snippets/version.rb: -------------------------------------------------------------------------------- 1 | module BlogSnippets 2 | VERSION = "0.0.1".freeze 3 | end 4 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require "concerns/coverage" if ENV["CI"] 2 | require "minitest/autorun" 3 | require "mocha/setup" 4 | require "blog_snippets" 5 | require "concerns/test_case" 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | 3 | rvm: 4 | - 1.9.3 5 | - 2.0.0 6 | - 2.1.0 7 | - 2.2.0 8 | - jruby-19mode 9 | - jruby-head 10 | - rbx-19mode 11 | - rbx-2 12 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/attr_optimizations.rb: -------------------------------------------------------------------------------- 1 | require "blog_snippets/articles/attr_optimizations/minimalist_attrs" 2 | require "blog_snippets/articles/attr_optimizations/excessive_attrs" 3 | -------------------------------------------------------------------------------- /test/concerns/coverage.rb: -------------------------------------------------------------------------------- 1 | require "simplecov" 2 | require "coveralls" 3 | SimpleCov.formatter = Coveralls::SimpleCov::Formatter 4 | SimpleCov.root(File.expand_path("../../lib", __FILE__)) 5 | SimpleCov.start 6 | -------------------------------------------------------------------------------- /script/wp-console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require "ruby-wpdb" 4 | require "pry" 5 | 6 | db_config = YAML.load(File.read(File.expand_path("../../config/database.yml", __FILE__))) 7 | WPDB.init(db_config["database_url"], db_config["wp_prefix"]) 8 | 9 | Pry.start 10 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/attr_optimizations/minimalist_attrs.rb: -------------------------------------------------------------------------------- 1 | module BlogSnippets 2 | module AttrOptimizations 3 | class MinimalistAttrs 4 | attr_accessor :accessor 5 | attr_reader :reader 6 | attr_writer :writer 7 | end 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /test/concerns/test_case.rb: -------------------------------------------------------------------------------- 1 | # Use alternate shoulda-style DSL for tests 2 | class BlogSnippets::TestCase < Minitest::Spec 3 | class << self 4 | alias :setup :before 5 | alias :teardown :after 6 | alias :context :describe 7 | alias :should :it 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /Guardfile: -------------------------------------------------------------------------------- 1 | guard(:minitest, :all_after_pass => false, :all_on_start => false) do 2 | watch(%r{^lib/blog_snippets\.rb$}) { "test" } 3 | watch(%r{^lib/blog_snippets/(.+)\.rb$}) { |m| "test/unit/#{m[1]}_test.rb" } 4 | watch(%r{^test/.+_test\.rb$}) 5 | watch(%r{^(?:test/test_helper|test/concerns/)(.*)\.rb$}) { "test" } 6 | end 7 | -------------------------------------------------------------------------------- /test/unit/blog_snippets_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | 3 | class BlogSnippetsTest < BlogSnippets::TestCase 4 | Subject = BlogSnippets 5 | 6 | subject { Subject } 7 | 8 | context Subject.name do 9 | should "be defined" do 10 | assert defined?(subject), "Expected #{subject.name} to be defined!" 11 | end 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec 4 | 5 | gem "pry" 6 | gem "redcarpet", :platform => %w[mri] 7 | gem "ruby-wpdb", :git => "https://github.com/tdg5-wordpress/ruby-wpdb.git", :branch => :master 8 | 9 | group :test do 10 | gem "coveralls", :require => false 11 | gem "guard" 12 | gem "guard-minitest" 13 | gem "minitest", ">= 3.0" 14 | gem "mocha" 15 | gem "simplecov", :require => false 16 | end 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Gemfile.lock 2 | 3 | *.gem 4 | *.rbc 5 | /.config 6 | /coverage/ 7 | /html/ 8 | /InstalledFiles 9 | /pkg/ 10 | /spec/reports/ 11 | /test/tmp/ 12 | /test/version_tmp/ 13 | /tmp/ 14 | 15 | ## Documentation cache and generated files: 16 | /.yardoc/ 17 | /_yardoc/ 18 | /doc/ 19 | /rdoc/ 20 | 21 | ## Environment normalisation: 22 | /.bundle/ 23 | /lib/bundler/man/ 24 | 25 | ## Random 26 | /src.html 27 | config/database.yml 28 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/attr_optimizations/excessive_attrs.rb: -------------------------------------------------------------------------------- 1 | module BlogSnippets 2 | module AttrOptimizations 3 | class ExcessiveAttrs 4 | def accessor 5 | @accessor 6 | end 7 | 8 | def accessor=(value) 9 | @accessor = value 10 | end 11 | 12 | def reader 13 | @reader 14 | end 15 | 16 | def writer=(value) 17 | @writer = value 18 | end 19 | end 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fact.rb: -------------------------------------------------------------------------------- 1 | code = <<-CODE 2 | class Factorial 3 | def self.fact_helper(n, res) 4 | n == 1 ? res : fact_helper(n - 1, n * res) 5 | end 6 | 7 | def self.fact(n) 8 | fact_helper(n, 1) 9 | end 10 | end 11 | CODE 12 | 13 | { 14 | "normal" => { :tailcall_optimization => false, :trace_instruction => false }, 15 | "tail call optimized" => { :tailcall_optimization => true, :trace_instruction => false }, 16 | }.each do |identifier, compile_options| 17 | instruction_sequence = RubyVM::InstructionSequence.new(code, nil, nil, nil, compile_options) 18 | puts "#{identifier}:\n#{instruction_sequence.disasm}" 19 | end 20 | -------------------------------------------------------------------------------- /blog_snippets.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | lib = File.expand_path("../lib", __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require "blog_snippets/version" 5 | 6 | Gem::Specification.new do |spec| 7 | spec.name = "blog_snippets" 8 | spec.version = BlogSnippets::VERSION 9 | spec.authors = ["Danny Guinther"] 10 | spec.email = ["dannyguinther@gmail.com"] 11 | spec.summary = %q{Code snippets from my blog.} 12 | spec.description = %q{Code snippets from my blog: http://blog.tdg5.com} 13 | spec.homepage = "https://github.com/tdg5/blog_snippets" 14 | spec.license = "MIT" 15 | 16 | spec.files = `git ls-files -z`.split("\x0") 17 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) 18 | spec.require_paths = ["lib"] 19 | 20 | spec.add_development_dependency "bundler", "~> 1.6" 21 | spec.add_development_dependency "rake" 22 | end 23 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | require "rake/testtask" 3 | 4 | Rake::TestTask.new do |t| 5 | t.libs << "test" 6 | t.pattern = "test/**/*_test.rb" 7 | end 8 | 9 | task :generate_html, [:source_path] do |tsk, arguments| 10 | require "redcarpet" 11 | require "blog_snippets/renderers/wordpress_html_renderer" 12 | require "blog_snippets/markdown_to_html_transformer" 13 | 14 | source_path = arguments[:source_path] || ENV["SOURCE"] 15 | source_path = File.expand_path(File.join("..", source_path), __FILE__) 16 | raise "#{source_path} does not exist!" unless File.exist?(source_path) 17 | raw_source = File.open(source_path, "r") { |f| f.read } 18 | renderer = BlogSnippets::Renderers::WordpressHTMLRenderer.new 19 | transformer = BlogSnippets::MarkdownToHTMLTransformer.new({ 20 | :parser_class => Redcarpet::Markdown, 21 | :renderer => renderer 22 | }) 23 | html = transformer.transform(raw_source) 24 | puts "---- BEGIN COPY ----\n#{html}\n---- END COPY ----" 25 | end 26 | 27 | task :default => :test 28 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fib.rb: -------------------------------------------------------------------------------- 1 | { 2 | "Fib" => { :tailcall_optimization => false, :trace_instruction => false }, 3 | "TCOFib" => { :tailcall_optimization => true, :trace_instruction => false }, 4 | }.each do |class_name, compile_options| 5 | RubyVM::InstructionSequence.compile_option = compile_options 6 | code = <<-CODE 7 | module BlogSnippets 8 | module #{class_name} 9 | def self.acc(i, n, result) 10 | if i == -1 11 | result 12 | else 13 | acc(i - 1, n + result, n) 14 | end 15 | end 16 | 17 | def self.fib(i) 18 | acc(i, 1, 0) 19 | end 20 | end 21 | end 22 | CODE 23 | instruction_sequence = RubyVM::InstructionSequence.new(code) 24 | 25 | puts "#{class_name}:\n#{instruction_sequence.disasm}" 26 | instruction_sequence.eval 27 | end 28 | 29 | # Reset compile options 30 | RubyVM::InstructionSequence.compile_option = { :tailcall_optimization => false, :trace_instruction => true } 31 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/opt_send_without_block.vm.inc: -------------------------------------------------------------------------------- 1 | INSN_ENTRY(opt_send_without_block){ 2 | { 3 | VALUE val; 4 | CALL_INFO ci = (CALL_INFO)GET_OPERAND(1); 5 | 6 | DEBUG_ENTER_INSN("opt_send_without_block"); 7 | ADD_PC(1+1); 8 | PREFETCH(GET_PC()); 9 | #define CURRENT_INSN_opt_send_without_block 1 10 | #define INSN_IS_SC() 0 11 | #define INSN_LABEL(lab) LABEL_opt_send_without_block_##lab 12 | #define LABEL_IS_SC(lab) LABEL_##lab##_##t 13 | COLLECT_USAGE_INSN(BIN(opt_send_without_block)); 14 | COLLECT_USAGE_OPERAND(BIN(opt_send_without_block), 0, ci); 15 | { 16 | ci->argc = ci->orig_argc; 17 | vm_search_method(ci, ci->recv = TOPN(ci->argc)); 18 | CALL_METHOD(ci); 19 | 20 | CHECK_VM_STACK_OVERFLOW_FOR_INSN(REG_CFP, 1); 21 | PUSH(val); 22 | #undef CURRENT_INSN_opt_send_without_block 23 | #undef INSN_IS_SC 24 | #undef INSN_LABEL 25 | #undef LABEL_IS_SC 26 | END_INSN(opt_send_without_block); 27 | } 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2015 Danny Guinther 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /script/update_remote_revision: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require "ruby-wpdb" 4 | require "redcarpet" 5 | require "blog_snippets/renderers/wordpress_html_renderer" 6 | require "blog_snippets/markdown_to_html_transformer" 7 | require "yaml" 8 | 9 | source_path = ARGV[1] 10 | raise "#{source_path} does not exist!" unless File.exist?(source_path) 11 | 12 | db_config = YAML.load(File.read(File.expand_path("../../config/database.yml", __FILE__))) 13 | WPDB.init(db_config["database_url"], db_config["wp_prefix"]) 14 | 15 | raw_source = File.read(source_path) 16 | renderer = BlogSnippets::Renderers::WordpressHTMLRenderer.new 17 | transformer = BlogSnippets::MarkdownToHTMLTransformer.new({ 18 | :parser_class => Redcarpet::Markdown, 19 | :renderer => renderer 20 | }) 21 | post_content = transformer.transform(raw_source) 22 | 23 | post_id = Integer(ARGV[0]) 24 | post = WPDB::Post.where(:id => post_id).first 25 | last_revision = post.revisions.last 26 | revision = WPDB::Post.new 27 | last_revision.keys.each {|key| revision.send("#{key}=", post.send(key)) } 28 | revision.ID = nil 29 | revision.instance_variable_set(:@new, true) 30 | revision.post_content = post_content 31 | revision.post_modified = Time.now 32 | revision.post_modified_gmt = Time.now.utc 33 | revision.save 34 | -------------------------------------------------------------------------------- /lib/blog_snippets/markdown_to_html_transformer.rb: -------------------------------------------------------------------------------- 1 | module BlogSnippets 2 | class MarkdownToHTMLTransformer 3 | 4 | DEFAULT_MARKDOWN_EXTENSIONS = { 5 | :autolink => true, 6 | :disable_indented_code_blocks => true, 7 | :fenced_code_blocks => true, 8 | :footnotes => true, 9 | :no_intra_emphasis => true, 10 | :space_after_headers => true, 11 | :strikethrough => true, 12 | :tables => true, 13 | :underline => true, 14 | } 15 | 16 | attr_reader :markdown_extensions, :renderer 17 | 18 | def self.default_markdown_extensions 19 | const_get(:DEFAULT_MARKDOWN_EXTENSIONS).dup 20 | end 21 | 22 | def initialize(options = {}) 23 | raise ArgumentError, ":renderer is required!" unless options[:renderer] 24 | raise ArgumentError, ":parser_class is required!" unless options[:parser_class] 25 | 26 | @renderer = options[:renderer] 27 | @parser_class = options[:parser_class] 28 | @markdown_extensions = options[:markdown_extensions] || default_markdown_extensions 29 | end 30 | 31 | def parser 32 | @parser ||= parser_class.new(renderer, @markdown_extensions) 33 | end 34 | 35 | def transform(markdown) 36 | parser.render(markdown) 37 | end 38 | 39 | private 40 | 41 | attr_reader :parser_class 42 | 43 | def default_markdown_extensions 44 | self.class.default_markdown_extensions 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/on_the_road_from_ruby_journeyman_to_ruby_master/int_from_ord_diff_benchmark.rb: -------------------------------------------------------------------------------- 1 | require "benchmark/ips" 2 | 3 | NUMBER = "7316717653133062491922511967442657474235534919493496983520312774506326239578318016984801869478851843858615607891129494954595017379583319528532088055111254069874715852386305071569329096329522744304355766896648950445244523161731856403098711121722383113622298934233803081353362766142828064444866452387493035890729629049156044077239071381051585930796086670172427121883998797908792274921901699720888093776657273330010533678812202354218097512545405947522435258490771167055601360483958644670632441572215539753697817977846174064955149290862569321978468622482839722413756570560574902614079729686524145351004748216637048440319989000889524345065854122758866688116427171479924442928230863465674813919123162824586178664583591245665294765456828489128831426076900422421902267105562632111110937054421750694165896040807198403850962455444362981230987879927244284909188845801561660979191338754992005240636899125607176060588611646710940507754100225698315520005593572972571636269561882670428252483600823257530420752963450" 4 | CHARS = NUMBER.each_char.to_a 5 | ZERO_ORD = "0".ord.freeze 6 | ORD_PROC = proc { |char| char.ord - ZERO_ORD } 7 | 8 | Benchmark.ips do |bm| 9 | bm.report("String#to_i") { CHARS.each(&:to_i) } 10 | bm.report("String#ord - ZERO_ORD") { CHARS.each(&ORD_PROC) } 11 | end 12 | 13 | # Calculating ----------------------------------------- 14 | # String#to_i 836.000 i/100ms 15 | # String#ord - ZERO_ORD 1.083k i/100ms 16 | # ----------------------------------------------------- 17 | # String#to_i 8.473k (± 1.2%) i/s - 42.636k 18 | # String#ord - ZERO_ORD 10.859k (± 1.4%) i/s - 55.233k 19 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tuning_dd_block_size/dd_obs_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Since we're dealing with dd, abort if any errors occur 4 | set -e 5 | 6 | TEST_FILE=${1:-dd_obs_testfile} 7 | TEST_FILE_EXISTS=0 8 | if [ -e "$TEST_FILE" ]; then TEST_FILE_EXISTS=1; fi 9 | TEST_FILE_SIZE=134217728 10 | 11 | if [ $EUID -ne 0 ]; then 12 | echo "NOTE: Kernel cache will not be cleared between tests without sudo. This will likely cause inaccurate results." 1>&2 13 | fi 14 | 15 | # Header 16 | PRINTF_FORMAT="%8s : %s\n" 17 | printf "$PRINTF_FORMAT" 'block size' 'transfer rate' 18 | 19 | # Block sizes of 512b 1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M 20 | for BLOCK_SIZE in 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432 67108864 21 | do 22 | # Calculate number of segments required to copy 23 | COUNT=$(($TEST_FILE_SIZE / $BLOCK_SIZE)) 24 | 25 | if [ $COUNT -le 0 ]; then 26 | echo "Block size of $BLOCK_SIZE estimated to require $COUNT blocks, aborting further tests." 27 | break 28 | fi 29 | 30 | # Clear kernel cache to ensure more accurate test 31 | [ $EUID -eq 0 ] && [ -e /proc/sys/vm/drop_caches ] && echo 3 > /proc/sys/vm/drop_caches 32 | 33 | # Create a test file with the specified block size 34 | DD_RESULT=$(dd if=/dev/zero of=$TEST_FILE bs=$BLOCK_SIZE count=$COUNT conv=fsync 2>&1 1>/dev/null) 35 | 36 | # Extract the transfer rate from dd's STDERR output 37 | TRANSFER_RATE=$(echo $DD_RESULT | \grep --only-matching -E '[0-9.]+ ([MGk]?B|bytes)/s(ec)?') 38 | 39 | # Clean up the test file if we created one 40 | if [ $TEST_FILE_EXISTS -ne 0 ]; then rm $TEST_FILE; fi 41 | 42 | # Output the result 43 | printf "$PRINTF_FORMAT" "$BLOCK_SIZE" "$TRANSFER_RATE" 44 | done 45 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tuning_dd_block_size/dd_ibs_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Since we're dealing with dd, abort if any errors occur 4 | set -e 5 | 6 | TEST_FILE=${1:-dd_ibs_testfile} 7 | if [ -e "$TEST_FILE" ]; then TEST_FILE_EXISTS=$?; fi 8 | TEST_FILE_SIZE=134217728 9 | 10 | # Exit if file exists 11 | if [ -e $TEST_FILE ]; then 12 | echo "Test file $TEST_FILE exists, aborting." 13 | exit 1 14 | fi 15 | TEST_FILE_EXISTS=1 16 | 17 | if [ $EUID -ne 0 ]; then 18 | echo "NOTE: Kernel cache will not be cleared between tests without sudo. This will likely cause inaccurate results." 1>&2 19 | fi 20 | 21 | # Create test file 22 | echo 'Generating test file...' 23 | BLOCK_SIZE=65536 24 | COUNT=$(($TEST_FILE_SIZE / $BLOCK_SIZE)) 25 | dd if=/dev/urandom of=$TEST_FILE bs=$BLOCK_SIZE count=$COUNT conv=fsync > /dev/null 2>&1 26 | 27 | # Header 28 | PRINTF_FORMAT="%8s : %s\n" 29 | printf "$PRINTF_FORMAT" 'block size' 'transfer rate' 30 | 31 | # Block sizes of 512b 1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M 32 | for BLOCK_SIZE in 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432 67108864 33 | do 34 | # Clear kernel cache to ensure more accurate test 35 | [ $EUID -eq 0 ] && [ -e /proc/sys/vm/drop_caches ] && echo 3 > /proc/sys/vm/drop_caches 36 | 37 | # Read test file out to /dev/null with specified block size 38 | DD_RESULT=$(dd if=$TEST_FILE of=/dev/null bs=$BLOCK_SIZE 2>&1 1>/dev/null) 39 | 40 | # Extract transfer rate 41 | TRANSFER_RATE=$(echo $DD_RESULT | \grep --only-matching -E '[0-9.]+ ([MGk]?B|bytes)/s(ec)?') 42 | 43 | printf "$PRINTF_FORMAT" "$BLOCK_SIZE" "$TRANSFER_RATE" 44 | done 45 | 46 | # Clean up the test file if we created one 47 | if [ $TEST_FILE_EXISTS -ne 0 ]; then rm $TEST_FILE; fi 48 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fact_disasm.txt: -------------------------------------------------------------------------------- 1 | == disasm: @>========== 2 | 0000 putspecialobject 3 ( 1) 3 | 0002 putnil 4 | 0003 defineclass :Factorial, , 0 5 | 0007 leave 6 | == disasm: @>=== 7 | 0000 putspecialobject 1 ( 2) 8 | 0002 putself 9 | 0003 putobject :fact_helper 10 | 0005 putiseq fact_helper 11 | 0007 opt_send_without_block 12 | 0009 pop 13 | 0010 putspecialobject 1 ( 6) 14 | 0012 putself 15 | 0013 putobject :fact 16 | 0015 putiseq fact 17 | 0017 opt_send_without_block 18 | 0019 leave 19 | == disasm: >========= 20 | local table (size: 3, argc: 2 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1]) 21 | [ 3] n [ 2] res 22 | 0000 getlocal_OP__WC__0 3 ( 3) 23 | 0002 putobject_OP_INT2FIX_O_1_C_ 24 | 0003 opt_eq 25 | 0005 branchunless 11 26 | 0007 getlocal_OP__WC__0 2 27 | 0009 leave 28 | 0010 pop 29 | 0011 putself 30 | 0012 getlocal_OP__WC__0 3 31 | 0014 putobject_OP_INT2FIX_O_1_C_ 32 | 0015 opt_minus 33 | 0017 getlocal_OP__WC__0 3 34 | 0019 getlocal_OP__WC__0 2 35 | 0021 opt_mult 36 | 0023 opt_send_without_block 37 | 0025 leave 38 | == disasm: >================ 39 | local table (size: 2, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1]) 40 | [ 2] n 41 | 0000 putself ( 7) 42 | 0001 getlocal_OP__WC__0 2 43 | 0003 putobject_OP_INT2FIX_O_1_C_ 44 | 0004 opt_send_without_block 45 | 0006 leave 46 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fact_tco_disasm.txt: -------------------------------------------------------------------------------- 1 | == disasm: @>========== 2 | 0000 putspecialobject 3 ( 1) 3 | 0002 putnil 4 | 0003 defineclass :Factorial, , 0 5 | 0007 leave 6 | == disasm: @>=== 7 | 0000 putspecialobject 1 ( 2) 8 | 0002 putself 9 | 0003 putobject :fact_helper 10 | 0005 putiseq fact_helper 11 | 0007 opt_send_without_block 12 | 0009 pop 13 | 0010 putspecialobject 1 ( 6) 14 | 0012 putself 15 | 0013 putobject :fact 16 | 0015 putiseq fact 17 | 0017 opt_send_without_block 18 | 0019 leave 19 | == disasm: >========= 20 | local table (size: 3, argc: 2 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1]) 21 | [ 3] n [ 2] res 22 | 0000 getlocal_OP__WC__0 3 ( 3) 23 | 0002 putobject_OP_INT2FIX_O_1_C_ 24 | 0003 opt_eq 25 | 0005 branchunless 11 26 | 0007 getlocal_OP__WC__0 2 27 | 0009 leave 28 | 0010 pop 29 | 0011 putself 30 | 0012 getlocal_OP__WC__0 3 31 | 0014 putobject_OP_INT2FIX_O_1_C_ 32 | 0015 opt_minus 33 | 0017 getlocal_OP__WC__0 3 34 | 0019 getlocal_OP__WC__0 2 35 | 0021 opt_mult 36 | 0023 opt_send_without_block 37 | 0025 leave 38 | == disasm: >================ 39 | local table (size: 2, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, kw: -1@-1, kwrest: -1]) 40 | [ 2] n 41 | 0000 putself ( 7) 42 | 0001 getlocal_OP__WC__0 2 43 | 0003 putobject_OP_INT2FIX_O_1_C_ 44 | 0004 opt_send_without_block 45 | 0006 leave 46 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/tail_optimized_reload.rb: -------------------------------------------------------------------------------- 1 | # This script demonstrates that any file loaded after a change to 2 | # RubyVM::InstructionSequence.compile_option will be compiled with the new 3 | # compile options. Rather than do this with two scripts, this script is hacked 4 | # together such that this can be demonstrated with one file that reloads itself 5 | # the first time it is loaded. 6 | 7 | # Flag indicating whether this is the first time time this file has been loaded. 8 | $first_load = true if $first_load.nil? 9 | 10 | # We can actually turn on tailcall optimization here without affecting how the 11 | # script is loaded the first time because the RubyVM::InstructionSequence object 12 | # that is used to compile the file the first time has already been created and 13 | # as such won't be affected by changing the global compile option. 14 | RubyVM::InstructionSequence.compile_option = { 15 | tailcall_optimization: true, 16 | trace_instruction: false, 17 | } 18 | 19 | # Declare classes to facilitate #instance_eval later 20 | class FirstLoadFactorial; end 21 | class ReloadedFactorial; end 22 | 23 | # On the first load, extend FirstLoadFactorial, 24 | # on the second load, extend ReloadedFactorial. 25 | klass = $first_load ? FirstLoadFactorial : ReloadedFactorial 26 | 27 | # Tail recursive factorial adapted from 28 | # https://github.com/ruby/ruby/blob/fcf6fa8781fe236a9761ad5d75fa1b87f1afeea2/test/ruby/test_optimization.rb#L213 29 | klass.instance_eval do 30 | def self.fact_helper(n, res) 31 | n == 1 ? res : fact_helper(n - 1, n * res) 32 | end 33 | 34 | def self.fact(n) 35 | fact_helper(n, 1) 36 | end 37 | end 38 | 39 | # This check avoids calculating the factorial twice; ReloadedFactorial will only 40 | # respond to :fact after the file has been reloaded. 41 | if ReloadedFactorial.respond_to?(:fact) 42 | begin 43 | puts "FirstLoadFactorial: #{FirstLoadFactorial.fact(50000).to_s.length}" 44 | rescue SystemStackError 45 | puts "FirstLoadFactorial: stack level too deep" 46 | end 47 | 48 | puts "ReloadedFactorial: #{ReloadedFactorial.fact(50000).to_s.length}" 49 | end 50 | 51 | # Reload the file on the first load only. 52 | if $first_load 53 | $first_load = false 54 | load __FILE__ 55 | end 56 | 57 | # $ ruby tail_optimized_reload.rb 58 | # FirstLoadFactorial: stack level too deep 59 | # ReloadedFactorial: 213237 60 | -------------------------------------------------------------------------------- /notes.txt: -------------------------------------------------------------------------------- 1 | Coercion to Boolean compared to lazy evaluation counterpart 2 | > false | :WTF? 3 | => true 4 | > true & :WTF? 5 | => true 6 | Even with very large object on RHS, no efficiency gained by coercion 7 | Because of method call implementation? 8 | Other than tricky coercion, only gain is that it is ever so slightly faster 9 | than double negation: 10 | require 'benchmark/ips' 11 | 12 | Benchmark.ips do |bm| 13 | bm.report("Double negate") { !!(true && :a) } 14 | 15 | bm.report("Logical bit-wise coerce") { true & :a } 16 | end 17 | 18 | # Calculating -------------------------------------------- 19 | # Double negate 138.008k i/100ms 20 | # Logical bit-wise coerce 139.350k i/100ms 21 | # -------------------------------------------------------- 22 | # Double negate 7.262M (± 1.0%) i/s - 36.434M 23 | # Logical bit-wise coerce 7.825M (± 1.3%) i/s - 39.157M 24 | # -------------------------------------------------------- 25 | 26 | 27 | 28 | "Maybe use when you have very simple boolean expressions and the cost 29 | of short cutting (i.e. a branch) is greater than the time you save by 30 | not evaluating the later expressions." 31 | http://stackoverflow.com/a/7105382/1169710 32 | Secretly method calls in Ruby! 33 | Doesn't seem to apply in Ruby. Branching always cheaper than a method call. 34 | 35 | 36 | Operator precedence: 37 | > true || 1 && 3 38 | => true 39 | > true || (1 && 3) 40 | => true 41 | 42 | > true | 1 && 3 43 | => 3 44 | > (true | 1) && 3 45 | => 3 46 | 47 | 48 | > false && true ^ true 49 | => false 50 | > false && (true ^ true) 51 | => false 52 | 53 | > false & true ^ true 54 | => true 55 | > (false && true) ^ true 56 | => true 57 | 58 | Seems like they'd mostly be used for their side effects which is bad 59 | 60 | Only works consistently for falsy values and true. Truthy values explosive! 61 | 62 | Examples: 63 | https://github.com/ruby/ruby/blob/75feee0968c9345e7ffd2bda9835fcd60b4c0880/benchmark/bm_so_k_nucleotide.rb#L40 64 | https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/slice_spec.rb#L436 65 | https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/shared/slice.rb#L419 66 | https://github.com/rubyspec/rubyspec/blob/324c37bb67ea51f197954a37a2c71878eeadea01/core/string/plus_spec.rb#L41 67 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fib_tco_disasm.txt: -------------------------------------------------------------------------------- 1 | == disasm: @>========== 2 | 0000 putspecialobject 3 ( 1) 3 | 0002 putnil 4 | 0003 defineclass :BlogSnippets, , 2 5 | 0007 leave 6 | == disasm: @> 7 | 0000 putspecialobject 3 ( 2) 8 | 0002 putnil 9 | 0003 defineclass :TCOFib, , 2 10 | 0007 leave 11 | == disasm: @>===== 12 | 0000 putspecialobject 1 ( 3) 13 | 0002 putself 14 | 0003 putobject :acc 15 | 0005 putiseq acc 16 | 0007 opt_send_simple 17 | 0009 pop 18 | 0010 putspecialobject 1 ( 11) 19 | 0012 putself 20 | 0013 putobject :fib 21 | 0015 putiseq fib 22 | 0017 opt_send_simple 23 | 0019 leave 24 | == disasm: >================= 25 | local table (size: 4, argc: 3 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@5] s1) 26 | [ 4] i [ 3] n [ 2] result 27 | 0000 getlocal_OP__WC__0 4 ( 4) 28 | 0002 putobject -1 29 | 0004 opt_eq 30 | 0006 branchunless 12 31 | 0008 getlocal_OP__WC__0 2 ( 5) 32 | 0010 leave ( 4) 33 | 0011 pop 34 | 0012 putself ( 7) 35 | 0013 getlocal_OP__WC__0 4 36 | 0015 putobject_OP_INT2FIX_O_1_C_ 37 | 0016 opt_minus 38 | 0018 getlocal_OP__WC__0 3 39 | 0020 getlocal_OP__WC__0 2 40 | 0022 opt_plus 41 | 0024 getlocal_OP__WC__0 3 42 | 0026 opt_send_simple 43 | 0028 leave 44 | == disasm: >================= 45 | local table (size: 2, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@3] s1) 46 | [ 2] i 47 | 0000 putself ( 12) 48 | 0001 getlocal_OP__WC__0 2 49 | 0003 putobject_OP_INT2FIX_O_1_C_ 50 | 0004 putobject_OP_INT2FIX_O_0_C_ 51 | 0005 opt_send_simple 52 | 0007 leave 53 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/fib_disasm.txt: -------------------------------------------------------------------------------- 1 | == disasm: @>========== [44/385] 2 | 0000 putspecialobject 3 ( 1) 3 | 0002 putnil 4 | 0003 defineclass :BlogSnippets, , 2 5 | 0007 leave 6 | == disasm: @> 7 | 0000 putspecialobject 3 ( 2) 8 | 0002 putnil 9 | 0003 defineclass :Fib, , 2 10 | 0007 leave 11 | == disasm: @>======== 12 | 0000 putspecialobject 1 ( 3) 13 | 0002 putself 14 | 0003 putobject :acc 15 | 0005 putiseq acc 16 | 0007 opt_send_simple 17 | 0009 pop 18 | 0010 putspecialobject 1 ( 11) 19 | 0012 putself 20 | 0013 putobject :fib 21 | 0015 putiseq fib 22 | 0017 opt_send_simple 23 | 0019 leave 24 | == disasm: >================= 25 | local table (size: 4, argc: 3 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@5] s1) 26 | [ 4] i [ 3] n [ 2] result 27 | 0000 getlocal_OP__WC__0 4 ( 4) 28 | 0002 putobject -1 29 | 0004 opt_eq 30 | 0006 branchunless 12 31 | 0008 getlocal_OP__WC__0 2 ( 5) 32 | 0010 leave ( 4) 33 | 0011 pop 34 | 0012 putself ( 7) 35 | 0013 getlocal_OP__WC__0 4 36 | 0015 putobject_OP_INT2FIX_O_1_C_ 37 | 0016 opt_minus 38 | 0018 getlocal_OP__WC__0 3 39 | 0020 getlocal_OP__WC__0 2 40 | 0022 opt_plus 41 | 0024 getlocal_OP__WC__0 3 42 | 0026 opt_send_simple 43 | 0028 leave 44 | == disasm: >================= 45 | local table (size: 2, argc: 1 [opts: 0, rest: -1, post: 0, block: -1, keyword: 0@3] s1) 46 | [ 2] i 47 | 0000 putself ( 12) 48 | 0001 getlocal_OP__WC__0 2 49 | 0003 putobject_OP_INT2FIX_O_1_C_ 50 | 0004 putobject_OP_INT2FIX_O_0_C_ 51 | 0005 opt_send_simple 52 | 0007 leave 53 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/eager_boolean_operators/notes.txt: -------------------------------------------------------------------------------- 1 | Coercion to Boolean compared to lazy evaluation counterpart 2 | > false | :WTF? 3 | => true 4 | > true & :WTF? 5 | => true 6 | Even with very large object on RHS, no efficiency gained by coercion 7 | Because of method call implementation? 8 | Other than tricky coercion, only gain is that it is ever so slightly faster 9 | than double negation: 10 | require 'benchmark/ips' 11 | 12 | Benchmark.ips do |bm| 13 | bm.report("Double negate") { !!(true && :a) } 14 | 15 | bm.report("Logical bit-wise coerce") { true & :a } 16 | end 17 | 18 | # Calculating -------------------------------------------- 19 | # Double negate 138.008k i/100ms 20 | # Logical bit-wise coerce 139.350k i/100ms 21 | # -------------------------------------------------------- 22 | # Double negate 7.262M (± 1.0%) i/s - 36.434M 23 | # Logical bit-wise coerce 7.825M (± 1.3%) i/s - 39.157M 24 | # -------------------------------------------------------- 25 | 26 | 27 | 28 | "Maybe use when you have very simple boolean expressions and the cost 29 | of short cutting (i.e. a branch) is greater than the time you save by 30 | not evaluating the later expressions." 31 | http://stackoverflow.com/a/7105382/1169710 32 | Secretly method calls in Ruby! 33 | Doesn't seem to apply in Ruby. Branching always cheaper than a method call. 34 | 35 | 36 | Operator precedence: 37 | > true || 1 && 3 38 | => true 39 | > true || (1 && 3) 40 | => true 41 | 42 | > true | 1 && 3 43 | => 3 44 | > (true | 1) && 3 45 | => 3 46 | 47 | 48 | > false && true ^ true 49 | => false 50 | > false && (true ^ true) 51 | => false 52 | 53 | > false & true ^ true 54 | => true 55 | > (false && true) ^ true 56 | => true 57 | 58 | Seems like they'd mostly be used for their side effects which is bad 59 | 60 | The console is the only somewhat reasonable use case I can think of. 61 | 62 | Only works consistently for falsy values and true. Truthy values explosive! 63 | 64 | Examples: 65 | https://github.com/ruby/ruby/blob/75feee0968c9345e7ffd2bda9835fcd60b4c0880/benchmark/bm_so_k_nucleotide.rb#L40 66 | https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/slice_spec.rb#L436 67 | https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/shared/slice.rb#L419 68 | https://github.com/rubyspec/rubyspec/blob/324c37bb67ea51f197954a37a2c71878eeadea01/core/string/plus_spec.rb#L41 69 | 70 | 71 | Method execution behavior means errors on the RHS, prevent the value from the 72 | LHS from being stored: 73 | 74 | or_result = nil 75 | begin 76 | or_result = true | Seriously(this(is(valid(Ruby!)))) 77 | rescue NameError 78 | puts "NameError :(" 79 | end 80 | or_result 81 | # Name Error :( 82 | # => nil 83 | 84 | and_result = nil 85 | begin 86 | and_result = false & 0/0 87 | rescue ZeroDivisionError 88 | puts "ZeroDivisionError :(" 89 | end 90 | and_result 91 | # ZeroDivisionError :( 92 | # => nil 93 | -------------------------------------------------------------------------------- /test/unit/markdown_to_html_transformer_test.rb: -------------------------------------------------------------------------------- 1 | require "test_helper" 2 | require "blog_snippets/markdown_to_html_transformer" 3 | 4 | class MarkdownToHTMLTransformerTest < BlogSnippets::TestCase 5 | Subject = BlogSnippets::MarkdownToHTMLTransformer 6 | 7 | subject { Subject } 8 | 9 | context "::default_markdown_extensions" do 10 | should "return expected defaults" do 11 | expected = { 12 | :autolink => true, 13 | :disable_indented_code_blocks => true, 14 | :fenced_code_blocks => true, 15 | :footnotes => true, 16 | :no_intra_emphasis => true, 17 | :space_after_headers => true, 18 | :strikethrough => true, 19 | :tables => true, 20 | :underline => true, 21 | } 22 | assert_equal expected, subject.default_markdown_extensions 23 | end 24 | 25 | should "return a new Hash instance each call" do 26 | first_defaults = subject.default_markdown_extensions 27 | second_defaults = subject.default_markdown_extensions 28 | refute_equal first_defaults.object_id, second_defaults.object_id 29 | end 30 | end 31 | 32 | context "#initialize" do 33 | [:parser_class, :renderer].each do |required_opt| 34 | should "raise unless #{required_opt} option is given" do 35 | assert_raises(ArgumentError) do 36 | opts = default_initialization_options 37 | opts.delete(required_opt) 38 | subject.new(opts) 39 | end 40 | end 41 | end 42 | 43 | should "assign given :renderer to #renderer" do 44 | instance = subject.new(default_initialization_options) 45 | assert_equal renderer, instance.renderer 46 | end 47 | 48 | should "take a Hash of Markdown extensions" do 49 | exts = { :tables => true } 50 | opts = default_initialization_options.merge(:markdown_extensions => exts) 51 | instance = subject.new(opts) 52 | assert_equal exts, instance.markdown_extensions 53 | end 54 | 55 | should "use default Markdown extensions if none given" do 56 | opts = default_initialization_options 57 | opts.delete(:markdown_extensions) 58 | instance = subject.new(opts) 59 | assert_equal subject.default_markdown_extensions, instance.markdown_extensions 60 | end 61 | 62 | should "assign :markdown_extensions to #markdown_extensions" do 63 | exts = { :tables => true } 64 | opts = default_initialization_options.merge(:markdown_extensions => exts) 65 | instance = subject.new(opts) 66 | assert_equal exts, instance.markdown_extensions 67 | end 68 | end 69 | 70 | context "instance_methods" do 71 | subject { Subject.new(default_initialization_options) } 72 | 73 | context "#parser" do 74 | should "initialize an instance of parser_class with renderer and markdown extensions" do 75 | parser_class.expects(:new).with(subject.renderer, subject.markdown_extensions) 76 | subject.parser 77 | end 78 | end 79 | 80 | context "#transform" do 81 | should "invoke parser#render with given markdown" do 82 | markdown = "# Hello World!" 83 | subject.expects(:parser).returns(mck = mock) 84 | mck.expects(:render).with(markdown) 85 | subject.transform(markdown) 86 | end 87 | end 88 | end 89 | 90 | def default_initialization_options 91 | { 92 | :parser_class => parser_class, 93 | :renderer => renderer, 94 | } 95 | end 96 | 97 | def parser_class 98 | @parser_class ||= mock 99 | end 100 | 101 | def renderer 102 | @renderer ||= mock 103 | end 104 | end 105 | -------------------------------------------------------------------------------- /lib/blog_snippets/renderers/wordpress_html_renderer.rb: -------------------------------------------------------------------------------- 1 | require "English" 2 | require "json" 3 | require "redcarpet" 4 | 5 | module BlogSnippets 6 | module Renderers 7 | class WordpressHTMLRenderer < Redcarpet::Render::HTML 8 | UNTARGETED_LINK = /^(?:mailto:|#)/.freeze 9 | # http://rubular.com/r/apmHqN4joc 10 | HEADER_MATCHER = /(?
[1-6])[^>]+id="(?[^"]+)".*?>.*?<\/h\k>)/.freeze 11 | INDENTATION_TOKEN = "__WORDPRESS_HTML_RENDERER_INDENTATION__".freeze 12 | NEW_LINE_TOKEN = "__WORDPRESS_HTML_RENDERER_NEW_LINE__".freeze 13 | 14 | def initialize(options = nil) 15 | super(@options = options || default_options) 16 | end 17 | 18 | # Can't call super due to C-extension design, so fake it so we can 19 | # customize it. 20 | def link(link, title, content) 21 | element = %Q[#{content}]) 28 | element 29 | end 30 | 31 | def block_code(code, language_or_attributes) 32 | # Replace line breaks with new-line token 33 | code.gsub!(/\n/, NEW_LINE_TOKEN) 34 | code.gsub!(/ /, INDENTATION_TOKEN) 35 | 36 | # Extract code tag attributes 37 | code_attrs = code_attributes(language_or_attributes) 38 | code_attrs &&= " #{code_attrs}" 39 | 40 | # Can't call super due to C-extension design, so fake it. 41 | [ 42 | "[code#{code_attrs}]", 43 | NEW_LINE_TOKEN, 44 | code, 45 | "[/code]\n", 46 | ].join 47 | end 48 | 49 | def postprocess(document) 50 | remove_new_lines_and_white_space_runs!(document) 51 | replace_tokens!(document) 52 | add_header_links!(document) 53 | end 54 | 55 | private 56 | 57 | def add_header_links!(document) 58 | document.gsub!(HEADER_MATCHER) do |match| 59 | match_data = $LAST_MATCH_INFO 60 | match[0..-6] + 61 | %Q|| + 62 | "" 63 | end 64 | document 65 | end 66 | 67 | def link_attributes(link) 68 | return {} unless attrs = @options[:link_attributes] 69 | link_attrs = attrs.dup 70 | link_attrs.delete("target") if UNTARGETED_LINK === link 71 | link_attrs 72 | end 73 | 74 | def code_attributes(lang_or_attrs) 75 | return "language=\"#{lang_or_attrs}\"" unless /[, :]/ === lang_or_attrs 76 | 77 | # Curly braces are omitted for some reason, so restore them. 78 | attr_json = JSON.parse("{#{lang_or_attrs}}") 79 | attr_json.map { |key, value| "#{key}=\"#{value}\"" }.join(" ") 80 | end 81 | 82 | def default_options 83 | { 84 | :link_attributes => { 85 | "target" => "_blank", 86 | }, 87 | :with_toc_data => true, 88 | } 89 | end 90 | 91 | def remove_new_lines_and_white_space_runs!(document) 92 | # Remove line breaks; HTML should handle breaking lines 93 | document.gsub!(/\n/, " ") 94 | # Removing line breaks may have introduced white space runs; zap 'em. 95 | # http://rubular.com/r/aaVCG1Wlep 96 | document.gsub!(/(?<=[^\s])\s{2,}/, " ") 97 | document 98 | end 99 | 100 | def replace_tokens!(document) 101 | # Replace tokens with desired characters 102 | document.gsub!(/#{NEW_LINE_TOKEN}/, "\n") 103 | document.gsub!(/#{INDENTATION_TOKEN}/, " ") 104 | document 105 | end 106 | end 107 | end 108 | end 109 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/introducing_the_tco_method_gem/README.md: -------------------------------------------------------------------------------- 1 | ![tco_method](https://s3.amazonaws.com/tdg5/blog/wp-content/uploads/2015/03/15014139/tco_method.jpg "tco_method") 2 | 3 | Earlier this week I published a gem intended to help simplify the process of 4 | compiling Ruby code with tail call optimization enabled in MRI Ruby. The gem, 5 | [tco_method](https://rubygems.org/gems/tco_method), builds on my recent research 6 | into the [internals of Ruby's implementation of tail call optimization](http://blog.tdg5.com/tail-call-optimization-ruby-deep-dive/) 7 | and the ideas presented in [Nithin Bekal's article *Tail Optimization in Ruby*](http://nithinbekal.com/posts/ruby-tco/). 8 | 9 | The gem aims to ease the process of compiling select Ruby code with tail call 10 | optimization by providing a helper method, [**TCOMethod.tco_eval**](http://www.rubydoc.info/gems/tco_method/TCOMethod/Mixin:tco_eval), 11 | for evaluating code with tail call optimization enabled and a mix-in, 12 | [**TCOMethod::Mixin**](http://www.rubydoc.info/gems/tco_method/TCOMethod/Mixin), 13 | for adding annotations to Classes and/or Modules for annotating singleton or 14 | instance methods that should be compiled with tail call optimization enabled. 15 | You can see what each of these approaches would look like below. 16 | 17 | ## TCOMethod.eval 18 | 19 | ```ruby 20 | TCOMethod.tco_eval(<<-CODE) 21 | module MyFactorial 22 | def self.factorial(n, acc = 1) 23 | n <= 1 ? acc : factorial(n - 1, n * acc) 24 | end 25 | end 26 | CODE 27 | 28 | MyFactorial.factorial(10_000).to_s.length 29 | # => 35660 30 | ``` 31 | 32 | Though not as powerful as Ruby's native **eval** method, **TCOMethod.tco_eval** provides 33 | easy access to the full power of Ruby with the added benefit of tail call 34 | optimization. The major downside to using **tco_eval** is that code must be 35 | provided as a String. Also, unlike Ruby's standard **eval** method, **tco_eval** 36 | currently cannot take a binding for the evaluation which can make it awkward 37 | at times to connect code that's being compiled with tail optimization to 38 | other application code compiled by Ruby's primary compilation process. 39 | 40 | All that said, I view **tco_eval** as more of a starting point than a solution. 41 | It inches the door a little wider for the Ruby community to play with tail call 42 | optimization and get a better sense of how and when it might be useful. I think 43 | this is an exciting opportunity that Nithin Bekal's work with TCO method 44 | decorators began to explore and, as we'll see momentarily, the 45 | **TCOMethod::Mixin** continues to test the waters of. 46 | 47 | Beyond the opportunity it offers the Ruby community, I'm also excited because 48 | the [tco_method gem](https://rubygems.org/gems/tco_method) seems like a great 49 | opportunity to dig into Ruby's C extensions and see how extending the gem to 50 | interface with Ruby's C code more directly could extend the abilities of the gem 51 | while further simplifying access to tail call optimization in Ruby. 52 | 53 | ## TCOMethod::Mixin#tco_method 54 | 55 | ```ruby 56 | class MyFibonacci 57 | extend TCOMethod::Mixin 58 | 59 | def fibonacci(index, back_one = 1, back_two = 0) 60 | index < 1 ? back_two : fibonacci(index - 1, back_one + back_two, back_one) 61 | end 62 | tco_method :fibonacci 63 | end 64 | 65 | puts MyFibonacci.new.fibonacci(10_000).to_s.length 66 | # => 2090 67 | ``` 68 | 69 | The **TCOMethod::Mixin** module provides annotations at the Class and Module 70 | level allowing a developer access to some of the niceties of tail call 71 | optimization, but without the awkwardness that comes from String literal code or 72 | heredocs. In the style of some of Ruby's other class annotations like 73 | **private_class_method** or **module_function**, the **tco_module_method**, 74 | **tco_class_method**, and eponymous *tco_method** annotation for instance 75 | methods, allow a user to annotate a previously defined method indicating that 76 | the specified method should be recompiled with tail call optimization enabled. 77 | 78 | Currently these helper methods are little more than nicely wrapped hacks that 79 | use some trickery to redefine the specified method with tail call optimization 80 | enabled. More specifically, the helper annotations will: 81 | 82 | - find the method identified by the given argument 83 | - retrieve the source for that method using the [method_source 84 | gem](https://github.com/banister/method_source) 85 | - generate a redefinition expression from the method source that 86 | reopens the defining Module or Class and redefines the method 87 | - pass the generated redefinition expression to **TCOMethod.tco_eval**, 88 | effectively overriding the previously defined method with the new tail call 89 | optimized version 90 | 91 | While this works in most situations, there are quite a few [pitfalls and 92 | gotchas](https://github.com/tdg5/tco_method/tree/6241e57f8bb8478e2ef2286d4cc6e463c0198e61#gotchas) 93 | that come from this approach. 94 | 95 | For one, this approach only works for methods defined using the **def** keyword. 96 | Though in some cases methods defined using **define_method** could be redefined 97 | correctly, given that **define_method** takes a block that maintains a closure 98 | with the definition context, there's no foolproof way to ensure that all methods 99 | defined using **define_method** could be reevaluated with tail call optimization 100 | enabled because of references to the closure context. 101 | 102 | Another gotcha worth mentioning is that because the current implementation 103 | relies on reopening the parent Module or Class, the helper methods won't work on 104 | anonymous Classes or Modules because they cannot be reopened by name. With more 105 | hacking there are ways to get around this limitation, but, at present, I don't 106 | think more hacking is the path forward and something more along the lines of a C 107 | extension is the right way to address these issues. 108 | 109 | ## Interesting problems 110 | 111 | As I said before, I think the [tco_method gem](https://rubygems.org/gems/tco_method) 112 | is a starting point, not a solution, and I'm excited by the various 113 | opportunities and challenges it presents. Though I am definitely interested in 114 | learning more about Ruby's C extension support, the [tco_method gem](https://rubygems.org/gems/tco_method) 115 | has already presented some interesting problems despite its current primitive 116 | and hacky nature. 117 | 118 | For example, in order to test that a recursive factorial method would no longer 119 | encounter a stack overflow after being recompiled with tail call optimization 120 | enabled, I first had to devise a means of ensuring that that method would 121 | have encountered a stack overflow without tail call optimization enabled and at 122 | what point that stack overflow would have occurred. To achieve this, I wrote a 123 | test helper that performs [a binary search to discover how many stack frames a 124 | recursive function can allocate before a stack overflow is 125 | encountered](https://github.com/tdg5/tco_method/blob/c28895742e18e9d87393c97435db99e4b71c5fa3/test/test_helpers/stack_busters/factorial_stack_buster.rb#L25). 126 | 127 | Though my current solution could use some refactoring, I thought this was a fun 128 | and interesting problem to solve. Though I don't find binary search particularly 129 | interesting on its own, I found this particular case interesting because the 130 | expensive nature of the **raise**/**rescue** cycle in Ruby introduces a sort of 131 | penalty to the process such that the process will be much quicker if the point 132 | of overflow can be discovered while causing as few **SystemStackError** 133 | exceptions as possible. I think this detail makes the binary search more 134 | interesting because there's more to it than just finding the desired result in as few 135 | operations as possible, there are also other considerations to keep in mind that 136 | could totally change how the utility of the search is assessed. In fact, given 137 | this behavior, a binary search may not be the best approach at all. 138 | 139 | For now, I've taken the approach of using one binary search to find a point of 140 | overflow, then using a second binary search to find the exact point at which the 141 | recursive function begins to exceed the system stack between the last successful 142 | invocation and the overflowing invocation. 143 | 144 | I haven't tried to do much research on this particular type of problem yet, but 145 | I'm excited to revisit this search function at some point in the future and see 146 | what other ideas are out there for me to throw at the problem. 147 | 148 | **Update:** After discussing the peculiarities of this approach with my coworker 149 | Matt Bittarelli, he suggested a couple of alternatives to the binary search 150 | approach that seemed intriguing and simpler. The first idea was simply to [force 151 | a **SystemStackError** and check the length of the exception's backtrace from the 152 | **rescue** context to determine the maximum stack 153 | depth](https://github.com/tdg5/tco_method/commit/e2e7f30314fd3d0e1b2d138328d7deeb31e7bd96). 154 | Though this approach works in Ruby 2.2, [it does not work in Ruby 2.0 or Ruby 155 | 2.1](https://travis-ci.org/tdg5/tco_method/builds/54811953). The other idea Matt 156 | had was that maybe a **SystemStackError** wasn't necessary at all if a block 157 | could be used to monitor how the stack depth changed from iteration to 158 | iteration. Though a little mind bending, I was able to [use a recursive method 159 | that yields to a block to monitor how the stack depth changes and using that 160 | information determine whether the method had been compiled with tail call 161 | optimization enabled](https://github.com/tdg5/tco_method/commit/c2963276376f7705b2fb1b6b582d88f07954c02f). 162 | Though the means of determining if a method is compiled with tail call 163 | optimization has changed since I initially wrote this article, I think all three 164 | of the above approaches are interesting and I expect more interesting problems 165 | will emerge as work on this gem continues. Thanks again to Matt Bittarelli for 166 | his insights into the problem! 167 | 168 | ## Test drive 169 | 170 | Because tail recursive functions can typically be restated in other ways that 171 | don't require tail call optimization, I'm still on the fence as to whether TCO 172 | provides any real value other than expanding the expressiveness of the Ruby 173 | language. As such, I encourage you to take the [tco_method gem](https://rubygems.org/gems/tco_method) 174 | for a test drive and explore the opportunities it presents. If you do take 175 | it for a test drive, drop me a line to let me know how it went. I'd be 176 | interested to hear about your experiences both with tail call optimization in 177 | Ruby-land and with the API offered by the [tco_method gem](https://rubygems.org/gems/tco_method). 178 | Contributions are also always welcome! 179 | 180 | [View the tco_method gem on RubyGems](https://rubygems.org/gems/tco_method) 181 | [View the tco_method gem on GitHub](https://github.com/tdg5/tco_method) 182 | 183 | As always, thanks for reading! 184 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tail_call_optimization_in_ruby_background/README.md: -------------------------------------------------------------------------------- 1 | Back in November, care of [/r/ruby](https://www.reddit.com/r/ruby), I came 2 | across [a blog post by Nithin Bekal, Tail Call Optimization in 3 | Ruby](http://nithinbekal.com/posts/ruby-tco/), demonstrating Ruby's built-in 4 | support for tail call optimization and I have to admit, my mind was a little 5 | blown. 6 | 7 | It's not that I have a specific need for tail call optimization. In fact, 8 | I can't think of even a single situation where I would have done 9 | things differently if I'd known the VM supported it. But, I guess I was 10 | surprised to find that tail call optimization was just hiding somewhere in the 11 | Ruby VM, waiting to be flipped on with a compile flag, or **at runtime**. 12 | 13 | I think it was this ability to just turn it on at any time that blew my mind. 14 | Not just that it was hiding in there somewhere, but that the VM is flexible 15 | enough to swap in the machinery to support tail call optimization whenever you 16 | decide you want it. Pretty awesome. 17 | 18 | With no particular use for tail call optimization, I've just been sitting on the 19 | knowledge, the notion bouncing around in my head. That is, until earlier 20 | this week when I decided I would try to apply some of what I learned from reading 21 | [Pat Shaughnessy's Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope) 22 | to better understanding how the Ruby VM can be so flexible when it comes to tail 23 | call optimization. 24 | 25 | Though I think that that will make for an interesting blog post, it's turned into a 26 | bit of an epic. So this week, I'm going to begin with a little background on 27 | tail call optimization and hopefully build on what others have already shared 28 | with some of what I've learned about Ruby's implementation of tail call 29 | optimization while trudging through Ruby's depths. Then, in my next post, with 30 | the stage already set, we can get into the internals of how the Ruby VM makes 31 | tail call optimization happen at runtime. 32 | 33 | Let's get started! 34 | 35 | ## A little background on tail call optimization 36 | [Nithin's article](http://nithinbekal.com/posts/ruby-tco/) does a great job of 37 | explaining tail recursive functions and tail call optimization, so if you're a 38 | little iffy on either subject, I'd recommend reading that before you continue 39 | with this post. The [Tail call entry in Wikipedia](https://en.wikipedia.org/wiki/Tail_call) 40 | is also a useful resource for even more depth on the subject. 41 | 42 | To summarize, tail call optimization, or tail call elimination as it is also 43 | known, is a special feature of some kinds of tail recursive functions that 44 | allows for the tail call to be implemented without adding a new stack frame to 45 | the call stack. This allows for more efficient tail calls while also 46 | allowing the size of the stack to remain constant which in turn allows recursion 47 | to be used in situations that might otherwise encounter a stack overflow without 48 | tail call optimization. 49 | 50 | ## Ruby and tail call optimization 51 | Starting with Ruby 1.9.2, the Ruby VM offers built-in, though experimental, 52 | support for tail call optimization. That said, there are other ways of achieving 53 | tail call optimization without enabling it in the VM. [Magnus Holm offers a 54 | couple of other hacks for achieving tail call optimization in Ruby in his blog post 55 | Tailin' Ruby](http://timelessrepo.com/tailin-ruby), which is worth the read 56 | just for the innovative ways he attempts to solve the problem, even if you're 57 | fine to use the Ruby VM's implementation of tail call optimization. Maybe it's 58 | just because I haven't had an itch that I needed tail call optimization to 59 | scratch, but using **redo** to emulate tail call optimization in a performant 60 | fashion is pretty damn clever. 61 | 62 | Now, although support for tail call optimization is built into the VM, because 63 | of its experimental nature it isn't enabled by default and must be turned on 64 | either with a flag when compiling Ruby or by configuring 65 | **RubyVM::InstructionSequence** at runtime with special compile options. There 66 | was some talk of [enabling tail call optimization by default around the time 67 | that Ruby 2.0 was released](https://bugs.ruby-lang.org/issues/6602), however 68 | this hasn't come to be for a number of reasons: Primary concerns were that tail 69 | call optimization makes it difficult to implement **set_trace_func** and also 70 | causes backtrace weirdness due to the absence of a new stack frame. 71 | 72 | Now that we have a little background on tail call optimization in Ruby, let's 73 | take a look at an example of a tail recursive, tail call optimizable function. 74 | 75 | ## A tail recursive Guinea pig 76 | In order for us to take Ruby's implementation of tail call optimization for a 77 | test drive and to help us get to the bottom of Ruby's implementation of tail 78 | call optimization in my next post, we'll first need a tail recursive function to 79 | be the subject of our experiments. As it turns out, we can actually extract such 80 | a subject from the Ruby source code itself. 81 | 82 | Depending on your feelings about the recent debate regarding how Ruby is 83 | tested[^1][^2], it may surprise you to learn that our Guinea pig comes directly 84 | from Ruby's built-in test suite. After all, though tail call optimization may 85 | not be enabled by default, and though it may only be experimental at this time, 86 | it's not unreasonable to think that there'd be a test for it somewhere. That 87 | somewhere is among a handful of other tests for various optimizations to the 88 | Ruby VM in the Ruby source at [test/ruby/test_optimization.rb](https://github.com/ruby/ruby/blob/fcf6fa8781fe236a9761ad5d75fa1b87f1afeea2/test/ruby/test_optimization.rb#L213). 89 | 90 | The test that is home to our Guinea pig is somewhat unremarkable, so though 91 | you're welcome to review the full contents of the test, for our purposes I've 92 | extracted the tail recursive factorial function used by the test with some 93 | refactoring to, among other things, isolate the HEREDOC and make it work outside 94 | of the test: 95 | 96 | ```ruby 97 | code = <<-CODE 98 | class Factorial 99 | def self.fact_helper(n, res) 100 | n == 1 ? res : fact_helper(n - 1, n * res) 101 | end 102 | 103 | def self.fact(n) 104 | fact_helper(n, 1) 105 | end 106 | end 107 | CODE 108 | options = { 109 | tailcall_optimization: true, 110 | trace_instruction: false, 111 | } 112 | RubyVM::InstructionSequence.new(code, nil, nil, nil, options).eval 113 | ``` 114 | 115 | The tail recursive method of interest above is the **fact_helper** method. It 116 | should hopefully be pretty obvious that **fact_helper** is tail recursive given 117 | that, in all but the base case, the final action of the method is the invocation 118 | of the itself with primitive values. Other than the tail recursive nature 119 | of this function, there are a couple of other things going on here that are worth 120 | noting. 121 | 122 | First, as I alluded to before in regard to tail call optimization not being 123 | enabled by default, currently it is not possible to turn on tail call 124 | optimization without also disabling the **set_trace_func** capabilities of the VM. 125 | This can be seen above in the option to **RubyVM::InstructionSequence** setting 126 | **trace_instruction** to false. 127 | 128 | Second, this example demonstrates the best strategy of enabling tail call 129 | optimization that I have come across so far. I say this because the other 130 | examples I've referenced have all enabled tail call optimization by changing 131 | **RubyVM::InstructionSequence.compile_option**, effectively enabling tail call 132 | optimization globally. 133 | 134 | Though at least one source suggested that the modified compile options would only be 135 | applied to code directly compiled with **RubyVM::InstructionSequence**, this is 136 | incorrect. In fact, any files loaded after the change to 137 | **RubyVM::InstructionSequence.compile_option** will be compiled with tail call 138 | optimization enabled. This can be verified by running the following contrived 139 | test script that adapts our Guinea pig both to evidence the global nature of 140 | **RubyVM::InstructionSequence.compile_option** and to demonstrate the utility of 141 | tail call optimization. 142 | 143 | ```ruby 144 | # Flag indicating whether this is the first time time this file has been loaded 145 | $first_load = true if $first_load.nil? 146 | 147 | # Declare classes to facilitate #instance_eval later 148 | class FirstLoadFactorial; end 149 | class ReloadedFactorial; end 150 | 151 | # On the first load, extend FirstLoadFactorial, 152 | # On the second load, extend ReloadedFactorial. 153 | klass = $first_load ? FirstLoadFactorial : ReloadedFactorial 154 | 155 | # Tail recursive factorial adapted from 156 | # https://github.com/ruby/ruby/blob/fcf6fa8781fe236a9761ad5d75fa1b87f1afeea2/test/ruby/test_optimization.rb#L213 157 | klass.instance_eval do 158 | def self.fact_helper(n, res) 159 | n == 1 ? res : fact_helper(n - 1, n * res) 160 | end 161 | 162 | def self.fact(n) 163 | fact_helper(n, 1) 164 | end 165 | end 166 | 167 | # Turn on tailcall optimization 168 | RubyVM::InstructionSequence.compile_option = { 169 | tailcall_optimization: true, 170 | trace_instruction: false, 171 | } 172 | 173 | # This check avoids calculating the factorial twice; ReloadedFactorial will only 174 | # respond to :fact after the file has been reloaded. 175 | if ReloadedFactorial.respond_to?(:fact) 176 | begin 177 | puts "FirstLoadFactorial: #{FirstLoadFactorial.fact(50000).to_s.length}" 178 | rescue SystemStackError 179 | puts 'FirstLoadFactorial: stack level too deep' 180 | end 181 | 182 | # 50000! is 213,237 digits long, so display just the length of the calculation 183 | puts "ReloadedFactorial: #{ReloadedFactorial.fact(50000).to_s.length}" 184 | end 185 | 186 | # Reload the file on the first load only 187 | if $first_load 188 | $first_load = false 189 | load __FILE__ 190 | end 191 | 192 | # $ ruby tail_optimized_reload.rb 193 | # FirstLoadFactorial: stack level too deep 194 | # ReloadedFactorial: 213237 195 | ``` 196 | 197 | [View on GitHub](https://github.com/tdg5/blog_snippets/blob/8cdc800e711f5270754e352b9f3458d7e429b87d/lib/blog_snippets/tail_call_optimization_in_ruby_internals/tail_optimized_reload.rb) 198 | 199 | Since tail call optimization is still an experimental feature, if you're going 200 | to use tail call optimization in production code or in code that could become 201 | production code, the strategy demonstrated by the Ruby core test of creating a 202 | new **RubyVM::InstructionSequence** object that can be used to load/compile tail 203 | call optimized code without affecting other code compiled by the VM later is 204 | absolutely the right way to go. 205 | 206 | ## End Part I 207 | That does it for our initial foray into tail call optimization in Ruby. I hope 208 | you've found something here today worth the price of admission. Stay tuned for 209 | my next post in which we'll take our tail recursive Guinea pig for a deep dive into the 210 | internals of Ruby, all the way from the Ruby source, through the YARV instructions 211 | just below the surface, down deep into the C weeds in search of the source 212 | of Ruby's tail call optimization implementation. It'll certainly be an 213 | interesting ride. 214 | 215 | [^1]: http://rubini.us/2014/12/31/matz-s-ruby-developers-don-t-use-rubyspec/ 216 | [^2]: https://gist.github.com/nateberkopec/11dbcf0ee7f2c08450ea 217 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c: -------------------------------------------------------------------------------- 1 | static void 2 | vm_search_method(rb_call_info_t *ci, VALUE recv) 3 | { 4 | VALUE klass = CLASS_OF(recv); 5 | 6 | #if OPT_INLINE_METHOD_CACHE 7 | if (LIKELY(GET_GLOBAL_METHOD_STATE() == ci->method_state && RCLASS_SERIAL(klass) == ci->class_serial)) { 8 | /* cache hit! */ 9 | return; 10 | } 11 | #endif 12 | 13 | ci->me = rb_method_entry(klass, ci->mid, &ci->defined_class); 14 | ci->klass = klass; 15 | ci->call = vm_call_general; 16 | #if OPT_INLINE_METHOD_CACHE 17 | ci->method_state = GET_GLOBAL_METHOD_STATE(); 18 | ci->class_serial = RCLASS_SERIAL(klass); 19 | #endif 20 | } 21 | 22 | 23 | static VALUE 24 | vm_call_general(rb_thread_t *th, rb_control_frame_t *reg_cfp, rb_call_info_t *ci) 25 | { 26 | return vm_call_method(th, reg_cfp, ci); 27 | } 28 | 29 | 30 | VALUE 31 | vm_call_method(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci) 32 | { 33 | int enable_fastpath = 1; 34 | rb_call_info_t ci_temp; 35 | 36 | start_method_dispatch: 37 | if (ci->me != 0) { 38 | if ((ci->me->flag == 0)) { 39 | VALUE klass; 40 | 41 | normal_method_dispatch: 42 | switch (ci->me->def->type) { 43 | case VM_METHOD_TYPE_ISEQ:{ 44 | CI_SET_FASTPATH(ci, vm_call_iseq_setup, enable_fastpath); 45 | return vm_call_iseq_setup(th, cfp, ci); 46 | } 47 | case VM_METHOD_TYPE_NOTIMPLEMENTED: 48 | case VM_METHOD_TYPE_CFUNC: 49 | CI_SET_FASTPATH(ci, vm_call_cfunc, enable_fastpath); 50 | return vm_call_cfunc(th, cfp, ci); 51 | case VM_METHOD_TYPE_ATTRSET:{ 52 | CALLER_SETUP_ARG(cfp, ci); 53 | rb_check_arity(ci->argc, 1, 1); 54 | ci->aux.index = 0; 55 | CI_SET_FASTPATH(ci, vm_call_attrset, enable_fastpath && !(ci->flag & VM_CALL_ARGS_SPLAT)); 56 | return vm_call_attrset(th, cfp, ci); 57 | } 58 | case VM_METHOD_TYPE_IVAR:{ 59 | CALLER_SETUP_ARG(cfp, ci); 60 | rb_check_arity(ci->argc, 0, 0); 61 | ci->aux.index = 0; 62 | CI_SET_FASTPATH(ci, vm_call_ivar, enable_fastpath && !(ci->flag & VM_CALL_ARGS_SPLAT)); 63 | return vm_call_ivar(th, cfp, ci); 64 | } 65 | case VM_METHOD_TYPE_MISSING:{ 66 | ci->aux.missing_reason = 0; 67 | CI_SET_FASTPATH(ci, vm_call_method_missing, enable_fastpath); 68 | return vm_call_method_missing(th, cfp, ci); 69 | } 70 | case VM_METHOD_TYPE_BMETHOD:{ 71 | CI_SET_FASTPATH(ci, vm_call_bmethod, enable_fastpath); 72 | return vm_call_bmethod(th, cfp, ci); 73 | } 74 | case VM_METHOD_TYPE_ZSUPER:{ 75 | klass = ci->me->klass; 76 | klass = RCLASS_ORIGIN(klass); 77 | zsuper_method_dispatch: 78 | klass = RCLASS_SUPER(klass); 79 | if (!klass) { 80 | ci->me = 0; 81 | goto start_method_dispatch; 82 | } 83 | ci_temp = *ci; 84 | ci = &ci_temp; 85 | 86 | ci->me = rb_method_entry(klass, ci->mid, &ci->defined_class); 87 | 88 | if (ci->me != 0) { 89 | goto normal_method_dispatch; 90 | } 91 | else { 92 | goto start_method_dispatch; 93 | } 94 | } 95 | case VM_METHOD_TYPE_OPTIMIZED:{ 96 | switch (ci->me->def->body.optimize_type) { 97 | case OPTIMIZED_METHOD_TYPE_SEND: 98 | CI_SET_FASTPATH(ci, vm_call_opt_send, enable_fastpath); 99 | return vm_call_opt_send(th, cfp, ci); 100 | case OPTIMIZED_METHOD_TYPE_CALL: 101 | CI_SET_FASTPATH(ci, vm_call_opt_call, enable_fastpath); 102 | return vm_call_opt_call(th, cfp, ci); 103 | default: 104 | rb_bug("vm_call_method: unsupported optimized method type (%d)", 105 | ci->me->def->body.optimize_type); 106 | } 107 | break; 108 | } 109 | case VM_METHOD_TYPE_UNDEF: 110 | break; 111 | case VM_METHOD_TYPE_REFINED:{ 112 | NODE *cref = rb_vm_get_cref(cfp->iseq, cfp->ep); 113 | VALUE refinements = cref ? cref->nd_refinements : Qnil; 114 | VALUE refinement, defined_class; 115 | rb_method_entry_t *me; 116 | 117 | refinement = find_refinement(refinements, 118 | ci->defined_class); 119 | if (NIL_P(refinement)) { 120 | goto no_refinement_dispatch; 121 | } 122 | me = rb_method_entry(refinement, ci->mid, &defined_class); 123 | if (me) { 124 | if (ci->call == vm_call_super_method) { 125 | rb_control_frame_t *top_cfp = current_method_entry(th, cfp); 126 | if (top_cfp->me && 127 | rb_method_definition_eq(me->def, top_cfp->me->def)) { 128 | goto no_refinement_dispatch; 129 | } 130 | } 131 | ci->me = me; 132 | ci->defined_class = defined_class; 133 | if (me->def->type != VM_METHOD_TYPE_REFINED) { 134 | goto start_method_dispatch; 135 | } 136 | } 137 | 138 | no_refinement_dispatch: 139 | if (ci->me->def->body.orig_me) { 140 | ci->me = ci->me->def->body.orig_me; 141 | if (UNDEFINED_METHOD_ENTRY_P(ci->me)) { 142 | ci->me = 0; 143 | } 144 | goto start_method_dispatch; 145 | } 146 | else { 147 | klass = ci->me->klass; 148 | goto zsuper_method_dispatch; 149 | } 150 | } 151 | } 152 | rb_bug("vm_call_method: unsupported method type (%d)", ci->me->def->type); 153 | } 154 | else { 155 | int noex_safe; 156 | if (!(ci->flag & VM_CALL_FCALL) && (ci->me->flag & NOEX_MASK) & NOEX_PRIVATE) { 157 | int stat = NOEX_PRIVATE; 158 | 159 | if (ci->flag & VM_CALL_VCALL) { 160 | stat |= NOEX_VCALL; 161 | } 162 | ci->aux.missing_reason = stat; 163 | CI_SET_FASTPATH(ci, vm_call_method_missing, 1); 164 | return vm_call_method_missing(th, cfp, ci); 165 | } 166 | else if (!(ci->flag & VM_CALL_OPT_SEND) && (ci->me->flag & NOEX_MASK) & NOEX_PROTECTED) { 167 | enable_fastpath = 0; 168 | if (!rb_obj_is_kind_of(cfp->self, ci->defined_class)) { 169 | ci->aux.missing_reason = NOEX_PROTECTED; 170 | return vm_call_method_missing(th, cfp, ci); 171 | } 172 | else { 173 | goto normal_method_dispatch; 174 | } 175 | } 176 | else if ((noex_safe = NOEX_SAFE(ci->me->flag)) > th->safe_level && (noex_safe > 2)) { 177 | rb_raise(rb_eSecurityError, "calling insecure method: %"PRIsVALUE, rb_id2str(ci->mid)); 178 | } 179 | else { 180 | goto normal_method_dispatch; 181 | } 182 | } 183 | } 184 | else { 185 | /* method missing */ 186 | int stat = 0; 187 | if (ci->flag & VM_CALL_VCALL) { 188 | stat |= NOEX_VCALL; 189 | } 190 | if (ci->flag & VM_CALL_SUPER) { 191 | stat |= NOEX_SUPER; 192 | } 193 | if (ci->mid == idMethodMissing) { 194 | rb_control_frame_t *reg_cfp = cfp; 195 | VALUE *argv = STACK_ADDR_FROM_TOP(ci->argc); 196 | rb_raise_method_missing(th, ci->argc, argv, ci->recv, stat); 197 | } 198 | else { 199 | ci->aux.missing_reason = stat; 200 | CI_SET_FASTPATH(ci, vm_call_method_missing, 1); 201 | return vm_call_method_missing(th, cfp, ci); 202 | } 203 | } 204 | 205 | rb_bug("vm_call_method: unreachable"); 206 | } 207 | 208 | 209 | static VALUE 210 | vm_call_iseq_setup(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci) 211 | { 212 | vm_callee_setup_arg(th, ci, ci->me->def->body.iseq, cfp->sp - ci->argc); 213 | return vm_call_iseq_setup_2(th, cfp, ci); 214 | } 215 | 216 | 217 | static VALUE 218 | vm_call_iseq_setup_2(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci) 219 | { 220 | if (LIKELY(!(ci->flag & VM_CALL_TAILCALL))) { 221 | return vm_call_iseq_setup_normal(th, cfp, ci); 222 | } 223 | else { 224 | return vm_call_iseq_setup_tailcall(th, cfp, ci); 225 | } 226 | } 227 | 228 | 229 | static inline VALUE 230 | vm_call_iseq_setup_normal(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci) 231 | { 232 | int i, local_size; 233 | VALUE *argv = cfp->sp - ci->argc; 234 | rb_iseq_t *iseq = ci->me->def->body.iseq; 235 | VALUE *sp = argv + iseq->param.size; 236 | 237 | /* clear local variables (arg_size...local_size) */ 238 | for (i = iseq->param.size, local_size = iseq->local_size; i < local_size; i++) { 239 | *sp++ = Qnil; 240 | } 241 | 242 | vm_push_frame(th, iseq, VM_FRAME_MAGIC_METHOD, ci->recv, ci->defined_class, 243 | VM_ENVVAL_BLOCK_PTR(ci->blockptr), 244 | iseq->iseq_encoded + ci->aux.opt_pc, sp, 0, ci->me, iseq->stack_max); 245 | 246 | cfp->sp = argv - 1 /* recv */; 247 | return Qundef; 248 | } 249 | 250 | 251 | static inline VALUE 252 | vm_call_iseq_setup_tailcall(rb_thread_t *th, rb_control_frame_t *cfp, rb_call_info_t *ci) 253 | { 254 | int i; 255 | VALUE *argv = cfp->sp - ci->argc; 256 | rb_iseq_t *iseq = ci->me->def->body.iseq; 257 | VALUE *src_argv = argv; 258 | VALUE *sp_orig, *sp; 259 | VALUE finish_flag = VM_FRAME_TYPE_FINISH_P(cfp) ? VM_FRAME_FLAG_FINISH : 0; 260 | 261 | cfp = th->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(th->cfp); /* pop cf */ 262 | 263 | RUBY_VM_CHECK_INTS(th); 264 | 265 | sp_orig = sp = cfp->sp; 266 | 267 | /* push self */ 268 | sp[0] = ci->recv; 269 | sp++; 270 | 271 | /* copy arguments */ 272 | for (i=0; i < iseq->param.size; i++) { 273 | *sp++ = src_argv[i]; 274 | } 275 | 276 | /* clear local variables */ 277 | for (i = 0; i < iseq->local_size - iseq->param.size; i++) { 278 | *sp++ = Qnil; 279 | } 280 | 281 | vm_push_frame(th, iseq, VM_FRAME_MAGIC_METHOD | finish_flag, 282 | ci->recv, ci->defined_class, VM_ENVVAL_BLOCK_PTR(ci->blockptr), 283 | iseq->iseq_encoded + ci->aux.opt_pc, sp, 0, ci->me, iseq->stack_max); 284 | 285 | cfp->sp = sp_orig; 286 | return Qundef; 287 | } 288 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tuning_dd_block_size/README.md: -------------------------------------------------------------------------------- 1 | Though I wouldn't call myself a dd expert, I have had my fair share of occasions 2 | to yield the might that is dd. From my first job after college using 3 | [KNOPPIX](http://www.knopper.net/knoppix/index-en.html) and dd to rescue NFL 4 | game footage from dying HDDs on behalf of NFL video coordinators, to using dd 5 | this past summer to move [my girlfriend's](http://alilallovertheplace.com/) OSX 6 | installation over to a faster SSD, dd has been an invaluable tool in my Unix 7 | arsenal for almost 10 years. 8 | 9 | Maybe it's because everyone focuses on getting the **of** (output file) argument 10 | right, or maybe there's more to it, but in my time with dd, one aspect of dd's 11 | usage that I've found often overlooked relates to dd's three block size 12 | arguments, **ibs** (input block size), **obs** (output block size), and the all 13 | encompassing **bs** (input and output block size). Don't get me wrong, making 14 | sure you've determined the correct **of** argument is of paramount importance, 15 | but once you've got that nailed down, there's more to be done than breathe a 16 | giant sigh of relief. The various block size arguments that dd takes will be the 17 | deciding factor between whether the copy completes in a day or in two hours. 18 | 19 | ## A little background on block size 20 | A **block** in terms of dd as explained by Wikipedia: 21 | > A block is a unit measuring the number of bytes that are read, written, or 22 | > converted at one time.[^1] 23 | 24 | As such, the various block size arguments tell dd how many sectors should be 25 | copied at once, whether for input, output, or both. By default, most versions of 26 | dd will use a block size 512 bytes for both input and output.[^2] This may have 27 | been fine pre-1999 when most hard drives had a sector size of 512 bytes, but 28 | in recent years most hard drives have a sector size of at least 4KB (4096 29 | bytes). This change may seem inconsequential but can lead to enormous 30 | inefficiencies when combined with the fact that these days many typical consumer 31 | hard drives have more than a terabyte of capacity. When dealing with a terabyte 32 | or more of data, you **really** want to make sure you choose an optimal block 33 | size. 34 | 35 | There's a useful, though pretty dated, [message in the archive of the Eugene, 36 | Oregon Linux User's Group (Eug-Lug) that offers some perspective on optimal 37 | block sizes for dd](http://www.mail-archive.com/eug-lug@efn.org/msg12073.html) 38 | that can be useful as a jumping off point for your own tests or in those 39 | situations where testing different block sizes isn't feasible. 40 | The findings presented in the message show that for the author's particular 41 | hardware, a block size of about 64K was pretty close to optimal. 42 | 43 | That's nice advice, but without more context it's somewhat meaningless, so let's 44 | perform a few experiments. 45 | 46 | ## Science! 47 | As an example of the impact that an inefficient/optimal block size can have, 48 | I've run a few tests for your consideration. These results are all specific to 49 | my hardware, and though they may offer a rule-of-thumb for similar situations, 50 | it's important to keep in mind that there is no universally correct block size; 51 | what is optimal for one situation may be terribly inefficient for another. To 52 | that end, the tests below are meant to provide a simple example of the benefits 53 | of optimizing the block size used by dd; they are not intended to accurately 54 | replicate real world copy scenarios. 55 | 56 | For simplicity, we will be reading data from */dev/zero*, which should be able 57 | to churn out zeros at a much, much faster rate than we can actually write them, which, 58 | in turn, means that these examples are actually testing optimal output block 59 | sizes and are, more or less, ignoring input block size entirely. Optimizing input 60 | block sizing is left as an exercise for the reader and should be easy enough to 61 | achieve by reading data from the desired disk and writing it out to */dev/null*. 62 | 63 | On with the experiments! 64 | 65 | Let's start off with a few tests writing out to a HDD: 66 | 67 | - Reading from */dev/zero* and writing out to a HDD with the default block size 68 | of 512 bytes yields a throughput of 10.9 MB/s. At that rate, writing 1TB of 69 | data would take about 96,200 seconds or just north of 26 hours. 70 | 71 | - Reading from */dev/zero* and writing out to a HDD with the Eug-Lug suggested 72 | block size of 64K yields a throughput of 108 MB/s. At that rate, writing 1TB 73 | of data would take 9,709 seconds or about 2.7 hours to complete. This is a 74 | huge improvement, nearly an order of magnitude, over the default block size of 75 | 512 bytes. 76 | 77 | - Reading from */dev/zero* and writing out to a HDD with a more 78 | optimal block size of 512K yields a throughput of 131 MB/s. At that rate, 79 | writing 1TB of data would take about 8,004 seconds or about 2.2 hours. Though 80 | not as pronounced a difference, this is even faster than the Eug-Lug 81 | suggestion and is more than a full order of magnitude faster than the default 82 | block size of 512 bytes. 83 | 84 | Let's switch gears and try a couple of experiments writing out to a SSD: 85 | 86 | - Reading from */dev/zero* and writing out to a SSD with the default block size 87 | of 512 bytes yields a throughput of 39.6 MB/s. At that rate writing 1TB of 88 | data would take about 26,479 seconds or about 7.4 hours. 89 | 90 | - Reading from */dev/zero* and writing out to a SSD with the Eug-Lug suggested 91 | block size of 64K yields a throughput of 266 MB/s. At that rate, writing 1TB 92 | of data would take about 3,942 seconds or about 1.1 hours. Once again, this 93 | is a huge improvement, nearly an order of magnitude faster than the default 94 | block size of 512 bytes. 95 | 96 | - Reading from */dev/zero* and writing out to a SSD with a more 97 | optimal block size of 256K yields a throughput of 280 MB/s. At that rate, 98 | writing 1TB of data would take about 3,744 seconds or about 1 hour. Once 99 | again this is faster than both the Eug-Lug suggestion and the default, though 100 | not as much of an improvement as in the HDD case. 101 | 102 | Let's switch gears one last time and try a few experiments writing out to RAM: 103 | 104 | - Reading from */dev/zero* and writing out to RAM with the default block size 105 | of 512 bytes yields a throughput of 221 MB/s. At that rate, writing 1TB of 106 | data would take about 4,745 seconds or about 1.3 hours. 107 | 108 | - Reading from */dev/zero* and writing out to RAM with the Eug-Lug suggested 109 | block size of 64K yields a throughput of 1,433 MB/s. At that rate, writing 1TB 110 | of data would take about 731 seconds or about 12 minutes to complete the 111 | transfer. Once again, this is a huge improvement, nearly an order of 112 | magnitude faster than the default block size. 113 | 114 | - Reading from */dev/zero* and writing out to RAM with a more 115 | optimal block size of 256K yields a throughput of 1,536 MB/s. At that rate, 116 | writing 1TB of data would take about 682 seconds or about 11 minutes. This is 117 | once again faster than the default and the Eug-Lug suggestion, but once 118 | again, pretty comparable to the Eug-Lug suggestion. 119 | 120 | These experiments should help illustrate that depending on the type, 121 | manufacturer, and state of the source and destination media, optimal block sizes 122 | can vary wildly. This should also help demonstrate that on modern hardware the 123 | default block size of 512 bytes tends to be horribly inefficient. That said, 124 | though not always the most optimal, the Eug-Lug suggested block size of 64K can 125 | be a somewhat reliable option for a more modern default. 126 | 127 | ## A pair of scripts to find more optimal block sizes 128 | Because of the wild variance in optimal block sizing, I've written a couple of 129 | scripts to test a range of different input and output block size options for use 130 | prior to starting any large copies with dd. However, before we discuss the 131 | scripts, **be warned that this both scripts use dd behind the scenes, so it's 132 | important to use caution when running either script so as to avoid summoning 133 | dd's alter ego, disk destroyer.**[^3] The scripts are short enough that I 134 | encourage you to read both scripts before using either one of them so you have a 135 | better understanding of what is going on behind the scenes. That said, first 136 | we'll look at a script for determining an optimal output block size. 137 | 138 | ### dd_obs_test.sh 139 | 140 | Let's just jump straight into the script: 141 | 142 | ```bash 143 | #!/bin/bash 144 | 145 | # Since we're dealing with dd, abort if any errors occur 146 | set -e 147 | 148 | TEST_FILE=${1:-dd_obs_testfile} 149 | [ -e "$TEST_FILE" ]; TEST_FILE_EXISTS=$? 150 | TEST_FILE_SIZE=134217728 151 | 152 | # Header 153 | PRINTF_FORMAT="%8s : %s\n" 154 | printf "$PRINTF_FORMAT" 'block size' 'transfer rate' 155 | 156 | # Block sizes of 512b 1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M 157 | for BLOCK_SIZE in 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432 67108864 158 | do 159 | # Calculate number of segments required to copy 160 | COUNT=$(($TEST_FILE_SIZE / $BLOCK_SIZE)) 161 | 162 | if [ $COUNT -le 0 ]; then 163 | echo "Block size of $BLOCK_SIZE estimated to require $COUNT blocks, aborting further tests." 164 | break 165 | fi 166 | 167 | # Create a test file with the specified block size 168 | DD_RESULT=$(dd if=/dev/zero of=$TEST_FILE bs=$BLOCK_SIZE count=$COUNT 2>&1 1>/dev/null) 169 | 170 | # Extract the transfer rate from dd's STDERR output 171 | TRANSFER_RATE=$(echo $DD_RESULT | \grep --only-matching -E '[0-9.]+ ([MGk]?B|bytes)/s(ec)?') 172 | 173 | # Clean up the test file if we created one 174 | [ $TEST_FILE_EXISTS -ne 0 ] && rm $TEST_FILE 175 | 176 | # Output the result 177 | printf "$PRINTF_FORMAT" "$BLOCK_SIZE" "$TRANSFER_RATE" 178 | done 179 | ``` 180 | 181 | [View on GitHub](https://github.com/tdg5/blog_snippets/blob/master/lib/blog_snippets/articles/tuning_dd_block_size/dd_obs_test.sh) 182 | 183 | As you can see, the script is a pretty basic for-loop that uses dd to create a 184 | test file of 128MB using a variety of block sizes, from the default of 512 185 | bytes, all the way up to 64M. There are a few extra arguments to the dd command 186 | to make writing out a 128M file easy and there's also some grepping to pull out 187 | the transfer rate, but otherwise, that's pretty much all there is to it. 188 | 189 | By default the command will create a test file named *dd_obs_testfile* in the 190 | current directory. Alternatively, you can provide a path to a custom test file 191 | by providing a path after the script name: 192 | 193 | ```bash 194 | $ ./dd_obs_test.sh /path/to/disk/or/test_file 195 | ``` 196 | 197 | The output of the script is a list of the tested block sizes and their respective transfer 198 | rates like so: 199 | 200 | ```bash 201 | $ ./dd_obs_test.sh /dev/null 202 | 512: 1.4 GB/s 203 | 1K: 2.6 GB/s 204 | 2K: 4.3 GB/s 205 | 4K: 6.5 GB/s 206 | 8K: 7.8 GB/s 207 | 16K: 9.0 GB/s 208 | 32K: 8.1 GB/s 209 | 64K: 7.6 GB/s 210 | 128K: 9.8 GB/s 211 | 256K: 7.9 GB/s 212 | 512K: 9.7 GB/s 213 | 1M: 12.8 GB/s 214 | 2M: 8.8 GB/s 215 | 4M: 7.2 GB/s 216 | 8M: 7.3 GB/s 217 | 16M: 5.5 GB/s 218 | 32M: 6.4 GB/s 219 | 64M: 4.0 GB/s 220 | ``` 221 | 222 | Wow, I guess [*/dev/null* really is 223 | web-scale.](https://www.youtube.com/watch?v=b2F-DItXtZs&t=1m42s) 224 | 225 | ### dd_ibs_test.sh 226 | Now let's look at a similar script for determining an optimal input block size. 227 | We can follow pretty much the same pattern expect for a couple of key 228 | differences: instead of reading from */dev/zero* and writing out the test 229 | file, this script reads from */dev/urandom* to create a test file of random bits 230 | and then uses dd to copy that test file to */dev/null* using a variety of 231 | different block sizes. Since this script creates the test file at the path you 232 | specify, you will want to be careful not to accidentally overwrite an existing 233 | file by pointing the script at an existing path. 234 | 235 | Here's the script: 236 | 237 | ```bash 238 | #!/bin/bash 239 | 240 | # Since we're dealing with dd, abort if any errors occur 241 | set -e 242 | 243 | TEST_FILE=${1:-dd_ibs_testfile} 244 | [ -e "$TEST_FILE" ]; TEST_FILE_EXISTS=$? 245 | TEST_FILE_SIZE=134217728 246 | 247 | # Exit if file exists 248 | if [ -e $TEST_FILE ]; then 249 | echo "Test file $TEST_FILE exists, aborting." 250 | exit 1 251 | fi 252 | 253 | # Create test file 254 | echo 'Generating test file...' 255 | BLOCK_SIZE=65536 256 | COUNT=$(($TEST_FILE_SIZE / $BLOCK_SIZE)) 257 | dd if=/dev/urandom of=$TEST_FILE bs=$BLOCK_SIZE count=$COUNT > /dev/null 2>&1 258 | 259 | # Header 260 | PRINTF_FORMAT="%8s : %s\n" 261 | printf "$PRINTF_FORMAT" 'block size' 'transfer rate' 262 | 263 | # Block sizes of 512b 1K 2K 4K 8K 16K 32K 64K 128K 256K 512K 1M 2M 4M 8M 16M 32M 64M 264 | for BLOCK_SIZE in 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 33554432 67108864 265 | do 266 | # Read test file out to /dev/null with specified block size 267 | DD_RESULT=$(dd if=$TEST_FILE of=/dev/null bs=$BLOCK_SIZE 2>&1 1>/dev/null) 268 | 269 | # Extract transfer rate 270 | TRANSFER_RATE=$(echo $DD_RESULT | \grep --only-matching -E '[0-9.]+ ([MGk]?B|bytes)/s(ec)?') 271 | 272 | printf "$PRINTF_FORMAT" "$BLOCK_SIZE" "$TRANSFER_RATE" 273 | done 274 | 275 | # Clean up the test file if we created one 276 | [ $TEST_FILE_EXISTS -ne 0 ] && rm $TEST_FILE 277 | ``` 278 | 279 | [View on GitHub](https://github.com/tdg5/blog_snippets/blob/master/lib/blog_snippets/articles/tuning_dd_block_size/dd_ibs_test.sh) 280 | 281 | Similar to the *dd_obs_test.sh* script, this script will create a default test 282 | file named *dd_ibs_testfile* but you you can also provide the script with a path 283 | argument to test input block sizes on different devices: 284 | 285 | ```bash 286 | $ ./dd_ibs_test.sh /path/to/disk/test_file 287 | ``` 288 | 289 | Again, it is important to remember that the script will try to overwrite the 290 | test file and later will remove the file after it has been written, so use 291 | extreme caution to avoid blowing away something you didn't mean to destroy. It 292 | is likely that you will need to tweak this script to meet your particular use 293 | case. 294 | 295 | Also like *dd_obs_test.sh*, the output of this script is a list of the tested 296 | block sizes and their respective transfer rates like so: 297 | 298 | ```bash 299 | $ ./dd_ibs_test.sh 300 | 512: 1.1 GB/s 301 | 1K: 1.8 GB/s 302 | 2K: 3.0 GB/s 303 | 4K: 4.2 GB/s 304 | 8K: 5.1 GB/s 305 | 16K: 5.7 GB/s 306 | 32K: 5.4 GB/s 307 | 64K: 5.8 GB/s 308 | 128K: 6.3 GB/s 309 | 256K: 5.4 GB/s 310 | 512K: 5.8 GB/s 311 | 1M: 5.8 GB/s 312 | 2M: 5.3 GB/s 313 | 4M: 5.0 GB/s 314 | 8M: 4.9 GB/s 315 | 16M: 4.5 GB/s 316 | 32M: 4.4 GB/s 317 | 64M: 3.5 GB/s 318 | ``` 319 | 320 | In the above example it can be seen that an input block size of 128K is optimal 321 | for my particular setup. 322 | 323 | ## The end 324 | I hope this post has given you some insight into tuning dd's block size 325 | arguments and maybe even saved you a day spent transferring blocks 512 bytes at 326 | a time. 327 | 328 | Thanks for reading! 329 | 330 | [^1]: ["A block is a unit measuring the number of bytes that are read, written, or converted at one time."](https://en.wikipedia.org/wiki/Dd_(Unix)#Block_size) 331 | [^2]: [**dd's** ibs (input block size) and obs (output block size) arguments both default to 512 bytes](http://man7.org/linux/man-pages/man1/dd.1.html) 332 | [^3]: ["Some people believe dd means "Destroy Disk" or "Delete Data" because if it is misused, a partition or output file can be trashed very quickly."](http://www.codecoffee.com/tipsforlinux/articles/036.html) 333 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/module_factory_for_dependency_management/README.md: -------------------------------------------------------------------------------- 1 | At last year's RubyConf in San Diego, [Craig Buchek](https://twitter.com/craigbuchek) 2 | gave a presentation entitled [Ruby Idioms You're Not Using Yet](https://www.youtube.com/watch?v=hc_wtllfKtQ), 3 | focusing on some of Ruby's under-utilized and emerging idioms. In this post 4 | we'll discuss one of those idioms, an idiom Craig appropriately calls **Module 5 | Factory**. In particular, we'll explore the using a Module Factory as a pattern 6 | for dependency and load-order management. 7 | 8 | ## Hey! Who you callin' an idiom? 9 | 10 | For those unfamiliar with idioms or, more likely, unfamiliar with what idioms 11 | refer to in the context of a programming language, Craig presents a number of 12 | different perspectives, my favorite of which is: 13 | 14 | > A style or form of expression that is characteristic of a particular person, 15 | > type of art, etc.[^1] 16 | 17 | Craig also offers his own perspective, which I think helps clarify and distill 18 | this concept further: 19 | 20 | > A way in which we normally express ourselves in a language. 21 | 22 | Though I think this definition captures the idea nicely, I think there's a pearl 23 | of enlightenment to be found in reducing the concept down to its roots: 24 | 25 | > Late Latin idioma, idiomat-, from Greek, from idiousthai, to make one's own, 26 | > from idios, own, personal, private.[^2] 27 | 28 | I find this etymology charming because while formal definitions tend to focus on 29 | existing patterns of language belonging to specific communities and cultures, 30 | the origin of the word hints at a deeper essence that leads ultimately to the 31 | cradle of all idiomatic expression: idioms are an emergent behavior of the 32 | efforts of individuals and communities to make a language their own. 33 | 34 | ## Idioms in Ruby 35 | 36 | In terms of Ruby, let's take a look at a couple of concrete examples of common 37 | Ruby idioms juxtaposed with their less idiomatic counterparts to give ourselves 38 | some grounding. Hopefully you'll agree that within each example, each variation 39 | gets further and further from how you'd expect to see an idea expressed in Ruby. 40 | 41 | ###### Conditional assignment: 42 | ```ruby 43 | # Idiomatic Ruby 44 | a ||= b 45 | 46 | # Less idiomatic 47 | a || a = b 48 | 49 | # And lastly, please don't do this 50 | a = b if a == nil || a == false 51 | ``` 52 | 53 | ###### Sequential iteration 54 | ```ruby 55 | # Idiomatic Ruby 56 | 5.times { |i| puts i } 57 | 58 | # Less idiomatic, though more performant 59 | i = 0 60 | while i < 5 61 | puts i 62 | i += 1 63 | end 64 | 65 | # And finally, the dreaded `for` statement 66 | for i in 0..4 67 | puts i 68 | end 69 | ``` 70 | 71 | Hopefully, these examples give you a good idea of idioms in Ruby, but if not, 72 | I'd encourage you to watch [Ruby Idioms You're Not Using Yet](https://www.youtube.com/watch?v=hc_wtllfKtQ), 73 | as it provides more examples which may help to further elucidate the concept. 74 | 75 | On with the show! 76 | 77 | ## Module Factory: An Introduction 78 | 79 | The Module Factory pattern as described in the presentation constitutes the use 80 | of some variety of [Factory Method](https://en.wikipedia.org/wiki/Factory_method_pattern) 81 | in place of a reference to a concrete Module when calling **extend** or 82 | **include** from a Class or a Module. This is a fairly technical description, so 83 | let's take a look at the example the presentation uses to demonstrate this 84 | pattern. This example comes from the README for the [Virtus gem](https://rubygems.org/gems/virtus): 85 | 86 | ```ruby 87 | class User 88 | include Virtus.model(:constructor => false, :mass_assignment => false) 89 | end 90 | ``` 91 | 92 | [View on GitHub](https://github.com/solnic/virtus/blob/e648e2fe771d715179bddb7b0df9b0169a295ae3/README.md#cherry-picking-extensions) 93 | 94 | Though it may be unclear what is going on here, if we trust that neither the 95 | [Virtus docs](https://github.com/solnic/virtus/blob/e648e2fe771d715179bddb7b0df9b0169a295ae3/README.md#cherry-picking-extensions) 96 | nor the Ruby docs for [Module#include](http://www.ruby-doc.org/core-2.2.0/Module.html#method-i-include) 97 | contain an error, we can use a little deduction to piece together what's going 98 | on: 99 | 100 | - Though the Ruby docs aren't totally explicit about it, **Module#include** 101 | will raise an error unless given one or more Modules. From this we can infer 102 | that **Virtus.model** must be returning one or more Modules. 103 | - A little trial and error in irb further uncovers that though 104 | **Module#include** supports being invoked with multiple Modules, these Modules 105 | cannot be provided in an Array, but must be normal method arguments (or in the 106 | case of an Array, must be exploded with the [splat operator](https://endofline.wordpress.com/2011/01/21/the-strange-ruby-splat/#calling_methods) 107 | into normal method arguments). Since the Virtus docs don't use the splat 108 | operator, we can further narrow our inference to deduce that **Virtus.model** 109 | must be returning a single module. 110 | 111 | Now that we have a clearer understanding of what's going on in this example, it 112 | becomes easier to see how it fulfills our definition of a Module Factory: 113 | Instead of referencing a concrete Module, **Module#include** is invoked with the 114 | result of invoking the **Virtus.model** method. Furthermore, we've deduced that 115 | **Virtus.model** must return a Module of some sort and given the arguments it 116 | takes, it's safe to assume there's some sort of factory logic going on inside. 117 | In fact, this Module Factory allows the including class to cherry-pick a subset 118 | of Virtus' model extensions and include only those selected modules. 119 | 120 | Alright! Not so bad, right? Now that we've got one Module Factory under our 121 | belt, let's take a look at how the Module Factory patten can help with 122 | dependency management and load ordering. 123 | 124 | ## A job for refactoring 125 | 126 | In order to provide some context for our discussion, let's start with some example 127 | code that I think could benefit from a refactoring to use the Module Factory 128 | pattern. For the sake of brevity, this code is non-functional and skips many of 129 | the details that don't impact our particular interests. That said, the code 130 | below should have a familiar flavor to anyone who has worked with an 131 | asynchronous job framework in the past, such as 132 | [Resque](https://github.com/resque/resque), 133 | [Sidekiq](https://github.com/mperham/sidekiq), 134 | [Backburner](https://github.com/nesquena/backburner), or 135 | [Rails' ActiveJob](https://github.com/rails/rails/tree/master/activejob). 136 | 137 | The example code outlines the skeleton of a job class that performs some 138 | undefined unit of work. For those unfamiliar with any of the job frameworks I 139 | mentioned above, the typical usage pattern for such a framework tends to involve 140 | subclassing a class provided by the job framework which encapsulates and handles 141 | most of the required behaviors of a job. In the example below, this role is 142 | filled by the fictitious class **JobFramework::Job**. 143 | 144 | Generally, by subclassing a class like **JobFramework::Job**, 145 | the subclass agrees to an interface contract that typically requires the 146 | subclass to implement a **perform** method at the instance level. This pattern 147 | is also followed in the example below, as can be seen by the **perform** 148 | instance method on the **ImportantJob** class. 149 | 150 | One final point worth discussing before getting into the example is that the job 151 | classes provided by many job frameworks tend to provide an **around_perform** 152 | method hook or similar functionality to allow for adding middleware-type 153 | behavior around job execution in a generic, unobtrusive way. The example below 154 | also borrows this pattern, however it can be inferred that **JobFramework::Job** 155 | provides this behavior in a very naive manner that relies heavily upon the class 156 | hierarchy and repeated calls to **super**. 157 | 158 | OK, that should be enough background, on to the example! 159 | 160 | **important_job.rb** 161 | 162 | ```ruby 163 | class ImportantJob < JobFramework::Job 164 | # NineLives must be included before ExceptionNotification, 165 | # otherwise up to nine alert emails will be sent per failed 166 | # job and in many cases, exception notifications will be 167 | # sent when the job didn't actually fail! 168 | include NineLives 169 | include ExceptionNotification 170 | 171 | def perform(*args) 172 | # Important work 173 | end 174 | end 175 | ``` 176 | 177 | **job_extensions.rb** 178 | 179 | ```ruby 180 | module NineLives 181 | def around_perform(*args) 182 | retry_count = 0 183 | begin 184 | super 185 | rescue TransientError 186 | if retry_count < 9 187 | retry_count += 1 188 | retry 189 | else 190 | raise 191 | end 192 | end 193 | end 194 | end 195 | 196 | module ExceptionNotification 197 | def around_perform(*args) 198 | super 199 | rescue 200 | # dispatch an email notification of the exception 201 | end 202 | end 203 | ``` 204 | 205 | Here's a quick rundown of what we can expect the lifetime of an execution of the 206 | **ImportantJob** class to look like: 207 | 208 | 1. Some code somewhere else in the codebase calls **ImportantJob.perform**. 209 | This class level **perform** method is provided by **JobFramework::Job** as a 210 | convenience method to enqueue an **ImportantJob** to be completed 211 | asynchronously. 212 | 2. Elsewhere, a worker process, also typically running code provided by the job 213 | framework, pops the job off of the job queue and instantiates a new instance 214 | of the **ImportantJob** class with the provided arguments. The internals of 215 | the worker process then take steps to execute the job which causes the 216 | **around_perform** method of the instance to be executed. Normally, the 217 | invocation of **around_perform** would simply cause **ImportantJob#perform** 218 | to be executed, however, since we've overwritten **around_perform** a couple 219 | of times, the behavior in the example is not so simple. The first version of 220 | **around_perform** that will be executed, perhaps counterintuitively, is the 221 | version from the last module we included in **ImportantJob**, **ExceptionNotification.around_perform**. 222 | 3. **ExceptionNotification.around_perform** immediately calls 223 | **super**, but includes a rescue block that catches any errors that bubble up 224 | and, hypothetically, dispatches email alerts about those exceptions. The 225 | invocation of **super** triggers the **around_perform** method from the first 226 | module we included in **ImportantJob**, **NineLives#around_perform**. 227 | 4. **NineLives#around_perform** is more involved, but its goals 228 | are pretty simple: Similar to **ExceptionNotification.around_perform**, it 229 | calls **super** almost immediately but adds some special error handling that 230 | catches errors of the **TransientError** class. The error handling will retry 231 | the call to **super** up to 9 times if the **TransientError** exception 232 | continues to occur. After 9 times, the error will be raised up to 233 | **ExceptionNotification** at which point an email should be dispatched. The 234 | call to **super** this time around invokes the original **around_perform** 235 | method, **JobFramework::Job#around_perform**, which as we discussed earlier, 236 | invokes **ImportantJob#perform**. 237 | 238 | Now that we've got a solid understanding of the example job, let's see how using 239 | the Module Factory pattern could benefit this class. 240 | 241 | ## What's wrong with a well written comment? 242 | 243 | You may already have an intuition for where we should begin our refactoring to 244 | introduce a Module Factory, but if you don't that's fine too. Personally, I'm 245 | inclined to start with the very first line of the **ImportantJob** class. No, 246 | not **include NineLives**. The honking four line comment that explains why the 247 | **NineLives** module must be included before the **ExceptionNotification** 248 | module. In a small enough codebase, the current form of **ImportantJob** might 249 | be fine, but if that codebase is likely to grow, or if the codebase is already 250 | of reasonable size, I'd argue that the comment and the rigid load-order are bad 251 | news. 252 | 253 | You may have your own arguments for or against the current implementation, but 254 | here are my arguments against: 255 | 256 | - That whopper of a comment is going to be repeated in every other job class 257 | that uses both the NineLives and ExceptionNotification modules (and if it's 258 | not, it should be). Trust me, I've seen it happen. Not only is this a 259 | violation of [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself), 260 | but because it's a comment it's pretty likely to mutate and/or deteriorate 261 | with each subsequent duplication. Eventually this leads to a situation 262 | where a newcomer to the code base doesn't know which version of the comment is 263 | accurate, or, alternatively, you end up with some job classes that tag 264 | **include NineLives** simply with "Must be included before 265 | ExceptionNotification" and no additional explanation. After this reduction, 266 | the comment starts to disappear entirely. 267 | - Without the comment, there is no other clue that there is a load-order 268 | dependency between these two modules. Obviously, this is why the comment was 269 | added, but a comment can't help the situation where a job class that already 270 | includes **NineLives** now needs to include **ExceptionNotification**, or vice 271 | versa. If the dev making the change is lucky enough to have seen the comment 272 | elsewhere in the codebase, or another dev happens to catch the issue in a code 273 | review, maybe you can avoid a Spam dinner, but if not, it's 274 | Spam-a-lam-a-ding-dong until the next deploy goes out. 275 | - What happens when another load-order dependency is added with the inclusion of 276 | a new module? Another giant comment in every class that needs some combination 277 | of the three modules? One giant comment that tries to encompass all the 278 | permutations in a generic fashion? How would you feel if the purpose of the 279 | **ImportantJob** class was to perform a payment on a loan and the newly 280 | included module was added to lower someone's credit score every time an 281 | exception bubbled out of **NineLives#around_perform**? It's a bit of a 282 | stretch, but don't think that financial systems are immune to these 283 | situations, and I certainly hope they're using a better design than repeated 284 | comments. 285 | 286 | One could certainly make the argument for handling this issue by introducing 287 | another module to encapsulate the load-order dependency, but in my experience 288 | that doesn't actually solve any of these problems, but instead, it just moves 289 | the problems into other parts of the codebase or mutates them into slightly 290 | different issues. 291 | 292 | While we could explore alternative solutions for handling this situation all 293 | day, let's move on and get an idea of how a Module Factory could be used to 294 | address all of the concerns I've raised. 295 | 296 | ## A Module Factory for job extensions 297 | 298 | Before we look at how me might go about implementing a Module Factory to address 299 | the issues I raised above, let's take a look at what **ImportantJob** might look 300 | like after we refactored it to use a Module Factory. 301 | 302 | ```ruby 303 | class ImportantJob < JobFramework::Job 304 | include JobExtensions.select(:exception_notification, :nine_lives) 305 | 306 | def perform(*args) 307 | # Important work 308 | end 309 | end 310 | ``` 311 | 312 | We have to make some assumptions for now, but hopefully you'll agree that this 313 | is already a significant improvement. 314 | 315 | We can't yet make a determination on the ultimate fate of the comment because 316 | it's no longer included in **ImportantJob**, but this by itself is a good sign. 317 | Realistically, I don't think there was ever hope of going completely comment 318 | free, but, at least for the moment, things have a much DRYer feeling. 319 | 320 | Otherwise, there's still no hint that a load-order dependency exists somewhere, 321 | but given the order of the arguments to **JobExtensions.select**, we can hope it 322 | doesn't matter anymore. If the order of the arguments truly doesn't matter, than 323 | this also helps the situation where someone wants to add **ExceptionNotification** 324 | to a class that already includes **NineLives**, as it seems like they could just 325 | add the snake-cased name of the extension to the list of selected extensions and 326 | continue on their way. The same applies for any new extension that might be 327 | added in the future. In fact, the use of the snake-cased names actually involves 328 | less coupling than the original version because though the snake-cased names 329 | match the module names in this case, there really is no need for the module name 330 | and the snake-cased name passed to the factory method to match. This means that 331 | the module implementing :nine_lives could change to an entirely different module 332 | with fewer repercussions to the codebase. 333 | 334 | So far, so good. So what kind of sorcery is required to make this interface 335 | possible? Behold! The **JobExtensions** module: 336 | 337 | ```ruby 338 | module JobExtensions 339 | def self.select(*selected_extensions) 340 | Module.new do 341 | # NineLives must be included before ExceptionNotification, 342 | # otherwise up to nine alert emails will be sent per failed 343 | # job and in many cases, exception notifications will be 344 | # sent when the job didn't actually fail! 345 | if selected_extensions.include?(:nine_lives) 346 | include NineLives 347 | end 348 | if selected_extensions.include?(:exception_notification) 349 | include ExceptionNotification 350 | end 351 | end 352 | end 353 | end 354 | ``` 355 | 356 | Maybe a little magical, but certainly not sorcery, in fact it looks a lot like 357 | we took the comment and includes from the former version of **ImportantJob**, 358 | added some conditional logic, and wrapped all that in a **Module.new** block. 359 | What's going on here? 360 | 361 | I suspect I don't need to explain the internals of the block, but **Module.new** 362 | is definitely worth taking a closer look at on its own. 363 | 364 | [Module.new](http://www.ruby-doc.org/core-2.2.0/Module.html#method-c-new), is 365 | the more metaprogramming-friendly version of your standard module declaration 366 | using the **module** keyword. In fact, when used with a block, it's even more 367 | similar to a standard module declaration than might be obvious because in the 368 | context of the block the target of **self** is the module being constructed. 369 | This behavior is what allows us to make normal calls to **include** without 370 | having to use an explicit receiver or having to call **send**. 371 | 372 | For our particular purposes, **Module.new** does offer one advantage over the 373 | **module** keyword worth mentioning. Because **Module.new** uses a block, a 374 | closure is created that allows us to reach outside of the block and access the 375 | list of **selected_extensions** while building the new module. Access to this 376 | list is crucial to our Module Factory's ability to build a customized module on 377 | demand. Without access to the list we'd have to figure out another way to 378 | assemble the desired module, which is certainly doable, but would be less 379 | pleasant to look at and would require using **send** to circumvent the generated 380 | module's public access rules. 381 | 382 | Other than the call to **Module.new**, I expect everything else in this factory 383 | method should make sense. We've found our missing comment and can be fairly 384 | confidant that in this form it's unlikely to be repeated. If it is repeated in 385 | the future, it will likely be a modified version that documents the load-order 386 | gotchas of a different extension that this Module Factory supports. While there 387 | is probably a better way to document the specifics of this particular load-order 388 | requirement, I'm much less concerned with many similar comments documenting 389 | similar behavior inside a particular method than I am with the same spread all 390 | across the codebase in any number of unaffiliated jobs. 391 | 392 | ## Before you get too excited: A couple of trade offs 393 | 394 | Though the Module Factory we've built certainly helps deal with handling the 395 | load-order logic in a DRY fashion, there are a couple of potential trade offs 396 | that I should mention. These issues can be addressed, but I won't go into great 397 | detail about how to address them. The good news, though, is that both trade offs 398 | are solved by pretty much the same code. 399 | 400 | The first trade off is that generating a module dynamically like we did above 401 | produces a more anonymous module than you might be used to seeing if you usually 402 | create modules using the **module** keyword. For example, here's the fictitious 403 | ancestry of the **ImportantJob** class: 404 | 405 | ```ruby 406 | ImportantJob.ancestors 407 | # => [ 408 | # ImportantJob, #, 409 | # JobFramework::Job, Object, Kernel, BasicObject 410 | # ] 411 | ``` 412 | 413 | That funky Module between **ImportantJob** and **JobFramework::Job** is our 414 | generated module. Though we've handled the load-order issue in a more robust 415 | fashion, we've obscured the class hierarchy which makes it harder to find 416 | information about the class via interrogation or examination. 417 | 418 | To get some insight into the second trade off introduced by the Module Factory 419 | pattern, let's pretend we've created another job class, **ReallyImportantJob**, 420 | that is an exact duplicate of **ImportantJob**, except named differently. What 421 | does the class hierarchy for **ReallyImportantJob** look like? 422 | 423 | ```ruby 424 | ReallyImportantJob.ancestors 425 | # => [ 426 | # ReallyImportantJob, #, 427 | # JobFramework::Job, Object, Kernel, BasicObject 428 | # ] 429 | ``` 430 | 431 | What may not be clear from this output is that though the two job classes are 432 | made up of the exact same code and modules, each generates its own special 433 | module when the **JobExtensions.select** factory method is called. This can be 434 | seen in the output above in that the each of the generated modules is identified 435 | by a different memory address. This might not be the end of the world in a small 436 | codebase, but it should make it clear that every class is going to generate its 437 | own version of the module, even if one matching the requested requirements 438 | already exists. This is obviously inefficient in terms of time and memory, but 439 | it also adds another complication to understanding a class by interrogation or 440 | inspection because though another dev might expect the class hierarchies of 441 | **ImportantJob** and **ReallyImportantJob** to include the same modules, they 442 | don't, but they do, but they don't. 443 | 444 | So what's the solution? Well, it turns out both issues can be solved by dealing 445 | with some naming issues. In terms of the first trade off, the anonymous module, 446 | Ruby uses an anonymous name because we never assigned the module to a constant. 447 | This is one of the implicit benefits of the **module** keyword: you assign the 448 | module to a constant at inception. So, if we can come up with a way to generate 449 | a name for the generated module, all we need to do is assign a constant with the 450 | generated name to point to the generated module and Ruby will use that name to 451 | refer to the generated module. 452 | 453 | Though it's not obvious, generating a name also helps us to address the second 454 | trade off of generating a new module every time the factory method is invoked. A 455 | name helps solve this problem because if we can generate a name that uniquely 456 | identifies the contents of a generated module and assign the appropriate 457 | constant, we can also check that constant in the future before generating a new 458 | module. If the constant is defined, we return the previously generated module, 459 | if not, we generate a new module and assign it to the constant. 460 | 461 | In terms of our example job, the actual implementation is left to the reader as 462 | an exercise, but generating a name that uniquely identifies each generated 463 | module could be as simple as creating a string from the sorted, title-cased 464 | collection of extensions that are used in the module being named. Title casing 465 | is important for readability, consistency, and so Ruby will accept the name as a 466 | constant.[^3] Sorting is also important because, at least in the case of our 467 | example, we don't want the order of the arguments to change the name of the 468 | class being created because whether **:exception_rety** is passed in before 469 | **:nine_lives**, or vice versa, both invocations should generate and refer to 470 | the same module. This naming pattern still has some problems because it is still 471 | unclear what the module does, but it is at least a little better than the module 472 | being identified by its raw memory address. 473 | 474 | ## Closing thoughts 475 | 476 | Though it may not feel like it, this post has really only scratched the surface 477 | of the power and potential of the Module Factory pattern. Though we've discussed 478 | how it can be used to improve code readability, maintainability, reliability, and 479 | flexibility, there's really a lot more opportunity out there. And so, rather 480 | than summarize what we've covered in this post, I'll leave you to ponder these 481 | possibilities: 482 | 483 | - As evidenced by [Kernel#Array](http://www.ruby-doc.org/core-2.2.0/Kernel.html#method-i-Array) 484 | and [Kernel#Integer](http://www.ruby-doc.org/core-2.2.0/Kernel.html#method-i-Integer) 485 | Ruby doesn't require method names to start with a lowercase letter. How might 486 | a method with a title-cased name be used to compliment the Module Factory 487 | pattern? Are there trade offs that come with this type of naming convention? 488 | - Ruby method names don't need to be words at all, take for example 489 | [Hash::[]](http://ruby-doc.org/core-2.1.5/Hash.html#method-c-5B-5D). How might 490 | an operator style of method name pair with the Module Factory pattern? 491 | - How else could the power of a method call be leveraged for Module Factory 492 | awesomeness? What magic could be yielded (pun intended!) by a factory method 493 | that takes a block? How might keyword arguments, Hash arguments, or splat 494 | arguments be leveraged in combination with a Module Factory? 495 | - If you've ever used a framework that uses dependency injection like 496 | Javascript's AngularJS, then the examples above may have caused your Spidey 497 | sense to tingle. How might the Module Factory pattern be used for dependency 498 | injection in Ruby? 499 | 500 | [^1]: Source: [Merriam-Webster](http://www.merriam-webster.com/dictionary/idiom) 501 | [^2]: Source [thefreedictionary.com](http://www.thefreedictionary.com/idiom) 502 | [^3]: A third-party library like [**ActiveSupport**](https://rubygems.org/gems/activesupport) can make the work of title casing the string trivial. 503 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/on_the_road_from_ruby_journeyman_to_ruby_master/README.md: -------------------------------------------------------------------------------- 1 | Mind-blowingly awful are really the only words that come to mind to describe my 2 | first bunch of Ruby scripts.[^1] Sure, this is probably unfair and over-critical 3 | given that Ruby, algorithms, and the whole shebang were all new to me at the 4 | time, but *damn*. There are so many decisions I can't even begin to comprehend 5 | or defend today. 6 | 7 | I imagine few Ruby devs still have their first scripts available to reflect on. 8 | This may be for the best, yet, as I looked over a few of my early scripts this 9 | past weekend, I began to ponder the value of occasionally revisiting old code 10 | samples to better gauge one's progress and get a periodic sense of perspective. 11 | Similarly, I also found myself contemplating the value of occasionally taking a 12 | step away from production code to draw a new line in the sand recording one's 13 | state as a developer in that moment; A coded testament of one's values, whether 14 | in terms of syntax, tradeoffs, or any number of other metrics; a mile marker 15 | somewhere along the road from Ruby journeyman to Ruby master. 16 | 17 | To that end, in this article I'll be sharing and discussing one of those early 18 | scripts. From there, I'll also leave behind a new mile marker by taking a stab 19 | at how I might solve the same problem today. With any luck, we'll all learn 20 | something along the way, and if not, it seems like I'll be back to rant about 21 | the inferior quality of my past work in no time. For now though, onward! 22 | 23 | ## When Danny met Ruby... 24 | 25 | Back in 2009, at the encouragement of my stepfather who thought the future had 26 | great things in store for Ruby and Rails (boy, was he wrong!), I began to 27 | explore the Ruby programming language by trying to solve a few of the math heavy 28 | programming problems over at [Project Euler](https://projecteuler.net/). Up 29 | until this point, I'd only ever done any "programming" in Basic and Visual 30 | Basic, as these were the focus of the programming courses taught at my high 31 | school. I'd argue that I got pretty advanced in my usage of Visual Basic, going 32 | so far as to develop a reasonable grasp on the Win32 API, but given my present 33 | distaste for my early Ruby code, I can only imagine that my earlier VB code must 34 | have been transcendently awful. In VB, I'd only ever written small utilities and 35 | weak attempts at games, so using Ruby to efficiently solve what were essentially 36 | math problems was new territory for me. 37 | 38 | For each problem that I attempted, I followed two rules. First, and obviously, 39 | the computed solution had to be correct. Second, the script had to run to 40 | completion in less than one minute. I don't remember if the second rule was 41 | stipulated from the beginning or if my naive tendency toward brute-force 42 | solutions prompted my stepfather to introduce the rule, but I definitely 43 | remember struggling to get my scripts to run in less than a minute at various 44 | times. For anyone getting started with this type of endeavor, it's definitely a 45 | great constraint to have in place. That said, the problem we're going to look at 46 | today isn't one of those long running problems, in fact, even my early attempts 47 | at solving the problem take less than a second to run. Let's have a look, shall 48 | we? 49 | 50 | ## Problem 8: Largest Product in a Series 51 | 52 | Though it's not the first problem I solved, [Problem 8: Largest Product in a 53 | Series](https://projecteuler.net/problem=8), seems like a problem of sufficient 54 | complexity to merit a bit of discussion. For your convenience here is the full 55 | text of the question: 56 | 57 | >The four adjacent digits in the 1000-digit number that have the greatest 58 | >product are 9 × 9 × 8 × 9 = 5832. 59 | > 60 | > 73167176531330624919225119674426574742355349194934 61 | > 96983520312774506326239578318016984801869478851843 62 | > 85861560789112949495459501737958331952853208805511 63 | > 12540698747158523863050715693290963295227443043557 64 | > 66896648950445244523161731856403098711121722383113 65 | > 62229893423380308135336276614282806444486645238749 66 | > 30358907296290491560440772390713810515859307960866 67 | > 70172427121883998797908792274921901699720888093776 68 | > 65727333001053367881220235421809751254540594752243 69 | > 52584907711670556013604839586446706324415722155397 70 | > 53697817977846174064955149290862569321978468622482 71 | > 83972241375657056057490261407972968652414535100474 72 | > 82166370484403199890008895243450658541227588666881 73 | > 16427171479924442928230863465674813919123162824586 74 | > 17866458359124566529476545682848912883142607690042 75 | > 24219022671055626321111109370544217506941658960408 76 | > 07198403850962455444362981230987879927244284909188 77 | > 84580156166097919133875499200524063689912560717606 78 | > 05886116467109405077541002256983155200055935729725 79 | > 71636269561882670428252483600823257530420752963450 80 | > 81 | > Find the thirteen adjacent digits in the 1000-digit number that have the 82 | > greatest product. What is the value of this product? 83 | 84 | It's worth noting that the requirements of the problem were modified in 2014 to 85 | encourage more programmatic solutions to the exercise. More specifically, the 86 | question originally asked for the largest product of not 13 adjacent digits but 87 | of just 5 adjacent digits in the 1000-digit number. A minor difference, but one 88 | that will, at the very least, help better explain at least one of the decisions 89 | I made in my 2009 solution. 90 | 91 | To that end, a modified version of my 2009 solution appears below. The solution 92 | has been modified from its original form in two ways. First, as necessitated by 93 | the change in the problem requirements, the solution has been extended, in a 94 | manner consistent with the original solution, to handle runs of 13 digits. 95 | Second, rather than repeat the 1000-digit number, we will assume it is stored in 96 | the constant NUMBER as a Bignum. I won't explain the solution, but hopefully my 97 | discussion of it should help fill in any gaps in understanding. Instead, I'll 98 | jump right into my thoughts on the shortcomings of this script. 99 | 100 | ### 2009 Edition 101 | 102 | ```ruby 103 | a=NUMBER.to_s 104 | big = 0 105 | for i in 1..(987) 106 | su=a[i,1].to_i*a[i+1,1].to_i*a[i+2,1].to_i*a[i+3,1].to_i*a[i+4,1].to_i*a[i+5,1].to_i* 107 | a[i+6,1].to_i*a[i+7,1].to_i*a[i+8,1].to_i*a[i+9,1].to_i*a[i+10,1].to_i*a[i+11,1].to_i* 108 | a[i+12,1].to_i 109 | if su>big 110 | big=su 111 | end 112 | end 113 | puts big 114 | ``` 115 | 116 | #### Where's the whitespace? 117 | 118 | The first thing that strikes me about this script, and many of the others I've 119 | reviewed from this period, is the omission of optional spaces. This is one of 120 | those situations where I can't even begin to understand what I was thinking. 121 | Given that I do add optional spaces in at least one place, we can rule out the 122 | possibility that my spacebar was broken. This being the case, I'm inclined to 123 | believe I simply wasn't thinking about it, but it seems so blatantly obvious to 124 | me now that I find this hard to believe. 125 | 126 | It is certainly possible that I had no notion of (or concern for) readability. 127 | It's also possible that my mental parser was in a sufficiently unformed, 128 | immature, or plastic state that the omission of optional spaces felt as readable 129 | to me then as when optional spaces were included. This seems a bit unfathomable 130 | now, but that's really all I can come up with. 131 | 132 | In the JavaScript world, you will sometimes see libraries that achieve some feat 133 | in less than 1KB or some other very minimal file size. In JavaScript, where 134 | libraries are typically transmitted over the wire to web browsers across the 135 | world, this type of optimization can be desirable to reduce the size of the 136 | payload being transmitted (though it really should be the job of a minifier). 137 | But in Ruby, where libraries typically live on the server, there is no benefit 138 | to this type of optimization as far as I'm aware. If there is a benefit to this 139 | approach that I am unaware of, I can assure you it's not what I was striving for 140 | at the time. 141 | 142 | #### Hmm, seems like a loop might help... 143 | 144 | Next on my list of grievances is the ginormous series of substring accesses of 145 | the form **a[i+n, 1]**. First, let's get it out of the way that the second 146 | argument to **String#[]** is totally useless here, being as it is that the 147 | default behavior is to return the 1-character substring located at the index 148 | given by the first argument. Normally, this might be an excusable offense, but 149 | given that this snippet could benefit from some serious DRYing, it's a little 150 | more intolerable because the extraneous argument would have to be removed in 13 151 | different places. 152 | 153 | Given that this seems like an obvious situation for a loop of some sort, why the 154 | no loop? In this particular case, I do have some recollection of my thinking, 155 | and I'm fairly certain that forgoing a loop was a conscious decision. If you'll 156 | recall, the problem at the time was concerned with 5 consecutive digits instead 157 | of 13 which made the repeated code a little more manageable and perhaps even 158 | tolerable. 159 | 160 | At the time, I may have hoped to gain some performance by skipping the loop and 161 | retrieving each element directly, though this concept seems like it would have 162 | been too advanced for my thinking at the time. Instead, I'm inclined to believe 163 | that I may have chosen five direct accesses because it was easier for me at the 164 | time than setting up a loop, though I'm not sure. Though skipping the loop is a 165 | teensy bit faster, it's clearly not DRY and it also hardcodes an implicit over 166 | specification into the solution that makes it very difficult to change the 167 | length of the series of adjacent digits that should be tested. As such, to 168 | update the code to test a series of 13 digits, I had to more than double the 169 | number of element accesses, moving the code even further from the goals of DRY. 170 | 171 | If it's not already clear, using a simple loop would have been a better choice. 172 | Though insignificantly slower, a simple loop would make the code much DRYer 173 | while also enabling the solution to be more generic. This would better prepare 174 | the solution to handle any number of adjacent digits while also making the code 175 | easier to read, follow, and understand. Generality definitely wasn't something 176 | that was on my mind in solving this problem as we'll see again in a moment. 177 | 178 | #### Maybe one loop was a better choice... 179 | 180 | Though we can hopefully agree that it seems like a loop would have been a better 181 | choice in the situation above, there are enough problems with that loop already 182 | used that it starts to seem like utilizing another loop might not have been a 183 | good idea. The loop already in use is a **for loop** operating over a range of 184 | Integers that allows for traversing the vector of digits. There are a number of 185 | things about this loop that are less than ideal, some more obvious than others. 186 | 187 | One thing that may stick out to more experienced Rubyists is the choice of a 188 | **for loop** over other alternatives. Though not technically wrong, the **for 189 | loop** is not commonly seen in Ruby and typically more idiomatic loop primitives 190 | are used instead. Another thing that may stick out to more experienced Rubyists 191 | is the unnecessary use of parentheses around the terminal Integer or upper bound 192 | of the Range expression. Again, not wrong per se, but certainly an indicator of 193 | my noob status and perhaps an indicator that I didn't fully grok the Range 194 | expression and perhaps thought I was calling a dot method on the Integer class, 195 | like **Integer#.**, that returned a Range instance when invoked with an Integer 196 | for an argument. Novel perhaps, but wrong. 197 | 198 | Returning to the topic of generality, the loop also hardcodes **two** more 199 | over-specifications into the solution that make the solution more rigid and less 200 | reuseable. As if this weren't bad enough, the two over-specifications interact 201 | with each other in such a way that it's not obvious what's going on. In fact, 202 | they're both encapsulated in the seemingly random choice of 987 for the upper 203 | bound of the Range. Being as astute as you are, I imagine if you were paying 204 | attention to the problem description then you've already surmised that 987 is 205 | none other than, 1,000, the length of the input digit, minus 13, the length of 206 | the run of adjacent digits we're calculating the product of. This upper bound 207 | makes sure our product calculations don't overflow the length of the provided 208 | number. Duh, right? 209 | 210 | Wrapped up there in one little number are three flavors of weak. First, the 211 | hardcoded reference to 1,000 means we won't be able to reliably use this solution 212 | on a similar problem that features a number that is anything other than exactly 213 | 1,000 digits. Second, the hardcoded reference to 13 means yet another place an 214 | update will be required in order to mutate the solution to handle runs of 215 | lengths other than 13. Finally, both of these facts are obscured by the use of 216 | the precalculated value of 987 for the upper bound of the range. Instead of 217 | hardcoding the value, calculating the upper bound by taking the difference of 218 | the length of **NUMBER** and the desired length of adjacent digits would be 219 | better. Having no reliance on knowing the length of **NUMBER** would be even 220 | better, if possible. 221 | 222 | One final point about the loop before we move on: it's wrong! Given the 223 | magnitude of the wrongness, you may prefer to think of it as a bug, but at the 224 | end of the day, it's just plain old wrong. The problem is that the Range starts 225 | at 1, which translates to index 1 of the stringified **NUMBER**. Starting with 226 | index 1 means that the digit at index 0 is totally ignored, which means that if, 227 | by some chance, the 13 consecutive digits with the largest product were the 228 | first 13 digits, this solution would fail to find the correct product. Whether 229 | you call this a bug or broken, it's bad news. So yeah, maybe one loop was the 230 | way to go. 231 | 232 | #### A final look back at 2009 233 | 234 | Before we look at how I might solve this problem today, I want to make two final 235 | points about my 2009 solution. First, the variable names suck. The only variable 236 | name that comes close to being tolerable is **big**, and even that isn't great. 237 | Finally, a compliment. Despite all of its problems, my 2009 solution does excel 238 | as example of the lowest of low Ruby newbie code. Certainly, that's a 239 | back-handed compliment, but I really could not have written an example like this 240 | today if I wanted to: it simply would have felt far too contrived. 241 | 242 | With the past firmly behind us, let's take a look at how I might solve this 243 | problem today. 244 | 245 | ### Solution 2015 246 | 247 | ```ruby 248 | # Project Euler #8 - Largest product in a series 249 | # https://projecteuler.net/problem=8 250 | # 251 | # Find the thirteen adjacent digits in the 1000-digit number that have the 252 | # greatest product. What is the value of this product? 253 | 254 | def largest_product_in_series(number, adjacency_length = 13) 255 | series = number.to_s 256 | zero_ord = '0'.ord 257 | factors = [] 258 | largest_product = 0 259 | current_product = 1 260 | series.each_char do |new_factor| 261 | # This String-to-Integer conversion assumes we can trust our input will only 262 | # contain digits. If we can safely assume this, calling String#ord and then 263 | # subtracting the ordinal of the String '0' will work faster than 264 | # String#to_i. 265 | new_factor = new_factor.ord - zero_ord 266 | 267 | # If our new_factor is zero, we know that the product of anything 268 | # currently in our collection of factors will be zero. so, rather than 269 | # work through that, just drop the current set of factors, drop the 270 | # zero, reset our current product, and move on to the next iteration. 271 | if new_factor.zero? 272 | factors.clear 273 | current_product = 1 274 | next 275 | end 276 | 277 | factors << new_factor 278 | current_product *= new_factor 279 | next if factors.length < adjacency_length 280 | 281 | largest_product = current_product if current_product > largest_product 282 | current_product /= factors.shift 283 | end 284 | largest_product 285 | end 286 | 287 | puts largest_product_in_series(NUMBER) 288 | ``` 289 | 290 | I think I'm still too close to this solution to offer much objective criticism, 291 | so though I'll touch on a few concerns later, for the most part, we'll leave 292 | criticism to future-Danny to worry about. So, let's start by seeing how the 293 | updated solution fairs in regard to some specific points that were brought up 294 | while dissecting my 2009 solution. After that, we'll look at some new goodness 295 | it brings to the table. Like the 2009 solution, I won't explain exactly what's 296 | going on, but hopefully the discussion below and included comments will suffice 297 | to convey the intention of the code. 298 | 299 | #### Lessons learned 300 | 301 | Here's a brief rundown of a few of the concerns I raised about the 2009 solution 302 | and how those concerns have faired in the 2015 solution: 303 | 304 | - Spacing is kind of funny in that you might not think about it if it's there, 305 | but if it's missing you'll definitely notice. Whether you noticed the 306 | additional white space or not, hopefully you'll agree that the use of 307 | consistent white space makes this solution much more readable than its 308 | counterpart. 309 | 310 | - Variable names, like white space, can be a little funny too given how personal 311 | and subjective they tend to be. Whether you think the variable names used in 312 | the updated solution are great, too short, too long, or just a little off, 313 | hopefully we can all agree they are a significant improvement over the 314 | variable names of the 2009 solution. 315 | 316 | - In terms of rigidity and over-specificity, the 2015 solution is much more 317 | flexible and generic. It has no dependency on the length of the number given, 318 | meaning the provided number could be 1,000 digits long or 10,000 digits long. 319 | Though it still needs to know how long a run of digits should be tested, it is 320 | not hardcoded to a certain length. A default length of 13 is used, but this 321 | can easily be overridden by invoking the **largest_product_in_series** method 322 | with a specific value for **adjacency_length**. This means that we could 323 | answer both the original 5-digit version of the question and the updated 324 | 13-digit version of the question with one algorithm. 325 | 326 | - Because the solutions are so different, any discussion in terms of the number 327 | of loops is somewhat moot, however the loop used in the 2015 solution does 328 | have one characteristic that I'd previously suggested could be desirable: it 329 | does not depend upon knowing the length of **NUMBER**. Instead, it iterates 330 | over every character in the String derived from **NUMBER**, **series**, using 331 | String#each_char. In this case, we still know **series** comes from the full 332 | **NUMBER** so, we're not a lot closer to a solution that would work for true 333 | streams of numbers, but the length agnostic nature of the loop is a step in 334 | the right direction. 335 | 336 | - One other big improvement included in the updated solution that we didn't 337 | mention in terms of the 2009 solution is the addition of comments. There are 338 | two flavors of comments in the updated solution that help provide clarity to 339 | the solution. First, the problem description is included as a comment at the 340 | head of the solution. This is really handy for someone else looking at the 341 | code or for coming back to the code six years later. Second, comments 342 | explaining some of the solution's logic have been added making it easier for a 343 | reader to understand what is going on and why those decisions were made. 344 | 345 | #### An alternate approach 346 | 347 | Beyond the better coding practices exhibited by the 2015 solution, the solution 348 | also leverages a better approach to solving the problem. Better can be somewhat 349 | subjective, so I should be clear that in this case I think the 2015 solution is 350 | superior because the algorithm is more efficient and offers a performance 351 | improvement of about an order of magnitude while still using about the same 352 | amount of memory. The concept for the alternate approach emerged from two 353 | seemingly unrelated notions, each of which I thought could be useful 354 | independently to squeeze some extra performance out of the algorithm. As it 355 | turns out, they weren't completely independent notions and one is actually much 356 | easier to implement when built on top of the other. 357 | 358 | The first idea for optimization revolved around a means to more efficiently 359 | calculate the new product each iteration. While the 2009 solution calculated the 360 | new product each iteration by performing 12 multiplications, I reasoned that 361 | since we're really only changing two numbers each iteration (the digit going out 362 | of focus and the digit coming in to focus), it should be possible to calculate 363 | the new product with only two operations (divide out the digit going out of 364 | focus, and multiply in the digit coming into focus). The only situation where 365 | this would be complicated is when a zero was encountered because a zero would 366 | effectively destroy our partial product when it got multiplied in, not to 367 | mention trying to divide by zero later would also be a fatal error. A better 368 | means of handling zeros would be required to calculate products in this manner 369 | and that's just what the second idea offered. 370 | 371 | The second notion I had for optimizing the algorithm stemmed from removing the 372 | extraneous work that was being performed the iteration in which a zero was 373 | encountered and the 12 subsequent iterations after. Because zero multiplied by 374 | any other number is always going to be zero, there were effectively 13 375 | iterations for every zero where the algorithm would do all the work despite the 376 | fact that the answer was guaranteed to be zero. It seemed to me that there had 377 | to be a way to avoid this extraneous effort and actually use zeros as a way to 378 | speed up the calculation. As it turns out, handling zeros is pretty easy because 379 | all that needs to be done when a zero is encountered is reset the partial 380 | product to its initial value, 1, and move on. 381 | 382 | With zeros taken care of, the more efficient means of calculating the product is 383 | simplified to keeping a queue of the factors of the partial product. Then, each 384 | iteration the digit going out of focus is removed from the queue and divided 385 | out of the partial product and the number coming into focus is added to the 386 | queue and multiplied into the partial product. One final bit of house keeping 387 | that is required is that when a zero is encountered, the queue of factors must 388 | be reset as well. 389 | 390 | #### A faster Char#to_i 391 | 392 | One final bit of hackery (of debatable merit) is the means by which the updated 393 | solution turns the String form of a digit into its Integer form. Though 394 | **String#to_i**, is the obvious candidate for this conversion, I wondered if 395 | there might be a faster way since this problem has little need for error 396 | checking or converting large strings of digits. If Ruby had a **Char** class 397 | for single characters, **Char#to_i** would likely have a different performance 398 | character than **String#to_s**, and a **Char#to_s** style approach was more what 399 | I was looking for. 400 | 401 | One way I had seen this done for individual numbers in other languages was to 402 | take the ordinal, or character code, of an ASCII number and subtract from it the 403 | ordinal for the character "0" to get the Integer equivalent of the character. 404 | This is exactly what the updated solution does using **String#ord**. In each of 405 | my trials, I found the **String#ord** trick to be 25-30% faster than 406 | **String#to_i**. Whether using this trick is a good idea or not (given that this 407 | method makes no checks to verify that the provided character is a number) is a 408 | whole other blog post. In this particular case, I thought the approach novel and 409 | performant enough to utilize it. 410 | 411 | #### Still a Ruby journeyman: A few concerns 412 | 413 | Before concluding this post, I want to mention a few concerns that have come to 414 | mind as I've spent some time analyzing the updated solution. Most stem from 415 | tradeoffs or implementation details. I can't help but wonder if a few of these 416 | concerns are going to be the reasons future-Danny gives for this solution being 417 | mind-blowingly awful in its own way. 418 | 419 | - Did I put way too much effort into the updated solution? 2009 for all of it's 420 | shortcomings was much more pragmatic in that it was all about getting the 421 | correct solution and moving on. The goals of the 2009 solution and the 2015 422 | solution are clearly different, so maybe I put exactly the right amount of 423 | time into the updated solution. I suspect it's something only future-Danny 424 | will be able to make a ruling on. 425 | 426 | - Should the solution include more/any error handling? The use of the 427 | **String#ord** trick certainly opens up opportunities for misuse. But even 428 | that hack aside, what happens when the number provided is shorter than the 429 | adjacency length? Currently it does a correct thing and returns zero, but 430 | should that raise an error instead? Is additional error handling worth the 431 | time? 432 | 433 | - Why the focus on performance? Is performance really critical for this problem 434 | or is the focus on performance more to provide some concrete metric of how the 435 | efficiency of my programming has improved over the last 6 years? The 436 | **String#ord** trick is nice, but is it really worth the extra complexity, 437 | confusion, and possible bugs? What benefit might a simpler, less efficient 438 | solution offer? 439 | 440 | - Should the **String#ord** trick be extracted into a method to make it easier 441 | to substitute a different means of converting a digit character into its 442 | Integer form? 443 | 444 | - Why convert **NUMBER** to a String? For all the focus on performance, this is 445 | likely not the most efficient option. If **NUMBER** can remain a Bignum and 446 | each of the digits could be extracted from it in Integer form, would that be a 447 | more performant solution? Would it be a simpler solution? 448 | 449 | - Why the long method format? Sandi Metz would likely argue for smaller methods, 450 | as would Martin Fowler. The long method was partly due to performance concerns 451 | and partly because **Replace Method With Method Object** seemed excessive by 452 | the time it made sense. That said, should this method be broken up into 453 | smaller methods encapsulated in a class of some sort? 454 | 455 | ## Happily ever after? 456 | 457 | Though my exploration of Ruby, and the many other concepts secretly embodied by 458 | the set of problems at Project Euler, didn't pay off in an obvious way at the 459 | time I was focusing on them, I'm happy to have begun my career with Ruby 460 | struggling to write efficient algorithms. Though a friend of mine, a Gopher 461 | through and through, would argue that all Ruby is struggling to write efficient 462 | algorithms, this is a sentiment I've never shared. Perhaps, our disagreement on 463 | the subject stems from my beginnings with Ruby where any algorithmic 464 | inefficiencies were almost always my own and not some fault of the language. 465 | Though there is certainly an argument to be made for using the right tool for 466 | the job, at least in the part of the stack I tend to work in, I have yet to come 467 | across a situation where Ruby was clearly inappropriate. But maybe that's just 468 | me defending an old friend. 469 | 470 | In the end, I'm glad I've held on to my old Project Euler solutions because 471 | though I wouldn't land my first Rails job until late 2011 and I'd spend two more 472 | years on the Microsoft stack dabbling in C# and relational concepts in MSSQL, 473 | and though, for a time, Ruby and I would talk less often, given our history 474 | together, it's nice to be able to look all the way back to the beginning of my 475 | time with Ruby. It helps me to understand that, frankly, I hope to always be 476 | writing code that is four years away from being mind-blowingly awful. If this 477 | stops being the case then I've stopped learning or I've stopped caring and 478 | either way, that'd be pretty sad. 479 | 480 | [^1]: I would **never** talk about another person's code in these terms, especially if that person was as junior as I was when I wrote these scripts. In the words of the [Ten Commandments of Egoless Programming](http://blog.stephenwyattbush.com/2012/04/07/dad-and-the-ten-commandments-of-egoless-programming), "Treat people who know less than you with respect, deference, and patience." I hope you too will follow this advice and save harsher criticisms for your own work. 481 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/tail_call_optimization_in_ruby_internals/README.md: -------------------------------------------------------------------------------- 1 | In [my last post](http://blog.tdg5.com/tail-call-optimization-ruby-background/), 2 | I began an exploration of tail call optimization in Ruby with some 3 | [background on tail call optimization and its little known existence and usage 4 | in Ruby](http://blog.tdg5.com/tail-call-optimization-ruby-background/). 5 | In this post, we'll continue that exploration at a much lower level, moving out 6 | of the Ruby layer and descending to whatever depths are necessary to get to the 7 | bottom of how the Ruby VM implements tail call optimization internally. 8 | 9 | A lot of what follows wouldn't be possible without [Pat Shaughnessy's Ruby Under 10 | a Microscope](http://patshaughnessy.net/ruby-under-a-microscope) (and a healthy 11 | dose of [K & R](https://en.wikipedia.org/wiki/The_C_Programming_Language)). If 12 | you find you enjoy the conceptual level of this article and you're interested in 13 | more, I'd highly recommend [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope). 14 | I found it an enjoyable, empowering, fascinating, and approachable introduction 15 | to the internals of Ruby. If you're curious about the book, but you're still 16 | unsure about it, I'd encourage you to check out [Ruby Rogues #146, a book club 17 | episode featuring Ruby Under a Microscope](http://devchat.tv/ruby-rogues/146-rr-book-club-ruby-under-a-microscope-with-pat-shaughnessy) 18 | with guest appearances by the author, [Pat Shaughnessy](http://patshaughnessy.net/), 19 | and [Aaron Patterson](http://tenderlovemaking.com/), of Ruby and Rails fame, and 20 | who also wrote the foreword of the book. It's an enjoyable episode that 21 | definitely helped guide my decision to read the book. 22 | 23 | So, getting on to the subject of today's post. Hold on to your butts. 24 | 25 | ## Revisiting our tail recursive Guinea pig 26 | In [my last post](http://blog.tdg5.com/tail-call-optimization-ruby-background/), 27 | we discovered a tail recursive function in [the Ruby test suite](https://github.com/ruby/ruby/blob/fcf6fa8781fe236a9761ad5d75fa1b87f1afeea2/test/ruby/test_optimization.rb#L213), 28 | which we extracted (with a few tweaks) to demonstrate tail call optimization in 29 | Ruby. We'll need our Guinea pig again for today's exercise, so allow me to 30 | introduce her one more time: 31 | 32 | ```ruby 33 | code = <<-CODE 34 | class Factorial 35 | def self.fact_helper(n, res) 36 | n == 1 ? res : fact_helper(n - 1, n * res) 37 | end 38 | 39 | def self.fact(n) 40 | fact_helper(n, 1) 41 | end 42 | end 43 | CODE 44 | options = { 45 | tailcall_optimization: true, 46 | trace_instruction: false, 47 | } 48 | RubyVM::InstructionSequence.new(code, nil, nil, nil, options).eval 49 | ``` 50 | 51 | I won't go into the details again, but suffice it to say that this code snippet 52 | will add a **Factorial** class with a tail call optimized **fact** method to our 53 | environment. Our journey begins with this class method. 54 | 55 | ## Initial descent 56 | With our tail recursive Guinea pig revived, we can begin our descent into the 57 | internals of Ruby's implementation of tail call optimization. A month ago I 58 | wouldn't have known where to begin such a quest, but this is where some of the 59 | background and methods employed in [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope) 60 | will be of great utility. 61 | 62 | One method that [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope) 63 | uses to great effect is using [**RubyVM::InstructionSequence#disasm**](http://www.ruby-doc.org/core-2.2.0/RubyVM/InstructionSequence.html#method-c-disasm) 64 | to disassemble Ruby code into the underlying YARV instructions that the Ruby VM 65 | will actually execute at runtime. Using this technique we should be able to 66 | disassemble both a tail call optimized version and an unoptimized version of our 67 | **Factorial#fact** method and compare the instruction sequences for differences. 68 | 69 | Before we continue, let's rewind for a second and discuss YARV. YARV, which 70 | stands for Yet Another Ruby Virtual Machine, is a stack-oriented VM internal to 71 | Ruby that is responsible for compiling your Ruby code into low-level bytecode 72 | instructions (called YARV instructions) and executing those instructions. YARV 73 | was introduced in Ruby 1.9 to improve performance over Ruby 1.8's direct 74 | traversal and interpretation of the Abstract Syntax Tree generated by parsing a 75 | Ruby program. For more insight into on how Ruby executes your code, you can 76 | check out an excerpt from [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope), 77 | [How Ruby Executes Your Code by Pat Shaughnessy](http://patshaughnessy.net/2012/6/29/how-ruby-executes-your-code). 78 | 79 | Back to our regularly scheduled broadcast. 80 | 81 | To facilitate comparing the YARV instructions of the tail call optimized and 82 | unoptimized versions of our factorial function, I've tweaked our Guinea pig 83 | script to disassemble both versions of the function and **puts** them to STDOUT. 84 | Here's the resulting script: 85 | 86 | ```ruby 87 | code = <<-CODE 88 | class Factorial 89 | def self.fact_helper(n, res) 90 | n == 1 ? res : fact_helper(n - 1, n * res) 91 | end 92 | 93 | def self.fact(n) 94 | fact_helper(n, 1) 95 | end 96 | end 97 | CODE 98 | 99 | { 100 | 'unoptimized' => { :tailcall_optimization => false, :trace_instruction => false }, 101 | 'tail call optimized' => { :tailcall_optimization => true, :trace_instruction => false }, 102 | }.each do |identifier, compile_options| 103 | instruction_sequence = RubyVM::InstructionSequence.new(code, nil, nil, nil, compile_options) 104 | puts "#{identifier}:\n#{instruction_sequence.disasm}" 105 | end 106 | ``` 107 | 108 | There are two things here worth making note of. First, I've chosen to disable 109 | the trace instruction for both versions to avoid unnecessary differences between 110 | the two instruction sequences that don't actually pertain to how Ruby implements 111 | tail call optimization internally. Second, though it is not explicit in this 112 | script, I am running MRI Ruby 2.2.0 locally, so all of the YARV instructions and 113 | C code that we'll look at are specific to MRI Ruby 2.2.0 and may be different 114 | from other versions. 115 | 116 | You can view [the YARV instructions of the unoptimized Factorial class here](https://github.com/tdg5/blog_snippets/blob/60b19663b0c9a34117b47665045ba66679584e14/lib/blog_snippets/tail_call_optimization_in_ruby_internals/fact_disasm.txt) 117 | and [the YARV instructions of the tail call optimized Factorial class here](https://github.com/tdg5/blog_snippets/blob/60b19663b0c9a34117b47665045ba66679584e14/lib/blog_snippets/tail_call_optimization_in_ruby_internals/fact_tco_disasm.txt). 118 | 119 | A vimdiff of the two instruction sequences with changed lines highlighted in 120 | purple and the actual changes highlighted in red looks like so: 121 | 122 | [![Differences between the unoptimized Factorial class and the tail call optimized Factorial class](https://tdg5.s3.amazonaws.com/blog/wp-content/uploads/2014/02/tco_diff.jpg)](https://tdg5.s3.amazonaws.com/blog/wp-content/uploads/2014/02/tco_diff.jpg) 123 | 124 | Oh no! Disaster! It seems that our initial descent is some what of a failure. 125 | Other than the addition of a **TAILCALL** flag to a few of the 126 | **opt_send_without_block** instructions, the YARV instructions for both the 127 | unoptimized version and the tail call optimized version are **exactly the 128 | same**. What gives? 129 | 130 | From here it seems like our only logical course of action is to descend even 131 | further and look at the C code that makes up those YARV instructions with the 132 | hope that the **TAILCALL** flag is really all that's needed to transform an 133 | unoptimized call into a tail call optimized call. 134 | 135 | ## Descending into the C 136 | We begin our journey into Ruby's C internals where our YARV instructions left 137 | us, with the **opt_send_without_block** instruction. Hopefully, we can find 138 | something in the implementation of that instruction that will help us find 139 | our way to where Ruby implements tail call optimization internally. 140 | 141 | As discussed in [Ruby Under a Microscope](http://patshaughnessy.net/ruby-under-a-microscope), 142 | the definitions that are used during the Ruby build process to generate the C 143 | code for all the YARV instructions live in the Ruby source in [insns.def](https://github.com/ruby/ruby/blob/6c0a375c58e99d1f5f1c9b9754d1bb87f1646f61/insns.def). 144 | With a little grepping, we can find the definition of **opt_send_without_block** 145 | around [line 1047 of insns.def](https://github.com/ruby/ruby/blob/6c0a375c58e99d1f5f1c9b9754d1bb87f1646f61/insns.def#L1047): 146 | 147 | ```c 148 | DEFINE_INSN 149 | opt_send_without_block 150 | (CALL_INFO ci) 151 | (...) 152 | (VALUE val) // inc += -ci->orig_argc; 153 | { 154 | ci->argc = ci->orig_argc; 155 | vm_search_method(ci, ci->recv = TOPN(ci->argc)); 156 | CALL_METHOD(ci); 157 | } 158 | ``` 159 | 160 | As you've almost certainly noticed, this isn't quite C. Rather, during the Ruby 161 | build process this definition is used to generate the actual C code for the 162 | **opt_send_without_block** instruction. [You can view the fully generated C code 163 | for **opt_send_without_block** in all its monstrous glory here](https://github.com/tdg5/blog_snippets/blob/2a9e48ccc10082d37c821e3b838f223597a0d7b6/lib/blog_snippets/tail_call_optimization_in_ruby_internals/opt_send_without_block.vm.inc). 164 | 165 | Luckily, for our purposes, we don't have to go quite to that extreme and can 166 | operate at the instruction definition level. One mutation I will make before we 167 | continue is to expand the **CALL_METHOD** macro and remove some noise added to 168 | facilitate the macro. That brings us to the following: 169 | 170 | ```c 171 | ci->argc = ci->orig_argc; 172 | vm_search_method(ci, ci->recv = TOPN(ci->argc)); 173 | VALUE v = (*(ci)->call)(th, GET_CFP(), (ci)); 174 | if (v == Qundef) { 175 | RESTORE_REGS(); 176 | NEXT_INSN(); 177 | } 178 | else { 179 | val = v; 180 | } 181 | ``` 182 | 183 | OK, so what in the name of Neptune is going on here? Well, the first thing to 184 | notice is there's no sign of tail call optimization here, so the question for 185 | now is, where to next? 186 | 187 | In this case, the **ci** variable is of most interest to our particular quest. 188 | The **ci** variable references a **rb_call_info_t** struct which encapsulates a 189 | variety of data about a method call including, among other things, the receiver 190 | of the call, how many arguments the call takes, and a reference to the C 191 | function that should actually be executed by the call. It's this final reference, 192 | **ci->call**, that we're most interested in, as we hope to find some trace of 193 | tail call optimization therein. 194 | 195 | From the code above we can ascertain that when the Ruby VM executes a method 196 | call, it invokes the function pointed to by the **rb_call_info_t* struct's 197 | **call** field with the current thread (**th**), the current frame pointer 198 | (result of **GET_CFP**), and the **rb_call_info_t** struct itself (**ci**) for 199 | arguments. 200 | 201 | This is definitely a step in the right direction, but since we have no insight 202 | into the origins of the function pointed to by the **rb_call_info_t** struct's 203 | **call** pointer, we'll need to step backward before we can step forward. 204 | Luckily for us, we literally only need to take one step backward to the previous 205 | line where **vm_search_method** is invoked. 206 | 207 | At this point, rather than drill into every call 208 | that is made on the way to our goal, let's speed things up a bit. We'll still 209 | walk through each step, but we'll be more brief and skip the code snippets 210 | until we get a whiff of tail call optimization. That said, I've collected [the 211 | source for each step of the way from **CALL_METHOD** to the internals of Ruby's 212 | tail call optimization into one file](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c) 213 | for your viewing pleasure. 214 | 215 | Take a deep breath... 216 | 217 | - The call to [**vm_search_method**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L2) 218 | is where the value of [**ci->call** is set, and it is set to reference another 219 | function, **vm_call_general**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L15). 220 | 221 | - [**vm_call_general**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L24) 222 | when called [invokes and returns the result of another method, **vm_call_method**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L26). 223 | 224 | - [**vm_call_method**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L31) 225 | at about 155 lines, is a monster of a function, that handles every type of 226 | method invocation that the Ruby VM supports. It'd be pretty easy to get lost in 227 | this method, but we are fortunate in that we know we are dealing with an 228 | instruction sequence method type because we got to this point from a YARV 229 | instruction. This allows us to jump right to the portion of the 230 | switch statement that deals with instruction sequence type methods. In which 231 | case, [**vm_call_method** returns the result of yet another function invocation **vm_call_iseq_setup**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L45). 232 | 233 | (If you're beginning to wonder if this rabbit hole of a descent has a bottom, 234 | don't worry, we're almost there.) 235 | 236 | - [**vm_call_iseq_setup**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L210) 237 | is a two-liner that sets up the callee of the method and then [returns the 238 | result of another function invocation, **vm_call_iseq_setup_2**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L213). 239 | 240 | - [**vm_call_iseq_setup_2**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L218) 241 | is where we finally get our first whiff of tail call optimization. In fact, the 242 | only purpose of **vm_call_iseq_setup_2** is to check if tail call optimization 243 | is enabled and if so [it calls yet another function, **vm_call_iseq_setup_tailcall**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L224). 244 | 245 | (**So close!** But, while we're here, it's worth noting that normally [when tail 246 | call optimization is not enabled, **vm_call_iseq_setup_2** will call 247 | **vm_call_iseq_setup_normal**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L221) 248 | instead of **vm_call_iseq_setup_tailcall**. We'll come back to this alternative 249 | path in a moment.) 250 | 251 | - One look at [**vm_call_iseq_setup_tailcall**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L252) 252 | and it's obvious that we've found what we've been searching for, the heart of 253 | Ruby's support for tail call optimization. 254 | 255 | Success! Well, sort of, we still need to grok what's going on here, and come to 256 | think of it, where the hell are we? Let's take a look at what's going on inside 257 | **vm_call_iseq_setup_tailcall** and see if we can find our bearings and see how 258 | this call translates into the goodness of tail call optimization. 259 | 260 | ## Just when you were starting to think it was turtles all the way down 261 | Though we could consider **vm_call_iseq_setup_tailcall** on its own, we would 262 | probably do better to use the same strategy that we employed earlier and compare 263 | the unoptimized version to the tail call optimized version, and see what is 264 | different between the two. It didn't work for us last time, but maybe we'll have 265 | better luck this time around. 266 | 267 | We've established that the tail optimized version can be found in 268 | **vm_call_iseq_setup_tailcall**, and if it wasn't obvious from its name or from 269 | my making a point of mentioning it during our descent, the unoptimized version 270 | can be found in [**vm_call_iseq_setup_normal**](https://github.com/tdg5/blog_snippets/blob/12cb499a95ced517ee9f70febfa9472e2d055d71/lib/blog_snippets/tail_call_optimization_in_ruby_internals/from_call_method_to_tco.c#L221). 271 | Looking at both methods at a high level, it looks like we're still in the 272 | process of making the method call, as both of these functions seem to be 273 | preparing Ruby's internal stack prior to pushing a new frame onto the call 274 | stack. 275 | 276 | Here's a side-by-side vimdiff highlighting the differences between the two 277 | functions, though I should warn you that I made a couple of minor adjustments to 278 | **vm_call_iseq_setup_normal** to suppress irrelevant differences: 279 | 280 | [![Differences between vm_call_iseq_setup_normal and vm_call_iseq_setup_tailcall](http://tdg5.s3.amazonaws.com/blog/wp-content/uploads/2014/02/vm_call_iseq_setup_diff.jpg)](http://tdg5.s3.amazonaws.com/blog/wp-content/uploads/2014/02/vm_call_iseq_setup_diff.jpg) 281 | 282 | Compared to the extremely minimal differences in the our initial diff, I'm much 283 | more optimistic that we'll find what we're looking for in this larger change 284 | set. Let's start with **vm_call_iseq_setup_normal** since it is the shorter and 285 | more typical of the two functions. 286 | 287 | ## vm_call_iseq_setup_normal 288 | ```c 289 | VALUE *argv = cfp->sp - ci->argc; 290 | ``` 291 | 292 | **vm_call_iseq_setup_normal** begins by creating a pointer to the position on 293 | the stack where the argument vector (**argv**) for the next iteration of the 294 | recursive call begins. This is achieved by taking the current stack frame's 295 | stack pointer (**cfp->sp**) and moving backward down the stack the appropriate 296 | number of elements, as determined by our old friend the call info struct 297 | (**rb_call_info_t**) and its argument count field (**ci->argc**). 298 | 299 | ```c 300 | rb_iseq_t *iseq = ci->me->def->body.iseq; 301 | ``` 302 | 303 | **vm_call_iseq_setup_normal** then continues by creating a pointer to the 304 | **rb_iseq_t** struct identifying and encapsulating data about the instruction 305 | sequence that will be invoked by this call. 306 | 307 | ```c 308 | VALUE *sp = argv + iseq->param.size; 309 | ``` 310 | 311 | **vm_call_iseq_setup_normal** next creates a new pointer (**sp**) and points it 312 | to where it calculates the end of the argument vector (**argv**) to be using the 313 | value returned by **iseq->param.size**, a field related to the instruction 314 | sequence indicating how many parameters the instruction sequence takes. 315 | 316 | It may seem strange that the VM determines the beginning of **argv** by descending 317 | **ci->argc** elements from the top of the stack and then later finds the end of 318 | **argv** by ascending **iseq->param.size** elements up the stack, however the use 319 | of **iseq->param.size** allows the VM to allocate extra space on the stack in 320 | situations that use special types of arguments. In this case however, our Guinea 321 | pig function uses only simple arguments so **ci->argc** and **iseq->param.size** 322 | are equal. This brings us right back to where we started at the top of the stack. 323 | 324 | ```c 325 | for (i = 0; i < iseq->local_size - iseq->param.size; i++) { 326 | *sp++ = Qnil; 327 | } 328 | ``` 329 | 330 | This next segment is responsible for allocating and clearing out space on the 331 | stack for local variables and special variables that will be required to execute 332 | the method call. In this case, our Guinea pig function doesn't use any local 333 | variables so no space is needed for those, but the VM does need to allocate a 334 | spot on the stack for special variables. That said, though the VM allocates a 335 | spot on the stack for special variables, our function doesn't actually use any 336 | of Ruby's special variables[^1], so that spot on the stack will remain nil. 337 | 338 | ```c 339 | vm_push_frame(th, iseq, VM_FRAME_MAGIC_METHOD, 340 | ci->recv, ci->defined_class, VM_ENVVAL_BLOCK_PTR(ci->blockptr), 341 | iseq->iseq_encoded + ci->aux.opt_pc, sp, 0, ci->me, iseq->stack_max); 342 | ``` 343 | 344 | For our particular intentions we don't need to get into the nitty-gritty details 345 | of this function invocation, but suffice it to say this call is responsible for 346 | pushing a new frame on to the stack for executing the method call. This new 347 | frame is the next iteration of our recursive function. 348 | 349 | ```c 350 | cfp->sp = argv - 1 /* recv */; 351 | ``` 352 | 353 | This last bit of logic sets the current frame's stack pointer (**cfp->sp**) to 354 | point to the position on the stack just before the beginning of the argument 355 | vector (**argv - 1**). When this line is executed, that position on the stack is 356 | occupied by the receiver of the next iteration of our function call. This may 357 | seem a little strange, but this assignment is preparing the current stack frame 358 | for when it resumes execution after the completion of the frame we've just 359 | pushed on to the stack. When the current frame resumes, it can assume the 360 | arguments further up the stack have already been consumed and should continue 361 | from further down the stack. Though it's not obvious, we'll see in a minute that 362 | this behavior is important for supporting tail call optimization. 363 | 364 | Whew, one down. Now let's take a look at how Ruby handles things differently in 365 | the tail call optimized case. 366 | 367 | ## vm_call_iseq_setup_tailcall 368 | 369 | ```c 370 | VALUE *argv = cfp->sp - ci->argc; 371 | rb_iseq_t *iseq = ci->me->def->body.iseq; 372 | ``` 373 | 374 | **vm_call_iseq_setup_tailcall** starts exactly the same as its counterpart: It 375 | creates a pointer to the beginning of the argument vector (**argv**) of the next 376 | iteration of our recursive function and extracts a reference to the instruction 377 | sequence struct from the call info struct. 378 | 379 | ```c 380 | VALUE *src_argv = argv; 381 | VALUE *sp_orig, *sp; 382 | VALUE finish_flag = VM_FRAME_TYPE_FINISH_P(cfp) ? VM_FRAME_FLAG_FINISH : 0; 383 | ``` 384 | 385 | Though the functions start the same, **vm_call_iseq_setup_tailcall** soon 386 | distinguishes itself with the allocation of a number of additional variables. 387 | First, a new pointer (**src_argv**) is created pointing to the beginning of the 388 | argument vector (**argv**). Next, two pointers (**sp_orig** and **sp**) are 389 | allocated, but not assigned. Finally, a fourth variable (**finish_flag**) is 390 | allocated and conditionally assigned. 391 | 392 | The final variable, **finish_flag**, is used to allow tail call optimization of 393 | special types of stack frames called **finish frames**. Since we're working with 394 | normal method frames, the **finish_flag** variable can be safely ignored. 395 | 396 | ```c 397 | cfp = th->cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(th->cfp); 398 | ``` 399 | 400 | This is where the cleverness of tail call optimization begins to surface. 401 | Whereas the normal recursive strategy continues to accumulate frame after frame, 402 | this line begins to demonstrate how an optimized tail recursive call can avoid 403 | doing so. 404 | 405 | The secret sauce behind the success of **vm_call_iseq_setup_tailcall**, and tail 406 | call optimization in general, is that each iteration actually removes itself 407 | from the stack, as part of invoking the next iteration. Since the nature of 408 | recursion can make discussion difficult, it's worth taking a moment here for 409 | clarity. 410 | 411 | The beginning of **vm_call_iseq_setup_tailcall**, places us at the point in the 412 | sequence of events where the current frame, iteration n of 413 | **Factorial.fact_helper**, is preparing the stack for the recursive invocation 414 | of iteration n+1 of **Factorial.fact_helper**. Iteration n, after storing a 415 | reference to the argument vector intended for iteration n+1, pops the current 416 | stack frame (itself) off of the call stack, effectively removing itself from the 417 | stack and giving the appearance that **Factorial.fact** is the call in the stack 418 | before iteration n+1 of **Factorial.fact_helper**. 419 | 420 | In terms of another metaphor, if you think of the factorial calculation as 421 | exercise and the call stack as distance traveled, tail call optimization is kind 422 | of like a hamster (or Guinea pig) running on a hamster wheel. Though both the 423 | hamster and the recursive call are running in place, they both still make 424 | progress on the work they are performing. This analogy may also elucidate why 425 | tail recursion can be thought of as a special kind of loop construct. 426 | 427 | Returning our focus to **vm_call_iseq_setup_tailcall**, after popping the 428 | current frame from the call stack, **vm_call_iseq_setup_tailcall** then updates 429 | the thread's current frame pointer (**th->cfp**) and the **cfp** variable to 430 | point at the stack frame prior to the invocation of our tail recursive function, 431 | **Factorial.fact**. 432 | 433 | Though this mechanism allows tail call optimization to avoid the stack overflows 434 | inherent to its counterpart, we will see in a moment that it also has other 435 | benefits. 436 | 437 | ```c 438 | RUBY_VM_CHECK_INTS(th); 439 | ``` 440 | 441 | This line handles a little extra bookkeeping that tail call optimization in Ruby 442 | incurs. Usually, when Ruby switches from one stack frame to another, it takes a 443 | moment to check for pending interrupts. However, since the stack frame was 444 | manually popped off of the call stack, the check for interrupts must also be 445 | handled manually. 446 | 447 | ```c 448 | sp_orig = sp = cfp->sp; 449 | ``` 450 | 451 | Though it is pretty clear that this line assigns the **sp_orig** and **sp** 452 | variables to the value stored in the current frame's stack pointer (**cfp->sp**) 453 | field, keep in mind that **cfp** now refers to the call to **Factorial.fact**. 454 | 455 | As you'll recall from the normal setup function, before the first invocation of 456 | **Factorial.fact_helper**, the previous frame (**Factorial.fact**) would have 457 | rewound it's stack pointer to the position on the stack that it should resume 458 | execution from, which would have been the point on the stack right before the 459 | arguments consumed by the first iteration of **Factorial.fact_helper**. This 460 | behavior benefits tail call optimization in a few ways. 461 | 462 | First, because the function call that just ended is exactly the same as the one 463 | that's being set up, it can be assumed that there's enough room on the stack for 464 | the call being prepared. This means that the stack pointer from the call prior 465 | to our tail optimized call (**cfp->sp**) can be used as the starting position 466 | for the new stack (**sp**) thats being assembled. 467 | 468 | Second, because the character of the stack is likely consistent for each 469 | recursive call, less overhead is required when setting up the stack. For 470 | example, earlier I mentioned that the Ruby VM allocates a spot on the stack for 471 | special variables that might be used by the function, but that since the 472 | function doesn't use any special variables, that field remains nil. Because of 473 | the alignment of values on stack from iteration to iteration, that nil field is 474 | actually only assigned on the first iteration and on every other iteration the 475 | assignment can be skipped because the value is already nil. 476 | 477 | The final benefit that comes from being able to reuse the stack pointer from the 478 | stack frame prior to our tail optimized call (**cfp->sp**) is that that same 479 | pointer also doubles as a pointer to the place on the stack that our current 480 | frame's stack pointer (**cfp->sp**) will need to be rewound later. To 481 | facilitate this usage a reference is set aside in **sp_orig** for later use. 482 | 483 | ```c 484 | sp[0] = ci->recv; 485 | sp++; 486 | ``` 487 | 488 | With this line, **vm_call_iseq_setup_tailcall** begins to rebuild the stack for 489 | the next iteration of the recursive call. To achieve this, it first pushes the 490 | receiver of the call (**ci-> recv**) into the position at the head of the stack 491 | (**sp[0]**), and increments the stack pointer to the next position. 492 | 493 | ```c 494 | for (i=0; i < iseq->param.size; i++) { 495 | *sp++ = src_argv[i]; 496 | } 497 | ``` 498 | 499 | Next, the function continues by pushing each of the arguments for the next 500 | iteration onto the stack. This is where it becomes clear why a reference to the 501 | next iteration's argument vector is needed, as the **cfp** pointer was replaced, 502 | and without this reference (**src_argv**) there'd be no consistent means by 503 | which to access those arguments. 504 | 505 | This loop is also responsible for the behavior I alluded to above where each 506 | argument is written to a consistent position on the stack with each iteration. 507 | 508 | 509 | ```c 510 | for (i = 0; i < iseq->local_size - iseq->param.size; i++) { 511 | *sp++ = Qnil; 512 | } 513 | ``` 514 | 515 | Consistent with the normal setup function, the tail call optimized setup function 516 | also reserves and resets additional space on the stack for the method call as 517 | required. 518 | 519 | ```c 520 | vm_push_frame(th, iseq, VM_FRAME_MAGIC_METHOD | finish_flag, 521 | ci->recv, ci->defined_class, VM_ENVVAL_BLOCK_PTR(ci->blockptr), 522 | iseq->iseq_encoded + ci->aux.opt_pc, sp, 0, ci->me, iseq->stack_max); 523 | ``` 524 | 525 | The process of pushing a new frame on to the stack is almost exactly the same as 526 | in the normal setup function, except for one slight difference: The bitwise 527 | logic related to the **finish_flag** variable is added to allow tail call 528 | optimization to be performed on **finish frames** as we briefly discussed 529 | earlier. 530 | 531 | ```c 532 | cfp->sp = sp_orig; 533 | ``` 534 | 535 | Last but not least, after pushing the new frame on to the stack, the setup 536 | function sets the current frame pointer's stack pointer (**cfp->sp**) to the 537 | point on the stack that it should resume from. In this case, that position 538 | matches the original position of the frame's stack pointer which was tucked away 539 | in **sp_orig** for later use. 540 | 541 | At this point we're back in sync with **vm_call_iseq_setup_normal**, but whereas 542 | **vm_call_iseq_setup_normal** would have picked up another stack frame, after 543 | some minor stack shuffling, **vm_call_iseq_setup_tailcall** leaves us right back 544 | where we started, but one step closer to the solution to our factorial 545 | calculation. 546 | 547 | ## The bends 548 | Wow. I don't know about you, but I didn't expect the bottom to be quite so far 549 | down there. Though I'm eager to come back up for air, as are you I'm sure, it's 550 | worth deferring our ascent a moment to reflect on what we found in the depths. 551 | 552 | Ruby's implementation of tail call optimization emerges from the Ruby VM's 553 | stack-oriented nature and ability to discard the current stack frame as it 554 | prepares the next frame for execution. Given this design it becomes more clear 555 | why tail call optimization is handled by Ruby on the C side instead of on the 556 | YARV side since method call setup is below the conceptual level at which YARV 557 | tends to work. 558 | 559 | In the end, there's a satirical humor in that we had to go to such depths to 560 | understand the facilities that allow the Ruby VM to handle tail recursive 561 | functions like treading water at the top of the stack. 562 | 563 | It's been a long journey, but I hope you learned something along the way, I know 564 | I certainly did. Thanks for reading! 565 | 566 | (I swear my next post will be shorter!) 567 | 568 | [^1]: Ruby's special **$** variables are out of the scope of this article, but you can see where the [parser defines the various special variables here](https://github.com/ruby/ruby/blob/17a65c320d9ce3bce3d7fe0177d74bf78314b8fa/parse.y#L7606). 569 | -------------------------------------------------------------------------------- /lib/blog_snippets/articles/eager_boolean_operators/README.md: -------------------------------------------------------------------------------- 1 | ![I'm going to continue never washing this cheek again](https://s3.amazonaws.com/tdg5/blog/wp-content/uploads/2015/02/24024043/never_washing_this_cheek_again.jpg) 2 | In relaying the story of eager Boolean operators, it is best to begin with their 3 | more ubiquitous siblings, short-circuiting logical Boolean operators. This is 4 | perhaps best achieved with an example: 5 | 6 | ```ruby 7 | true || Seriously(this(is(valid(Ruby!)))) 8 | # => true 9 | 10 | false && 0/0 11 | # => false 12 | ``` 13 | 14 | In Ruby, and many other common programming languages,[^1] the Boolean operators 15 | used for chaining together logical expressions are designed to minimize the 16 | amount of work required to determine the outcome of a logical expression. More 17 | specifically, when determining the outcome of a logical expression as few of 18 | the statements in the expression will be evaluated as possible. In the previous 19 | example, this notion, known as [short-circuit evaluation](https://en.wikipedia.org/wiki/Short-circuit_evaluation), 20 | is exploited to include some very bad code in a manner that renders that bad 21 | code completely innocuous. 22 | 23 | In the first example, the short-circuiting behavior of the **||** Boolean 24 | operator, representing a [logical **OR** or logical disjunction](https://en.wikipedia.org/wiki/Logical_disjunction) 25 | operation, prevents a series of undefined methods from causing a fatal 26 | **NoMethodError** exception. This code can safely be executed because when the 27 | first argument of an **OR** operation is **true** then the overall value of the 28 | expression must also be **true**. Put more simply, **true OR _anything_** will 29 | always result in **true**. Given this logical maxim, at runtime the program does 30 | not need to execute the right-hand side of the expression and can move on 31 | without executing the explosive code. 32 | 33 | Similarly, in the second example, the short-circuiting behavior of the **&&** 34 | Boolean operator, representing a [logical **AND** or logical conjunction](https://en.wikipedia.org/wiki/Logical_conjunction) 35 | operation, prevents a fatal **ZeroDivisionError** exception. This code can 36 | safely be executed because when the first argument of an **AND** operation is 37 | **false** then the overall value of the expression must also be **false**. In 38 | simpler terms, **false AND _anything_** will always result in **false**. Given 39 | this basic tenant of Boolean logic, at runtime the program can decide the 40 | outcome of the logical expression without executing the subversive right-hand 41 | side of the expression. 42 | 43 | It's interesting to note that, because of their short-circuiting behavior, the 44 | **||** and **&&** Boolean operators are more than just logical operators, 45 | they actually also function as control structures. To demonstrate this, 46 | though the previous example used Boolean operators, it could just have easily 47 | have been written with more traditional flow control structures like **if** or 48 | **unless**: 49 | 50 | ```ruby 51 | # The true result is lost, but we weren't storing it anyway, so no problemo. 52 | Seriously(this(is(valid(Ruby!)))) unless true 53 | # => nil 54 | 55 | # Again, the result of false is lost, but for this example that's okay. 56 | 0/0 if false 57 | # => nil 58 | ``` 59 | 60 | Eager Boolean operators come into play when someone inevitably asks the 61 | question, "what if we don't want to short-circuit?" 62 | 63 | ## Eager Boolean Operators 64 | 65 | As their name suggests, eager Boolean operators are logical operators that do 66 | not short-circuit. Instead, even when the outcome of a logical expression is 67 | determined, they continue to execute the logical expression until it has been 68 | fully evaluated. If we changed our example of short-circuiting Boolean operators 69 | to use eager Boolean operators instead, we'd no longer be safe from that 70 | sinister code. Here it is again as such with a couple of other tweaks: 71 | 72 | ```ruby 73 | begin 74 | true | Seriously(this(is(valid(Ruby!)))) 75 | rescue NoMethodError => e 76 | e.class 77 | end 78 | # => NoMethodError 79 | 80 | begin 81 | false & 0/0 82 | rescue ZeroDivisionError => e 83 | e.class 84 | end 85 | # => ZeroDivisionError 86 | ``` 87 | 88 | In the first example above, I've modified the earlier example to replace the 89 | **||** Boolean operator with an alternative Boolean operator included in Ruby 90 | that offers eager evaluation of logical **OR** expressions, **|**. Though more 91 | commonly used for bitwise operations, when used with **true**, **false**, or 92 | **nil**, the **|** operator functions similarly to its counterpart, **||**, 93 | except without the short-circuiting behavior. Evidence of this eager evaluation 94 | behavior can be seen above in that the outcome of the **begin** block is not 95 | true, as would be the case if **|** were a short circuiting operator, but it is 96 | instead the exception class we would expect to be raised if the right-hand side 97 | of the logical expression had been evaluated. 98 | 99 | Similarly, in the second example above, I've modified the earlier example and 100 | replaced the **&&** Boolean operator with Ruby's eager Boolean **AND** 101 | operator, **&**. Also more commonly used in bitwise expressions, when used with 102 | **true**, **false**, or **nil**, the **&** operator behaves similarly to its 103 | short-circuiting cousin, **&&**, except that it eagerly evaluates the right-hand 104 | side of the logical expression even if the overall outcome of the expression has 105 | already been determined. Once again, this behavior can be seen in that the 106 | result of the **begin** block is the **ZeroDivisionError** class, which would 107 | only be the case if the right-hand side of the logical expression had been 108 | evaluated. 109 | 110 | Though this example helps demonstrate the eager evaluation properties of the 111 | **|** and **&** Boolean operators, given its explosive nature, it doesn't offer 112 | much insight into how eager Boolean operators might be useful. Having addressed 113 | the question of "what if we don't want to short-circuit?", let us consider 114 | another question that may actually be a better answer to the question than the 115 | one I've just outlined: "why wouldn't you want to short-circuit?" 116 | 117 | ## Bitwise digression 118 | 119 | Before we look at a handful of examples of eager Boolean operators, I'd like to 120 | digress for a moment for a brief discussion of bitwise Boolean operators. 121 | Bitwise Boolean operators are operators like **&** and **|** that perform 122 | operations on Boolean values as though those Boolean values were bits or binary 123 | 0s and 1s, where **false** and **nil** are both 0 and **true** is 1. For 124 | example, consider the following truth table for the **&** bitwise operation that 125 | demonstrates the equivalence of the two operations. 126 | 127 | | Truth of & | nil ( 0 ) | false ( 0 ) | true ( 1 ) | 128 | |------------|-------------|-------------|-------------| 129 | |nil ( 0 ) | false ( 0 ) | false ( 0 ) | false ( 0 ) | 130 | |false ( 0 ) | false ( 0 ) | false ( 0 ) | false ( 0 ) | 131 | |true ( 1 ) | false ( 0 ) | false ( 0 ) | true ( 1 ) | 132 | 133 | One behavior of bitwise Boolean operators worth noting is that they always 134 | return a Boolean value. Even if the second argument to a bitwise Boolean 135 | operator is truthy or falsy, or even if the first argument to the bitwise 136 | Boolean operator is falsy, as is the case with **nil**, the result of the 137 | expression will still be a Boolean value. This is in contrast to their logical 138 | Boolean counterparts who are more than content to return a truthy or falsy value 139 | in place of a strict Boolean value. 140 | 141 | This behavior can be useful at times, but can certainly come as a surprise to 142 | those who are more familiar with the more ubiquitous logical Boolean operators 143 | and their penchant for returning truthy and falsy values. The behavior of 144 | bitwise Boolean operators can also surprise the unaware in that unlike the 145 | logical Boolean operators which can be invoked with any two values, the bitwise 146 | Boolean operators must be invoked with either **true**, **false**, or **nil** on 147 | the left-hand side of the expression, otherwise, an error or other unexpected 148 | behavior will occur. 149 | 150 | In terms of eager Boolean operators, the bitwise Boolean operators are important 151 | because the eager Boolean operators are a sort of subset of the bitwise Boolean 152 | operators. The **&** and **|** operators are both bitwise Boolean operators, but 153 | in the cases of **true | _anything_** and **false & _anything_** they are also 154 | eager Boolean operators. If this is unclear, the following examples may help. 155 | 156 | ## Eager Boolean Operators in Practice 157 | 158 | Let's look at a couple of examples of eager Boolean operators in practice. After 159 | we've considered a couple of examples, perhaps we'll be better prepared to take 160 | a step back and get more clarity on what aspects or behaviors of eager 161 | evaluation are exploited by these examples in the name of utility. I've done 162 | what I can to try to find examples of eager Boolean operators out in the wild, 163 | but I've not had enormous success. To that end, I've tried to evaluate and order 164 | the examples below in terms of utility. Some examples are mine, some come from 165 | more popular libraries. 166 | 167 | ### Enumerable#eager_all? 168 | 169 | The first example is far and away the best use-case I've found for both bitwise 170 | and eager Boolean operators that I've come across. The example below uses the 171 | bitwise **AND** operator, **&**, to create a version of 172 | [**Enumerable#all?**](http://ruby-doc.org/core-2.2.0/Enumerable.html#method-i-all-3F) 173 | that is guaranteed to evaluate all elements in a collection. This is different 174 | from the normal behavior of **Enumerable#all?** in that **Enumerable#all?** 175 | normally discontinues evaluation of the collection as soon as any element in the 176 | collection returns **false** for the provided block. 177 | 178 | ```ruby 179 | module Enumerable 180 | def eager_all? 181 | inject(true) do |result, item| 182 | result & (block_given? ? yield(item) : item) 183 | end 184 | end 185 | end 186 | ``` 187 | 188 | This example leverages the **&** operator to ensure that the right-hand side of 189 | the logical expression is always evaluated. This behavior is combined with 190 | [**Enumerable#inject**](http://ruby-doc.org/core-2.2.0/Enumerable.html#method-i-inject) 191 | to ensure that all elements of the collection are evaluated, ultimately 192 | accumulating to the correct result. 193 | 194 | The astute among you may have noticed that this example could alternatively have 195 | used the short-circuiting **&&** Boolean operator by flipping the operands like 196 | so: 197 | 198 | ```ruby 199 | module Enumerable 200 | def alternative_eager_all? 201 | inject(true) do |result, item| 202 | (block_given? ? yield(item) : item) && result 203 | end 204 | end 205 | end 206 | ``` 207 | 208 | Though this is true, at runtime this alternative approach draws attention to the 209 | bitwise nature of the **&** operator as compared to its short-circuiting cousin, 210 | **&&**, a difference in nature which I think in this case gives the eager 211 | Boolean operator the edge. The bitwise nature I refer to is, as I mentioned 212 | before and as is demonstrated below, eager Boolean operators will always return 213 | **true** or **false** while the short-circuiting Boolean operators could return 214 | any object depending on the operator and the arguments given to it. We don't 215 | have to worry about *any* object in the alternative example since the result of 216 | the yield combined with **true** or **false** using **&&**, but we do have 217 | to worry about one other object, **nil**. Because of the short-circuiting nature 218 | of **&&**, if the result of the **yield** is **nil**, the result of the call to 219 | **alternative_eager_all?** will also result in **nil** as demonstrated below: 220 | 221 | ```ruby 222 | [false, nil].eager_all? 223 | # => false 224 | 225 | [false, nil].alternative_eager_all? 226 | # => nil 227 | ``` 228 | 229 | Given that **nil** is also falsy, this isn't really a problem, but I think it 230 | does make **alternative_eager_all?** less robust than it could be. 231 | 232 | Another way the **nil** case could be handled without resorting to using an 233 | eager Boolean operator is by double negating the result of the **inject** call 234 | to ensure that a Boolean is returned. That would look like this: 235 | 236 | ```ruby 237 | module Enumerable 238 | def alternative_eager_all? 239 | !!inject(true) do |result, item| 240 | (block_given? ? yield(item) : item) && result 241 | end 242 | end 243 | end 244 | ``` 245 | 246 | Though the practice of double negation is pretty common, as it turns out, the 247 | coercive nature of the bitwise Boolean operators is actually slightly faster 248 | than the more idiomatic double negation. Consider this benchmark generated using 249 | the [benchmark-ips gem](https://github.com/evanphx/benchmark-ips): 250 | 251 | ```ruby 252 | require "benchmark/ips" 253 | 254 | Benchmark.ips do |bm| 255 | bm.config(:time => 20, :warmup => 5) 256 | 257 | bm.report("Double negate") { !!(true && :a) } 258 | bm.report("Logical bit-wise coerce") { true & :a } 259 | end 260 | 261 | # Calculating -------------------------------------------- 262 | # Double negate 138.008k i/100ms 263 | # Logical bit-wise coerce 139.350k i/100ms 264 | # -------------------------------------------------------- 265 | # Double negate 7.262M (± 1.0%) i/s - 36.434M 266 | # Logical bit-wise coerce 7.825M (± 1.3%) i/s - 39.157M 267 | # -------------------------------------------------------- 268 | ``` 269 | 270 | The difference in performance between the two approaches is pretty negligible 271 | and certainly isn't substantial enough to merit choosing bitwise Boolean 272 | coercion over double negation. Keep in mind also that the bitwise coercion (if 273 | you want to call it that) to **true** or **false** is not without its downside. 274 | As I mentioned before, the coercive behavior of eager Boolean operators may 275 | come as a surprise for developers who are more familiar with the behavior of the 276 | more common short-circuiting logical Boolean operators. 277 | 278 | ### Bringing *before_suite* type behavior to Minitest 279 | 280 | The next example is a bit of questionable code of mine from a few years ago. In 281 | this example, I use the **&** eager Boolean operator in an attempt to emulate 282 | behavior similar to **RSpec's #before_suite** hook in a **Minitest** test case 283 | seeing as **Minitest** does not offer a similar behavior. 284 | 285 | ```ruby 286 | class SomeTest < Minitest::TestCase 287 | setup { self.class.one_time_setup } 288 | 289 | def self.one_time_setup 290 | return if @setup_complete & @setup_complete ||= true 291 | # Some expensive or non-idempotent setup 292 | end 293 | 294 | def test_something 295 | # ... 296 | end 297 | end 298 | ``` 299 | 300 | At the time, I thought this was clever, probably because of its condensed 301 | nature, but a few years later and I can see that this code is excessively tricky 302 | and has obvious, though minor, inefficiencies. This example exploits two tricks 303 | to create a sort of switch that doesn't fire the first time it's evaluated, but 304 | will fire on all subsequent evaluations. 305 | 306 | The first trick in this example takes advantage of the fact that accessing a 307 | nonexistent instance variable will never result in an error. The second trick 308 | takes advantage of the **&** operator to ensure that even when the 309 | **@setup_complete** instance variable is **nil**, a second statement is 310 | evaluated that will set **@setup_complete** to true, while still returning 311 | **nil** to the **if** statement. These two tricks allow for the described 312 | behavior as more concisely demonstrated below: 313 | 314 | ```ruby 315 | def first_time_only 316 | return if @not_first_time & @not_first_time ||= true 317 | "Hello world!" 318 | end 319 | 320 | first_time_only 321 | # => "Hello world!" 322 | 323 | first_time_only 324 | # => nil 325 | ``` 326 | 327 | The inefficiency of this approach that I referenced earlier is that the 328 | **@not_first_time** variable is going to be evaluated twice every time the 329 | **first_time_only** method is invoked, once on both the left and right hand 330 | sides of the **&** operator. Since this evaluation is cheap, it's not the end 331 | of the world, but it starts to beg a question that has been nagging me as I've 332 | become more familiar with bitwise and eager Boolean operators: When is chaining 333 | logical expressions using eager Boolean operators a better choice than just 334 | splitting the expression into two statements? 335 | 336 | In terms of the **first_time_only** example above, the method could be rewritten 337 | like so by splitting the logical expression into two parts instead of relying on 338 | the tricky behavior of the **&** operator: 339 | 340 | ```ruby 341 | def first_time_only 342 | return if @not_first_time 343 | @not_first_time = true 344 | "Hello world!" 345 | end 346 | ``` 347 | 348 | ## Examples from the real world 349 | 350 | I've led with two of my own examples not because of my acute egomania, but 351 | because frankly, I couldn't find many examples of bitwise Boolean operators, 352 | much less eager Boolean operators out there in the wild. Maybe there was a flaw 353 | in the regular expression I used to grep through the wealth of gems I've 354 | accumulated or maybe I've missed some genius examples in the noise of numerical 355 | bitwise expressions and Array intersections, I don't know. 356 | 357 | In the end, I was only able to find 4 examples, and unfortunately, three of 358 | those four were similar enough (two were exactly the same!) to make it really 359 | only worth mentioning one. Making matters worse, I'm not convinced any of the 360 | examples are using eager or bitwise Boolean operators in an effective way. But 361 | again, maybe I'm missing something. You be the judge. 362 | 363 | ### RubySpec: Three flavors of tainted? 364 | 365 | The three very similar examples I mentioned above come from the now defunct 366 | [RubySpec](https://github.com/rubyspec/rubyspec) project. Each occurs while 367 | testing whether a **String** has become tainted following a slice operation 368 | [[1]](https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/slice_spec.rb#L436) 369 | [[2]](https://github.com/rubyspec/rubyspec/blob/38b775a32293ce7ec5bdadaa7e70422fb5dc3a68/core/string/shared/slice.rb#L419) 370 | or a [concatenation using the **+** operator](https://github.com/rubyspec/rubyspec/blob/324c37bb67ea51f197954a37a2c71878eeadea01/core/string/plus_spec.rb#L41). 371 | The example testing concatenation with **+** is the shortest of the bunch, so 372 | let's have a look. 373 | 374 | ```ruby 375 | it "taints the result when self or other is tainted" do 376 | strs = ["", "OK", StringSpecs::MyString.new(""), StringSpecs::MyString.new("OK")] 377 | strs += strs.map { |s| s.dup.taint } 378 | 379 | strs.each do |str| 380 | strs.each do |other| 381 | (str + other).tainted?.should == (str.tainted? | other.tainted?) 382 | end 383 | end 384 | end 385 | ``` 386 | 387 | In this example, a few instances of the **String** class and their tainted alter 388 | egos are created and then each of the instances is concatenated with each of the 389 | other instances using the **+** operator. For each concatenation produced, the 390 | result is tested to ensure that it is considered tainted if either of its 391 | parents were tainted. During the test to determine if a result **String** should 392 | be tainted or not, we find our bitwise Boolean friend, the **|** operator. But 393 | what advantage does the **|** operator offer in this situation over its 394 | short-circuiting counterpart, **||**? 395 | 396 | When **str.tainted?** is **true**, the result of parenthetical expression will 397 | be **true**, however, keep in mind that **other.tainted?** will still be 398 | evaluated, though the result will be discarded. Unless there is some hidden side 399 | effect of calling **other.tainted?** at this point in the test, this seems like 400 | extraneous work to me. If there is a side effect to calling **other.tainted?** 401 | at this point in the test, that's a whole other problem because it seems quite 402 | possible that whatever that side effect is, it could have impacted the outcome 403 | of **(str + other).tainted?**, in which case, who knows what's really being 404 | tested. All this taken into account, I'm inclined to believe that 405 | short-circuiting would be desirable alternative in this case. 406 | 407 | Conversely, when **str.tainted?** is **false**, the result of the parenthetical 408 | expression depends entirely on the outcome of **other.tainted?**. This may seem 409 | good in that when **other.tainted?** is **true**, the parenthetical expression 410 | will be **true** and when **other.tainted?** is **false**, the parenthetical 411 | expression will be **false**. However, as we discussed earlier, the eager 412 | Boolean operators only return **true** or **false** unlike their 413 | short-circuiting counterparts. This means that **other.tainted?** could return 414 | **:wtf?** or **nil** and the parenthetical expression would evaluate to **true** 415 | or **false**, respectively. Perhaps this coercion to **true** or **false** was 416 | the goal in choosing **|** over **||**, but in a test, particularly a test aimed 417 | at describing how the language itself should work, this seems like a bad idea to 418 | me. 419 | 420 | Overall, it seems like **||** would be a much better choice here than **|**, as 421 | it ensures the minimal amount of evaluation is performed while also ensuring 422 | that the output values of both **str.tainted?** and **other.tainted?** are 423 | tested for validity. 424 | 425 | ### Ruby: k-nucleotide benchmark 426 | 427 | The final example we'll look at is a Ruby implementation of the 428 | [k-nucleotide benchmark](http://benchmarksgame.alioth.debian.org/u32/performance.php?test=knucleotide#about). 429 | Unchanged since it was added to the Ruby source tree in 2007, 430 | [bm_so_k_nucelotide.rb](https://github.com/ruby/ruby/blob/75feee0968c9345e7ffd2bda9835fcd60b4c0880/benchmark/bm_so_k_nucleotide.rb#L40) 431 | utilizes the eager Boolean operator **&** to read lines from a file until a line 432 | is encountered that starts with ">". 433 | 434 | ```ruby 435 | while (line !~ /^>/) & line do 436 | seq << line.chomp 437 | line = input.gets 438 | end 439 | ``` 440 | 441 | The purpose of this code is fairly straightforward, however what is less clear, 442 | is the utility of taking the eager logical conjunction (**&**) of **(line !~ /^>/)** 443 | and **line**. 444 | 445 | When the result of the **!~** operation results in **false**, the right-hand 446 | side of the expression will be evaluated and the result discarded. It's 447 | important to keep in mind that this will only happen once because the result of 448 | **false** will end the loop, but more generally speaking, in circumstances 449 | similar to this there's no reason to waste CPU time extraneously evaluating the 450 | right-hand side of the expression. We can be pretty confidant that this 451 | operation is wasteful because the value of **line** has no impact on the outcome 452 | of the logical expression and since we know that **line** is a reference to an 453 | object and not a method call, we know that the evaluation of **line** should not 454 | cause any side effects that might be worth preserving. Again though, since the 455 | eager evaluation is only going to happen once for this loop, it's really not of 456 | great concern. 457 | 458 | The case when the **!~** expression evaluates to **true** is a little trickier. 459 | One would think that when the left-hand side of the expression evaluates to 460 | **true**, there would be no point in evaluating **line** as we might expect that 461 | the value of **line** is a **String** that will be coerced into **true** by 462 | **&**. However, the **!~** operator is defined for more than just instances of 463 | **String**. In fact, **true**, **false**, **nil**, and anything that inherits 464 | from **Object** all implement the complement method to **!~**, **=~**, and by 465 | default they all return a value of **nil** for **=~**. This means that in most 466 | cases the **!~** operator will be negating **nil** which means the left-hand 467 | side is going to evaluate to **true** in a lot of cases we might not expect. 468 | 469 | In reality though, I suspect that the real reason the right-hand side of the 470 | expression is included is as a guard against **line** having a value of **nil**. 471 | If this is the case, then the only reason to choose **&** over **&&** would be 472 | the ability of **&** to coerce truthy values to **true**. If the result of the 473 | expression were being stored, this might make sense, however, since the result 474 | of the expression is being used as the condition for a **while** loop, it seems 475 | unlikely that this coercion would yield any perceivable benefit. As such, I 476 | think **&&** would be a better choice here because it is more familiar to most 477 | programmers and it will still guard against **nil** values. 478 | 479 | In the event that a value of **true** is easier for **while** statement to 480 | consume than other truthy values, we can always flip the condition around like 481 | so: 482 | 483 | ```ruby 484 | while line && (line !~ /^>/) do 485 | # ... 486 | end 487 | ``` 488 | 489 | This arrangement has the added benefit of removing the need for the parentheses 490 | and short-circuiting the **!~** operation in situations where **line** is falsy. 491 | 492 | But why stop there? Why explicitly guard against **nil** and **false** at all? 493 | Especially when every other **Object** out in the Ruby universe is going to slip 494 | right past this check, resulting in a **NoMethodError** when the program 495 | attempts to call **chomp** on an object that doesn't support **chomp**. When it 496 | comes down to it, the condition of this **while** loop is pretty inadequate. 497 | 498 | A lot of the problem with the condition comes from the negation of the **=~** 499 | operation, what if we could avoid that? Given the regular expression of 500 | **/^>/**, it would seem that we're on the lookout for any line that starts with 501 | ">". But, what if, instead, we changed the condition such that it were **true** 502 | as long as a line started with anything other than ">"? This can be achieved by 503 | modifying the regular expression and would change the **while** loop to look 504 | like so: 505 | 506 | ```ruby 507 | while line =~ /^[^>]/ do 508 | # ... 509 | end 510 | ``` 511 | 512 | Though the regular expression is more complex, I think the whole expression is 513 | much easier to reason about without the negation, extra logical expression, and 514 | parentheses. 515 | 516 | I've gotten a little off topic here, so we should move on, but before we do so, 517 | here are a few benchmarks generated using the [benchmark-ips gem](https://github.com/evanphx/benchmark-ips) 518 | for the **&**, **&&**, and altered **Regexp** versions of the **while** 519 | loop when run in the actual context of the nucleotide benchmark: 520 | 521 | ```ruby 522 | # Calculating ---------------------------------------------- 523 | # & 2.000 i/100ms 524 | # && 2.000 i/100ms 525 | # Alternate Regexp 3.000 i/100ms 526 | # ---------------------------------------------------------- 527 | # & 27.538 (± 3.6%) i/s - 550.000 528 | # && 28.092 (± 3.6%) i/s - 562.000 529 | # Alternate Regexp 29.000 (± 3.4%) i/s - 582.000 530 | # ---------------------------------------------------------- 531 | ``` 532 | 533 | Very minor performance differences, but another case where bitwise Boolean 534 | operators don't seem to be the best choice for the job. 535 | 536 | ## Optimization by branch avoidance 537 | 538 | Having been through a few examples of eager Boolean operators in Ruby, I imagine 539 | you're opinions on the matter are starting to coalesce, I know mine certainly 540 | are. Though I started this article to get a better understanding of when and 541 | why one might want to use eager Boolean operators, the more research I've done, 542 | the more the question for me has become "Why would I ever want to use bitwise or 543 | eager Boolean operators?" 544 | 545 | If you looked at the [list of programming languages that support both short-circuiting and eager Boolean operators](https://en.wikipedia.org/wiki/Short-circuit_evaluation#Support_in_common_programming_languages) 546 | I referenced earlier, you may have noticed that quite a few languages support 547 | both types of operators. This seems like a clue that there is a strong 548 | reason to have both types of operators. However, perhaps my Google-fu failed me, 549 | but I really couldn't find a strong argument for using eager Boolean operators. 550 | 551 | The best argument I came across that we haven't already discussed in some form 552 | comes from [a Stack Overflow question asking about the difference between the 553 | **||** operator and the **|** operator](https://stackoverflow.com/questions/7101992/why-do-we-usually-use-not-what-is-the-difference/7105382#7105382). 554 | All the way down 8 or 9 answers in is [an answer from Peter Lawrey](http://stackoverflow.com/a/7105382/1169710) 555 | that I think has some merit. Peter writes: 556 | 557 | > Maybe use [eager Boolean operators] when you have very simple boolean 558 | > expressions and the cost of short cutting (i.e. a branch) is greater than the 559 | > time you save by not evaluating the later expressions. 560 | 561 | I was certainly intrigued by this idea, especially since one of the commenters 562 | on Peter's answer claimed to have actually come across this behavior on some 563 | CPUs. 564 | 565 | I could see this type of behavior pretty easily existing in a lower level 566 | language like C, but I had reservations about whether or not something that must 567 | be a pretty minor micro-optimization could bubble all the way up into a higher 568 | level language like Ruby. To find out, I put together the following benchmark, 569 | again making use of the [benchmark-ips gem](https://github.com/evanphx/benchmark-ips): 570 | 571 | ```ruby 572 | require "benchmark/ips" 573 | 574 | Benchmark.ips do |bm| 575 | bm.config(:time => 20, :warmup => 5) 576 | 577 | bm.report(";") { true ; true } 578 | bm.report("&&") { true && true } 579 | bm.report("&") { true & true } 580 | end 581 | ``` 582 | 583 | The goal of this benchmark is to use the simplest case possible to get an idea 584 | of the cost of branching compared to a more strict eager evaluation alternative. 585 | To this end, both the **&&** and **&** operators are benchmarked. In addition, 586 | to provide a baseline, the benchmarks also include a version that simply 587 | evaluates **true** twice to ensure a benchmark that includes no branching or 588 | other silly business. I found the results surprising: 589 | 590 | ```ruby 591 | # Calculating ------------------------- 592 | # ; 131.478k i/100ms 593 | # && 128.222k i/100ms 594 | # & 126.305k i/100ms 595 | # ------------------------------------- 596 | # ; 9.346M (± 3.4%) i/s - 186.699M 597 | # && 8.867M (± 3.2%) i/s - 177.075M 598 | # & 7.812M (± 2.6%) i/s - 156.113M 599 | # ------------------------------------- 600 | ``` 601 | 602 | I wasn't surprised to find that **&** wasn't faster than **&&**, but what did 603 | surprise me was how much slower **&** actually was compared to **&&**, 604 | especially in a case where I expected there to be a fairly negligible 605 | difference. It's pretty clear from this benchmark that, at least in Ruby, any 606 | branching that's avoided by using the **&** operator is insignificant in 607 | comparison to other overhead. But what could that other overhead be? Though it 608 | may surprise you, that overhead is a method call. *Say what?* 609 | 610 | ## Holy method calls, Batman! 611 | 612 | As it turns out, in the case of Boolean values, bitwise operators like **&** and 613 | **|** aren't so much operators as they are methods on **TrueClass**, 614 | **FalseClass**, and **NilClass**! Consider for example the C source of the 615 | bitwise **|** method on **TrueClass**: 616 | 617 | ```c 618 | static VALUE 619 | true_or(VALUE obj, VALUE obj2) 620 | { 621 | return Qtrue; 622 | } 623 | ``` 624 | 625 | [View on GitHub](https://github.com/ruby/ruby/blob/16294913f71b8a38526096cf6458340b19b45f9f/object.c#L1247) 626 | 627 | Thankfully, this is one of the simplest examples of Ruby's C source you'll come 628 | across. Though it's simple to read, the nuance of what is going on here is a 629 | little more complicated. The **true_or** method is simply a method that takes 630 | two arguments (actually only one really since the first argument will always be 631 | the **true** singleton), and regardless of what those arguments are, returns 632 | **true**. What may not be completely obvious from this code is how this method 633 | implementation leads to the eager evaluation of the right-hand side of a logical 634 | expression. 635 | 636 | Throughout this article we've treated **|** like a primitive operator, perhaps 637 | if we treat it more like a method call, it will make it more obvious how this 638 | simple method equates to eager evaluation. Let's consider something along the 639 | lines of the simplest possible case and while we're at it, let's see if 640 | **||** is also implemented as a method on **TrueClass**. Let's see what happens 641 | if we try to use **Object#send**: 642 | 643 | ```ruby 644 | true.send("||", true) 645 | # => NoMethodError: undefined method `||' for true:TrueClass 646 | 647 | true.send("|", true) 648 | # => true 649 | ``` 650 | 651 | Interesting! So we've learned that **||** is not a method, but must be a more 652 | primitive operator. Additionally, we can see much more clearly now that **|** is 653 | definitely a method of **TrueClass**. 654 | 655 | With some closer examination, this example should also help make it clear how 656 | implementing **TrueClass#|** as a method call leads to eager evaluation. Though 657 | the argument we passed to **TrueClass#|** in the example above was a primitive 658 | **true** value, it could have been any arbitrary Ruby expression. Unlike **||** 659 | which could completely ignore the right-hand side of the expression when the 660 | left-hand side of the operation is **true**, **TrueClass#|** cannot skip the 661 | right-hand side of the expression because it is a method call. In fact, before 662 | **TrueClass#|** is invoked, the RubyVM has already evaluated the right-hand side 663 | of the expression, reducing it to the value that will be used as the argument to 664 | **TrueClass#|**. 665 | 666 | So, that's the magic behind one of the eager bitwise Boolean operators, what 667 | about one of the bitwise Boolean operators? How is that implemented? Is it 668 | also a method call? As it turns out, yes. Consider the implementation of 669 | **TrueClass#&**: 670 | 671 | ```ruby 672 | static VALUE 673 | true_and(VALUE obj, VALUE obj2) 674 | { 675 | return RTEST(obj2)?Qtrue:Qfalse; 676 | } 677 | ``` 678 | 679 | [View on GitHub](https://github.com/ruby/ruby/blob/16294913f71b8a38526096cf6458340b19b45f9f/object.c#L1225) 680 | 681 | Thankfully, this method is also pretty easy to read. It's a little more 682 | complicated than **TrueClass#|**, but it's pretty easy to see that the method 683 | evaluates the **RTEST** macro on **obj2** and returns **true** or **false** 684 | depending on the outcome of that evaluation. I won't go into the inner workings 685 | of **RTEST**, but you can view [the C source for the **RTEST** macro here](https://github.com/ruby/ruby/blob/01195a202cb9fcc6ddb6cf793868e4c7d85292dc/include/ruby/ruby.h#L422) 686 | if you're interested. Basically, **RTEST** uses a couple of numeric bitwise 687 | operations to determine if its argument is **false** or **nil** and if not 688 | returns **true**, which in turn causes **true_and** to do the same. 689 | 690 | Okay, so given all that, it should make more sense that using a bitwise/eager 691 | Boolean operator would be slower than a more primitive operator. Unfortunately 692 | though, slower execution is not the only drawback of these these method-based 693 | bitwise Boolean operators. 694 | 695 | # Inconsistent precedence 696 | 697 | The fundamentally different nature of the method-based bitwise Boolean 698 | operators and the more primitive logical Boolean operators is unfortunately not 699 | without consequence. The overhead of a method call is only one consequence. 700 | Another consequence is that the bitwise Boolean operators have a different 701 | precedence than their logical cousins. 702 | 703 | I won't get into the nature of [precedence, or order of operations,](https://en.wikipedia.org/wiki/Order_of_operations) 704 | in this article, but I will offer these examples for your consideration: 705 | 706 | ```ruby 707 | true || 1 && 3 708 | # => true 709 | 710 | true | 1 && 3 711 | # => 3 712 | 713 | # wtf? 714 | # `true || 1 && 3` evaluates like `true || (1 && 3)` while 715 | # `true | 1 && 3` evaluates like `(true | 1) && 3` 716 | 717 | 718 | false && true ^ true 719 | # => false 720 | 721 | false & true ^ true 722 | # => true 723 | 724 | # wtf? 725 | # `false && true ^ true` evaluates like `false && (true ^ true)` while 726 | # `false & true ^ true` evaluates like `(false && true) ^ true` 727 | ``` 728 | 729 | As if the bitwise Boolean operators didn't have enough going against them, the 730 | differences in operator precedence reek too much of a 4-hour debugging session 731 | for my taste. 732 | 733 | ## The case against bitwise Boolean operators 734 | 735 | Though I started this article with an agenda for finding a use-case appropriate 736 | for eager Boolean operators, the search for such a use-case has ultimately led 737 | me to the opposite end of the spectrum. Where once I sought to bring light to 738 | eager Boolean operators, I now find myself at odds with the whole family of 739 | bitwise Boolean operators. We've been through many of the arguments against, but 740 | here they are again, in summary: 741 | 742 | - Rare usage in community code suggests limited understanding and familiarity 743 | - The primary benefit of eager evaluation is side effects. 744 | - Side effects make the code harder to debug, harder to reason about, and 745 | harder to test. 746 | - Errors encountered during eager evaluation occur before assignment operations 747 | - Even if errors during eager evaluation are caught, the value of the logical 748 | expression is lost.[^2] 749 | - Bitwise Boolean operators have too many differences from their logical 750 | counterparts. 751 | - Return values are converted to Booleans 752 | - Operator precedence is different 753 | - Operators are implemented as method calls, which are about 10% slower 754 | - Can only be invoked on **true**, **false**, or **nil** 755 | 756 | With such an abundance of arguments against, arguments in favor had better 757 | be significant in length or benefit. Unfortunately, they're not. 758 | 759 | - Conversion of return values to Booleans slightly faster than double negation. 760 | - Eager evaluation? 761 | - Maybe useful in irb? 762 | 763 | I didn't expect to find so many reasons not to use eager or bitwise Boolean 764 | operators, but maybe that's part of the reason I had so much trouble finding 765 | examples of bitwise Boolean operators at large. With the evidence laid out 766 | before you, I hope you will join me in continuing to never use any of the 767 | bitwise Boolean operators in Ruby without a comment and a damn good reason. 768 | 769 | Thanks for reading! 770 | 771 | *Have I missed something? Do you know of an example of bitwise and/or eager 772 | Boolean operators being used effectively? Have I got it all wrong? Leave me a 773 | comment and let me know! I'd love to hear your feedback and/or find a 774 | legitimate reason to utilize the family of bitwise Boolean operators.* 775 | 776 | [^1]: [Short-circuit evaluation - Support in common programming languages](https://en.wikipedia.org/wiki/Short-circuit_evaluation#Support_in_common_programming_languages) 777 | [^2]: [Gist: Errors during eager evaluation cause result of logical expression to be lost](https://gist.github.com/tdg5/12eccaae6132e72c0490) 778 | --------------------------------------------------------------------------------