├── .document ├── .gitignore ├── .travis.yml ├── Gemfile ├── Gemfile.lock ├── History.txt ├── LICENSE ├── README.md ├── Rakefile ├── VERSION ├── bench ├── bench.rb ├── decode_bench.rb ├── encode_bench.rb ├── memsize.rb └── results.txt ├── bert.gemspec ├── ext └── bert │ └── c │ ├── decode.c │ └── extconf.rb ├── gemfiles ├── mochilo-v1 └── mochilo-v2 ├── lib ├── bert.rb └── bert │ ├── bert.rb │ ├── decode.rb │ ├── decoder.rb │ ├── encode.rb │ ├── encoder.rb │ └── types.rb └── test ├── bert_test.rb ├── decoder_test.rb ├── encoder_test.rb └── test_helper.rb /.document: -------------------------------------------------------------------------------- 1 | README.rdoc 2 | lib/**/*.rb 3 | bin/* 4 | features/**/*.feature 5 | LICENSE 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.sw? 2 | .DS_Store 3 | coverage 4 | rdoc 5 | pkg 6 | ext/bert/c/Makefile 7 | ext/bert/c/*.bundle 8 | ext/bert/c/*.o 9 | /vendor/ 10 | /.bundle/ 11 | *.so 12 | /bin/ 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 2.3.3 4 | - 2.4.0 5 | gemfiles: 6 | - gemfiles/mochilo-v1 7 | - gemfiles/mochilo-v2 8 | env: 9 | - BERT_TEST_IMPL=Ruby 10 | - BERT_TEST_IMPL=C 11 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | # A sample Gemfile 3 | source "https://rubygems.org" 4 | 5 | gemspec 6 | 7 | gem "mochilo", :git => "https://github.com/brianmario/mochilo", :ref => "master" 8 | -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | GIT 2 | remote: https://github.com/brianmario/mochilo 3 | revision: 8c75eea34bdde298733432b988c8d7f07f56e311 4 | ref: master 5 | specs: 6 | mochilo (2.1) 7 | 8 | PATH 9 | remote: . 10 | specs: 11 | bert (1.1.10) 12 | mochilo (>= 1.3, != 2.0) 13 | 14 | GEM 15 | remote: https://rubygems.org/ 16 | specs: 17 | git (1.3.0) 18 | power_assert (0.4.1) 19 | rake (13.0.1) 20 | rake-compiler (0.9.9) 21 | rake 22 | test-unit (3.2.3) 23 | power_assert 24 | thoughtbot-shoulda (2.11.1) 25 | yajl-ruby (1.3.1) 26 | 27 | PLATFORMS 28 | ruby 29 | 30 | DEPENDENCIES 31 | bert! 32 | git 33 | mochilo! 34 | rake 35 | rake-compiler (~> 0.9.0) 36 | test-unit 37 | thoughtbot-shoulda 38 | yajl-ruby 39 | 40 | BUNDLED WITH 41 | 1.14.6 42 | -------------------------------------------------------------------------------- /History.txt: -------------------------------------------------------------------------------- 1 | = 1.1.6 / 2012-05-25 2 | * Bug fixes 3 | * Better handling of utf-8 characters 4 | 5 | = 1.1.5 / 2011-12-09 6 | * Bug fixes 7 | * Faster and more secure C BERT decoder 8 | * Fix for encoding of negative bignums 9 | * Ruby 1.9 compatibility 10 | 11 | = 1.1.2 / 2010-02-08 12 | * Bug fixes 13 | * Fix bignum handling on 256 byte boundary 14 | * Remove unnecessary rubygems require 15 | 16 | = 1.1.1 / 2010-01-12 17 | * Bug fixes 18 | * require 'stringio' 19 | * Fix number encoding problem on 32 bit arch 20 | 21 | = 1.1.0 / 2009-10-27 22 | * Major Changes 23 | * Remove reliance on Erlectricity. 24 | * Bug fixes 25 | * Fix unsigned int problem in C decoder 26 | * Fix stack overflow segfault in C binary decoder for > 8MB binaries 27 | * Optimize C bytelist decoder 28 | * Fix bignum encoding 29 | 30 | = 1.0.0 / 2009-10-19 31 | * No changes. Production ready! 32 | 33 | = 0.2.0 / 2009-10-15 34 | * Major changes 35 | * Use {time, MegaSecs, Secs, Microsecs} for time serialization 36 | * Use array of options for regex serialization 37 | * Tests 38 | * Add roundtrip tests 39 | 40 | = 0.1.0 / 2009-10-08 41 | * Birthday! 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 Tom Preston-Werner 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | BERT 2 | ==== 3 | 4 | A BERT (Binary ERlang Term) serialization library for Ruby. It can 5 | encode Ruby objects into BERT format and decode BERT binaries into Ruby 6 | objects. 7 | 8 | See the BERT specification at [bert-rpc.org](http://bert-rpc.org). 9 | 10 | Instances of the following Ruby classes will be automatically converted to the 11 | proper simple BERT type: 12 | 13 | * Integer 14 | * Float 15 | * Symbol 16 | * Array 17 | * String 18 | 19 | Instances of the following Ruby classes will be automatically converted to the 20 | proper complex BERT type: 21 | 22 | * NilClass 23 | * TrueClass 24 | * FalseClass 25 | * Hash 26 | * Time 27 | * Regexp 28 | 29 | To designate tuples, simply prefix an Array literal with a `t` or use the 30 | BERT::Tuple class: 31 | 32 | t[:foo, [1, 2, 3]] 33 | BERT::Tuple[:foo, [1, 2, 3]] 34 | 35 | Both of these will be converted to (in Erlang syntax): 36 | 37 | {foo, [1, 2, 3]} 38 | 39 | 40 | Installation 41 | ------------ 42 | 43 | gem install bert -s http://gemcutter.org 44 | 45 | 46 | Usage 47 | ----- 48 | 49 | require 'bert' 50 | 51 | bert = BERT.encode(t[:user, {:name => 'TPW', :nick => 'mojombo'}]) 52 | # => "\203h\002d\000\004userh\003d\000\004bertd\000\004dictl\000\000\ 53 | 000\002h\002d\000\004namem\000\000\000\003TPWh\002d\000\004nickm\ 54 | 000\000\000\amojomboj" 55 | 56 | BERT.decode(bert) 57 | # => t[:user, {:name=>"TPW", :nick=>"mojombo"}] 58 | 59 | 60 | 61 | Note on Patches/Pull Requests 62 | ----------------------------- 63 | 64 | * Fork the project. 65 | * Make your feature addition or bug fix. 66 | * Add tests for it. This is important so I don't break it in a 67 | future version unintentionally. 68 | * Commit, do not mess with rakefile, version, or history. 69 | (if you want to have your own version, that is fine but 70 | bump version in a commit by itself I can ignore when I pull) 71 | * Send me a pull request. Bonus points for topic branches. 72 | 73 | 74 | Copyright 75 | --------- 76 | 77 | Copyright (c) 2009 Tom Preston-Werner. See LICENSE for details. 78 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'rake' 3 | 4 | require 'rake/extensiontask' 5 | 6 | gemspec = Gem::Specification::load(File.expand_path('../bert.gemspec', __FILE__)) 7 | 8 | Rake::ExtensionTask.new(gemspec) do |ext| 9 | ext.name = 'decode' 10 | ext.ext_dir = 'ext/bert/c' 11 | ext.lib_dir = 'ext/bert/c' 12 | end 13 | 14 | require 'rake/testtask' 15 | Rake::TestTask.new(:runtests) do |test| 16 | test.libs << 'lib' << 'test' 17 | test.pattern = 'test/**/*_test.rb' 18 | test.verbose = true 19 | end 20 | 21 | task :default => [:compile, :test] 22 | 23 | Rake::TestTask.new do |t| 24 | t.libs << 'lib' << 'test' 25 | t.pattern = 'test/**/*_test.rb' 26 | t.verbose = false 27 | t.warning = true 28 | end 29 | 30 | begin 31 | require 'rcov/rcovtask' 32 | Rcov::RcovTask.new do |test| 33 | test.libs << 'test' 34 | test.pattern = 'test/**/*_test.rb' 35 | test.verbose = true 36 | end 37 | rescue LoadError 38 | task :rcov do 39 | abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov" 40 | end 41 | end 42 | 43 | task :default => :test 44 | 45 | require 'rdoc/task' 46 | Rake::RDocTask.new do |rdoc| 47 | if File.exist?('VERSION') 48 | version = File.read('VERSION') 49 | else 50 | version = "" 51 | end 52 | 53 | rdoc.rdoc_dir = 'rdoc' 54 | rdoc.title = "bert #{version}" 55 | rdoc.rdoc_files.include('README*') 56 | rdoc.rdoc_files.include('lib/**/*.rb') 57 | end 58 | 59 | task :console do 60 | exec('irb -I lib -rbert') 61 | end 62 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 1.1.10 2 | -------------------------------------------------------------------------------- /bench/bench.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 2 | 3 | require 'rubygems' 4 | require 'bert' 5 | require 'json' 6 | require 'yajl' 7 | require 'benchmark' 8 | 9 | ITER = 1_000 10 | LONG_ITER = 100 11 | 12 | tiny = t[:ok, :awesome] 13 | small = t[:ok, :answers, [42] * 42] 14 | large = ["abc" * 1000] * 100 15 | complex = [42, {:foo => 'bac' * 100}, t[(1..100).to_a]] * 10 16 | long_array = {:a => ["a", :a, Time.now, /a/]*1000} 17 | 18 | Benchmark.bm(30) do |bench| 19 | [:v1, :v2, :v3, :v4].each do |v| 20 | unless BERT.supports?(v) 21 | puts "SKIP #{v} (unsupported)" 22 | next 23 | end 24 | BERT::Encode.version = v 25 | bench.report("BERT #{v} tiny") {ITER.times {BERT.decode(BERT.encode(tiny))}} 26 | bench.report("BERT #{v} small") {ITER.times {BERT.decode(BERT.encode(small))}} 27 | bench.report("BERT #{v} large") {ITER.times {BERT.decode(BERT.encode(large))}} 28 | bench.report("BERT #{v} complex") {ITER.times {BERT.decode(BERT.encode(complex))}} 29 | bench.report("BERT #{v} long array") {LONG_ITER.times {BERT.decode(BERT.encode(long_array))}} 30 | end 31 | 32 | bench.report("JSON tiny") {ITER.times {JSON.load(JSON.dump(tiny))}} 33 | bench.report("JSON small") {ITER.times {JSON.load(JSON.dump(small))}} 34 | bench.report("JSON large") {ITER.times {JSON.load(JSON.dump(large))}} 35 | bench.report("JSON complex") {ITER.times {JSON.load(JSON.dump(complex))}} 36 | bench.report("JSON long array") {LONG_ITER.times {JSON.load(JSON.dump(long_array))}} 37 | 38 | bench.report("YAJL tiny") {ITER.times {Yajl::Parser.parse(Yajl::Encoder.encode(tiny))}} 39 | bench.report("YAJL small") {ITER.times {Yajl::Parser.parse(Yajl::Encoder.encode(small))}} 40 | bench.report("YAJL large") {ITER.times {Yajl::Parser.parse(Yajl::Encoder.encode(large))}} 41 | bench.report("YAJL complex") {ITER.times {Yajl::Parser.parse(Yajl::Encoder.encode(complex))}} 42 | bench.report("YAJL long array") {LONG_ITER.times {Yajl::Parser.parse(Yajl::Encoder.encode(long_array))}} 43 | 44 | bench.report("Ruby tiny") {ITER.times {Marshal.load(Marshal.dump(tiny))}} 45 | bench.report("Ruby small") {ITER.times {Marshal.load(Marshal.dump(small))}} 46 | bench.report("Ruby large") {ITER.times {Marshal.load(Marshal.dump(large))}} 47 | bench.report("Ruby complex") {ITER.times {Marshal.load(Marshal.dump(complex))}} 48 | bench.report("Ruby long array") {LONG_ITER.times {Marshal.load(Marshal.dump(long_array))}} 49 | end 50 | -------------------------------------------------------------------------------- /bench/decode_bench.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 2 | 3 | require 'rubygems' 4 | require 'json' 5 | require 'yajl' 6 | require 'benchmark' 7 | 8 | ITER = 1_000 9 | 10 | def setup 11 | tiny = t[:ok, :awesome] 12 | small = t[:ok, :answers, [42] * 42] 13 | large = ["abc" * 1000] * 100 14 | complex = [42, {:foo => 'bac' * 100}, t[(1..100).to_a]] * 10 15 | 16 | $tiny_encoded_bert = BERT.encode(tiny) 17 | $small_encoded_bert = BERT.encode(small) 18 | $large_encoded_bert = BERT.encode(large) 19 | $complex_encoded_bert = BERT.encode(complex) 20 | 21 | $tiny_encoded_json = JSON.dump(tiny) 22 | $small_encoded_json = JSON.dump(small) 23 | $large_encoded_json = JSON.dump(large) 24 | $complex_encoded_json = JSON.dump(complex) 25 | 26 | $tiny_encoded_yajl = Yajl::Encoder.encode(tiny) 27 | $small_encoded_yajl = Yajl::Encoder.encode(small) 28 | $large_encoded_yajl = Yajl::Encoder.encode(large) 29 | $complex_encoded_yajl = Yajl::Encoder.encode(complex) 30 | 31 | $tiny_encoded_ruby = Marshal.dump(tiny) 32 | $small_encoded_ruby = Marshal.dump(small) 33 | $large_encoded_ruby = Marshal.dump(large) 34 | $complex_encoded_ruby = Marshal.dump(complex) 35 | end 36 | 37 | Benchmark.bm(13) do |bench| 38 | pid = fork do 39 | require 'bert' 40 | raise "Could not load C extension" unless BERT::Decode.impl == 'C' 41 | setup 42 | puts "BERT C Extension Decoder" 43 | bench.report("BERT tiny") {ITER.times {BERT.decode($tiny_encoded_bert)}} 44 | bench.report("BERT small") {ITER.times {BERT.decode($small_encoded_bert)}} 45 | bench.report("BERT large") {ITER.times {BERT.decode($large_encoded_bert)}} 46 | bench.report("BERT complex") {ITER.times {BERT.decode($complex_encoded_bert)}} 47 | puts 48 | end 49 | Process.waitpid(pid) 50 | 51 | pid = fork do 52 | Dir.chdir(File.join(File.dirname(__FILE__), *%w[.. ext bert c])) do 53 | ['*.bundle', '*.o'].each { |pat| `rm -f #{pat}` } 54 | end 55 | require 'bert' 56 | raise "Not using Ruby decoder" unless BERT::Decode.impl == 'Ruby' 57 | setup 58 | puts "BERT Pure Ruby Decoder" 59 | bench.report("BERT tiny") {ITER.times {BERT.decode($tiny_encoded_bert)}} 60 | bench.report("BERT small") {ITER.times {BERT.decode($small_encoded_bert)}} 61 | bench.report("BERT large") {ITER.times {BERT.decode($large_encoded_bert)}} 62 | bench.report("BERT complex") {ITER.times {BERT.decode($complex_encoded_bert)}} 63 | puts 64 | end 65 | Process.waitpid(pid) 66 | 67 | require 'bert' 68 | setup 69 | 70 | bench.report("JSON tiny") {ITER.times {JSON.load($tiny_encoded_json)}} 71 | bench.report("JSON small") {ITER.times {JSON.load($small_encoded_json)}} 72 | bench.report("JSON large") {ITER.times {JSON.load($large_encoded_json)}} 73 | bench.report("JSON complex") {ITER.times {JSON.load($complex_encoded_json)}} 74 | puts 75 | 76 | bench.report("YAJL tiny") {ITER.times {Yajl::Parser.parse($tiny_encoded_yajl)}} 77 | bench.report("YAJL small") {ITER.times {Yajl::Parser.parse($small_encoded_yajl)}} 78 | bench.report("YAJL large") {ITER.times {Yajl::Parser.parse($large_encoded_yajl)}} 79 | bench.report("YAJL complex") {ITER.times {Yajl::Parser.parse($complex_encoded_yajl)}} 80 | puts 81 | 82 | bench.report("Ruby tiny") {ITER.times {Marshal.load($tiny_encoded_ruby)}} 83 | bench.report("Ruby small") {ITER.times {Marshal.load($small_encoded_ruby)}} 84 | bench.report("Ruby large") {ITER.times {Marshal.load($large_encoded_ruby)}} 85 | bench.report("Ruby complex") {ITER.times {Marshal.load($complex_encoded_ruby)}} 86 | end 87 | -------------------------------------------------------------------------------- /bench/encode_bench.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 2 | 3 | require 'rubygems' 4 | require 'bert' 5 | require 'json' 6 | require 'yajl' 7 | require 'benchmark' 8 | 9 | ITER = 1_000 10 | 11 | tiny = t[:ok, :awesome] 12 | small = t[:ok, :answers, [42] * 42] 13 | large = ["abc" * 1000] * 100 14 | complex = [42, {:foo => 'bac' * 100}, t[(1..100).to_a]] * 10 15 | 16 | Benchmark.bm(13) do |bench| 17 | bench.report("BERT tiny") {ITER.times {BERT.encode(tiny)}} 18 | bench.report("BERT small") {ITER.times {BERT.encode(small)}} 19 | bench.report("BERT large") {ITER.times {BERT.encode(large)}} 20 | bench.report("BERT complex") {ITER.times {BERT.encode(complex)}} 21 | puts 22 | bench.report("JSON tiny") {ITER.times {JSON.dump(tiny)}} 23 | bench.report("JSON small") {ITER.times {JSON.dump(small)}} 24 | bench.report("JSON large") {ITER.times {JSON.dump(large)}} 25 | bench.report("JSON complex") {ITER.times {JSON.dump(complex)}} 26 | puts 27 | bench.report("JSON tiny") {ITER.times {Yajl::Encoder.encode(tiny)}} 28 | bench.report("JSON small") {ITER.times {Yajl::Encoder.encode(small)}} 29 | bench.report("JSON large") {ITER.times {Yajl::Encoder.encode(large)}} 30 | bench.report("JSON complex") {ITER.times {Yajl::Encoder.encode(complex)}} 31 | puts 32 | bench.report("Ruby tiny") {ITER.times {Marshal.dump(tiny)}} 33 | bench.report("Ruby small") {ITER.times {Marshal.dump(small)}} 34 | bench.report("Ruby large") {ITER.times {Marshal.dump(large)}} 35 | bench.report("Ruby complex") {ITER.times {Marshal.dump(complex)}} 36 | end -------------------------------------------------------------------------------- /bench/memsize.rb: -------------------------------------------------------------------------------- 1 | require 'bert' 2 | require 'objspace' 3 | 4 | large = ["abc" * 1000] * 10000 5 | 6 | def write_berp(output, ruby) 7 | data = BERT.encode(ruby) 8 | output.write([data.bytesize].pack("N")) 9 | output.write(data) 10 | end 11 | 12 | def write_berp2(output, ruby) 13 | data = BERT.encode_to_buffer(ruby) 14 | output.write([data.bytesize].pack("N")) 15 | data.write_to output 16 | end 17 | 18 | socket = File.open File::NULL, 'wb' do |f| 19 | GC.start; GC.start; GC.start; GC.disable 20 | 21 | before = ObjectSpace.memsize_of_all(String) 22 | write_berp f, large 23 | p :ORIGINAL => ObjectSpace.memsize_of_all(String) - before 24 | 25 | GC.start; GC.start; GC.start; GC.disable 26 | before = ObjectSpace.memsize_of_all(String) 27 | write_berp2 f, large 28 | p :NEW => ObjectSpace.memsize_of_all(String) - before 29 | end 30 | -------------------------------------------------------------------------------- /bench/results.txt: -------------------------------------------------------------------------------- 1 | user system total real 2 | 3 | Comparisons 4 | 5 | JSON tiny 0.020000 0.000000 0.020000 ( 0.017486) 6 | JSON small 0.070000 0.000000 0.070000 ( 0.080681) 7 | JSON large 15.260000 0.600000 15.860000 ( 16.427857) 8 | JSON complex 1.470000 0.010000 1.480000 ( 1.558230) 9 | 10 | YAJL tiny 0.010000 0.000000 0.010000 ( 0.015537) 11 | YAJL small 0.050000 0.000000 0.050000 ( 0.061879) 12 | YAJL large 3.610000 0.800000 4.410000 ( 4.675255) 13 | YAJL complex 1.030000 0.000000 1.030000 ( 1.066871) 14 | 15 | Ruby tiny 0.010000 0.000000 0.010000 ( 0.007117) 16 | Ruby small 0.020000 0.000000 0.020000 ( 0.015964) 17 | Ruby large 0.040000 0.000000 0.040000 ( 0.042695) 18 | Ruby complex 0.040000 0.000000 0.040000 ( 0.048395) 19 | 20 | Ruby encoder / Ruby decoder 21 | 41503465479e8762916d6997d91639f0d7308a13 22 | 23 | BERT tiny 0.090000 0.000000 0.090000 ( 0.092357) 24 | BERT small 0.830000 0.000000 0.830000 ( 0.853270) 25 | BERT large 4.190000 0.620000 4.810000 ( 4.959149) 26 | BERT complex 19.380000 0.080000 19.460000 ( 20.402862) 27 | 28 | Simple C decoder / Ruby encoder 29 | 41503465479e8762916d6997d91639f0d7308a13 30 | 31 | BERT tiny 0.030000 0.000000 0.030000 ( 0.033826) 32 | BERT small 0.390000 0.010000 0.400000 ( 0.413229) 33 | BERT large 2.270000 0.550000 2.820000 ( 3.029141) 34 | BERT complex 8.680000 0.040000 8.720000 ( 9.097990) 35 | 36 | Smarter Ruby decoder 37 | 38 | BERT tiny 0.070000 0.000000 0.070000 ( 0.075155) 39 | BERT small 0.810000 0.010000 0.820000 ( 0.831905) 40 | BERT large 4.340000 0.600000 4.940000 ( 5.064875) 41 | BERT complex 18.460000 0.070000 18.530000 ( 19.096184) 42 | 43 | Smarter C decoder 44 | 45 | BERT tiny 0.030000 0.000000 0.030000 ( 0.035685) 46 | BERT small 0.350000 0.010000 0.360000 ( 0.358929) 47 | BERT large 2.410000 0.560000 2.970000 ( 3.056593) 48 | BERT complex 7.910000 0.040000 7.950000 ( 8.236641) 49 | 50 | Smart C Decoder only 51 | 52 | BERT tiny 0.000000 0.000000 0.000000 ( 0.001820) 53 | BERT small 0.000000 0.000000 0.000000 ( 0.003859) 54 | BERT large 0.430000 0.010000 0.440000 ( 0.499631) 55 | BERT complex 0.080000 0.010000 0.090000 ( 0.086992) 56 | -------------------------------------------------------------------------------- /bert.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | if ENV['DEVELOPMENT'] 4 | VERSION = `git describe --tags`.strip.gsub('-', '.')[1..-1] 5 | else 6 | VERSION = File.read('VERSION') 7 | end 8 | 9 | Gem::Specification.new do |s| 10 | s.name = "bert" 11 | s.version = VERSION 12 | s.summary = %Q{BERT Serializiation for Ruby} 13 | s.description = %Q{BERT Serializiation for Ruby} 14 | s.license = "MIT" 15 | s.email = "tom@mojombo.com" 16 | s.homepage = "http://github.com/github/bert" 17 | s.authors = ["Tom Preston-Werner"] 18 | 19 | s.files = `git ls-files`.split("\n") 20 | s.require_paths = ["lib", "ext"] 21 | 22 | s.extensions = ["ext/bert/c/extconf.rb"] 23 | 24 | s.add_dependency "mochilo", ">= 1.3", "!= 2.0" 25 | 26 | s.add_development_dependency "thoughtbot-shoulda" 27 | s.add_development_dependency "git" 28 | s.add_development_dependency "rake" 29 | s.add_development_dependency "rake-compiler", "~> 0.9.0" 30 | s.add_development_dependency "yajl-ruby" 31 | s.add_development_dependency "test-unit" 32 | end 33 | -------------------------------------------------------------------------------- /ext/bert/c/decode.c: -------------------------------------------------------------------------------- 1 | #include "ruby.h" 2 | #include "ruby/encoding.h" 3 | #include 4 | #include 5 | 6 | #define ERL_SMALL_INT 97 7 | #define ERL_INT 98 8 | #define ERL_FLOAT 99 9 | #define ERL_ATOM 100 10 | #define ERL_SMALL_TUPLE 104 11 | #define ERL_LARGE_TUPLE 105 12 | #define ERL_NIL 106 13 | #define ERL_STRING 107 14 | #define ERL_LIST 108 15 | #define ERL_BIN 109 16 | #define ERL_SMALL_BIGNUM 110 17 | #define ERL_LARGE_BIGNUM 111 18 | 19 | /* These two types are specific to version 2 of the protocol. They diverge 20 | * from Erlang, but allow us to pass string encodings across the wire. */ 21 | #define ERLEXT_ENC_STRING 112 22 | #define ERLEXT_UNICODE_STRING 113 23 | 24 | /* Protocol version constants. */ 25 | #define ERL_VERSION 131 26 | #define ERL_VERSION2 132 27 | #define MOCHILO_VERSION1 133 28 | #define MOCHILO_VERSION2 134 29 | 30 | #define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERLEXT_UNICODE_STRING) 31 | #define BERT_TYPE_OFFSET (ERL_SMALL_INT) 32 | 33 | static VALUE rb_mBERT; 34 | static VALUE rb_cDecode; 35 | static VALUE id_unpack_unsafe; 36 | static VALUE id_unpack; 37 | static VALUE id_byteslice; 38 | 39 | struct bert_buf { 40 | const uint8_t *data; 41 | const uint8_t *start; 42 | const uint8_t *end; 43 | VALUE rb_buf; 44 | }; 45 | 46 | static VALUE bert_read_invalid(struct bert_buf *buf); 47 | 48 | static VALUE bert_read_sint(struct bert_buf *buf); 49 | static VALUE bert_read_int(struct bert_buf *buf); 50 | static VALUE bert_read_float(struct bert_buf *buf); 51 | static VALUE bert_read_atom(struct bert_buf *buf); 52 | static VALUE bert_read_stuple(struct bert_buf *buf); 53 | static VALUE bert_read_ltuple(struct bert_buf *buf); 54 | static VALUE bert_read_nil(struct bert_buf *buf); 55 | static VALUE bert_read_string(struct bert_buf *buf); 56 | static VALUE bert_read_list(struct bert_buf *buf); 57 | static VALUE bert_read_bin(struct bert_buf *buf); 58 | static VALUE bert_read_enc_string(struct bert_buf *buf); 59 | static VALUE bert_read_unicode_string(struct bert_buf *buf); 60 | static VALUE bert_read_sbignum(struct bert_buf *buf); 61 | static VALUE bert_read_lbignum(struct bert_buf *buf); 62 | 63 | typedef VALUE (*bert_ptr)(struct bert_buf *buf); 64 | static bert_ptr bert_callbacks[] = { 65 | &bert_read_sint, 66 | &bert_read_int, 67 | &bert_read_float, 68 | &bert_read_atom, 69 | &bert_read_invalid, 70 | &bert_read_invalid, 71 | &bert_read_invalid, 72 | &bert_read_stuple, 73 | &bert_read_ltuple, 74 | &bert_read_nil, 75 | &bert_read_string, 76 | &bert_read_list, 77 | &bert_read_bin, 78 | &bert_read_sbignum, 79 | &bert_read_lbignum, 80 | &bert_read_enc_string, 81 | &bert_read_unicode_string 82 | }; 83 | 84 | static inline uint8_t bert_buf_read8(struct bert_buf *buf) 85 | { 86 | return *buf->data++; 87 | } 88 | 89 | static inline uint16_t bert_buf_read16(struct bert_buf *buf) 90 | { 91 | /* Note that this will trigger -Wcast-align and throw a 92 | * bus error on platforms where unaligned reads are not 93 | * allowed. Also note that this is not breaking any 94 | * strict aliasing rules. */ 95 | uint16_t short_val = *(uint16_t *)buf->data; 96 | buf->data += sizeof(uint16_t); 97 | return ntohs(short_val); 98 | } 99 | 100 | static inline uint32_t bert_buf_read32(struct bert_buf *buf) 101 | { 102 | /* Note that this will trigger -Wcast-align and throw a 103 | * bus error on platforms where unaligned reads are not 104 | * allowed. Also note that this is not breaking any 105 | * strict aliasing rules. */ 106 | uint32_t long_val = *(uint32_t *)buf->data; 107 | buf->data += sizeof(uint32_t); 108 | return ntohl(long_val); 109 | } 110 | 111 | static inline void bert_buf_ensure(struct bert_buf *buf, size_t size) 112 | { 113 | if (buf->data + size > buf->end) 114 | rb_raise(rb_eEOFError, "Unexpected end of BERT stream"); 115 | } 116 | 117 | static VALUE bert_read(struct bert_buf *buf) 118 | { 119 | uint8_t type; 120 | 121 | bert_buf_ensure(buf, 1); 122 | type = bert_buf_read8(buf); 123 | 124 | if (!BERT_VALID_TYPE(type)) 125 | rb_raise(rb_eRuntimeError, "Invalid tag '%d' for term", type); 126 | 127 | return bert_callbacks[type - BERT_TYPE_OFFSET](buf); 128 | } 129 | 130 | static VALUE bert_read_dict(struct bert_buf *buf) 131 | { 132 | uint8_t type; 133 | uint32_t length = 0, i; 134 | VALUE rb_dict; 135 | 136 | bert_buf_ensure(buf, 1); 137 | type = bert_buf_read8(buf); 138 | 139 | if (type != ERL_LIST && type != ERL_NIL) 140 | rb_raise(rb_eTypeError, "Invalid dict spec, not an erlang list"); 141 | 142 | if (type == ERL_LIST) { 143 | bert_buf_ensure(buf, 4); 144 | length = bert_buf_read32(buf); 145 | } 146 | 147 | rb_dict = rb_hash_new(); 148 | 149 | for (i = 0; i < length; ++i) { 150 | VALUE key, val; 151 | bert_buf_ensure(buf, 2); 152 | 153 | if (bert_buf_read8(buf) != ERL_SMALL_TUPLE || bert_buf_read8(buf) != 2) 154 | rb_raise(rb_eTypeError, "Invalid dict tuple"); 155 | 156 | key = bert_read(buf); 157 | val = bert_read(buf); 158 | 159 | rb_hash_aset(rb_dict, key, val); 160 | } 161 | 162 | if (type == ERL_LIST) { 163 | /* disregard tail; adquire women */ 164 | bert_buf_ensure(buf, 1); 165 | (void)bert_buf_read8(buf); 166 | } 167 | 168 | return rb_dict; 169 | } 170 | 171 | static inline void bert_ensure_arity(uint32_t arity, uint32_t expected) 172 | { 173 | if (arity != expected) 174 | rb_raise(rb_eTypeError, "Invalid tuple arity for complex type"); 175 | } 176 | 177 | static VALUE bert_read_complex(struct bert_buf *buf, uint32_t arity) 178 | { 179 | VALUE rb_type; 180 | ID id_type; 181 | 182 | rb_type = bert_read(buf); 183 | Check_Type(rb_type, T_SYMBOL); 184 | 185 | id_type = SYM2ID(rb_type); 186 | 187 | if (id_type == rb_intern("nil")) { 188 | bert_ensure_arity(arity, 2); 189 | return Qnil; 190 | 191 | } else if (id_type == rb_intern("true")) { 192 | bert_ensure_arity(arity, 2); 193 | return Qtrue; 194 | 195 | } else if (id_type == rb_intern("false")) { 196 | bert_ensure_arity(arity, 2); 197 | return Qfalse; 198 | 199 | } else if (id_type == rb_intern("time")) { 200 | VALUE rb_megasecs, rb_secs, rb_microsecs, rb_stamp, rb_msecs; 201 | 202 | bert_ensure_arity(arity, 5); 203 | 204 | rb_megasecs = bert_read(buf); 205 | rb_secs = bert_read(buf); 206 | rb_microsecs = bert_read(buf); 207 | 208 | rb_msecs = rb_funcall(rb_megasecs, rb_intern("*"), 1, INT2NUM(1000000)); 209 | rb_stamp = rb_funcall(rb_msecs, rb_intern("+"), 1, rb_secs); 210 | 211 | return rb_funcall(rb_cTime, rb_intern("at"), 2, rb_stamp, rb_microsecs); 212 | 213 | } else if (id_type == rb_intern("regex")) { 214 | VALUE rb_source, rb_opts; 215 | int flags = 0; 216 | 217 | bert_ensure_arity(arity, 4); 218 | 219 | rb_source = bert_read(buf); 220 | rb_opts = bert_read(buf); 221 | 222 | Check_Type(rb_source, T_STRING); 223 | Check_Type(rb_opts, T_ARRAY); 224 | 225 | if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("caseless")))) 226 | flags = flags | 1; 227 | 228 | if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("extended")))) 229 | flags = flags | 2; 230 | 231 | if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("multiline")))) 232 | flags = flags | 4; 233 | 234 | return rb_funcall(rb_cRegexp, rb_intern("new"), 2, rb_source, INT2NUM(flags)); 235 | 236 | } else if (id_type == rb_intern("dict")) { 237 | bert_ensure_arity(arity, 3); 238 | return bert_read_dict(buf); 239 | } 240 | 241 | rb_raise(rb_eTypeError, "Invalid tag for complex value"); 242 | return Qnil; 243 | } 244 | 245 | static VALUE bert_read_tuple(struct bert_buf *buf, uint32_t arity) 246 | { 247 | VALUE cTuple; 248 | 249 | cTuple = rb_const_get(rb_mBERT, rb_intern("Tuple")); 250 | 251 | if (arity > 0) { 252 | VALUE rb_tag = bert_read(buf); 253 | 254 | if (TYPE(rb_tag) == T_SYMBOL && SYM2ID(rb_tag) == rb_intern("bert")) { 255 | return bert_read_complex(buf, arity); 256 | } else { 257 | uint32_t i; 258 | VALUE rb_tuple; 259 | 260 | rb_tuple = rb_funcall(cTuple, rb_intern("new"), 1, INT2NUM(arity)); 261 | rb_ary_store(rb_tuple, 0, rb_tag); 262 | 263 | for(i = 1; i < arity; ++i) 264 | rb_ary_store(rb_tuple, i, bert_read(buf)); 265 | 266 | return rb_tuple; 267 | } 268 | } 269 | 270 | return rb_funcall(cTuple, rb_intern("new"), 0); 271 | } 272 | 273 | static VALUE bert_read_stuple(struct bert_buf *buf) 274 | { 275 | bert_buf_ensure(buf, 1); 276 | return bert_read_tuple(buf, bert_buf_read8(buf)); 277 | } 278 | 279 | static VALUE bert_read_ltuple(struct bert_buf *buf) 280 | { 281 | bert_buf_ensure(buf, 4); 282 | return bert_read_tuple(buf, bert_buf_read32(buf)); 283 | } 284 | 285 | static VALUE bert_read_list(struct bert_buf *buf) 286 | { 287 | uint32_t i, length; 288 | VALUE rb_list; 289 | 290 | bert_buf_ensure(buf, 4); 291 | length = bert_buf_read32(buf); 292 | rb_list = rb_ary_new2(length); 293 | 294 | for(i = 0; i < length; ++i) 295 | rb_ary_store(rb_list, i, bert_read(buf)); 296 | 297 | /* disregard tail; adquire currency */ 298 | bert_buf_ensure(buf, 1); 299 | (void)bert_buf_read8(buf); 300 | 301 | return rb_list; 302 | } 303 | 304 | static VALUE bert_read_bin(struct bert_buf *buf) 305 | { 306 | uint32_t length; 307 | VALUE rb_bin; 308 | 309 | bert_buf_ensure(buf, 4); 310 | length = bert_buf_read32(buf); 311 | 312 | bert_buf_ensure(buf, length); 313 | rb_bin = rb_str_substr(buf->rb_buf, buf->data - buf->start, length); 314 | buf->data += length; 315 | 316 | return rb_bin; 317 | } 318 | 319 | static VALUE bert_read_unicode_string(struct bert_buf *buf) 320 | { 321 | VALUE rb_str; 322 | 323 | rb_str = bert_read_bin(buf); 324 | rb_enc_associate(rb_str, rb_utf8_encoding()); 325 | 326 | return rb_str; 327 | } 328 | 329 | static VALUE bert_read_enc_string(struct bert_buf *buf) 330 | { 331 | uint8_t type; 332 | VALUE rb_bin, enc; 333 | 334 | rb_bin = bert_read_bin(buf); 335 | 336 | bert_buf_ensure(buf, 1); 337 | type = bert_buf_read8(buf); 338 | if (ERL_BIN != type) 339 | rb_raise(rb_eRuntimeError, "Invalid tag '%d' for term", type); 340 | 341 | enc = bert_read_bin(buf); 342 | rb_enc_associate(rb_bin, rb_find_encoding(enc)); 343 | 344 | return rb_bin; 345 | } 346 | 347 | static VALUE bert_read_string(struct bert_buf *buf) 348 | { 349 | uint16_t i, length; 350 | VALUE rb_string; 351 | 352 | bert_buf_ensure(buf, 2); 353 | length = bert_buf_read16(buf); 354 | 355 | bert_buf_ensure(buf, length); 356 | rb_string = rb_ary_new2(length); 357 | 358 | for (i = 0; i < length; ++i) 359 | rb_ary_store(rb_string, i, INT2FIX(buf->data[i])); 360 | 361 | buf->data += length; 362 | return rb_string; 363 | } 364 | 365 | static VALUE bert_read_atom(struct bert_buf *buf) 366 | { 367 | VALUE rb_atom; 368 | uint32_t atom_len; 369 | 370 | bert_buf_ensure(buf, 2); 371 | atom_len = bert_buf_read16(buf); 372 | 373 | /* Instead of trying to build the symbol 374 | * from here, just create a Ruby string 375 | * and internalize it. this will be faster for 376 | * unique symbols */ 377 | bert_buf_ensure(buf, atom_len); 378 | rb_atom = rb_str_new((char *)buf->data, atom_len); 379 | buf->data += atom_len; 380 | 381 | return rb_str_intern(rb_atom); 382 | } 383 | 384 | static VALUE bert_read_sint(struct bert_buf *buf) 385 | { 386 | bert_buf_ensure(buf, 1); 387 | return INT2FIX((uint8_t)bert_buf_read8(buf)); 388 | } 389 | 390 | static VALUE bert_read_int(struct bert_buf *buf) 391 | { 392 | bert_buf_ensure(buf, 4); 393 | return LONG2NUM((int32_t)bert_buf_read32(buf)); 394 | } 395 | 396 | static VALUE bert_buf_tobignum(struct bert_buf *buf, uint8_t sign, uint32_t bin_digits) 397 | { 398 | #ifdef BERT_FAST_BIGNUM 399 | uint32_t *bin_buf = NULL; 400 | VALUE rb_num; 401 | uint32_t round_size; 402 | 403 | bert_buf_ensure(buf, bin_digits); 404 | 405 | /* Hack: ensure that we have at least a full word 406 | * of extra padding for the actual string, so Ruby 407 | * cannot guess the sign of the bigint from the MSB */ 408 | round_size = 4 + ((bin_digits + 3) & ~3); 409 | bin_buf = xmalloc(round_size); 410 | 411 | memcpy(bin_buf, buf->data, bin_digits); 412 | memset((char *)bin_buf + bin_digits, 0x0, round_size - bin_digits); 413 | 414 | /* Make Ruby unpack the string internally. 415 | * this is significantly faster than adding 416 | * the bytes one by one */ 417 | rb_num = rb_big_unpack(bin_buf, round_size / 4); 418 | 419 | /* Enfore sign. So fast! */ 420 | RBIGNUM_SET_SIGN(rb_num, !sign); 421 | 422 | free(bin_buf); 423 | return rb_num; 424 | #else 425 | /** 426 | * Slower bignum serialization; convert to a base16 427 | * string and then let ruby parse it internally. 428 | * 429 | * We're shipping with this by default because 430 | * `rb_big_unpack` is not trustworthy 431 | */ 432 | static const char to_hex[] = "0123456789abcdef"; 433 | char *num_str = NULL, *ptr; 434 | VALUE rb_num; 435 | int32_t i; 436 | 437 | bert_buf_ensure(buf, bin_digits); 438 | 439 | /* 2 digits per byte + sign + trailing null */ 440 | num_str = ptr = xmalloc((bin_digits * 2) + 2); 441 | 442 | *ptr++ = sign ? '-' : '+'; 443 | 444 | for (i = (int32_t)bin_digits - 1; i >= 0; --i) { 445 | uint8_t val = buf->data[i]; 446 | *ptr++ = to_hex[val >> 4]; 447 | *ptr++ = to_hex[val & 0xf]; 448 | } 449 | 450 | *ptr = 0; 451 | buf->data += bin_digits; 452 | 453 | rb_num = rb_cstr_to_inum(num_str, 16, 1); 454 | free(num_str); 455 | 456 | return rb_num; 457 | #endif 458 | } 459 | 460 | VALUE bert_read_sbignum(struct bert_buf *buf) 461 | { 462 | uint8_t sign, bin_digits; 463 | 464 | bert_buf_ensure(buf, 2); 465 | 466 | bin_digits = bert_buf_read8(buf); 467 | sign = bert_buf_read8(buf); 468 | 469 | return bert_buf_tobignum(buf, sign, (uint32_t)bin_digits); 470 | } 471 | 472 | VALUE bert_read_lbignum(struct bert_buf *buf) 473 | { 474 | uint32_t bin_digits; 475 | uint8_t sign; 476 | 477 | bert_buf_ensure(buf, 5); 478 | 479 | bin_digits = bert_buf_read32(buf); 480 | sign = bert_buf_read8(buf); 481 | 482 | return bert_buf_tobignum(buf, sign, bin_digits); 483 | } 484 | 485 | /* 486 | * ------------------- 487 | * |1 | 31 | 488 | * |99 | Float String| 489 | * ------------------- 490 | * 491 | * A float is stored in string format. the format used in sprintf 492 | * to format the float is "%.20e" (there are more bytes allocated 493 | * than necessary). To unpack the float use sscanf with format "%lf". 494 | */ 495 | static VALUE bert_read_float(struct bert_buf *buf) 496 | { 497 | VALUE rb_float; 498 | 499 | bert_buf_ensure(buf, 31); 500 | 501 | rb_float = rb_str_new((char *)buf->data, 31); 502 | buf->data += 31; 503 | 504 | return rb_funcall(rb_float, rb_intern("to_f"), 0); 505 | } 506 | 507 | static VALUE bert_read_nil(struct bert_buf *buf) 508 | { 509 | return rb_ary_new2(0); 510 | } 511 | 512 | static VALUE bert_read_invalid(struct bert_buf *buf) 513 | { 514 | rb_raise(rb_eTypeError, "Invalid object tag in BERT stream"); 515 | return Qnil; 516 | } 517 | 518 | static int supports(const char *version) 519 | { 520 | return RTEST(rb_funcall(rb_mBERT, rb_intern("supports?"), 1, ID2SYM(rb_intern(version)))); 521 | } 522 | 523 | static VALUE rb_bert_decode(VALUE klass, VALUE rb_string) 524 | { 525 | struct bert_buf buf; 526 | uint8_t proto_version; 527 | const char *str; 528 | size_t size; 529 | VALUE substr, cMochilo; 530 | 531 | Check_Type(rb_string, T_STRING); 532 | str = RSTRING_PTR(rb_string); 533 | size = RSTRING_LEN(rb_string); 534 | 535 | cMochilo = rb_const_get(rb_cObject, rb_intern("Mochilo")); 536 | 537 | buf.data = (uint8_t *)str; 538 | buf.start = buf.data; 539 | buf.rb_buf = rb_string; 540 | buf.end = buf.data + size; 541 | 542 | bert_buf_ensure(&buf, 1); 543 | 544 | proto_version = bert_buf_read8(&buf); 545 | if (proto_version == ERL_VERSION || proto_version == ERL_VERSION2) { 546 | return bert_read(&buf); 547 | } else if (proto_version == MOCHILO_VERSION1) { 548 | if (supports("v3")) { 549 | substr = rb_funcall(rb_string, id_byteslice, 2, INT2NUM(1), INT2NUM(size - 1)); 550 | return rb_funcall(cMochilo, id_unpack_unsafe, 1, substr); 551 | } else { 552 | rb_raise(rb_eTypeError, "v3 stream cannot be decoded"); 553 | } 554 | } else if (proto_version == MOCHILO_VERSION2) { 555 | if (supports("v4")) { 556 | substr = rb_funcall(rb_string, id_byteslice, 2, INT2NUM(1), INT2NUM(size - 1)); 557 | return rb_funcall(cMochilo, id_unpack, 1, substr); 558 | } else { 559 | rb_raise(rb_eTypeError, "v4 stream cannot be decoded"); 560 | } 561 | } else { 562 | rb_raise(rb_eTypeError, "Invalid magic value (%d) for BERT string", proto_version); 563 | } 564 | } 565 | 566 | static VALUE rb_bert_impl(VALUE klass) 567 | { 568 | return rb_str_new("C", 1); 569 | } 570 | 571 | void Init_decode() 572 | { 573 | rb_mBERT = rb_define_module("BERT"); 574 | 575 | rb_require("mochilo"); 576 | id_unpack_unsafe = rb_intern("unpack_unsafe"); 577 | id_unpack = rb_intern("unpack"); 578 | id_byteslice = rb_intern("byteslice"); 579 | 580 | rb_cDecode = rb_define_class_under(rb_mBERT, "Decode", rb_cObject); 581 | rb_define_singleton_method(rb_cDecode, "decode", rb_bert_decode, 1); 582 | rb_define_singleton_method(rb_cDecode, "impl", rb_bert_impl, 0); 583 | } 584 | -------------------------------------------------------------------------------- /ext/bert/c/extconf.rb: -------------------------------------------------------------------------------- 1 | # Loads mkmf which is used to make makefiles for Ruby extensions 2 | require 'mkmf' 3 | 4 | # warnings save lives 5 | $CFLAGS << " -Wall " 6 | 7 | # Give it a name 8 | extension_name = 'decode' 9 | 10 | # The destination 11 | dir_config(extension_name) 12 | 13 | # Do the work 14 | create_makefile(extension_name) -------------------------------------------------------------------------------- /gemfiles/mochilo-v1: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | # A sample Gemfile 3 | source "https://rubygems.org" 4 | 5 | gemspec 6 | 7 | gem "mochilo", :git => "https://github.com/brianmario/mochilo", :ref => "v1-release" 8 | -------------------------------------------------------------------------------- /gemfiles/mochilo-v2: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | # A sample Gemfile 3 | source "https://rubygems.org" 4 | 5 | gemspec 6 | 7 | gem "mochilo", :git => "https://github.com/brianmario/mochilo", :ref => "master" 8 | -------------------------------------------------------------------------------- /lib/bert.rb: -------------------------------------------------------------------------------- 1 | 2 | require 'stringio' 3 | 4 | $:.unshift File.join(File.dirname(__FILE__), *%w[.. ext]) 5 | 6 | require 'bert/bert' 7 | require 'bert/types' 8 | 9 | case ENV["BERT_TEST_IMPL"] 10 | when "C" 11 | require 'bert/c/decode' 12 | when "Ruby" 13 | require 'bert/decode' 14 | else 15 | begin 16 | # try to load the C extension 17 | require 'bert/c/decode' 18 | rescue LoadError 19 | # fall back on the pure ruby version 20 | require 'bert/decode' 21 | end 22 | end 23 | 24 | require 'bert/encode' 25 | 26 | require 'bert/encoder' 27 | require 'bert/decoder' 28 | 29 | # Global method for specifying that an array should be encoded as a tuple. 30 | def t 31 | BERT::Tuple 32 | end 33 | -------------------------------------------------------------------------------- /lib/bert/bert.rb: -------------------------------------------------------------------------------- 1 | require "mochilo/version" 2 | 3 | module BERT 4 | def self.supports?(v) 5 | case v 6 | when :v1, :v2 7 | true 8 | when :v3 9 | Mochilo.respond_to?(:pack_unsafe) 10 | when :v4 11 | !Mochilo.respond_to?(:pack_unsafe) 12 | else 13 | false 14 | end 15 | end 16 | 17 | def self.encode(ruby) 18 | Encoder.encode(ruby) 19 | end 20 | 21 | def self.encode_to_buffer(ruby) 22 | Encoder.encode_to_buffer(ruby) 23 | end 24 | 25 | def self.decode(bert) 26 | Decoder.decode(bert) 27 | end 28 | 29 | def self.ebin(str) 30 | bytes = [] 31 | str.each_byte { |b| bytes << b.to_s } 32 | "<<" + bytes.join(',') + ">>" 33 | end 34 | 35 | class Tuple < Array 36 | def inspect 37 | "t#{super}" 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /lib/bert/decode.rb: -------------------------------------------------------------------------------- 1 | require "mochilo" 2 | 3 | module BERT 4 | class Decode 5 | attr_accessor :in 6 | include Types 7 | 8 | def self.impl 9 | 'Ruby' 10 | end 11 | 12 | def self.decode(string) 13 | header = string.getbyte(0) 14 | 15 | case header 16 | when VERSION_4 17 | raise "v4 stream cannot be decoded" unless BERT.supports?(:v4) 18 | Mochilo.unpack(string.byteslice(1, string.bytesize - 1)) 19 | when VERSION_3 20 | raise "v3 stream cannot be decoded" unless BERT.supports?(:v3) 21 | Mochilo.unpack_unsafe(string.byteslice(1, string.bytesize - 1)) 22 | when MAGIC, VERSION_2 23 | io = StringIO.new(string) 24 | io.set_encoding('binary') if io.respond_to?(:set_encoding) 25 | io.getbyte 26 | new(io).read_any 27 | else 28 | fail("Bad Magic") 29 | end 30 | end 31 | 32 | def initialize(ins) 33 | @in = ins 34 | @peeked = +"" 35 | end 36 | 37 | def read_any 38 | read_any_raw 39 | end 40 | 41 | def read_any_raw 42 | case peek_1 43 | when ATOM then read_atom 44 | when SMALL_INT then read_small_int 45 | when INT then read_int 46 | when SMALL_BIGNUM then read_small_bignum 47 | when LARGE_BIGNUM then read_large_bignum 48 | when FLOAT then read_float 49 | when SMALL_TUPLE then read_small_tuple 50 | when LARGE_TUPLE then read_large_tuple 51 | when NIL then read_nil 52 | when STRING then read_erl_string 53 | when LIST then read_list 54 | when BIN then read_bin 55 | when ENC_STRING then read_enc_string 56 | when UNICODE_STRING then read_unicode_string 57 | else 58 | fail("Unknown term tag: #{peek_1}") 59 | end 60 | end 61 | 62 | def read(length) 63 | if length < @peeked.length 64 | result = @peeked[0...length] 65 | @peeked = @peeked[length..-1] 66 | length = 0 67 | else 68 | result = @peeked 69 | @peeked = +'' 70 | length -= result.length 71 | end 72 | 73 | if length > 0 74 | result << @in.read(length) 75 | end 76 | result 77 | end 78 | 79 | def peek(length) 80 | if length <= @peeked.length 81 | @peeked[0...length] 82 | else 83 | read_bytes = @in.read(length - @peeked.length) 84 | @peeked << read_bytes if read_bytes 85 | @peeked 86 | end 87 | end 88 | 89 | def peek_1 90 | peek(1).unpack("C").first 91 | end 92 | 93 | def peek_2 94 | peek(2).unpack("n").first 95 | end 96 | 97 | def read_1 98 | read(1).unpack("C").first 99 | end 100 | 101 | def read_2 102 | read(2).unpack("n").first 103 | end 104 | 105 | def read_4 106 | read(4).unpack("N").first 107 | end 108 | 109 | def read_string(length) 110 | read(length) 111 | end 112 | 113 | def read_atom 114 | fail("Invalid Type, not an atom") unless read_1 == ATOM 115 | length = read_2 116 | a = read_string(length) 117 | case a 118 | when "" 119 | Marshal.load("\004\b:\005") # Workaround for inability to do ''.to_sym 120 | else 121 | a.to_sym 122 | end 123 | end 124 | 125 | def read_small_int 126 | fail("Invalid Type, not a small int") unless read_1 == SMALL_INT 127 | read_1 128 | end 129 | 130 | def read_int 131 | fail("Invalid Type, not an int") unless read_1 == INT 132 | value = read_4 133 | negative = (value >> 31)[0] == 1 134 | value = (value - (1 << 32)) if negative 135 | value 136 | end 137 | 138 | def read_small_bignum 139 | fail("Invalid Type, not a small bignum") unless read_1 == SMALL_BIGNUM 140 | size = read_1 141 | sign = read_1 142 | bytes = read_string(size).unpack("C" * size) 143 | added = bytes.zip((0..bytes.length).to_a).inject(0) do |result, byte_index| 144 | byte, index = *byte_index 145 | value = (byte * (256 ** index)) 146 | sign != 0 ? (result - value) : (result + value) 147 | end 148 | added 149 | end 150 | 151 | def read_large_bignum 152 | fail("Invalid Type, not a large bignum") unless read_1 == LARGE_BIGNUM 153 | size = read_4 154 | sign = read_1 155 | bytes = read_string(size).unpack("C" * size) 156 | added = bytes.zip((0..bytes.length).to_a).inject(0) do |result, byte_index| 157 | byte, index = *byte_index 158 | value = (byte * (256 ** index)) 159 | sign != 0 ? (result - value) : (result + value) 160 | end 161 | added 162 | end 163 | 164 | def read_float 165 | fail("Invalid Type, not a float") unless read_1 == FLOAT 166 | string_value = read_string(31) 167 | string_value.to_f 168 | end 169 | 170 | def read_small_tuple 171 | fail("Invalid Type, not a small tuple") unless read_1 == SMALL_TUPLE 172 | read_tuple(read_1) 173 | end 174 | 175 | def read_large_tuple 176 | fail("Invalid Type, not a small tuple") unless read_1 == LARGE_TUPLE 177 | read_tuple(read_4) 178 | end 179 | 180 | def read_tuple(arity) 181 | if arity > 0 182 | tag = read_any_raw 183 | if tag == :bert 184 | read_complex_type(arity) 185 | else 186 | tuple = Tuple.new(arity) 187 | tuple[0] = tag 188 | (arity - 1).times { |i| tuple[i + 1] = read_any_raw } 189 | tuple 190 | end 191 | else 192 | Tuple.new 193 | end 194 | end 195 | 196 | def read_complex_type(arity) 197 | case read_any_raw 198 | when :nil 199 | nil 200 | when :true 201 | true 202 | when :false 203 | false 204 | when :time 205 | Time.at(read_any_raw * 1_000_000 + read_any_raw, read_any_raw) 206 | when :regex 207 | source = read_any_raw 208 | opts = read_any_raw 209 | options = 0 210 | options |= Regexp::EXTENDED if opts.include?(:extended) 211 | options |= Regexp::IGNORECASE if opts.include?(:caseless) 212 | options |= Regexp::MULTILINE if opts.include?(:multiline) 213 | Regexp.new(source, options) 214 | when :dict 215 | read_dict 216 | else 217 | nil 218 | end 219 | end 220 | 221 | def read_dict 222 | type = read_1 223 | fail("Invalid dict spec, not an erlang list") unless [LIST, NIL].include?(type) 224 | if type == LIST 225 | length = read_4 226 | else 227 | length = 0 228 | end 229 | hash = {} 230 | length.times do |i| 231 | pair = read_any_raw 232 | hash[pair[0]] = pair[1] 233 | end 234 | read_1 if type == LIST 235 | hash 236 | end 237 | 238 | def read_nil 239 | fail("Invalid Type, not a nil list") unless read_1 == NIL 240 | [] 241 | end 242 | 243 | def read_unicode_string 244 | fail("Invalid Type, not a unicode string") unless read_1 == UNICODE_STRING 245 | length = read_4 246 | str = read_string(length) 247 | str.force_encoding "UTF-8" 248 | str 249 | end 250 | 251 | def read_erl_string 252 | fail("Invalid Type, not an erlang string") unless read_1 == STRING 253 | length = read_2 254 | read_string(length).unpack('C' * length) 255 | end 256 | 257 | def read_list 258 | fail("Invalid Type, not an erlang list") unless read_1 == LIST 259 | length = read_4 260 | list = (0...length).map { |i| read_any_raw } 261 | read_1 262 | list 263 | end 264 | 265 | def read_bin 266 | fail("Invalid Type, not an erlang binary") unless read_1 == BIN 267 | length = read_4 268 | read_string(length) 269 | end 270 | 271 | def fail(str) 272 | raise str 273 | end 274 | 275 | private 276 | 277 | def read_enc_string 278 | fail("Invalid Type, not an erlang binary") unless read_1 == ENC_STRING 279 | length = read_4 280 | x = read_string(length) 281 | 282 | fail("Invalid Type, not an erlang binary") unless read_1 == BIN 283 | length = read_4 284 | x.force_encoding read_string(length) 285 | x 286 | end 287 | end 288 | end 289 | -------------------------------------------------------------------------------- /lib/bert/decoder.rb: -------------------------------------------------------------------------------- 1 | module BERT 2 | class Decoder 3 | # Decode a BERT into a Ruby object. 4 | # +bert+ is the BERT String 5 | # 6 | # Returns a Ruby object 7 | def self.decode(bert) 8 | Decode.decode(bert) 9 | end 10 | end 11 | end -------------------------------------------------------------------------------- /lib/bert/encode.rb: -------------------------------------------------------------------------------- 1 | require "mochilo" 2 | 3 | module BERT 4 | class Encode 5 | include Types 6 | 7 | class V2 < Encode 8 | def write_binary(data) 9 | enc = data.encoding 10 | case enc 11 | when ::Encoding::UTF_8, ::Encoding::US_ASCII 12 | write_unicode_string data 13 | when ::Encoding::ASCII_8BIT 14 | super 15 | else 16 | write_enc_string data 17 | end 18 | end 19 | 20 | private 21 | 22 | def write_unicode_string(data) 23 | write_1 UNICODE_STRING 24 | write_4 data.bytesize 25 | write_string data 26 | end 27 | 28 | def write_enc_string(data) 29 | write_1 ENC_STRING 30 | write_4 data.bytesize 31 | write_string data 32 | enc = data.encoding.name 33 | write_1 BIN 34 | write_4 enc.bytesize 35 | write_string enc 36 | end 37 | 38 | def version_header 39 | VERSION_2 40 | end 41 | end 42 | 43 | class V3 44 | def initialize(out) 45 | @out = out 46 | end 47 | 48 | attr_reader :out 49 | 50 | def write_any(obj) 51 | out.write(version_header.chr) 52 | out.write(Mochilo.pack_unsafe(obj)) 53 | end 54 | 55 | private 56 | 57 | def version_header 58 | BERT::Encode::VERSION_3 59 | end 60 | end 61 | 62 | class V4 63 | def initialize(out) 64 | @out = out 65 | end 66 | 67 | attr_reader :out 68 | 69 | def write_any(obj) 70 | out.write(version_header.chr) 71 | out.write(Mochilo.pack(obj)) 72 | end 73 | 74 | private 75 | 76 | def version_header 77 | BERT::Encode::VERSION_4 78 | end 79 | end 80 | 81 | class << self 82 | attr_accessor :version 83 | end 84 | self.version = :v1 85 | 86 | attr_accessor :out 87 | 88 | def initialize(out) 89 | self.out = out 90 | end 91 | 92 | class Buffer 93 | def initialize 94 | @buf = [] 95 | end 96 | 97 | def write(str) 98 | @buf << str.b 99 | end 100 | 101 | def write_to(io) 102 | @buf.each { |x| io.write x } 103 | end 104 | 105 | def to_s 106 | @buf.join("") 107 | end 108 | 109 | def bytesize 110 | @buf.map(&:bytesize).inject :+ 111 | end 112 | end 113 | 114 | def self.encode_to_buffer(data) 115 | io = Buffer.new 116 | encode_data data, io 117 | io 118 | end 119 | 120 | def self.encode(data) 121 | encode_to_buffer(data).to_s 122 | end 123 | 124 | def self.encode_data(data, io) 125 | fail "Cannot encode with requested version (#{version})" unless BERT.supports?(version) 126 | encoder = 127 | case version 128 | when :v4 129 | V4.new(io) 130 | when :v3 131 | V3.new(io) 132 | when :v2 133 | V2.new(io) 134 | else 135 | new(io) 136 | end 137 | encoder.write_any(data) 138 | end 139 | 140 | def write_any obj 141 | write_1 version_header 142 | write_any_raw obj 143 | end 144 | 145 | def write_any_raw obj 146 | case obj 147 | when Symbol then write_symbol(obj) 148 | when String then write_binary(obj) 149 | when Integer then write_integer(obj) 150 | when Float then write_float(obj) 151 | when Tuple then write_tuple(obj) 152 | when Array then write_list(obj) 153 | else 154 | fail(obj) 155 | end 156 | end 157 | 158 | def write_1(byte) 159 | out.write(byte.chr) 160 | end 161 | 162 | def write_2(short) 163 | out.write([short].pack("n")) 164 | end 165 | 166 | def write_4(long) 167 | out.write([long].pack("N")) 168 | end 169 | 170 | def write_string(string) 171 | out.write(string) 172 | end 173 | 174 | def write_boolean(bool) 175 | write_symbol(bool.to_s.to_sym) 176 | end 177 | 178 | def write_symbol(sym) 179 | fail(sym) unless sym.is_a?(Symbol) 180 | data = sym.to_s 181 | write_1 ATOM 182 | write_2 data.bytesize 183 | write_string data 184 | end 185 | 186 | def write_integer(num) 187 | if num >= 0 && num < 256 188 | write_1 SMALL_INT 189 | write_1 num 190 | elsif num <= MAX_INT && num >= MIN_INT 191 | write_1 INT 192 | write_4 num 193 | else 194 | write_bignum num 195 | end 196 | end 197 | 198 | def write_float(float) 199 | write_1 FLOAT 200 | write_string format("%15.15e", float).ljust(31, "\000") 201 | end 202 | 203 | def write_bignum(num) 204 | n = (num.abs.to_s(2).size / 8.0).ceil 205 | if n < 256 206 | write_1 SMALL_BIGNUM 207 | write_1 n 208 | write_bignum_guts(num) 209 | else 210 | write_1 LARGE_BIGNUM 211 | write_4 n 212 | write_bignum_guts(num) 213 | end 214 | end 215 | 216 | def write_bignum_guts(num) 217 | write_1 (num >= 0 ? 0 : 1) 218 | num = num.abs 219 | while num != 0 220 | rem = num % 256 221 | write_1 rem 222 | num = num >> 8 223 | end 224 | end 225 | 226 | def write_tuple(data) 227 | fail(data) unless data.is_a? Array 228 | 229 | if data.length < 256 230 | write_1 SMALL_TUPLE 231 | write_1 data.length 232 | else 233 | write_1 LARGE_TUPLE 234 | write_4 data.length 235 | end 236 | 237 | data.each { |e| write_any_raw e } 238 | end 239 | 240 | def write_list(data) 241 | fail(data) unless data.is_a? Array 242 | write_1 NIL and return if data.empty? 243 | write_1 LIST 244 | write_4 data.length 245 | data.each{|e| write_any_raw e } 246 | write_1 NIL 247 | end 248 | 249 | def write_binary(data) 250 | write_1 BIN 251 | write_4 data.bytesize 252 | write_string data 253 | end 254 | 255 | private 256 | 257 | def version_header 258 | MAGIC 259 | end 260 | 261 | def fail(obj) 262 | raise "Cannot encode to erlang external format: #{obj.inspect}" 263 | end 264 | end 265 | end 266 | -------------------------------------------------------------------------------- /lib/bert/encoder.rb: -------------------------------------------------------------------------------- 1 | module BERT 2 | class Encoder 3 | # Encode a Ruby object into a BERT. 4 | # +ruby+ is the Ruby object 5 | # 6 | # Returns a BERT 7 | def self.encode(ruby) 8 | complex_ruby = convert(ruby) 9 | Encode.encode(complex_ruby) 10 | end 11 | 12 | def self.encode_to_buffer(ruby) 13 | complex_ruby = convert(ruby) 14 | Encode.encode_to_buffer(complex_ruby) 15 | end 16 | 17 | # Convert complex Ruby form in simple Ruby form. 18 | # +item+ is the Ruby object to convert 19 | # 20 | # Returns the converted Ruby object 21 | def self.convert(item) 22 | return item if Encode.version == :v3 || Encode.version == :v4 23 | case item 24 | when Hash 25 | pairs = [] 26 | item.each_pair { |k, v| pairs << t[convert(k), convert(v)] } 27 | t[:bert, :dict, pairs] 28 | when Tuple 29 | Tuple.new(item.map { |x| convert(x) }) 30 | when Array 31 | item.map { |x| convert(x) } 32 | when nil 33 | t[:bert, :nil] 34 | when TrueClass 35 | t[:bert, :true] 36 | when FalseClass 37 | t[:bert, :false] 38 | when Time 39 | t[:bert, :time, item.to_i / 1_000_000, item.to_i % 1_000_000, item.usec] 40 | when Regexp 41 | options = [] 42 | options << :caseless if item.options & Regexp::IGNORECASE > 0 43 | options << :extended if item.options & Regexp::EXTENDED > 0 44 | options << :multiline if item.options & Regexp::MULTILINE > 0 45 | t[:bert, :regex, item.source, options] 46 | else 47 | item 48 | end 49 | end 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /lib/bert/types.rb: -------------------------------------------------------------------------------- 1 | module BERT 2 | module Types 3 | SMALL_INT = 97 4 | INT = 98 5 | SMALL_BIGNUM = 110 6 | LARGE_BIGNUM = 111 7 | FLOAT = 99 8 | ATOM = 100 9 | SMALL_TUPLE = 104 10 | LARGE_TUPLE = 105 11 | NIL = 106 12 | STRING = 107 13 | LIST = 108 14 | BIN = 109 15 | ENC_STRING = 112 16 | UNICODE_STRING = 113 17 | FUN = 117 18 | MAGIC = 131 19 | VERSION_2 = 132 20 | VERSION_3 = 133 21 | VERSION_4 = 134 22 | MAX_INT = (1 << 27) -1 23 | MIN_INT = -(1 << 27) 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /test/bert_test.rb: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | require 'test_helper' 3 | 4 | class BertTest < Test::Unit::TestCase 5 | context "BERT" do 6 | setup do 7 | time = Time.at(1254976067).utc 8 | @ruby = t[:user, {:name => 'TPW'}, [/cat/i, 9.9], time, nil, true, false, :true, :false] 9 | @bert_v1 = "\203h\td\000\004userh\003d\000\004bertd\000\004dictl\000\000\000\001h\002d\000\004namem\000\000\000\003TPWjl\000\000\000\002h\004d\000\004bertd\000\005regexm\000\000\000\003catl\000\000\000\001d\000\bcaselessjc9.900000000000000e+00\000\000\000\000\000\000\000\000\000\000jh\005d\000\004bertd\000\004timeb\000\000\004\346b\000\016\344\303a\000h\002d\000\004bertd\000\003nilh\002d\000\004bertd\000\004trueh\002d\000\004bertd\000\005falsed\000\004trued\000\005false".b 10 | @ebin_v1 = "<<131,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,109,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,109,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>" 11 | @berts = { 12 | :v2 => "\x84h\td\x00\x04userh\x03d\x00\x04bertd\x00\x04dictl\x00\x00\x00\x01h\x02d\x00\x04nameq\x00\x00\x00\x03TPWjl\x00\x00\x00\x02h\x04d\x00\x04bertd\x00\x05regexq\x00\x00\x00\x03catl\x00\x00\x00\x01d\x00\bcaselessjc9.900000000000000e+00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00jh\x05d\x00\x04bertd\x00\x04timeb\x00\x00\x04\xE6b\x00\x0E\xE4\xC3a\x00h\x02d\x00\x04bertd\x00\x03nilh\x02d\x00\x04bertd\x00\x04trueh\x02d\x00\x04bertd\x00\x05falsed\x00\x04trued\x00\x05false".b, 13 | :v3 => "\x85\x99\xD4\x04user\x81\xD4\x04name\xD8\x00\x03\x00TPW\x92\xD5\x00\x03\x00\x00\x00\x01\x01cat\xCB@#\xCC\xCC\xCC\xCC\xCC\xCD\xD6\x00\x00\x00\x00\x4A\xCD\x6A\x43\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xC0\xC3\xC2\xD4\x04true\xD4\x05false".b, 14 | :v4 => "\x86\x99\xC7\x05\xFF\x00user\x81\xC7\x05\xFF\x00name\xA3TPW\x92\xC7\x09\xFF\x01\x00\x00\x00\x01\x01cat\xCB@#\xCC\xCC\xCC\xCC\xCC\xCD\xC7\x15\xFF\x02\x00\x00\x00\x00\x4A\xCD\x6A\x43\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xC0\xC3\xC2\xC7\x05\xFF\x00true\xC7\x06\xFF\x00false".b, 15 | } 16 | @ebins = { 17 | :v2 => "<<132,104,9,100,0,4,117,115,101,114,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,4,110,97,109,101,113,0,0,0,3,84,80,87,106,108,0,0,0,2,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120,113,0,0,0,3,99,97,116,108,0,0,0,1,100,0,8,99,97,115,101,108,101,115,115,106,99,57,46,57,48,48,48,48,48,48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,0,0,0,0,0,106,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0,0,4,230,98,0,14,228,195,97,0,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101,100,0,4,116,114,117,101,100,0,5,102,97,108,115,101>>", 18 | :v3 => "<<133,153,212,4,117,115,101,114,129,212,4,110,97,109,101,216,0,3,0,84,80,87,146,213,0,3,0,0,0,1,1,99,97,116,203,64,35,204,204,204,204,204,205,214,0,0,0,0,74,205,106,67,0,0,0,0,0,0,0,0,0,0,0,0,192,195,194,212,4,116,114,117,101,212,5,102,97,108,115,101>>", 19 | :v4 => "<<134,153,199,5,255,0,117,115,101,114,129,199,5,255,0,110,97,109,101,163,84,80,87,146,199,9,255,1,0,0,0,1,1,99,97,116,203,64,35,204,204,204,204,204,205,199,21,255,2,0,0,0,0,74,205,106,67,0,0,0,0,0,0,0,0,0,0,0,0,192,195,194,199,5,255,0,116,114,117,101,199,6,255,0,102,97,108,115,101>>", 20 | } 21 | end 22 | 23 | should "support either v3 or v4" do 24 | assert BERT.supports?(:v3) || BERT.supports?(:v4) 25 | end 26 | 27 | [:v2, :v3, :v4].each do |v| 28 | context "#{v} encoder" do 29 | setup do 30 | @old_version = BERT::Encode.version 31 | BERT::Encode.version = v 32 | @bert = @berts.fetch(v) 33 | @ebin = @ebins.fetch(v) 34 | end 35 | 36 | teardown do 37 | BERT::Encode.version = @old_version 38 | end 39 | 40 | should "decode new format" do 41 | pend unless BERT.supports?(v) 42 | assert_equal @ruby, BERT.decode(@bert) 43 | end 44 | 45 | should "roundtrip string and maintain encoding" do 46 | pend unless BERT.supports?(v) 47 | str = "日本語".encode 'EUC-JP' 48 | round = BERT.decode(BERT.encode(str)) 49 | assert_equal str, round 50 | assert_equal str.encoding, round.encoding 51 | end 52 | 53 | should "roundtrip binary string" do 54 | pend unless BERT.supports?(v) 55 | str = "日本語".b 56 | round = BERT.decode(BERT.encode(str)) 57 | assert_equal str, round 58 | assert_equal str.encoding, round.encoding 59 | end 60 | 61 | should "encode" do 62 | pend unless BERT.supports?(v) 63 | assert_equal @bert, BERT.encode(@ruby) 64 | end 65 | 66 | should "roundtrip obj" do 67 | pend unless BERT.supports?(v) 68 | assert_equal @ruby, BERT.decode(BERT.encode(@ruby)) 69 | end 70 | 71 | should "encode with buffer" do 72 | pend unless BERT.supports?(v) 73 | buf = BERT.encode_to_buffer(@ruby) 74 | io = StringIO.new 75 | io.set_encoding 'binary' 76 | buf.write_to io 77 | assert_equal @bert, io.string 78 | end 79 | 80 | should "ebin" do 81 | pend unless BERT.supports?(v) 82 | assert_equal @ebin, BERT.ebin(@bert) 83 | end 84 | 85 | should "raise on encode when unsupported" do 86 | pend if BERT.supports?(v) 87 | assert_raises do 88 | BERT.encode_to_buffer(@ruby) 89 | end 90 | end 91 | 92 | should "raise on decode when unsupported" do 93 | pend if BERT.supports?(v) 94 | assert_raises do 95 | BERT.decode(@bert) 96 | end 97 | end 98 | end 99 | end 100 | 101 | should "decode the old format" do 102 | assert_equal @ruby, BERT.decode(@bert_v1) 103 | end 104 | 105 | should "ebin" do 106 | assert_equal @ebin_v1, BERT.ebin(@bert_v1) 107 | end 108 | 109 | should "do roundtrips" do 110 | dd = [] 111 | dd << 1 112 | dd << 1.0 113 | dd << :a 114 | dd << t[] 115 | dd << t[:a] 116 | dd << t[:a, :b] 117 | dd << t[t[:a, 1], t[:b, 2]] 118 | dd << [] 119 | dd << [:a] 120 | dd << [:a, 1] 121 | dd << [[:a, 1], [:b, 2]] 122 | dd << "a" 123 | 124 | dd << nil 125 | dd << true 126 | dd << false 127 | dd << {} 128 | dd << {:a => 1} 129 | dd << {:a => 1, :b => 2} 130 | dd << Time.at(1484112000, 123456) 131 | dd << /^c(a)t$/i 132 | 133 | dd << 178 134 | dd << 256**256 - 1 135 | 136 | dd << :true 137 | dd << :false 138 | dd << :nil 139 | 140 | dd.each do |d| 141 | assert_equal d, BERT.decode(BERT.encode(d)) 142 | end 143 | end 144 | 145 | # should "let me inspect it" do 146 | # puts 147 | # p @ruby 148 | # ruby2 = BERT.decode(@bert) 149 | # p ruby2 150 | # bert2 = BERT.encode(ruby2) 151 | # ruby3 = BERT.decode(bert2) 152 | # p ruby3 153 | # end 154 | end 155 | end 156 | -------------------------------------------------------------------------------- /test/decoder_test.rb: -------------------------------------------------------------------------------- 1 | require 'test_helper' 2 | 3 | class DecoderTest < Test::Unit::TestCase 4 | BERT_NIL = [131,104,2,100,0,4,98,101,114,116,100,0,3,110,105,108].pack('c*') 5 | BERT_TRUE = [131,104,2,100,0,4,98,101,114,116,100,0,4,116,114,117,101].pack('c*') 6 | BERT_FALSE = [131,104,2,100,0,4,98,101,114,116,100,0,5,102,97,108,115,101].pack('c*') 7 | 8 | context "BERT Decoder complex type converter" do 9 | should "convert nil" do 10 | assert_equal nil, BERT::Decoder.decode(BERT_NIL) 11 | end 12 | 13 | should "convert nested nil" do 14 | bert = [131,108,0,0,0,2,104,2,100,0,4,98,101,114,116,100,0,3,110,105, 15 | 108,108,0,0,0,1,104,2,100,0,4,98,101,114,116,100,0,3,110,105, 16 | 108,106,106].pack('c*') 17 | assert_equal [nil, [nil]], BERT::Decoder.decode(bert) 18 | end 19 | 20 | should "convert hashes" do 21 | bert = [131,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108, 22 | 0,0,0,1,104,2,100,0,3,102,111,111,109,0,0,0,3,98,97,114, 23 | 106].pack('c*') 24 | after = {:foo => 'bar'} 25 | assert_equal after, BERT::Decoder.decode(bert) 26 | end 27 | 28 | should "convert empty hashes" do 29 | bert = [131,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116, 30 | 106].pack('c*') 31 | after = {} 32 | assert_equal after, BERT::Decoder.decode(bert) 33 | end 34 | 35 | should "convert nested hashes" do 36 | bert = [131,104,3,100,0,4,98,101,114,116,100,0,4,100,105,99,116,108,0, 37 | 0,0,1,104,2,100,0,3,102,111,111,104,3,100,0,4,98,101,114,116, 38 | 100,0,4,100,105,99,116,108,0,0,0,1,104,2,100,0,3,98,97,122,109, 39 | 0,0,0,3,98,97,114,106,106].pack('c*') 40 | after = {:foo => {:baz => 'bar'}} 41 | assert_equal after, BERT::Decoder.decode(bert) 42 | end 43 | 44 | should "convert true" do 45 | assert_equal true, BERT::Decoder.decode(BERT_TRUE) 46 | end 47 | 48 | should "convert false" do 49 | assert_equal false, BERT::Decoder.decode(BERT_FALSE) 50 | end 51 | 52 | should "convert times" do 53 | bert = [131,104,5,100,0,4,98,101,114,116,100,0,4,116,105,109,101,98,0, 54 | 0,4,230,98,0,14,228,195,97,0].pack('c*') 55 | after = Time.at(1254976067) 56 | assert_equal after, BERT::Decoder.decode(bert) 57 | end 58 | 59 | should "convert regexen" do 60 | bert = [131,104,4,100,0,4,98,101,114,116,100,0,5,114,101,103,101,120, 61 | 109,0,0,0,7,94,99,40,97,41,116,36,108,0,0,0,2,100,0,8,99,97, 62 | 115,101,108,101,115,115,100,0,8,101,120,116,101,110,100,101, 63 | 100,106].pack('c*') 64 | after = /^c(a)t$/ix 65 | assert_equal after, BERT::Decoder.decode(bert) 66 | end 67 | 68 | should "leave other stuff alone" do 69 | bert = [131,108,0,0,0,3,97,1,99,50,46,48,48,48,48,48,48,48,48,48,48,48, 70 | 48,48,48,48,48,48,48,48,48,101,43,48,48,0,0,0,0,0,108,0,0,0,2, 71 | 100,0,3,102,111,111,109,0,0,0,3,98,97,114,106,106].pack('c*') 72 | after = [1, 2.0, [:foo, 'bar']] 73 | assert_equal after, BERT::Decoder.decode(bert) 74 | end 75 | 76 | should "handle bignums" do 77 | bert = [131,110,8,0,0,0,232,137,4,35,199,138].pack('c*') 78 | assert_equal 10_000_000_000_000_000_000, BERT::Decoder.decode(bert) 79 | end 80 | 81 | should "handle bytelists" do 82 | bert = [131,104,3,100,0,3,102,111,111,107,0,2,97,97,100,0,3,98,97,114].pack('c*') 83 | assert_equal t[:foo, [97, 97], :bar], BERT::Decoder.decode(bert) 84 | end 85 | 86 | should "handle massive binaries" do 87 | bert = [131,109,0,128,0,0].pack('c*') + ('a' * (8 * 1024 * 1024)) 88 | assert_equal (8 * 1024 * 1024), BERT::Decoder.decode(bert).size 89 | end 90 | end 91 | end 92 | -------------------------------------------------------------------------------- /test/encoder_test.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require 'test_helper' 4 | 5 | class EncoderTest < Test::Unit::TestCase 6 | context "BERT Encoder complex type converter" do 7 | should "convert nil" do 8 | assert_equal [:bert, :nil], BERT::Encoder.convert(nil) 9 | end 10 | 11 | should "convert nested nil" do 12 | before = [nil, [nil]] 13 | after = [[:bert, :nil], [[:bert, :nil]]] 14 | assert_equal after, BERT::Encoder.convert(before) 15 | end 16 | 17 | should "convert hashes" do 18 | before = {:foo => 'bar'} 19 | after = [:bert, :dict, [[:foo, 'bar']]] 20 | assert_equal after, BERT::Encoder.convert(before) 21 | end 22 | 23 | should "convert nested hashes" do 24 | before = {:foo => {:baz => 'bar'}} 25 | after = [:bert, :dict, [[:foo, [:bert, :dict, [[:baz, "bar"]]]]]] 26 | assert_equal after, BERT::Encoder.convert(before) 27 | end 28 | 29 | should "convert hash to tuple with array of tuples" do 30 | arr = BERT::Encoder.convert({:foo => 'bar'}) 31 | assert arr.is_a?(Array) 32 | assert arr[2].is_a?(Array) 33 | assert arr[2][0].is_a?(Array) 34 | end 35 | 36 | should "convert tuple to array" do 37 | arr = BERT::Encoder.convert(t[:foo, 2]) 38 | assert arr.is_a?(Array) 39 | end 40 | 41 | should "convert array to erl list" do 42 | list = BERT::Encoder.convert([1, 2]) 43 | assert list.is_a?(Array) 44 | end 45 | 46 | should "convert an array in a tuple" do 47 | arrtup = BERT::Encoder.convert(t[:foo, [1, 2]]) 48 | assert arrtup.is_a?(Array) 49 | assert arrtup[1].is_a?(Array) 50 | end 51 | 52 | should "convert true" do 53 | before = true 54 | after = [:bert, :true] 55 | assert_equal after, BERT::Encoder.convert(before) 56 | end 57 | 58 | should "convert false" do 59 | before = false 60 | after = [:bert, :false] 61 | assert_equal after, BERT::Encoder.convert(before) 62 | end 63 | 64 | should "convert times" do 65 | before = Time.at(1254976067) 66 | after = [:bert, :time, 1254, 976067, 0] 67 | assert_equal after, BERT::Encoder.convert(before) 68 | end 69 | 70 | should "convert regexen" do 71 | before = /^c(a)t$/ix 72 | after = [:bert, :regex, '^c(a)t$', [:caseless, :extended]] 73 | assert_equal after, BERT::Encoder.convert(before) 74 | end 75 | 76 | should "properly convert types" do 77 | ruby = t[:user, {:name => 'TPW'}, [/cat/i, 9.9], nil, true, false, :true, :false] 78 | cruby = BERT::Encoder.convert(ruby) 79 | assert cruby.instance_of?(BERT::Tuple) 80 | assert cruby[0].instance_of?(Symbol) 81 | assert cruby[1].instance_of?(BERT::Tuple) 82 | end 83 | 84 | should 'handle utf8 strings' do 85 | bert = [131, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*') 86 | assert_equal bert, BERT::Encoder.encode("été") 87 | end 88 | 89 | should 'handle utf8 symbols' do 90 | bert = [131, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*') 91 | assert_equal bert, BERT::Encoder.encode(:'été') 92 | end 93 | 94 | should "handle bignums" do 95 | bert = [131,110,8,0,0,0,232,137,4,35,199,138].pack('c*') 96 | assert_equal bert, BERT::Encoder.encode(10_000_000_000_000_000_000) 97 | 98 | bert = [131,110,8,1,0,0,232,137,4,35,199,138].pack('c*') 99 | assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000) 100 | end 101 | 102 | context "v2" do 103 | setup do 104 | @old_version = BERT::Encode.version 105 | BERT::Encode.version = :v2 106 | end 107 | 108 | teardown do 109 | BERT::Encode.version = @old_version 110 | end 111 | 112 | should 'handle utf8 strings' do 113 | bert = [132, 113, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*') 114 | assert_equal bert, BERT::Encoder.encode("été") 115 | end 116 | 117 | should 'handle utf8 symbols' do 118 | bert = [132, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*') 119 | assert_equal bert, BERT::Encoder.encode(:'été') 120 | end 121 | 122 | should "handle bignums" do 123 | bert = [132,110,8,0,0,0,232,137,4,35,199,138].pack('c*') 124 | assert_equal bert, BERT::Encoder.encode(10_000_000_000_000_000_000) 125 | 126 | bert = [132,110,8,1,0,0,232,137,4,35,199,138].pack('c*') 127 | assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000) 128 | end 129 | end 130 | 131 | should "leave other stuff alone" do 132 | before = [1, 2.0, [:foo, 'bar']] 133 | assert_equal before, BERT::Encoder.convert(before) 134 | end 135 | end 136 | end 137 | -------------------------------------------------------------------------------- /test/test_helper.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'test/unit' 3 | require 'shoulda' 4 | 5 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 6 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'ext', 'bert', 'c')) 7 | 8 | load 'bert.rb' 9 | 10 | if ENV.key?("BERT_TEST_IMPL") && ENV["BERT_TEST_IMPL"] != BERT::Decode.impl 11 | raise "Incorrect implementation loaded for value of BERT_TEST_IMPL environment variable! " + 12 | "Wanted #{ENV["BERT_TEST_IMPL"]}, but loaded #{BERT::Decode.impl}." 13 | end 14 | 15 | puts "Using #{BERT::Decode.impl} implementation." 16 | --------------------------------------------------------------------------------