├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── Gemfile ├── LICENSE ├── README.md ├── Rakefile ├── ci └── jruby_download_commons_compress.sh ├── lib ├── .rspec ├── core_ext │ └── io.rb ├── rbzip2.rb └── rbzip2 │ ├── adapter.rb │ ├── ffi.rb │ ├── ffi │ ├── compressor.rb │ ├── constants.rb │ ├── decompressor.rb │ └── errors.rb │ ├── io.rb │ ├── java.rb │ ├── java │ ├── compressor.rb │ └── decompressor.rb │ ├── ruby.rb │ ├── ruby │ ├── compressor.rb │ ├── constants.rb │ ├── crc.rb │ ├── decompressor.rb │ ├── input_data.rb │ └── output_data.rb │ └── version.rb ├── rbzip2.gemspec └── spec ├── common ├── compressor_spec.rb └── decompressor_spec.rb ├── ffi ├── compressor_spec.rb └── decompressor_spec.rb ├── fixtures ├── big_test.bz2 ├── big_test.java.bz2 ├── big_test.ruby.bz2 ├── big_test.txt ├── test.bz2 └── test.txt ├── helper.rb ├── java ├── compressor_spec.rb └── decompressor_spec.rb └── ruby ├── compressor_spec.rb └── decompressor_spec.rb /.gitignore: -------------------------------------------------------------------------------- 1 | .yardoc/ 2 | Gemfile.lock 3 | doc/ 4 | pkg/ 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | before_install: gem install bundler 2 | before_script: source ./ci/jruby_download_commons_compress.sh 3 | cache: bundler 4 | rvm: 5 | - 1.8.7 6 | - 1.9.2 7 | - 1.9.3 8 | - 2.0 9 | - 2.1 10 | - 2.2 11 | - 2.3 12 | - 2.4.1 13 | - jruby-18mode 14 | - jruby-19mode 15 | - jruby-9 16 | - jruby-head 17 | - rbx 18 | - rbx-2 19 | - ruby-head 20 | matrix: 21 | allow_failures: 22 | - rvm: jruby-head 23 | - rvm: rbx 24 | - rvm: rbx-2 25 | - rvm: ruby-head 26 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## Version 0.3.0 / 2017-02-16 2 | 3 | * [FEATURE] De-/compress using libbz2 via FFI 4 | * [FEATURE] De-/compress using Commons Compress on JRuby 5 | * [FEATURE] Implement getc/gets and putc/puts 6 | * [ENHANCEMENT] Performance improvements for the pure Ruby implementation 7 | 8 | ## Version 0.2.0 / 2011-11-11 9 | 10 | * [FEATURE] Working compression 11 | * [ENHANCEMENT] Improvments for existing code 12 | * [BUGFIX] Various fixes 13 | 14 | ## Version 0.1.0 / 2011-11-09 15 | 16 | * [FEATURE] Working decompression 17 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | group :development do 4 | gem 'ffi', '~> 1.9.0', :platform => :ruby 5 | gem 'rspec-core', '~> 3.5' 6 | gem 'rspec-expectations', '~> 3.5' 7 | gem 'yard', '~> 0.9' 8 | end 9 | 10 | group :test do 11 | gem 'coveralls', '~> 0.8', :require => false 12 | end 13 | 14 | if Bundler.current_ruby.jruby_1? || Bundler.current_ruby.ruby_1? 15 | gem 'json', '< 2' 16 | gem 'rake', '~> 10.0', :group => :development 17 | gem 'term-ansicolor', '< 1.4' 18 | gem 'tins', '< 1.7' 19 | else 20 | gem 'rake', '~> 12', :group => :development 21 | end 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011-2017, Sebastian Staudt 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of the author nor the names of its contributors 13 | may be used to endorse or promote products derived from this software 14 | without specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 23 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | RBzip2 2 | ====== 3 | 4 | RBzip2 is a gem providing various implementations of the [bzip2][bzip2] 5 | algorithm used for compression and decompression. Currently, it includes a 6 | [FFI][ffi]-based implementation and a pure Ruby implementation that's slower 7 | but works on any Ruby VM. Additionally, there's a JRuby specific implementation 8 | that's based on Commons Compress. 9 | 10 | The pure Ruby implementations is based on the code of the 11 | [Apache Commons Compress][commons] project and adds a straight Ruby-like API. 12 | There are no external dependencies like other gems or libraries. Therefore it 13 | will run on any Ruby implementation and the respective operating systems 14 | supported by those implementations. 15 | 16 | The FFI implementation is using `libbz2` and provides fast performance on 17 | platforms where both `libbz2` and FFI are available. It is derived from this 18 | [Gist by Brian Lopez][gist]. 19 | 20 | The Java-based implementation can use the 21 | [Commons Compress Java library][commons] if it is available in the classpath. 22 | 23 | ## Features 24 | 25 | * Compression of raw data into bzip2 compressed `IO`s (like `File` or 26 | `StringIO`) 27 | * Decompression of bzip2 compressed `IO`s (like `File` or `StringIO`) 28 | 29 | ## Usage 30 | 31 | ```ruby 32 | require 'rbzip2' 33 | ``` 34 | 35 | ### Compression 36 | 37 | ```ruby 38 | data = some_data 39 | file = File.new 'somefile.bz2' # open the target file 40 | bz2 = RBzip2.default_adapter::Compressor.new file # wrap the file into the compressor 41 | bz2.write data # write the raw data to the compressor 42 | bz2.close # finish compression (important!) 43 | ``` 44 | 45 | ### Decompression 46 | 47 | ```ruby 48 | file = File.new 'somefile.bz2' # open a compressed file 49 | bz2 = RBzip2.default_adapter::Decompressor.new file # wrap the file into the decompressor 50 | data = io.read # read data into a string 51 | ``` 52 | 53 | ## Future plans 54 | 55 | * Simple decompression of strings 56 | * Simple creation of compressed files 57 | * Two-way compressed IO that will (de)compress as you read/write 58 | 59 | ## Installation 60 | 61 | To install RBzip2 as a Ruby gem use the following command: 62 | 63 | ```sh 64 | $ gem install rbzip2 65 | ``` 66 | 67 | To use it as a dependency managed by Bundler add the following to your 68 | `Gemfile`: 69 | 70 | ```ruby 71 | gem 'rbzip2' 72 | ``` 73 | 74 | If you want to use the FFI implementation on any non-JRuby VM, be sure to also 75 | install the `ffi` gem. 76 | 77 | ## Performance 78 | 79 | The `bzip2-ruby` gem is a Ruby binding to `libbz2` and offers best performance, 80 | but it is only available for MRI < 2.0.0 and Rubinius. 81 | 82 | The FFI implementation binds to `libbz2` as well and has almost the same 83 | performance as `bzip2-ruby`. 84 | 85 | The Java implementation uses a native Java library and is slower by a factor of 86 | about 2/10 while compressing/decompressing. 87 | 88 | The pure Ruby implementation of RBzip2 is inherently slower than `bzip2-ruby`. 89 | Currently, this is a plain port of Apache Commons' Java code to Ruby and no 90 | effort has been made to optimize it. That's why the Ruby implementation of 91 | RBzip2 is slower by a factor of about 130/100 while compressing/decompressing 92 | (on Ruby 1.9.3). Ruby 1.8.7 is even slower. 93 | 94 | ## License 95 | 96 | This code is free software; you can redistribute it and/or modify it under the 97 | terms of the new BSD License. A copy of this license can be found in the 98 | included LICENSE file. 99 | 100 | ## Credits 101 | 102 | * Sebastian Staudt -- koraktor(at)gmail.com 103 | * Brian Lopez -- seniorlopez(at)gmail.com 104 | 105 | ## See Also 106 | 107 | * [Documentation](http://rubydoc.info/gems/rbzip2) 108 | * [GitHub project page](https://github.com/koraktor/rbzip2) 109 | * [bzip2 project page][bzip2] 110 | 111 | [bzip2]: http://bzip.org 112 | [commons]: http://commons.apache.org/compress 113 | [ffi]: https://github.com/ffi/ffi/wiki 114 | [gist]: https://gist.github.com/brianmario/5833373 115 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | # This code is free software; you can redistribute it and/or modify it under 4 | # the terms of the new BSD License. 5 | # 6 | # Copyright (c) 2011, Sebastian Staudt 7 | 8 | require 'bundler/gem_tasks' 9 | require 'rspec/core/rake_task' 10 | require 'rubygems/package_task' 11 | 12 | task :default => :spec 13 | task :test => :spec 14 | 15 | spec = Gem::Specification.load 'rbzip2.gemspec' 16 | Gem::PackageTask.new(spec) do |pkg| 17 | end 18 | 19 | RSpec::Core::RakeTask.new('spec') do |t| 20 | end 21 | 22 | begin 23 | require 'yard' 24 | 25 | YARD::Rake::YardocTask.new do |yardoc| 26 | yardoc.name = 'doc' 27 | yardoc.files = [ 'lib/**/*.rb', 'LICENSE', 'README.md' ] 28 | yardoc.options = [ '--private', '--title', 'RBzip2 — API Documentation' ] 29 | end 30 | rescue LoadError 31 | desc 'Generate YARD Documentation (not available)' 32 | task :doc do 33 | $stderr.puts 'You need YARD to build the documentation. Install it using `gem install yard`.' 34 | end 35 | end 36 | 37 | desc 'Clean documentation and package directories' 38 | task :clean do 39 | FileUtils.rm_rf 'doc' 40 | FileUtils.rm_rf 'pkg' 41 | end 42 | -------------------------------------------------------------------------------- /ci/jruby_download_commons_compress.sh: -------------------------------------------------------------------------------- 1 | VERSION=1.16.1 2 | JAR_PATH=commons-compress-${VERSION}/commons-compress-${VERSION}.jar 3 | URL=http://archive.apache.org/dist/commons/compress/binaries/commons-compress-${VERSION}-bin.tar.gz 4 | 5 | if `ruby -v | grep -q jruby`; then 6 | mkdir java > /dev/null 2>&1 7 | curl $URL | tar -xz -C java $JAR_PATH 8 | export CLASSPATH=java/${JAR_PATH} 9 | fi 10 | -------------------------------------------------------------------------------- /lib/.rspec: -------------------------------------------------------------------------------- 1 | --colour 2 | -------------------------------------------------------------------------------- /lib/core_ext/io.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2013, Sebastian Staudt 5 | 6 | unless IO.method_defined? :readbyte 7 | 8 | def IO.readbyte 9 | read(1)[0].ord 10 | end 11 | 12 | end 13 | -------------------------------------------------------------------------------- /lib/rbzip2.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2013, Sebastian Staudt 5 | 6 | module RBzip2 7 | 8 | autoload :Adapter, 'rbzip2/adapter' 9 | autoload :FFI, 'rbzip2/ffi' 10 | autoload :IO, 'rbzip2/io' 11 | autoload :Java, 'rbzip2/java' 12 | autoload :Ruby, 'rbzip2/ruby' 13 | autoload :VERSION, 'rbzip2/version' 14 | 15 | def self.default_adapter 16 | return FFI if FFI.available? 17 | return Java if Java.available? 18 | Ruby 19 | end 20 | 21 | end 22 | -------------------------------------------------------------------------------- /lib/rbzip2/adapter.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013, Sebastian Staudt 5 | 6 | module RBzip2::Adapter 7 | 8 | def self.extended(mod) 9 | mod.send :class_variable_set, :@@available, true 10 | mod.init if mod.respond_to? :init 11 | end 12 | 13 | def available? 14 | class_variable_get :@@available 15 | end 16 | 17 | end 18 | -------------------------------------------------------------------------------- /lib/rbzip2/ffi.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013, Sebastian Staudt 5 | 6 | begin 7 | require 'ffi' 8 | rescue LoadError 9 | end 10 | 11 | module RBzip2::FFI 12 | LIBBZ2 = ['bz2', 'libbz2.so.1'] 13 | 14 | def self.init 15 | begin 16 | extend ::FFI::Library 17 | ffi_lib LIBBZ2 18 | rescue NameError, LoadError 19 | @@available = false 20 | end 21 | end 22 | 23 | extend RBzip2::Adapter 24 | 25 | autoload :BufferError, 'rbzip2/ffi/errors' 26 | autoload :Compressor, 'rbzip2/ffi/compressor' 27 | autoload :ConfigError, 'rbzip2/ffi/errors' 28 | autoload :CorruptError, 'rbzip2/ffi/errors' 29 | autoload :Decompressor, 'rbzip2/ffi/decompressor' 30 | autoload :Error, 'rbzip2/ffi/errors' 31 | 32 | end 33 | 34 | require 'rbzip2/ffi/constants' 35 | -------------------------------------------------------------------------------- /lib/rbzip2/ffi/compressor.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013, Brian Lopez 5 | # Copyright (c) 2013-2017, Sebastian Staudt 6 | 7 | class RBzip2::FFI::Compressor 8 | 9 | extend ::FFI::Library 10 | 11 | ffi_lib ::RBzip2::FFI::LIBBZ2 12 | attach_function :BZ2_bzBuffToBuffCompress, 13 | [:pointer, :buffer_inout, :pointer, :uint32, :int, :int, :int], 14 | :int 15 | 16 | def self.compress(data, blksize = RBzip2::FFI::DEFAULT_BLK_SIZE, verbosity = 0, work_factor = 30) 17 | blksize = 1 if blksize < 1 18 | blksize = 9 if blksize > 9 19 | verbosity = 0 if verbosity < 0 20 | verbosity = 4 if verbosity > 4 21 | work_factor = 0 if work_factor < 0 22 | work_factor = 250 if work_factor > 250 23 | 24 | out_len = data.bytesize + (data.bytesize * 0.01) + 600 25 | dst_buf = ::FFI::MemoryPointer.new :char, out_len 26 | dst_len = ::FFI::MemoryPointer.new :uint32 27 | dst_len.write_uint out_len 28 | 29 | src_buf = ::FFI::MemoryPointer.new :char, data.bytesize 30 | src_buf.put_bytes 0, data 31 | 32 | ret = BZ2_bzBuffToBuffCompress dst_buf, dst_len, src_buf, data.bytesize, 33 | blksize, verbosity, work_factor 34 | 35 | case ret 36 | when RBzip2::FFI::BZ_OK 37 | dst_buf.read_bytes dst_len.read_uint 38 | when RBzip2::FFI::BZ_PARAM_ERROR 39 | raise ArgumentError, 'One of blksize, verbosity or work_factor is out of range' 40 | when RBzip2::FFI::BZ_MEM_ERROR 41 | raise NoMemoryError, 'Out of memory' 42 | when RBzip2::FFI::BZ_OUTBUFF_FULL 43 | raise RBzip2::FFI::BufferError, "Output buffer isn't large enough" 44 | when RBzip2::FFI::BZ_CONFIG_ERROR 45 | raise RBzip2::FFI::ConfigError, 'libbz2 has been mis-compiled' 46 | else 47 | raise RBzip2::FFI::Error, "Unhandled error code: #{ret}" 48 | end 49 | end 50 | 51 | def initialize(io) 52 | @io = io 53 | end 54 | 55 | def flush 56 | @io.flush unless @io.nil? 57 | end 58 | 59 | def close 60 | flush 61 | unless @io.nil? 62 | @io.close 63 | @io = nil 64 | end 65 | end 66 | 67 | def putc(int) 68 | if int.is_a? Numeric 69 | write int & 0xff 70 | else 71 | write int.to_s[0].chr 72 | end 73 | end 74 | 75 | def puts(line) 76 | write line + $/ 77 | end 78 | 79 | def write(bytes) 80 | raise 'stream closed' if @io.nil? 81 | 82 | @io.write self.class.compress(bytes, 9) 83 | end 84 | 85 | end 86 | -------------------------------------------------------------------------------- /lib/rbzip2/ffi/constants.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013, Brian Lopez 5 | # Copyright (c) 2013, Sebastian Staudt 6 | 7 | module RBzip2::FFI 8 | 9 | DEFAULT_BLK_SIZE = 3 10 | 11 | BZ_RUN = 0 12 | BZ_FLUSH = 1 13 | BZ_FINISH = 2 14 | 15 | BZ_OK = 0 16 | BZ_RUN_OK = 1 17 | BZ_FLUSH_OK = 2 18 | BZ_FINISH_OK = 3 19 | BZ_STREAM_END = 4 20 | BZ_SEQUENCE_ERROR = -1 21 | BZ_PARAM_ERROR = -2 22 | BZ_MEM_ERROR = -3 23 | BZ_DATA_ERROR = -4 24 | BZ_DATA_ERROR_MAGIC = -5 25 | BZ_IO_ERROR = -6 26 | BZ_UNEXPECTED_EOF = -7 27 | BZ_OUTBUFF_FULL = -8 28 | BZ_CONFIG_ERROR = -9 29 | 30 | end 31 | -------------------------------------------------------------------------------- /lib/rbzip2/ffi/decompressor.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013, Brian Lopez 5 | # Copyright (c) 2013-2017, Sebastian Staudt 6 | 7 | class RBzip2::FFI::Decompressor 8 | 9 | extend ::FFI::Library 10 | 11 | ffi_lib ::FFI::Platform::LIBC 12 | attach_function :fopen, 13 | [:string, :string], 14 | :pointer 15 | attach_function :fclose, 16 | [:pointer], 17 | :int 18 | 19 | ffi_lib ::RBzip2::FFI::LIBBZ2 20 | attach_function :BZ2_bzRead, 21 | [:pointer, :pointer, :pointer, :int], 22 | :int 23 | attach_function :BZ2_bzReadClose, 24 | [:pointer, :pointer], 25 | :void 26 | attach_function :BZ2_bzReadOpen, 27 | [:pointer, :pointer, :int, :int, :pointer, :int], 28 | :pointer 29 | attach_function :BZ2_bzBuffToBuffDecompress, 30 | [:pointer, :buffer_inout, :pointer, :uint32, :int, :int], 31 | :int 32 | 33 | def self.decompress(data, factor = 2, small = 0, verbosity = 0) 34 | out_len = data.bytesize * factor 35 | dst_buf = ::FFI::MemoryPointer.new :char, out_len 36 | dst_len = ::FFI::MemoryPointer.new :uint32 37 | dst_len.write_uint out_len 38 | 39 | src_buf = ::FFI::MemoryPointer.new :char, data.bytesize 40 | src_buf.put_bytes 0, data 41 | 42 | ret = BZ2_bzBuffToBuffDecompress dst_buf, dst_len, src_buf, data.bytesize, 43 | small, verbosity 44 | 45 | case ret 46 | when RBzip2::FFI::BZ_OK 47 | dst_buf.read_bytes dst_len.read_uint 48 | when RBzip2::FFI::BZ_PARAM_ERROR 49 | raise ArgumentError, 'One of sall or verbosity' 50 | when RBzip2::FFI::BZ_MEM_ERROR 51 | raise NoMemoryError, 'Out of memory' 52 | when RBzip2::FFI::BZ_OUTBUFF_FULL 53 | raise RBzip2::FFI::BufferError, "Output buffer isn't large enough" 54 | when RBzip2::FFI::BZ_DATA_ERROR, RBzip2::FFI::BZ_DATA_ERROR_MAGIC, 55 | RBzip2::FFI::BZ_UNEXPECTED_EOF 56 | raise RBzip2::FFI::CorruptError, 'Compressed data appears to be corrupt or unreadable' 57 | when RBzip2::FFI::BZ_CONFIG_ERROR 58 | raise RBzip2::FFI::ConfigError, 'libbz2 has been mis-compiled' 59 | else 60 | raise RBzip2::FFI::Error, "Unhandled error code: #{ret}" 61 | end 62 | end 63 | 64 | def read_file(length) 65 | error = ::FFI::MemoryPointer.new :uint32 66 | dst_buf = ::FFI::MemoryPointer.new :char, length 67 | 68 | open_file if @bz_file.nil? 69 | 70 | BZ2_bzRead error, @bz_file, dst_buf, length 71 | 72 | dst_buf.read_bytes length 73 | end 74 | 75 | def initialize(io) 76 | @io = io 77 | end 78 | 79 | def close 80 | if @io != $stdin 81 | @io = nil 82 | @data = nil 83 | end 84 | 85 | close_file unless @bz_file.nil? 86 | end 87 | 88 | def close_file 89 | error = ::FFI::MemoryPointer.new :uint32 90 | BZ2_bzReadClose error, @bz_file 91 | fclose @file 92 | end 93 | 94 | def getc 95 | read 1 96 | end 97 | 98 | def gets 99 | line = '' 100 | loop do 101 | char = getc 102 | line += char 103 | break if char == "\n" 104 | end 105 | line 106 | end 107 | 108 | def open_file(verbosity = 0, small = 0) 109 | raise 'IO not a file' unless @io.is_a? File 110 | 111 | small = 0 if small < 0 112 | verbosity = 0 if verbosity < 0 113 | verbosity = 4 if verbosity > 4 114 | 115 | error = ::FFI::MemoryPointer.new :uint32 116 | 117 | @file = fopen @io.path, 'r' 118 | @bz_file = BZ2_bzReadOpen error, @file, verbosity, small, nil, 0 119 | end 120 | 121 | def read(length = nil) 122 | raise 'stream closed' if @io.nil? 123 | 124 | if length.nil? 125 | factor = 4 126 | compressed_data = @io.read 127 | data = nil 128 | while data.nil? 129 | begin 130 | data = self.class.decompress compressed_data, factor 131 | rescue RBzip2::FFI::BufferError 132 | factor = factor ** 2 133 | end 134 | end 135 | else 136 | if @io.is_a? File 137 | data = read_file length 138 | else 139 | raise NotImplementedError 140 | end 141 | end 142 | 143 | data 144 | end 145 | 146 | def size 147 | if @io.is_a? StringIO 148 | @io.size 149 | elsif @io.is_a? File 150 | @io.stat.size 151 | end 152 | end 153 | 154 | def uncompressed 155 | @data = read 156 | @data.size 157 | end 158 | 159 | def inspect 160 | "#<#{self.class}: @io=#{@io.inspect} size=#{size} uncompressed=#{uncompressed}>" 161 | end 162 | 163 | end 164 | -------------------------------------------------------------------------------- /lib/rbzip2/ffi/errors.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013, Brian Lopez 5 | # Copyright (c) 2013, Sebastian Staudt 6 | 7 | module RBzip2::FFI 8 | 9 | class Error < StandardError; end 10 | class BufferError < Error; end 11 | class ConfigError < Error; end 12 | class CorruptError < Error; end 13 | 14 | end 15 | -------------------------------------------------------------------------------- /lib/rbzip2/io.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2017, Sebastian Staudt 5 | 6 | class RBzip2::IO 7 | 8 | def initialize(io) 9 | @io = io 10 | @compressor = RBzip2.default_adapter::Compressor.new io 11 | @decompressor = RBzip2.default_adapter::Decompressor.new io 12 | end 13 | 14 | def close 15 | @compressor.close 16 | @decompressor.close 17 | end 18 | 19 | def getc 20 | @decompressor.getc 21 | end 22 | 23 | def gets 24 | @decompressor.gets 25 | end 26 | 27 | def putc(int) 28 | @compressor.putc int 29 | end 30 | 31 | def puts(line) 32 | @compressor.puts line 33 | end 34 | 35 | def read 36 | @decompressor.read 37 | end 38 | 39 | def write(data) 40 | @compressor.write data 41 | end 42 | 43 | end 44 | -------------------------------------------------------------------------------- /lib/rbzip2/java.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013, Sebastian Staudt 5 | 6 | module RBzip2::Java 7 | 8 | def self.init 9 | begin 10 | require 'java' 11 | include_package 'org.apache.commons.compress.compressors.bzip2' 12 | BZip2CompressorOutputStream 13 | rescue LoadError, NameError 14 | @@available = false 15 | end 16 | end 17 | 18 | extend RBzip2::Adapter 19 | 20 | autoload :Compressor, 'rbzip2/java/compressor' 21 | autoload :Decompressor, 'rbzip2/java/decompressor' 22 | 23 | end 24 | -------------------------------------------------------------------------------- /lib/rbzip2/java/compressor.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013-2017, Sebastian Staudt 5 | 6 | class RBzip2::Java::Compressor 7 | 8 | def initialize(io) 9 | @io = RBzip2::Java::BZip2CompressorOutputStream.new io.to_outputstream 10 | end 11 | 12 | def flush 13 | @io.flush 14 | end 15 | 16 | def close 17 | @io.close 18 | end 19 | 20 | def putc(int) 21 | if int.is_a? Numeric 22 | write int & 0xff 23 | else 24 | write int.to_s[0] 25 | end 26 | end 27 | 28 | def puts(line) 29 | write line + $/ 30 | end 31 | 32 | def write(bytes) 33 | raise 'stream closed' if @io.nil? 34 | 35 | @io.write bytes.to_java_bytes 36 | end 37 | 38 | end 39 | -------------------------------------------------------------------------------- /lib/rbzip2/java/decompressor.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013-2017, Sebastian Staudt 5 | 6 | class RBzip2::Java::Decompressor 7 | 8 | def initialize(io) 9 | @io = io 10 | @is = RBzip2::Java::BZip2CompressorInputStream.new io.to_inputstream 11 | end 12 | 13 | def close 14 | @is.close 15 | end 16 | 17 | def getc 18 | read(1)[0].chr 19 | end 20 | 21 | def gets 22 | line = '' 23 | loop do 24 | char = getc 25 | line += char 26 | break if char == "\n" 27 | end 28 | line 29 | end 30 | 31 | def read(length = nil) 32 | if length.nil? 33 | bytes = Java::byte[0].new 34 | chunk = Java::byte[1024].new 35 | begin 36 | bytes_read = @is.read chunk 37 | chunk = chunk[0..(bytes_read - 1)] if bytes_read < 1024 38 | bytes += chunk 39 | end while bytes_read == 1024 40 | else 41 | bytes = Java::byte[length].new 42 | @is.read bytes 43 | end 44 | 45 | String.from_java_bytes bytes 46 | end 47 | 48 | def size 49 | if @io.is_a? StringIO 50 | @io.size 51 | elsif @io.is_a? File 52 | @io.stat.size 53 | end 54 | end 55 | 56 | def uncompressed 57 | @data = read 58 | @data.size 59 | end 60 | 61 | def inspect 62 | "#<#{self.class}: @io=#{@io.inspect} size=#{size} uncompressed=#{uncompressed}>" 63 | end 64 | 65 | end 66 | -------------------------------------------------------------------------------- /lib/rbzip2/ruby.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013, Sebastian Staudt 5 | 6 | module RBzip2::Ruby 7 | 8 | extend RBzip2::Adapter 9 | 10 | autoload :CRC, 'rbzip2/ruby/crc' 11 | autoload :Compressor, 'rbzip2/ruby/compressor' 12 | autoload :Constants, 'rbzip2/ruby/constants' 13 | autoload :Decompressor, 'rbzip2/ruby/decompressor' 14 | autoload :IO, 'rbzip2/ruby/io' 15 | autoload :InputData, 'rbzip2/ruby/input_data' 16 | autoload :OutputData, 'rbzip2/ruby/output_data' 17 | 18 | end 19 | -------------------------------------------------------------------------------- /lib/rbzip2/ruby/compressor.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2017, Sebastian Staudt 5 | 6 | class RBzip2::Ruby::Compressor 7 | 8 | include RBzip2::Ruby::Constants 9 | 10 | def self.assign_codes(code, length, min_len, max_len, alpha_size) 11 | vec = 0 12 | min_len.upto(max_len) do |n| 13 | alpha_size.times do |i| 14 | if (length[i] & 0xff) == n 15 | code[i] = vec 16 | vec += 1 17 | end 18 | end 19 | vec <<= 1 20 | end 21 | end 22 | 23 | def self.choose_block_size(input_length) 24 | input_length > 0 ? [(input_length / 132000) + 1, 9].min : MAX_BLOCK_SIZE 25 | end 26 | 27 | def self.make_code_lengths(len, freq, data, alpha_size, max_len) 28 | heap = data.heap 29 | weight = data.weight 30 | parent = data.parent 31 | 32 | weight[0] = 0 33 | (alpha_size - 1).downto(0) do |i| 34 | weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8 35 | end 36 | 37 | too_long = true 38 | while too_long 39 | too_long = false 40 | 41 | n_nodes = alpha_size 42 | n_heap = 0 43 | heap[0] = 0 44 | weight[0] = 0 45 | parent[0] = -2 46 | 47 | 1.upto(alpha_size) do |i| 48 | parent[i] = -1 49 | n_heap += 1 50 | heap[n_heap] = i 51 | 52 | zz = n_heap 53 | tmp = heap[zz] 54 | while weight[tmp] < weight[heap[zz >> 1]] 55 | heap[zz] = heap[zz >> 1] 56 | zz >>= 1 57 | end 58 | heap[zz] = tmp 59 | end 60 | 61 | while n_heap > 1 62 | n1 = heap[1] 63 | heap[1] = heap[n_heap] 64 | n_heap -= 1 65 | 66 | zz = 1 67 | tmp = heap[1] 68 | 69 | while true do 70 | yy = zz << 1 71 | 72 | break if yy > n_heap 73 | 74 | yy += 1 if (yy < n_heap) && (weight[heap[yy + 1]] < weight[heap[yy]]) 75 | 76 | break if weight[tmp] < weight[heap[yy]] 77 | 78 | heap[zz] = heap[yy] 79 | zz = yy 80 | end 81 | 82 | heap[zz] = tmp 83 | 84 | n2 = heap[1] 85 | heap[1] = heap[n_heap] 86 | n_heap -= 1 87 | 88 | zz = 1 89 | tmp = heap[1] 90 | 91 | while true do 92 | yy = zz << 1 93 | 94 | break if yy > n_heap 95 | 96 | yy += 1 if (yy < n_heap) && (weight[heap[yy + 1]] < weight[heap[yy]]) 97 | 98 | break if weight[tmp] < weight[heap[yy]] 99 | 100 | heap[zz] = heap[yy] 101 | zz = yy 102 | end 103 | 104 | heap[zz] = tmp 105 | n_nodes += 1 106 | parent[n1] = parent[n2] = n_nodes 107 | 108 | weight_n1 = weight[n1] 109 | weight_n2 = weight[n2] 110 | weight[n_nodes] = ((weight_n1 & 0xffffff00) + 111 | (weight_n2 & 0xffffff00)) | 112 | (1 + (((weight_n1 & 0x000000ff) > 113 | (weight_n2 & 0x000000ff)) ? (weight_n1 & 0x000000ff) : 114 | (weight_n2 & 0x000000ff))) 115 | 116 | parent[n_nodes] = -1 117 | n_heap += 1 118 | heap[n_heap] = n_nodes 119 | 120 | zz = n_heap 121 | tmp = heap[zz] 122 | weight_tmp = weight[tmp] 123 | while weight_tmp < weight[heap[zz >> 1]] 124 | heap[zz] = heap[zz >> 1] 125 | zz >>= 1 126 | end 127 | heap[zz] = tmp 128 | end 129 | 130 | 1.upto(alpha_size) do |i| 131 | j = 0 132 | k = i 133 | 134 | while (parent_k = parent[k]) >= 0 135 | k = parent_k 136 | j += 1 137 | end 138 | 139 | len[i - 1] = j 140 | too_long = true if j > max_len 141 | end 142 | 143 | if too_long 144 | 1.upto(alpha_size) do |i| 145 | j = weight[i] >> 8 146 | j = 1 + (j >> 1) 147 | weight[i] = j << 8 148 | end 149 | end 150 | end 151 | end 152 | 153 | def self.med3(a, b, c) 154 | (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c : a) 155 | end 156 | 157 | def self.vswap(fmap, p1, p2, n) 158 | n += p1 159 | while p1 < n 160 | fmap[p1], fmap[p2] = fmap[p2], fmap[p1] 161 | p1 += 1 162 | p2 += 1 163 | end 164 | end 165 | 166 | attr_reader :block_size 167 | 168 | def initialize(io, block_size = MAX_BLOCK_SIZE) 169 | @allowable_block_size = 0 170 | @block_size = block_size 171 | @buff = 0 172 | @combined_crc = 0 173 | @crc = RBzip2::Ruby::CRC.new 174 | @current_char = -1 175 | @io = io 176 | @last = 0 177 | @live = 0 178 | @run_length = 0 179 | 180 | init 181 | end 182 | 183 | def block_sort 184 | @work_limit = WORK_FACTOR * @last 185 | @work_done = 0 186 | @block_randomized = false 187 | @first_attempt = true 188 | main_sort 189 | 190 | if @first_attempt && @work_done > @work_limit 191 | randomize_block 192 | @work_limit = @work_done = 0 193 | @first_attempt = false 194 | main_sort 195 | end 196 | 197 | fmap = @data.fmap 198 | @orig_ptr = -1 199 | (@last + 1).times do |i| 200 | if fmap[i] == 0 201 | @orig_ptr = i 202 | break 203 | end 204 | end 205 | end 206 | 207 | def close 208 | unless @io.nil? 209 | io_shadow = @io 210 | finish 211 | io_shadow.close 212 | end 213 | end 214 | 215 | def end_block 216 | @block_crc = @crc.final_crc 217 | @combined_crc = (@combined_crc << 1) | (@combined_crc >> 31) 218 | @combined_crc ^= @block_crc 219 | 220 | return if @last == -1 221 | 222 | block_sort 223 | 224 | put_byte 0x31 225 | put_byte 0x41 226 | put_byte 0x59 227 | put_byte 0x26 228 | put_byte 0x53 229 | put_byte 0x59 230 | 231 | put_int @block_crc 232 | 233 | @block_randomized ? w(1, 1) : w(1, 0) 234 | 235 | move_to_front_code_and_send 236 | end 237 | 238 | def end_compression 239 | put_byte 0x17 240 | put_byte 0x72 241 | put_byte 0x45 242 | put_byte 0x38 243 | put_byte 0x50 244 | put_byte 0x90 245 | 246 | put_int @combined_crc 247 | 248 | finished_with_stream 249 | end 250 | 251 | def finish 252 | unless @io.nil? 253 | begin 254 | write_run if @run_length > 0 255 | @current_char = -1 256 | end_block 257 | end_compression 258 | ensure 259 | @io = nil 260 | @data = nil 261 | end 262 | end 263 | end 264 | 265 | def finished_with_stream 266 | while @live > 0 267 | @io.write((@buff >> 24).chr) 268 | @buff <<= 8 269 | @buff &= 0xffffffff 270 | @live -= 8 271 | end 272 | end 273 | 274 | def flush 275 | @io.flush unless @io.nil? 276 | end 277 | 278 | def generate_mtf_values 279 | last_shadow = @last 280 | data_shadow = @data 281 | in_use = data_shadow.in_use 282 | block = data_shadow.block 283 | fmap = data_shadow.fmap 284 | sfmap = data_shadow.sfmap 285 | mtf_freq = data_shadow.mtf_freq 286 | unseq_to_seq = data_shadow.unseq_to_seq 287 | yy = data_shadow.generate_mtf_values_yy 288 | 289 | n_in_use_shadow = 0 290 | 256.times do |i| 291 | if in_use[i] 292 | unseq_to_seq[i] = n_in_use_shadow 293 | n_in_use_shadow += 1 294 | end 295 | end 296 | @n_in_use = n_in_use_shadow 297 | 298 | eob = n_in_use_shadow + 1 299 | eob.times { |i| mtf_freq[i] } 300 | 301 | n_in_use_shadow.times { |i| yy[i] = i } 302 | 303 | wr = 0 304 | z_pend = 0 305 | 306 | 0.upto(last_shadow) do |i| 307 | ll_i = unseq_to_seq[block[fmap[i]]] 308 | tmp = yy[0] 309 | j = 0 310 | 311 | while ll_i != tmp 312 | j += 1 313 | tmp, yy[j] = yy[j], tmp 314 | end 315 | yy[0] = tmp 316 | 317 | if j == 0 318 | z_pend += 1 319 | else 320 | if z_pend > 0 321 | z_pend -= 1 322 | while true do 323 | if (z_pend & 1) == 0 324 | sfmap[wr] = RUNA 325 | mtf_freq[RUNA] += 1 326 | else 327 | sfmap[wr] = RUNB 328 | mtf_freq[RUNB] += 1 329 | end 330 | wr += 1 331 | 332 | break if z_pend < 2 333 | 334 | z_pend = (z_pend - 2) >> 1 335 | end 336 | z_pend = 0 337 | end 338 | sfmap[wr] = j + 1 339 | wr += 1 340 | mtf_freq[j + 1] += 1 341 | end 342 | end 343 | 344 | if z_pend > 0 345 | z_pend -= 1 346 | while true do 347 | if (z_pend & 1) == 0 348 | sfmap[wr] = RUNA 349 | mtf_freq[RUNA] += 1 350 | else 351 | sfmap[wr] = RUNB 352 | mtf_freq[RUNB] += 1 353 | end 354 | wr += 1 355 | 356 | break if z_pend < 2 357 | 358 | z_pend = (z_pend - 2) >> 1 359 | end 360 | end 361 | 362 | sfmap[wr] = eob 363 | mtf_freq[eob] += 1 364 | @n_mtf = wr + 1 365 | end 366 | 367 | def init 368 | put_byte 0x42 369 | put_byte 0x5a 370 | 371 | @data = RBzip2::Ruby::OutputData.new @block_size 372 | 373 | put_byte 0x68 374 | put_byte 0x30 + @block_size 375 | 376 | @combined_crc = 0 377 | init_block 378 | end 379 | 380 | def init_block 381 | @crc.initialize_crc 382 | @last = -1 383 | 384 | in_use = @data.in_use 385 | in_use[0, 256] = [false] * 256 386 | 387 | @allowable_block_size = (@block_size * BASEBLOCKSIZE) - 20 388 | end 389 | 390 | def main_sort 391 | data_shadow = @data 392 | running_order = data_shadow.main_sort_running_order 393 | copy = data_shadow.main_sort_copy 394 | big_done = data_shadow.main_sort_big_done 395 | ftab = data_shadow.ftab 396 | block = data_shadow.block 397 | fmap = data_shadow.fmap 398 | quadrant = data_shadow.quadrant 399 | 400 | 65537.times { |i| ftab[i] = 0 } 401 | 402 | NUM_OVERSHOOT_BYTES.times do |i| 403 | block[@last + i + 2] = block[(i % (@last + 1)) + 1] 404 | end 405 | (@last + NUM_OVERSHOOT_BYTES).times do |i| 406 | quadrant[i] = 0 407 | end 408 | block[0] = block[@last + 1] 409 | 410 | c1 = block[0] 411 | (@last + 1).times do |i| 412 | c2 = block[i + 1] 413 | ftab[(c1 << 8) + c2] += 1 414 | c1 = c2 415 | end 416 | 417 | 1.upto(65536) { |i| ftab[i] += ftab[i - 1] } 418 | 419 | c1 = block[1] 420 | @last.times do |i| 421 | c2 = block[i + 2] 422 | fmap[ftab[(c1 << 8) + c2] -= 1] = i 423 | c1 = c2 424 | end 425 | 426 | fmap[ftab[((block[@last + 1]) << 8) + block[1]] -= 1] = @last 427 | 428 | big_done.replace Array.new(256, false) 429 | 256.times { |i| running_order[i] = i } 430 | 431 | h = 364 432 | while h != 1 433 | h /= 3 434 | h.upto(255) do |i| 435 | vv = running_order[i] 436 | a = ftab[(vv + 1) << 8] - ftab[vv << 8] 437 | b = h - 1 438 | j = i 439 | 440 | ro = running_order[j - h] 441 | while ftab[(ro + 1) << 8] - ftab[ro << 8] > a 442 | running_order[j] = ro 443 | j -= h 444 | break if j <= b 445 | ro = running_order[j - h] 446 | end 447 | 448 | running_order[j] = vv 449 | end 450 | end 451 | 452 | 256.times do |i| 453 | ss = running_order[i] 454 | 455 | 256.times do |j| 456 | sb = (ss << 8) + j 457 | ftab_sb = ftab[sb] 458 | if (ftab_sb & SETMASK) != SETMASK 459 | lo = ftab_sb & CLEARMASK 460 | hi = (ftab[sb + 1] & CLEARMASK) - 1 461 | if hi > lo 462 | main_qsort3 data_shadow, lo, hi, 2 463 | return if @first_attempt && (@work_done > @work_limit) 464 | end 465 | ftab[sb] = ftab_sb | SETMASK 466 | end 467 | end 468 | 469 | 256.times { |j| copy[j] = ftab[(j << 8) + ss] & CLEARMASK } 470 | 471 | hj = ftab[(ss + 1) << 8] & CLEARMASK 472 | (ftab[ss << 8] & CLEARMASK).upto(hj - 1) do |j| 473 | fmap_j = fmap[j] 474 | c1 = block[fmap_j] 475 | unless big_done[c1] 476 | fmap[copy[c1]] = (fmap_j == 0) ? @last : (fmap_j - 1) 477 | copy[c1] += 1 478 | end 479 | end 480 | 481 | 255.downto(0) { |j| ftab[(j << 8) + ss] |= SETMASK } 482 | 483 | big_done[ss] = true 484 | 485 | if i < 255 486 | bb_start = ftab[ss << 8] & CLEARMASK 487 | bb_size = (ftab[(ss + 1) << 8] & CLEARMASK) - bb_start 488 | shifts = 0 489 | 490 | while (bb_size >> shifts) > 65534 491 | shifts += 1 492 | end 493 | 494 | bb_size.times do |j| 495 | a2update = fmap[bb_start + j] 496 | q_val = j >> shifts 497 | quadrant[a2update] = q_val 498 | if a2update < NUM_OVERSHOOT_BYTES 499 | quadrant[a2update + @last + 1] = q_val 500 | end 501 | end 502 | end 503 | end 504 | end 505 | 506 | def main_qsort3(data_shadow, lo_st, hi_st, d_st) 507 | stack_ll = data_shadow.stack_ll 508 | stack_hh = data_shadow.stack_hh 509 | stack_dd = data_shadow.stack_dd 510 | fmap = data_shadow.fmap 511 | block = data_shadow.block 512 | 513 | stack_ll[0] = lo_st 514 | stack_hh[0] = hi_st 515 | stack_dd[0] = d_st 516 | 517 | sp = 1 518 | while (sp -= 1) >= 0 519 | lo = stack_ll[sp] 520 | hi = stack_hh[sp] 521 | d = stack_dd[sp] 522 | 523 | if (hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH) 524 | return if main_simple_sort data_shadow, lo, hi, d 525 | else 526 | d1 = d + 1 527 | med = self.class.med3 block[fmap[lo] + d1], block[fmap[hi] + d1], block[fmap[(lo + hi) >> 1] + d1] 528 | 529 | un_lo = lo 530 | un_hi = hi 531 | lt_lo = lo 532 | gt_hi = hi 533 | 534 | while true 535 | while un_lo <= un_hi 536 | n = block[fmap[un_lo] + d1] - med 537 | if n == 0 538 | fmap[un_lo], fmap[lt_lo] = fmap[lt_lo], fmap[un_lo] 539 | un_lo += 1 540 | lt_lo += 1 541 | elsif n < 0 542 | un_lo += 1 543 | else 544 | break 545 | end 546 | end 547 | 548 | while un_lo <= un_hi 549 | n = block[fmap[un_hi] + d1] - med 550 | if n == 0 551 | fmap[un_hi], fmap[gt_hi] = fmap[gt_hi], fmap[un_hi] 552 | un_hi -= 1 553 | gt_hi -= 1 554 | elsif n > 0 555 | un_hi -= 1 556 | else 557 | break 558 | end 559 | end 560 | 561 | if un_lo <= un_hi 562 | fmap[un_lo], fmap[un_hi] = fmap[un_hi], fmap[un_lo] 563 | un_lo += 1 564 | un_hi -= 1 565 | else 566 | break 567 | end 568 | end 569 | 570 | if gt_hi < lt_lo 571 | stack_ll[sp] = lo 572 | stack_hh[sp] = hi 573 | stack_dd[sp] = d1 574 | sp += 1 575 | else 576 | n = ((lt_lo - lo) < (un_lo - lt_lo)) ? (lt_lo - lo) : (un_lo - lt_lo) 577 | self.class.vswap fmap, lo, un_lo - n, n 578 | m = ((hi - gt_hi) < (gt_hi - un_hi)) ? (hi - gt_hi) : (gt_hi - un_hi) 579 | self.class.vswap fmap, un_lo, hi - m + 1, m 580 | 581 | n = lo + un_lo - lt_lo - 1 582 | m = hi - (gt_hi - un_hi) + 1 583 | 584 | stack_ll[sp, 3] = lo, n + 1, m 585 | stack_hh[sp, 3] = n, m - 1, hi 586 | stack_dd[sp, 3] = d, d1, d 587 | sp += 3 588 | end 589 | end 590 | end 591 | end 592 | 593 | def main_simple_sort(data_shadow, lo, hi, d) 594 | big_n = hi - lo + 1 595 | return @first_attempt && (@work_done > @work_limit) if big_n < 2 596 | 597 | hp = 0 598 | while INCS[hp] < big_n 599 | hp += 1 600 | end 601 | 602 | fmap = data_shadow.fmap 603 | quadrant = data_shadow.quadrant 604 | block = data_shadow.block 605 | last_plus_1 = @last + 1 606 | 607 | h = nil 608 | i1 = nil 609 | i2 = nil 610 | j = nil 611 | mj = nil 612 | once_runned = nil 613 | vd = nil 614 | x = nil 615 | 616 | x_loop = lambda do 617 | while x > 0 618 | x -= 4 619 | 620 | if block[i1 + 1] == block[i2 + 1] 621 | if quadrant[i1] == quadrant[i2] 622 | if block[i1 + 2] == block[i2 + 2] 623 | if quadrant[i1 + 1] == quadrant[i2 + 1] 624 | if block[i1 + 3] == block[i2 + 3] 625 | if quadrant[i1 + 2] == quadrant[i2 + 2] 626 | if block[i1 + 4] == block[i2 + 4] 627 | if quadrant[i1 + 3] == quadrant[i2 + 3] 628 | i1 -= last_plus_1 if (i1 += 4) >= last_plus_1 629 | i2 -= last_plus_1 if (i2 += 4) >= last_plus_1 630 | @work_done += 1 631 | next 632 | elsif quadrant[i1 + 3] > quadrant[i2 + 3] 633 | return true 634 | else 635 | return false 636 | end 637 | elsif block[i1 + 4] > block[i2 + 4] 638 | return true 639 | else 640 | return false 641 | end 642 | elsif quadrant[i1 + 2] > quadrant[i2 + 2] 643 | return true 644 | else 645 | return false 646 | end 647 | elsif block[i1 + 3] > block[i2 + 3] 648 | return true 649 | else 650 | return false 651 | end 652 | elsif quadrant[i1 + 1] > quadrant[i2 + 1] 653 | return true 654 | else 655 | return false 656 | end 657 | elsif block[i1 + 2] > block[i2 + 2] 658 | return true 659 | else 660 | return false 661 | end 662 | elsif quadrant[i1] > quadrant[i2] 663 | return true 664 | else 665 | return false 666 | end 667 | elsif block[i1 + 1] > block[i2 + 1] 668 | return true 669 | else 670 | return false 671 | end 672 | end 673 | 674 | true 675 | end 676 | 677 | hammer_loop = lambda do 678 | a = 0 679 | 680 | while true do 681 | if once_runned 682 | fmap[j] = a 683 | break if (j -= h) <= mj 684 | else 685 | once_runned = true 686 | end 687 | 688 | a = fmap[j - h] 689 | i1 = a + d 690 | i2 = vd 691 | 692 | if block[i1 + 1] == block[i2 + 1] 693 | if block[i1 + 2] == block[i2 + 2] 694 | if block[i1 + 3] == block[i2 + 3] 695 | if block[i1 + 4] == block[i2 + 4] 696 | if block[i1 + 5] == block[i2 + 5] 697 | if block[i1 += 6] == block[i2 += 6] 698 | x = @last 699 | 700 | break unless x_loop.call 701 | else 702 | if block[i1] > block[i2] 703 | next 704 | else 705 | break 706 | end 707 | end 708 | elsif block[i1 + 5] > block[i2 + 5] 709 | next 710 | else 711 | break 712 | end 713 | elsif block[i1 + 4] > block[i2 + 4] 714 | next 715 | else 716 | break 717 | end 718 | elsif block[i1 + 3] > block[i2 + 3] 719 | next 720 | else 721 | break 722 | end 723 | elsif block[i1 + 2] > block[i2 + 2] 724 | next 725 | else 726 | break 727 | end 728 | elsif block[i1 + 1] > block[i2 + 1] 729 | next 730 | else 731 | break 732 | end 733 | end 734 | end 735 | 736 | while (hp -= 1) >= 0 737 | h = INCS[hp] 738 | mj = lo + h - 1 739 | 740 | i = lo + h 741 | while i <= hi 742 | k = 3 743 | while i <= hi && (k -= 1) >= 0 744 | v = fmap[i] 745 | vd = v + d 746 | j = i 747 | 748 | once_runned = false 749 | 750 | hammer_loop.call 751 | 752 | fmap[j] = v 753 | 754 | i += 1 755 | end 756 | end 757 | 758 | break if @first_attempt && i <= hi && @work_done > @work_limit 759 | end 760 | 761 | @first_attempt && @work_done > @work_limit 762 | end 763 | 764 | def move_to_front_code_and_send 765 | w 24, @orig_ptr 766 | generate_mtf_values 767 | send_mtf_values 768 | end 769 | 770 | def putc(int) 771 | if int.is_a? Numeric 772 | put_byte int & 0xff 773 | else 774 | write int.to_s[0].chr 775 | end 776 | end 777 | 778 | def puts(line) 779 | write line + $/ 780 | end 781 | 782 | def put_byte(c) 783 | c = c[0].to_i if c.is_a? String 784 | w 8, c 785 | end 786 | 787 | def put_int(u) 788 | w 8, (u >> 24) & 0xff 789 | w 8, (u >> 16) & 0xff 790 | w 8, (u >> 8) & 0xff 791 | w 8, u & 0xff 792 | end 793 | 794 | def randomize_block 795 | in_use = @data.in_use 796 | block = @data.block 797 | 798 | 256.times { |i| in_use[i] = false } 799 | 800 | r_n_to_go = 0 801 | r_t_pos = 0 802 | j = 1 803 | i = 0 804 | while i <= @last 805 | i = j 806 | 807 | if r_n_to_go == 0 808 | r_n_to_go = RNUMS[r_t_pos] 809 | r_t_pos = 0 if (r_t_pos += 1) == 512 810 | end 811 | 812 | r_n_to_go -= 1 813 | block[j] ^= r_n_to_go == 1 ? 1 : 0 814 | 815 | in_use[block[j]] = true 816 | 817 | j += 1 818 | end 819 | 820 | @block_randomized = true 821 | end 822 | 823 | def send_mtf_values 824 | len = @data.send_mtf_values_len 825 | alpha_size = @n_in_use + 2 826 | 827 | N_GROUPS.times do |t| 828 | len_t = len[t] 829 | alpha_size.times { |v| len_t[v] = GREATER_ICOST } 830 | end 831 | 832 | n_groups = (@n_mtf < 200) ? 2 : (@n_mtf < 600) ? 3 : (@n_mtf < 1200) ? 4 : 833 | (@n_mtf < 2400) ? 5 : 6 834 | 835 | send_mtf_values0 n_groups, alpha_size 836 | n_selectors = send_mtf_values1 n_groups, alpha_size 837 | send_mtf_values2 n_groups, n_selectors 838 | send_mtf_values3 n_groups, alpha_size 839 | send_mtf_values4 840 | send_mtf_values5 n_groups, n_selectors 841 | send_mtf_values6 n_groups, alpha_size 842 | send_mtf_values7 843 | end 844 | 845 | def send_mtf_values0(n_groups, alpha_size) 846 | len = @data.send_mtf_values_len 847 | mtf_freq = @data.mtf_freq 848 | 849 | rem_f = @n_mtf 850 | gs = 0 851 | 852 | n_groups.downto(1) do |n_part| 853 | t_freq = rem_f / n_part 854 | ge = gs - 1 855 | a_freq = 0 856 | 857 | a = alpha_size - 1 858 | while a_freq < t_freq && ge < a 859 | ge += 1 860 | a_freq += mtf_freq[ge] 861 | end 862 | 863 | if ge > gs && n_part != n_groups && n_part != 1 && 864 | ((n_groups - n_part) & 1) != 0 865 | ge -= 1 866 | a_freq -= mtf_freq[ge] 867 | end 868 | 869 | len_np = len[n_part - 1] 870 | (alpha_size - 1).downto(0) do |v| 871 | if v >= gs && v <= ge 872 | len_np[v] = LESSER_ICOST 873 | else 874 | len_np[v] = GREATER_ICOST 875 | end 876 | end 877 | 878 | gs = ge + 1 879 | rem_f -= a_freq 880 | end 881 | end 882 | 883 | def send_mtf_values1(n_groups, alpha_size) 884 | data_shadow = @data 885 | rfreq = data_shadow.send_mtf_values_rfreq 886 | fave = data_shadow.send_mtf_values_fave 887 | cost = data_shadow.send_mtf_values_cost 888 | sfmap = data_shadow.sfmap 889 | selector = data_shadow.selector 890 | len = data_shadow.send_mtf_values_len 891 | len_0 = len[0] 892 | len_1 = len[1] 893 | len_2 = len[2] 894 | len_3 = len[3] 895 | len_4 = len[4] 896 | len_5 = len[5] 897 | n_selectors = 0 898 | 899 | N_ITERS.times do 900 | (n_groups - 1).downto(0) do |t| 901 | fave[t] = 0 902 | rfreqt = rfreq[t] 903 | (alpha_size - 1).downto(0) { |i| rfreqt[i] = 0 } 904 | end 905 | 906 | n_selectors = 0 907 | 908 | gs = 0 909 | while gs < @n_mtf 910 | ge = [gs + G_SIZE - 1, @n_mtf - 1].min 911 | 912 | if n_groups == N_GROUPS 913 | cost0 = 0 914 | cost1 = 0 915 | cost2 = 0 916 | cost3 = 0 917 | cost4 = 0 918 | cost5 = 0 919 | 920 | gs.upto(ge) do |i| 921 | icv = sfmap[i] 922 | cost0 += len_0[icv] & 0xff 923 | cost1 += len_1[icv] & 0xff 924 | cost2 += len_2[icv] & 0xff 925 | cost3 += len_3[icv] & 0xff 926 | cost4 += len_4[icv] & 0xff 927 | cost5 += len_5[icv] & 0xff 928 | end 929 | 930 | cost[0] = cost0 931 | cost[1] = cost1 932 | cost[2] = cost2 933 | cost[3] = cost3 934 | cost[4] = cost4 935 | cost[5] = cost5 936 | else 937 | (n_groups - 1).downto(0) { |t| cost[t] = 0 } 938 | 939 | gs.upto(ge) do |i| 940 | icv = sfmap[i] 941 | (n_groups - 1).downto(0) { |t| cost[t] += len[t][icv] & 0xff } 942 | end 943 | end 944 | 945 | bt = -1 946 | bc = 999999999 947 | (n_groups - 1).downto(0) do |t| 948 | cost_t = cost[t] 949 | if cost_t < bc 950 | bc = cost_t 951 | bt = t 952 | end 953 | end 954 | 955 | fave[bt] += 1 956 | selector[n_selectors] = bt 957 | n_selectors += 1 958 | 959 | rfreq_bt = rfreq[bt] 960 | gs.upto(ge) { |i| rfreq_bt[sfmap[i]] += 1 } 961 | 962 | gs = ge + 1 963 | end 964 | 965 | n_groups.times do |t| 966 | self.class.make_code_lengths len[t], rfreq[t], @data, alpha_size, 20 967 | end 968 | end 969 | 970 | n_selectors 971 | end 972 | 973 | def send_mtf_values2(n_groups, n_selectors) 974 | data_shadow = @data 975 | pos = data_shadow.send_mtf_values2_pos 976 | 977 | n_groups.times { |i| pos[i] = i } 978 | 979 | n_selectors.times do |i| 980 | ll_i = data_shadow.selector[i] 981 | tmp = pos[0] 982 | j = 0 983 | 984 | while ll_i != tmp 985 | j += 1 986 | tmp, pos[j] = pos[j], tmp 987 | end 988 | 989 | pos[0] = tmp 990 | data_shadow.selector_mtf[i] = j 991 | end 992 | end 993 | 994 | def send_mtf_values3(n_groups, alpha_size) 995 | code = @data.send_mtf_values_code 996 | len = @data.send_mtf_values_len 997 | 998 | n_groups.times do |t| 999 | min_len = 32 1000 | max_len = 0 1001 | len_t = len[t] 1002 | (alpha_size - 1).downto(0) do |i| 1003 | l = len_t[i] & 0xff 1004 | max_len = l if l > max_len 1005 | min_len = l if l < min_len 1006 | end 1007 | 1008 | self.class.assign_codes code[t], len[t], min_len, max_len, alpha_size 1009 | end 1010 | end 1011 | 1012 | def send_mtf_values4 1013 | in_use = @data.in_use 1014 | in_use_16 = @data.send_mtf_values4_in_use_16 1015 | 1016 | 15.downto(0) do |i| 1017 | in_use_16[i] = false 1018 | i16 = i * 16 1019 | 15.downto(0) do |j| 1020 | in_use_16[i] = true if in_use[i16 + j] 1021 | end 1022 | end 1023 | 1024 | 16.times { |i| w 1, in_use_16[i] ? 1 : 0 } 1025 | 1026 | io_shadow = @io 1027 | live_shadow = @live 1028 | buff_shadow = @buff 1029 | 1030 | 16.times do |i| 1031 | if in_use_16[i] 1032 | i16 = i * 16 1033 | 16.times do |j| 1034 | while live_shadow >= 8 1035 | io_shadow.write(((buff_shadow >> 24) & 0xffffffff).chr) 1036 | buff_shadow <<= 8 1037 | buff_shadow &= 0xffffffff 1038 | live_shadow -= 8 1039 | end 1040 | buff_shadow |= 1 << (32 - live_shadow - 1) if in_use[i16 + j] 1041 | live_shadow += 1 1042 | end 1043 | end 1044 | end 1045 | 1046 | @buff = buff_shadow 1047 | @live = live_shadow 1048 | end 1049 | 1050 | def send_mtf_values5(n_groups, n_selectors) 1051 | w 3, n_groups 1052 | w 15, n_selectors 1053 | 1054 | io_shadow = @io 1055 | selector_mtf = @data.selector_mtf 1056 | 1057 | live_shadow = @live 1058 | buff_shadow = @buff 1059 | 1060 | n_selectors.times do |i| 1061 | hj = selector_mtf[i] & 0xff 1062 | hj.times do 1063 | while live_shadow >= 8 1064 | io_shadow.write(((buff_shadow >> 24) & 0xffffffff).chr) 1065 | buff_shadow <<= 8 1066 | buff_shadow &= 0xffffffff 1067 | live_shadow -= 8 1068 | end 1069 | buff_shadow |= 1 << (32 - live_shadow - 1) 1070 | live_shadow += 1 1071 | end 1072 | 1073 | while live_shadow >= 8 1074 | io_shadow.write(((buff_shadow >> 24) & 0xffffffff).chr) 1075 | buff_shadow <<= 8 1076 | buff_shadow &= 0xffffffff 1077 | live_shadow -= 8 1078 | end 1079 | live_shadow += 1 1080 | end 1081 | 1082 | @buff = buff_shadow 1083 | @live = live_shadow 1084 | end 1085 | 1086 | def send_mtf_values6(n_groups, alpha_size) 1087 | len = @data.send_mtf_values_len 1088 | 1089 | n_groups.times do |t| 1090 | len_t = len[t] 1091 | curr = len_t[0] & 0xff 1092 | 1093 | while @live >= 8 1094 | @io.write(((@buff >> 24) & 0xffffffff).chr) 1095 | @buff <<= 8 1096 | @buff &= 0xffffffff 1097 | @live -= 8 1098 | end 1099 | @buff |= curr << (32 - @live - 5) 1100 | @live += 5 1101 | 1102 | alpha_size.times do |i| 1103 | lti = len_t[i] & 0xff 1104 | while curr < lti 1105 | while @live >= 8 1106 | @io.write(((@buff >> 24) & 0xffffffff).chr) 1107 | @buff <<= 8 1108 | @buff &= 0xffffffff 1109 | @live -= 8 1110 | end 1111 | @buff |= 2 << (32 - @live - 2) 1112 | @live += 2 1113 | 1114 | curr += 1 1115 | end 1116 | 1117 | while curr > lti 1118 | while @live >= 8 1119 | @io.write(((@buff >> 24) & 0xffffffff).chr) 1120 | @buff <<= 8 1121 | @buff &= 0xffffffff 1122 | @live -= 8 1123 | end 1124 | @buff |= 3 << (32 - @live - 2) 1125 | @live += 2 1126 | 1127 | curr -= 1 1128 | end 1129 | 1130 | while @live >= 8 1131 | @io.write(((@buff >> 24) & 0xffffffff).chr) 1132 | @buff <<= 8 1133 | @buff &= 0xffffffff 1134 | @live -= 8 1135 | end 1136 | @live += 1 1137 | end 1138 | end 1139 | end 1140 | 1141 | def send_mtf_values7 1142 | data_shadow = @data 1143 | len = data_shadow.send_mtf_values_len 1144 | code = data_shadow.send_mtf_values_code 1145 | selector = data_shadow.selector 1146 | sfmap = data_shadow.sfmap 1147 | 1148 | sel_ctr = 0 1149 | 1150 | gs = 0 1151 | while gs < @n_mtf 1152 | ge = [gs + G_SIZE - 1, @n_mtf - 1].min 1153 | selector_sel_ctr = selector[sel_ctr] & 0xff 1154 | code_sel_ctr = code[selector_sel_ctr] 1155 | len_sel_ctr = len[selector_sel_ctr] 1156 | 1157 | while gs <= ge 1158 | sfmap_i = sfmap[gs] 1159 | 1160 | while @live >= 8 1161 | @io.write(((@buff >> 24) & 0xffffffff).chr) 1162 | @buff <<= 8 1163 | @buff &= 0xffffffff 1164 | @live -= 8 1165 | end 1166 | n = len_sel_ctr[sfmap_i] & 0xff 1167 | @buff |= code_sel_ctr[sfmap_i] << (32 - @live - n) 1168 | @live += n 1169 | 1170 | gs += 1 1171 | end 1172 | 1173 | gs = ge + 1 1174 | sel_ctr += 1 1175 | end 1176 | end 1177 | 1178 | def w(n, v) 1179 | while @live >= 8 1180 | @io.write(((@buff >> 24) & 0xffffffff).chr) 1181 | @buff <<= 8 1182 | @buff &= 0xffffffff 1183 | @live -= 8 1184 | end 1185 | 1186 | @buff = @buff | (v << (32 - @live - n)) 1187 | @live += n 1188 | end 1189 | 1190 | def write(bytes) 1191 | raise 'stream closed' if @io.nil? 1192 | 1193 | bytes.each_byte { |b| write0 b } 1194 | end 1195 | 1196 | def write0(b) 1197 | b &= 0xff 1198 | if @current_char != -1 1199 | if @current_char == b 1200 | @run_length += 1 1201 | if @run_length > 254 1202 | write_run 1203 | @current_char = -1 1204 | @run_length = 0 1205 | end 1206 | else 1207 | write_run 1208 | @run_length = 1 1209 | @current_char = b 1210 | end 1211 | else 1212 | @current_char = b 1213 | @run_length += 1 1214 | end 1215 | end 1216 | 1217 | def write_run 1218 | if @last < @allowable_block_size 1219 | ch = @current_char 1220 | data_shadow = @data 1221 | data_shadow.in_use[ch] = true 1222 | block = data_shadow.block 1223 | 1224 | run_length_shadow = @run_length 1225 | run_length_shadow.times { @crc.update_crc ch } 1226 | 1227 | case run_length_shadow 1228 | when 1 1229 | block[@last + 2] = ch 1230 | @last += 1 1231 | 1232 | when 2 1233 | block[@last + 2] = ch 1234 | block[@last + 3] = ch 1235 | @last += 2 1236 | 1237 | when 3 1238 | block[@last + 2] = ch 1239 | block[@last + 3] = ch 1240 | block[@last + 4] = ch 1241 | @last += 3 1242 | 1243 | else 1244 | run_length_shadow -= 4 1245 | data_shadow.in_use[run_length_shadow] = true 1246 | block[@last + 2] = ch 1247 | block[@last + 3] = ch 1248 | block[@last + 4] = ch 1249 | block[@last + 5] = ch 1250 | block[@last + 6] = run_length_shadow 1251 | @last += 5 1252 | end 1253 | else 1254 | end_block 1255 | init_block 1256 | write_run 1257 | end 1258 | end 1259 | 1260 | end 1261 | -------------------------------------------------------------------------------- /lib/rbzip2/ruby/constants.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2013, Sebastian Staudt 5 | 6 | module RBzip2::Ruby::Constants 7 | 8 | BASEBLOCKSIZE = 100000 9 | MAX_ALPHA_SIZE = 258 10 | MAX_CODE_LEN = 23 11 | RUNA = 0 12 | RUNB = 1 13 | N_GROUPS = 6 14 | G_SIZE = 50 15 | N_ITERS = 4 16 | MAX_SELECTORS = (2 + (900000 / G_SIZE)) 17 | NUM_OVERSHOOT_BYTES = 20 18 | 19 | EOF = 0 20 | START_BLOCK_STATE = 1 21 | RAND_PART_A_STATE = 2 22 | RAND_PART_B_STATE = 3 23 | RAND_PART_C_STATE = 4 24 | NO_RAND_PART_A_STATE = 5 25 | NO_RAND_PART_B_STATE = 6 26 | NO_RAND_PART_C_STATE = 7 27 | 28 | RNUMS = [ 29 | 619, 720, 127, 481, 931, 816, 813, 233, 566, 247, 985, 724, 205, 454, 863, 30 | 491, 741, 242, 949, 214, 733, 859, 335, 708, 621, 574, 73, 654, 730, 472, 31 | 419, 436, 278, 496, 867, 210, 399, 680, 480, 51, 878, 465, 811, 169, 869, 32 | 675, 611, 697, 867, 561, 862, 687, 507, 283, 482, 129, 807, 591, 733, 623, 33 | 150, 238, 59, 379, 684, 877, 625, 169, 643, 105, 170, 607, 520, 932, 727, 34 | 476, 693, 425, 174, 647, 73, 122, 335, 530, 442, 853, 695, 249, 445, 515, 35 | 909, 545, 703, 919, 874, 474, 882, 500, 594, 612, 641, 801, 220, 162, 819, 36 | 984, 589, 513, 495, 799, 161, 604, 958, 533, 221, 400, 386, 867, 600, 782, 37 | 382, 596, 414, 171, 516, 375, 682, 485, 911, 276, 98, 553, 163, 354, 666, 38 | 933, 424, 341, 533, 870, 227, 730, 475, 186, 263, 647, 537, 686, 600, 224, 39 | 469, 68, 770, 919, 190, 373, 294, 822, 808, 206, 184, 943, 795, 384, 383, 40 | 461, 404, 758, 839, 887, 715, 67, 618, 276, 204, 918, 873, 777, 604, 560, 41 | 951, 160, 578, 722, 79, 804, 96, 409, 713, 940, 652, 934, 970, 447, 318, 42 | 353, 859, 672, 112, 785, 645, 863, 803, 350, 139, 93, 354, 99, 820, 908, 43 | 609, 772, 154, 274, 580, 184, 79, 626, 630, 742, 653, 282, 762, 623, 680, 44 | 81, 927, 626, 789, 125, 411, 521, 938, 300, 821, 78, 343, 175, 128, 250, 45 | 170, 774, 972, 275, 999, 639, 495, 78, 352, 126, 857, 956, 358, 619, 580, 46 | 124, 737, 594, 701, 612, 669, 112, 134, 694, 363, 992, 809, 743, 168, 974, 47 | 944, 375, 748, 52, 600, 747, 642, 182, 862, 81, 344, 805, 988, 739, 511, 48 | 655, 814, 334, 249, 515, 897, 955, 664, 981, 649, 113, 974, 459, 893, 228, 49 | 433, 837, 553, 268, 926, 240, 102, 654, 459, 51, 686, 754, 806, 760, 493, 50 | 403, 415, 394, 687, 700, 946, 670, 656, 610, 738, 392, 760, 799, 887, 653, 51 | 978, 321, 576, 617, 626, 502, 894, 679, 243, 440, 680, 879, 194, 572, 640, 52 | 724, 926, 56, 204, 700, 707, 151, 457, 449, 797, 195, 791, 558, 945, 679, 53 | 297, 59, 87, 824, 713, 663, 412, 693, 342, 606, 134, 108, 571, 364, 631, 54 | 212, 174, 643, 304, 329, 343, 97, 430, 751, 497, 314, 983, 374, 822, 928, 55 | 140, 206, 73, 263, 980, 736, 876, 478, 430, 305, 170, 514, 364, 692, 829, 56 | 82, 855, 953, 676, 246, 369, 970, 294, 750, 807, 827, 150, 790, 288, 923, 57 | 804, 378, 215, 828, 592, 281, 565, 555, 710, 82, 896, 831, 547, 261, 524, 58 | 462, 293, 465, 502, 56, 661, 821, 976, 991, 658, 869, 905, 758, 745, 193, 59 | 768, 550, 608, 933, 378, 286, 215, 979, 792, 961, 61, 688, 793, 644, 986, 60 | 403, 106, 366, 905, 644, 372, 567, 466, 434, 645, 210, 389, 550, 919, 135, 61 | 780, 773, 635, 389, 707, 100, 626, 958, 165, 504, 920, 176, 193, 713, 857, 62 | 265, 203, 50, 668, 108, 645, 990, 626, 197, 510, 357, 358, 850, 858, 364, 63 | 936, 638 64 | ] 65 | 66 | MIN_BLOCK_SIZE = 1 67 | MAX_BLOCK_SIZE = 9 68 | SETMASK = (1 << 21) 69 | CLEARMASK = (~SETMASK) 70 | GREATER_ICOST = 15 71 | LESSER_ICOST = 0 72 | SMALL_THRESH = 20 73 | DEPTH_THRESH = 10 74 | WORK_FACTOR = 30 75 | QSORT_STACK_SIZE = 1000 76 | 77 | INCS = [ 78 | 1, 4, 13, 40, 121, 364, 1093, 3280, 9841, 29524, 88573, 265720, 797161, 79 | 2391484 80 | ] 81 | 82 | end 83 | -------------------------------------------------------------------------------- /lib/rbzip2/ruby/crc.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2017, Sebastian Staudt 5 | 6 | class RBzip2::Ruby::CRC 7 | 8 | CRC32_TABLE = [ 9 | 0x00000000, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 10 | 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, 0x2b4bcb61, 11 | 0x350c9b64, 0x31cd86d3, 0x3c8ea00a, 0x384fbdbd, 0x4c11db70, 0x48d0c6c7, 12 | 0x4593e01e, 0x4152fda9, 0x5f15adac, 0x5bd4b01b, 0x569796c2, 0x52568b75, 13 | 0x6a1936c8, 0x6ed82b7f, 0x639b0da6, 0x675a1011, 0x791d4014, 0x7ddc5da3, 14 | 0x709f7b7a, 0x745e66cd, 0x9823b6e0, 0x9ce2ab57, 0x91a18d8e, 0x95609039, 15 | 0x8b27c03c, 0x8fe6dd8b, 0x82a5fb52, 0x8664e6e5, 0xbe2b5b58, 0xbaea46ef, 16 | 0xb7a96036, 0xb3687d81, 0xad2f2d84, 0xa9ee3033, 0xa4ad16ea, 0xa06c0b5d, 17 | 0xd4326d90, 0xd0f37027, 0xddb056fe, 0xd9714b49, 0xc7361b4c, 0xc3f706fb, 18 | 0xceb42022, 0xca753d95, 0xf23a8028, 0xf6fb9d9f, 0xfbb8bb46, 0xff79a6f1, 19 | 0xe13ef6f4, 0xe5ffeb43, 0xe8bccd9a, 0xec7dd02d, 0x34867077, 0x30476dc0, 20 | 0x3d044b19, 0x39c556ae, 0x278206ab, 0x23431b1c, 0x2e003dc5, 0x2ac12072, 21 | 0x128e9dcf, 0x164f8078, 0x1b0ca6a1, 0x1fcdbb16, 0x018aeb13, 0x054bf6a4, 22 | 0x0808d07d, 0x0cc9cdca, 0x7897ab07, 0x7c56b6b0, 0x71159069, 0x75d48dde, 23 | 0x6b93dddb, 0x6f52c06c, 0x6211e6b5, 0x66d0fb02, 0x5e9f46bf, 0x5a5e5b08, 24 | 0x571d7dd1, 0x53dc6066, 0x4d9b3063, 0x495a2dd4, 0x44190b0d, 0x40d816ba, 25 | 0xaca5c697, 0xa864db20, 0xa527fdf9, 0xa1e6e04e, 0xbfa1b04b, 0xbb60adfc, 26 | 0xb6238b25, 0xb2e29692, 0x8aad2b2f, 0x8e6c3698, 0x832f1041, 0x87ee0df6, 27 | 0x99a95df3, 0x9d684044, 0x902b669d, 0x94ea7b2a, 0xe0b41de7, 0xe4750050, 28 | 0xe9362689, 0xedf73b3e, 0xf3b06b3b, 0xf771768c, 0xfa325055, 0xfef34de2, 29 | 0xc6bcf05f, 0xc27dede8, 0xcf3ecb31, 0xcbffd686, 0xd5b88683, 0xd1799b34, 30 | 0xdc3abded, 0xd8fba05a, 0x690ce0ee, 0x6dcdfd59, 0x608edb80, 0x644fc637, 31 | 0x7a089632, 0x7ec98b85, 0x738aad5c, 0x774bb0eb, 0x4f040d56, 0x4bc510e1, 32 | 0x46863638, 0x42472b8f, 0x5c007b8a, 0x58c1663d, 0x558240e4, 0x51435d53, 33 | 0x251d3b9e, 0x21dc2629, 0x2c9f00f0, 0x285e1d47, 0x36194d42, 0x32d850f5, 34 | 0x3f9b762c, 0x3b5a6b9b, 0x0315d626, 0x07d4cb91, 0x0a97ed48, 0x0e56f0ff, 35 | 0x1011a0fa, 0x14d0bd4d, 0x19939b94, 0x1d528623, 0xf12f560e, 0xf5ee4bb9, 36 | 0xf8ad6d60, 0xfc6c70d7, 0xe22b20d2, 0xe6ea3d65, 0xeba91bbc, 0xef68060b, 37 | 0xd727bbb6, 0xd3e6a601, 0xdea580d8, 0xda649d6f, 0xc423cd6a, 0xc0e2d0dd, 38 | 0xcda1f604, 0xc960ebb3, 0xbd3e8d7e, 0xb9ff90c9, 0xb4bcb610, 0xb07daba7, 39 | 0xae3afba2, 0xaafbe615, 0xa7b8c0cc, 0xa379dd7b, 0x9b3660c6, 0x9ff77d71, 40 | 0x92b45ba8, 0x9675461f, 0x8832161a, 0x8cf30bad, 0x81b02d74, 0x857130c3, 41 | 0x5d8a9099, 0x594b8d2e, 0x5408abf7, 0x50c9b640, 0x4e8ee645, 0x4a4ffbf2, 42 | 0x470cdd2b, 0x43cdc09c, 0x7b827d21, 0x7f436096, 0x7200464f, 0x76c15bf8, 43 | 0x68860bfd, 0x6c47164a, 0x61043093, 0x65c52d24, 0x119b4be9, 0x155a565e, 44 | 0x18197087, 0x1cd86d30, 0x029f3d35, 0x065e2082, 0x0b1d065b, 0x0fdc1bec, 45 | 0x3793a651, 0x3352bbe6, 0x3e119d3f, 0x3ad08088, 0x2497d08d, 0x2056cd3a, 46 | 0x2d15ebe3, 0x29d4f654, 0xc5a92679, 0xc1683bce, 0xcc2b1d17, 0xc8ea00a0, 47 | 0xd6ad50a5, 0xd26c4d12, 0xdf2f6bcb, 0xdbee767c, 0xe3a1cbc1, 0xe760d676, 48 | 0xea23f0af, 0xeee2ed18, 0xf0a5bd1d, 0xf464a0aa, 0xf9278673, 0xfde69bc4, 49 | 0x89b8fd09, 0x8d79e0be, 0x803ac667, 0x84fbdbd0, 0x9abc8bd5, 0x9e7d9662, 50 | 0x933eb0bb, 0x97ffad0c, 0xafb010b1, 0xab710d06, 0xa6322bdf, 0xa2f33668, 51 | 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 52 | ] 53 | 54 | def initialize 55 | initialize_crc 56 | end 57 | 58 | def initialize_crc 59 | @global_crc = 0xffffffff 60 | end 61 | 62 | def final_crc 63 | @global_crc ^ 0xffffffff 64 | end 65 | 66 | def update_crc(in_ch) 67 | @global_crc = ((@global_crc << 8) & 0xffffffff) ^ CRC32_TABLE[(@global_crc >> 24) ^ in_ch] 68 | end 69 | 70 | end 71 | -------------------------------------------------------------------------------- /lib/rbzip2/ruby/decompressor.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2017, Sebastian Staudt 5 | 6 | require 'core_ext/io' 7 | 8 | class RBzip2::Ruby::Decompressor 9 | 10 | include RBzip2::Ruby::Constants 11 | 12 | def initialize(io) 13 | @buff = 0 14 | @bytes_read = 0 15 | @computed_combined_crc = 0 16 | @crc = RBzip2::Ruby::CRC.new 17 | @current_char = -1 18 | @io = io 19 | @live = 0 20 | @stored_combined_crc = 0 21 | @su_t_pos = 0 22 | init 23 | end 24 | 25 | def count(read) 26 | @bytes_read += read if read != -1 27 | end 28 | 29 | def getc 30 | read 1 31 | end 32 | 33 | def gets 34 | line = '' 35 | loop do 36 | char = getc 37 | line += char 38 | break if char == "\n" 39 | end 40 | line 41 | end 42 | 43 | def read(length = nil) 44 | raise 'stream closed' if @io.nil? 45 | 46 | if length == 1 47 | r = read0 48 | count (r < 0 ? -1 : 1) 49 | r.chr 50 | else 51 | r = '' 52 | if length == nil 53 | while true do 54 | b = read0 55 | break if b < 0 56 | r << b.chr 57 | end 58 | elsif length > 0 59 | length.times do 60 | b = read0 61 | break if b < 0 62 | r << b.chr 63 | end 64 | count r.size 65 | end 66 | r 67 | end 68 | end 69 | 70 | def read0 71 | ret_char = @current_char 72 | 73 | if @current_state == RAND_PART_B_STATE 74 | setup_rand_part_b 75 | elsif @current_state == NO_RAND_PART_B_STATE 76 | setup_no_rand_part_b 77 | elsif @current_state == RAND_PART_C_STATE 78 | setup_rand_part_c 79 | elsif @current_state == NO_RAND_PART_C_STATE 80 | setup_no_rand_part_c 81 | elsif @current_state == EOF 82 | return -1 83 | else 84 | raise 'illegal state' 85 | end 86 | 87 | ret_char 88 | end 89 | 90 | def make_maps 91 | in_use = @data.in_use 92 | seq_to_unseq = @data.seq_to_unseq 93 | 94 | n_in_use_shadow = 0 95 | 96 | 256.times do |i| 97 | if in_use[i] 98 | seq_to_unseq[n_in_use_shadow] = i 99 | n_in_use_shadow += 1 100 | end 101 | end 102 | 103 | @n_in_use = n_in_use_shadow 104 | end 105 | 106 | def init 107 | check_magic 108 | 109 | block_size = @io.read(1).to_i 110 | raise 'Illegal block size.' if block_size < 1 || block_size > 9 111 | @block_size = block_size 112 | 113 | init_block 114 | setup_block 115 | end 116 | 117 | def check_magic 118 | raise 'Magic number does not match "BZh".' unless @io.read(3) == 'BZh' 119 | end 120 | 121 | def init_block 122 | magic = [ubyte, ubyte, ubyte, ubyte, ubyte, ubyte] 123 | 124 | if magic == [0x17, 0x72, 0x45, 0x38, 0x50, 0x90] 125 | complete 126 | elsif magic != [0x31, 0x41, 0x59, 0x26, 0x53, 0x59] 127 | @current_state = EOF 128 | 129 | raise 'Bad block header.' 130 | else 131 | @stored_block_crc = int 132 | @block_randomised = bit 133 | 134 | @data = RBzip2::Ruby::InputData.new @block_size if @data.nil? 135 | 136 | get_and_move_to_front_decode 137 | 138 | @crc.initialize_crc 139 | @current_state = START_BLOCK_STATE 140 | end 141 | end 142 | 143 | def end_block 144 | @computed_block_crc = @crc.final_crc 145 | 146 | raise 'BZip2 CRC error' if @stored_block_crc != @computed_block_crc 147 | 148 | @computed_combined_crc = ((@computed_combined_crc << 1) & 0xffffffff) | (@computed_combined_crc >> 31) 149 | @computed_combined_crc ^= @computed_block_crc 150 | end 151 | 152 | def complete 153 | @stored_combined_crc = int 154 | @current_state = EOF 155 | @data = nil 156 | 157 | raise 'BZip2 CRC error' if @stored_combined_crc != @computed_combined_crc 158 | end 159 | 160 | def close 161 | if @io != $stdin 162 | @io = nil 163 | @data = nil 164 | end 165 | end 166 | 167 | def r(n) 168 | live_shadow = @live 169 | buff_shadow = @buff 170 | 171 | if live_shadow < n 172 | begin 173 | thech = @io.readbyte 174 | 175 | raise 'unexpected end of stream' if thech < 0 176 | 177 | buff_shadow = (buff_shadow << 8) | thech 178 | live_shadow += 8 179 | end while live_shadow < n 180 | 181 | @buff = buff_shadow 182 | end 183 | 184 | @live = live_shadow - n 185 | 186 | (buff_shadow >> (live_shadow - n)) & ((1 << n) - 1) 187 | end 188 | 189 | def bit 190 | r(1) != 0 191 | end 192 | 193 | def ubyte 194 | r 8 195 | end 196 | 197 | def int 198 | r 32 199 | end 200 | 201 | def create_decode_tables(limit, base, perm, length, min_len, max_len, alpha_size) 202 | pp = 0 203 | (min_len..max_len).each do |i| 204 | alpha_size.times do |j| 205 | if length[j] == i 206 | perm[pp] = j 207 | pp += 1 208 | end 209 | end 210 | end 211 | 212 | base[1..MAX_CODE_LEN] = 0 213 | limit[1..MAX_CODE_LEN] = 0 214 | 215 | alpha_size.times do |i| 216 | base[length[i] + 1] += 1 217 | end 218 | 219 | b = 0 220 | 1.upto(MAX_CODE_LEN - 1) do |i| 221 | b += base[i] 222 | base[i] = b 223 | end 224 | 225 | vec = 0 226 | min_len.upto(max_len) do |i| 227 | b = base[i] 228 | nb = base[i + 1] 229 | vec += nb - b 230 | limit[i] = vec - 1 231 | vec = vec << 1 232 | end 233 | 234 | (min_len + 1).upto(max_len) do |i| 235 | base[i] = ((limit[i - 1] + 1) << 1) - base[i] 236 | end 237 | end 238 | 239 | def receive_decoding_tables 240 | in_use = @data.in_use 241 | pos = @data.receive_decoding_tables_pos 242 | selector = @data.selector 243 | selector_mtf = @data.selector_mtf 244 | 245 | in_use16 = 0 246 | 247 | 16.times do |i| 248 | in_use16 |= 1 << i if bit 249 | end 250 | 251 | in_use.fill false 252 | 253 | 16.times do |i| 254 | if (in_use16 & (1 << i)) != 0 255 | i16 = i << 4 256 | 16.times do |j| 257 | in_use[i16 + j] = true if bit 258 | end 259 | end 260 | end 261 | 262 | make_maps 263 | alpha_size = @n_in_use + 2 264 | 265 | groups = r 3 266 | selectors = r 15 267 | 268 | selectors.times do |i| 269 | j = 0 270 | while bit 271 | j += 1 272 | end 273 | selector_mtf[i] = j & 0xff 274 | end 275 | 276 | pos.fill(0..groups) { |v| v & 0xff } 277 | 278 | selectors.times do |i| 279 | v = selector_mtf[i] 280 | tmp = pos[v] 281 | 282 | while v > 0 do 283 | v -= 1 284 | pos[v + 1] = pos[v] 285 | end 286 | 287 | pos[0] = tmp 288 | selector[i] = tmp 289 | end 290 | 291 | len = @data.temp_char_array_2d 292 | 293 | groups.times do |t| 294 | curr = r 5 295 | len_t = len[t] 296 | alpha_size.times do |i| 297 | while bit 298 | curr += bit ? -1 : 1 299 | end 300 | len_t[i] = curr 301 | end 302 | @data.temp_char_array_2d[t] = len_t 303 | end 304 | 305 | create_huffman_decoding_tables alpha_size, groups 306 | end 307 | 308 | def create_huffman_decoding_tables(alpha_size, groups) 309 | len = @data.temp_char_array_2d 310 | min_lens = @data.min_lens 311 | limit = @data.limit 312 | base = @data.base 313 | perm = @data.perm 314 | 315 | groups.times do |t| 316 | min_len = 32 317 | max_len = 0 318 | len_t = len[t] 319 | 320 | (alpha_size - 1).downto 0 do |i| 321 | lent = len_t[i] 322 | max_len = lent if lent > max_len 323 | min_len = lent if lent < min_len 324 | end 325 | 326 | create_decode_tables limit[t], base[t], perm[t], len[t], min_len, max_len, alpha_size 327 | min_lens[t] = min_len 328 | end 329 | end 330 | 331 | def get_and_move_to_front_decode 332 | @orig_ptr = r 24 333 | receive_decoding_tables 334 | 335 | ll8 = @data.ll8 336 | unzftab = @data.unzftab 337 | selector = @data.selector 338 | seq_to_unseq = @data.seq_to_unseq 339 | yy = @data.get_and_move_to_front_decode_yy 340 | min_lens = @data.min_lens 341 | limit = @data.limit 342 | base = @data.base 343 | perm = @data.perm 344 | limit_last = @block_size * BASEBLOCKSIZE 345 | 346 | yy.fill(0..256) { |i| i } 347 | unzftab.fill 0 348 | 349 | group_no = 0 350 | group_pos = G_SIZE - 1 351 | eob = @n_in_use + 1 352 | next_sym = get_and_move_to_front_decode0 0 353 | @last = -1 354 | zt = selector[group_no] 355 | base_zt = base[zt] 356 | limit_zt = limit[zt] 357 | perm_zt = perm[zt] 358 | min_lens_zt = min_lens[zt] 359 | 360 | while next_sym != eob 361 | if next_sym == RUNA || next_sym == RUNB 362 | s = -1 363 | 364 | n = 1 365 | while true do 366 | if next_sym == RUNA 367 | s += n 368 | elsif next_sym == RUNB 369 | s += n << 1 370 | else 371 | break 372 | end 373 | 374 | if group_pos == 0 375 | group_pos = G_SIZE - 1 376 | group_no += 1 377 | zt = selector[group_no] 378 | base_zt = base[zt] 379 | limit_zt = limit[zt] 380 | perm_zt = perm[zt] 381 | min_lens_zt = min_lens[zt] 382 | else 383 | group_pos -= 1 384 | end 385 | 386 | zn = min_lens_zt 387 | 388 | while @live < zn 389 | thech = @io.readbyte 390 | 391 | raise 'unexpected end of stream' if thech < 0 392 | 393 | @buff = ((@buff << 8) & 0xffffffff) | thech 394 | @live += 8 395 | end 396 | 397 | zvec = ((@buff >> (@live - zn)) & 0xffffffff) & ((1 << zn) - 1) 398 | @live -= zn 399 | 400 | while zvec > limit_zt[zn] 401 | zn += 1 402 | 403 | while @live < 1 404 | thech = @io.readbyte 405 | 406 | raise 'unexpected end of stream' if thech < 0 407 | 408 | @buff = ((@buff << 8) & 0xffffffff) | thech 409 | @live += 8 410 | end 411 | 412 | @live -= 1 413 | zvec = (zvec << 1) | ((@buff >> @live) & 1) 414 | end 415 | 416 | next_sym = perm_zt[zvec - base_zt[zn]] 417 | 418 | n = n << 1 419 | end 420 | 421 | ch = seq_to_unseq[yy[0]] 422 | unzftab[ch & 0xff] += s + 1 423 | 424 | while s >= 0 425 | @last += 1 426 | ll8[@last] = ch 427 | s -= 1 428 | end 429 | 430 | raise 'block overrun' if @last >= limit_last 431 | else 432 | @last += 1 433 | raise 'block overrun' if @last >= limit_last 434 | 435 | tmp = yy[next_sym - 1] 436 | unzftab[seq_to_unseq[tmp]] += 1 437 | ll8[@last] = seq_to_unseq[tmp] 438 | 439 | yy[1, next_sym - 1] = yy[0, next_sym - 1] 440 | yy[0] = tmp 441 | 442 | if group_pos == 0 443 | group_pos = G_SIZE - 1 444 | group_no += 1 445 | zt = selector[group_no] 446 | base_zt = base[zt] 447 | limit_zt = limit[zt] 448 | perm_zt = perm[zt] 449 | min_lens_zt = min_lens[zt] 450 | else 451 | group_pos -= 1 452 | end 453 | 454 | zn = min_lens_zt 455 | 456 | while @live < zn 457 | thech = @io.readbyte 458 | 459 | raise 'unexpected end of stream' if thech < 0 460 | 461 | @buff = ((@buff << 8) & 0xffffffff) | thech 462 | @live += 8 463 | end 464 | zvec = (@buff >> (@live - zn)) & ((1 << zn) - 1) 465 | @live -= zn 466 | 467 | while zvec > limit_zt[zn] 468 | zn += 1 469 | while @live < 1 470 | thech = @io.readbyte 471 | 472 | raise 'unexpected end of stream' if thech < 0 473 | 474 | @buff = ((@buff << 8) & 0xffffffff) | thech 475 | @live += 8 476 | end 477 | @live -= 1 478 | zvec = (zvec << 1) | ((@buff >> @live) & 1) 479 | end 480 | 481 | next_sym = perm_zt[zvec - base_zt[zn]] 482 | end 483 | end 484 | end 485 | 486 | def get_and_move_to_front_decode0(group_no) 487 | zt = @data.selector[group_no] 488 | limit_zt = @data.limit[zt] 489 | zn = @data.min_lens[zt] 490 | zvec = r zn 491 | 492 | while zvec > limit_zt[zn] 493 | zn += 1 494 | 495 | while @live < 1 496 | thech = @io.readbyte 497 | 498 | raise 'unexpected end of stream' if thech < 0 499 | 500 | @buff = ((@buff << 8) & 0xffffffff) | thech 501 | @live += 8 502 | end 503 | 504 | @live -=1 505 | zvec = (zvec << 1) | ((@buff >> @live) & 1) 506 | end 507 | 508 | @data.perm[zt][zvec - @data.base[zt][zn]] 509 | end 510 | 511 | def setup_block 512 | return if @data.nil? 513 | 514 | cftab = @data.cftab 515 | tt = @data.init_tt @last + 1 516 | ll8 = @data.ll8 517 | cftab[0] = 0 518 | cftab[1, 256] = @data.unzftab[0, 256] 519 | 520 | c = 0 521 | 1.upto(256) do |i| 522 | c += cftab[i] 523 | cftab[i] = c 524 | end 525 | 526 | (@last + 1).times do |i| 527 | cftab_i = ll8[i] 528 | tt[cftab[cftab_i]] = i 529 | cftab[cftab_i] += 1 530 | end 531 | 532 | raise 'stream corrupted' if @orig_ptr < 0 || @orig_ptr >= tt.size 533 | 534 | @su_t_pos = tt[@orig_ptr] 535 | @su_count = 0 536 | @su_i2 = 0 537 | @su_ch2 = 256 538 | 539 | if @block_randomised 540 | @su_r_n_to_go = 0 541 | @su_r_t_pos = 0 542 | 543 | setup_rand_part_a 544 | else 545 | setup_no_rand_part_a 546 | end 547 | end 548 | 549 | def setup_rand_part_a 550 | if @su_i2 <= @last 551 | @su_ch_prev = @su_ch2 552 | su_ch2_shadow = @data.ll8[@su_t_pos] 553 | @su_t_pos = @data.tt[@su_t_pos] 554 | 555 | if @su_r_n_to_go == 0 556 | @su_r_n_to_go = RNUMS[@su_r_t_pos] - 1 557 | @su_r_t_pos += 1 558 | @su_r_t_pos = 0 if @su_r_t_pos == 512 559 | else 560 | @su_r_n_to_go -= 1 561 | end 562 | 563 | @su_ch2 = su_ch2_shadow ^= (@su_r_n_to_go == 1) ? 1 : 0 564 | @su_i2 += 1 565 | @current_char = su_ch2_shadow 566 | @current_state = RAND_PART_B_STATE 567 | @crc.update_crc su_ch2_shadow 568 | else 569 | end_block 570 | init_block 571 | setup_block 572 | end 573 | end 574 | 575 | def setup_no_rand_part_a 576 | if @su_i2 <= @last 577 | @su_ch_prev = @su_ch2 578 | su_ch2_shadow = @data.ll8[@su_t_pos] 579 | @su_ch2 = su_ch2_shadow 580 | @su_t_pos = @data.tt[@su_t_pos] 581 | @su_i2 += 1 582 | @current_char = su_ch2_shadow 583 | @current_state = NO_RAND_PART_B_STATE 584 | @crc.update_crc su_ch2_shadow 585 | else 586 | @current_state = NO_RAND_PART_A_STATE 587 | end_block 588 | init_block 589 | setup_block 590 | end 591 | end 592 | 593 | def setup_rand_part_b 594 | if @su_ch2 != @su_ch_prev 595 | @current_state = RAND_PART_A_STATE 596 | @su_count = 1 597 | setup_rand_part_a 598 | else 599 | @su_count += 1 600 | if @su_count >= 4 601 | @su_z = @data.ll8[@su_t_pos] 602 | @su_t_pos = @data.tt[@su_t_pos] 603 | 604 | if @su_r_n_to_go == 0 605 | @su_r_n_to_go = RNUMS[@su_r_t_pos] - 1 606 | @su_r_t_pos += 1 607 | @su_r_t_pos = 0 if @su_r_t_pos == 512 608 | else 609 | @su_r_n_to_go -= 1 610 | end 611 | 612 | @su_j2 = 0 613 | @current_state = RAND_PART_C_STATE 614 | @su_z ^= 1 if @su_r_n_to_go == 1 615 | setup_rand_part_c 616 | else 617 | @current_state = RAND_PART_A_STATE 618 | setup_rand_part_a 619 | end 620 | end 621 | end 622 | 623 | def setup_rand_part_c 624 | if @su_j2 < @su_z 625 | @current_char = @su_ch2 626 | @crc.update_crc @su_ch2 627 | @su_j2 += 1 628 | else 629 | @current_state = RAND_PART_A_STATE 630 | @su_i2 += 1 631 | @su_count = 0 632 | setup_rand_part_a 633 | end 634 | end 635 | 636 | def setup_no_rand_part_b 637 | if @su_ch2 != @su_ch_prev 638 | @su_count = 1 639 | setup_no_rand_part_a 640 | else 641 | @su_count += 1 642 | if @su_count >= 4 643 | @su_z = @data.ll8[@su_t_pos] 644 | @su_t_pos = @data.tt[@su_t_pos] 645 | @su_j2 = 0 646 | setup_no_rand_part_c 647 | else 648 | setup_no_rand_part_a 649 | end 650 | end 651 | end 652 | 653 | def setup_no_rand_part_c 654 | if @su_j2 < @su_z 655 | su_ch2_shadow = @su_ch2 656 | @current_char = su_ch2_shadow 657 | @crc.update_crc su_ch2_shadow 658 | @su_j2 += 1 659 | @current_state = NO_RAND_PART_C_STATE 660 | else 661 | @su_i2 += 1 662 | @su_count = 0 663 | setup_no_rand_part_a 664 | end 665 | end 666 | 667 | def size 668 | if @io.is_a? StringIO 669 | @io.size 670 | elsif @io.is_a? File 671 | @io.stat.size 672 | end 673 | end 674 | 675 | def uncompressed 676 | @last + 1 677 | end 678 | 679 | def inspect 680 | "#<#{self.class}: @io=#{@io.inspect} size=#{size} uncompressed=#{uncompressed}>" 681 | end 682 | 683 | end 684 | -------------------------------------------------------------------------------- /lib/rbzip2/ruby/input_data.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2017, Sebastian Staudt 5 | 6 | class RBzip2::Ruby::InputData 7 | 8 | include RBzip2::Ruby::Constants 9 | 10 | attr_reader :base, :cftab, :get_and_move_to_front_decode_yy, :in_use, 11 | :limit, :ll8, :min_lens, :perm, :receive_decoding_tables_pos, 12 | :selector, :selector_mtf, :seq_to_unseq, :temp_char_array_2d, 13 | :unzftab, :tt 14 | 15 | def initialize(block_size) 16 | @in_use = Array.new 256, false 17 | 18 | @seq_to_unseq = Array.new 256, 0 19 | @selector = Array.new MAX_SELECTORS, 0 20 | @selector_mtf = Array.new MAX_SELECTORS, 0 21 | 22 | @unzftab = Array.new 256, 0 23 | 24 | @base = Array.new(N_GROUPS) { Array.new(MAX_ALPHA_SIZE, 0) } 25 | @limit = Array.new(N_GROUPS) { Array.new(MAX_ALPHA_SIZE, 0) } 26 | @perm = Array.new(N_GROUPS) { Array.new(MAX_ALPHA_SIZE, 0) } 27 | @min_lens = Array.new N_GROUPS, 0 28 | 29 | @cftab = Array.new 257, 0 30 | @get_and_move_to_front_decode_yy = Array.new 256 31 | @temp_char_array_2d = Array.new(N_GROUPS) { Array.new(MAX_ALPHA_SIZE, 0) } 32 | @receive_decoding_tables_pos = Array.new N_GROUPS, 0 33 | 34 | @ll8 = Array.new block_size * BASEBLOCKSIZE 35 | end 36 | 37 | def init_tt(size) 38 | @tt = Array.new(size) if @tt.nil? || @tt.size < size 39 | end 40 | 41 | end 42 | -------------------------------------------------------------------------------- /lib/rbzip2/ruby/output_data.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2013, Sebastian Staudt 5 | 6 | class RBzip2::Ruby::OutputData 7 | 8 | include RBzip2::Ruby::Constants 9 | 10 | attr_reader :block, :ftab, :fmap, :generate_mtf_values_yy, :heap, :in_use, 11 | :main_sort_big_done, :main_sort_copy, :main_sort_running_order, 12 | :mtf_freq, :parent, :quadrant, :selector, :selector_mtf, 13 | :send_mtf_values_code, :send_mtf_values_cost, 14 | :send_mtf_values_fave, :send_mtf_values_len, 15 | :send_mtf_values_rfreq, :send_mtf_values2_pos, 16 | :send_mtf_values4_in_use_16, :sfmap, :stack_dd, :stack_hh, 17 | :stack_ll, :unseq_to_seq, :weight 18 | 19 | def initialize(block_size) 20 | n = block_size * BASEBLOCKSIZE 21 | @block = Array.new n + 1 + NUM_OVERSHOOT_BYTES, 0 22 | @fmap = Array.new n, 0 23 | @selector = Array.new MAX_SELECTORS 24 | @selector_mtf = Array.new MAX_SELECTORS 25 | @sfmap = Array.new 2 * n 26 | @quadrant = @sfmap 27 | 28 | @in_use = Array.new 256 29 | @mtf_freq = Array.new MAX_ALPHA_SIZE, 0 30 | @unseq_to_seq = Array.new 256 31 | 32 | @generate_mtf_values_yy = Array.new 256 33 | @send_mtf_values_code = Array.new(N_GROUPS) { Array.new MAX_ALPHA_SIZE } 34 | @send_mtf_values_cost = Array.new N_GROUPS 35 | @send_mtf_values_fave = Array.new N_GROUPS 36 | @send_mtf_values_len = Array.new(N_GROUPS) { Array.new MAX_ALPHA_SIZE } 37 | @send_mtf_values_rfreq = Array.new(N_GROUPS) { Array.new MAX_ALPHA_SIZE } 38 | @send_mtf_values2_pos = Array.new N_GROUPS 39 | @send_mtf_values4_in_use_16 = Array.new 16 40 | 41 | @stack_dd = Array.new QSORT_STACK_SIZE 42 | @stack_hh = Array.new QSORT_STACK_SIZE 43 | @stack_ll = Array.new QSORT_STACK_SIZE 44 | 45 | @main_sort_big_done = Array.new 256 46 | @main_sort_copy = Array.new 256 47 | @main_sort_running_order = Array.new 256 48 | 49 | @heap = Array.new MAX_ALPHA_SIZE + 2 50 | @parent = Array.new MAX_ALPHA_SIZE + 2 51 | @weight = Array.new MAX_ALPHA_SIZE + 2 52 | 53 | @ftab = Array.new 65537 54 | end 55 | 56 | end 57 | -------------------------------------------------------------------------------- /lib/rbzip2/version.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2017, Sebastian Staudt 5 | 6 | module RBzip2 7 | 8 | VERSION = '0.3.0' unless const_defined? :VERSION 9 | 10 | end 11 | -------------------------------------------------------------------------------- /rbzip2.gemspec: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2013, Sebastian Staudt 5 | 6 | require File.expand_path(File.dirname(__FILE__) + '/lib/rbzip2/version') 7 | 8 | Gem::Specification.new do |s| 9 | s.name = 'rbzip2' 10 | s.version = RBzip2::VERSION 11 | s.platform = Gem::Platform::RUBY 12 | s.authors = [ 'Sebastian Staudt' ] 13 | s.email = [ 'koraktor@gmail.com' ] 14 | s.homepage = 'https://github.com/koraktor/rbzip2' 15 | s.summary = 'bzip2 for Ruby' 16 | s.description = 'Various bzip2 implementations for Ruby.' 17 | 18 | s.files = Dir['{lib}/**/*.rb', 'LICENSE', 'Rakefile', 'README.md'] 19 | s.test_files = Dir['{spec}/**/*_spec.rb'] 20 | s.require_paths = [ 'lib' ] 21 | end 22 | -------------------------------------------------------------------------------- /spec/common/compressor_spec.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2017, Sebastian Staudt 5 | 6 | require 'base64' 7 | 8 | require 'helper' 9 | 10 | shared_examples_for 'a compressor' do 11 | 12 | before do 13 | @io = StringIO.new 14 | @bz2_compressor = described_class.new @io 15 | end 16 | 17 | it 'acts like a standard IO' do 18 | methods = described_class.instance_methods.map &:to_sym 19 | expect(methods).to include(:close, :putc, :puts, :write) 20 | end 21 | 22 | it 'should be able to compress raw data' do 23 | txt_file = fixture 'fixtures/test.txt' 24 | bz2_file = fixture 'fixtures/test.bz2' 25 | @bz2_compressor.write txt_file.read 26 | @bz2_compressor.close 27 | 28 | expect(@io.string).to eq(bz2_file.read) 29 | end 30 | 31 | it 'should be able to compress large raw data' do 32 | txt_file = fixture 'fixtures/big_test.txt' 33 | suffix = '.' + described_class.name.split('::')[1].downcase 34 | suffix = '' if suffix == '.ffi' 35 | bz2_file = fixture "fixtures/big_test#{suffix}.bz2" 36 | @bz2_compressor.write txt_file.read 37 | @bz2_compressor.close 38 | 39 | expect(@io.string).to eq(bz2_file.read) 40 | end 41 | 42 | it 'should be able to compress a single character' do 43 | @bz2_compressor.putc 'T' 44 | @bz2_compressor.putc 'e' 45 | @bz2_compressor.putc 's' 46 | @bz2_compressor.putc 't' 47 | @bz2_compressor.close 48 | 49 | base64_result = Base64.encode64 @io.string 50 | 51 | expect(`echo "#{base64_result}" | base64 --decode | bzcat`).to eq('Test') 52 | end 53 | 54 | it 'should be able to compress a line of text' do 55 | @bz2_compressor.puts 'Test 1' 56 | @bz2_compressor.puts 'Test 2' 57 | @bz2_compressor.close 58 | 59 | base64_result = Base64.encode64 @io.string 60 | 61 | expect(`echo "#{base64_result}" | base64 --decode | bzcat`).to eq("Test 1#{$/}Test 2#{$/}") 62 | end 63 | 64 | after do 65 | @bz2_compressor.close unless @bz2_compressor.nil? 66 | end 67 | 68 | end 69 | -------------------------------------------------------------------------------- /spec/common/decompressor_spec.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2017, Sebastian Staudt 5 | 6 | require 'helper' 7 | 8 | shared_examples_for 'a decompressor' do 9 | 10 | it 'acts like a standard IO' do 11 | methods = described_class.instance_methods.map { |m| m.to_sym } 12 | expect(methods).to include(:close, :getc, :gets, :read) 13 | end 14 | 15 | it 'knows its size' do 16 | bz2_file = fixture 'fixtures/test.bz2' 17 | bz2_decompressor = described_class.new bz2_file 18 | 19 | expect(bz2_decompressor.size).to eq(375) 20 | end 21 | 22 | it 'knows the size of the uncompressed data' do 23 | bz2_file = fixture 'fixtures/test.bz2' 24 | bz2_decompressor = described_class.new bz2_file 25 | 26 | expect(bz2_decompressor.uncompressed).to eq(704) 27 | end 28 | 29 | it 'should be able to decompress compressed data' do 30 | bz2_file = fixture 'fixtures/test.bz2' 31 | bz2_decompressor = described_class.new bz2_file 32 | txt_file = fixture 'fixtures/test.txt' 33 | 34 | expect(bz2_decompressor.read).to eq(txt_file.read) 35 | end 36 | 37 | it 'should be able to decompress large compressed data' do 38 | txt_file = fixture 'fixtures/big_test.txt' 39 | bz2_file = fixture 'fixtures/big_test.bz2' 40 | bz2_decompressor = described_class.new bz2_file 41 | 42 | expect(bz2_decompressor.read).to eq(txt_file.read) 43 | end 44 | 45 | it 'should be able to decompress a single character from compressed data' do 46 | bz2_file = fixture 'fixtures/test.bz2' 47 | bz2_decompressor = described_class.new bz2_file 48 | 49 | expect(bz2_decompressor.getc).to eq('T') 50 | expect(bz2_decompressor.getc).to eq('h') 51 | expect(bz2_decompressor.getc).to eq('i') 52 | expect(bz2_decompressor.getc).to eq('s') 53 | expect(bz2_decompressor.getc).to eq(' ') 54 | end 55 | 56 | it 'should be able to decompress a single line from compressed data' do 57 | bz2_file = fixture 'fixtures/test.bz2' 58 | bz2_decompressor = described_class.new bz2_file 59 | 60 | expect(bz2_decompressor.gets).to eq("This is a test fixture for RBzip2.\n") 61 | end 62 | 63 | end 64 | -------------------------------------------------------------------------------- /spec/ffi/compressor_spec.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013-2017, Sebastian Staudt 5 | 6 | require 'helper' 7 | 8 | describe RBzip2::FFI::Compressor do 9 | 10 | it_behaves_like 'a compressor' 11 | 12 | end 13 | -------------------------------------------------------------------------------- /spec/ffi/decompressor_spec.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013-2017, Sebastian Staudt 5 | 6 | require 'helper' 7 | 8 | describe RBzip2::FFI::Decompressor do 9 | 10 | it_behaves_like 'a decompressor' 11 | 12 | end 13 | -------------------------------------------------------------------------------- /spec/fixtures/big_test.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koraktor/rbzip2/f9544ca605ff5ab602192d1dca821f4414a382ef/spec/fixtures/big_test.bz2 -------------------------------------------------------------------------------- /spec/fixtures/big_test.java.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koraktor/rbzip2/f9544ca605ff5ab602192d1dca821f4414a382ef/spec/fixtures/big_test.java.bz2 -------------------------------------------------------------------------------- /spec/fixtures/big_test.ruby.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koraktor/rbzip2/f9544ca605ff5ab602192d1dca821f4414a382ef/spec/fixtures/big_test.ruby.bz2 -------------------------------------------------------------------------------- /spec/fixtures/test.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koraktor/rbzip2/f9544ca605ff5ab602192d1dca821f4414a382ef/spec/fixtures/test.bz2 -------------------------------------------------------------------------------- /spec/fixtures/test.txt: -------------------------------------------------------------------------------- 1 | This is a test fixture for RBzip2. 2 | Its contents will be compressed and decompressed to test the functionality. 3 | 4 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod 5 | tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At 6 | vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd 7 | gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum 8 | dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor 9 | invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero 10 | eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no 11 | sea takimata sanctus est Lorem ipsum dolor sit amet. 12 | -------------------------------------------------------------------------------- /spec/helper.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2017, Sebastian Staudt 5 | 6 | require 'rspec/core' 7 | require 'rspec/expectations' 8 | 9 | require 'coveralls' 10 | Coveralls.wear! 11 | 12 | require 'rbzip2' 13 | 14 | include RBzip2 15 | 16 | RSpec.configure do |config| 17 | config.color = true 18 | config.formatter = :documentation 19 | end 20 | 21 | def java? 22 | defined?(::RUBY_ENGINE) && RUBY_ENGINE == 'jruby' 23 | end 24 | 25 | def fixture(file) 26 | File.new File.join(File.dirname(__FILE__), file) 27 | end 28 | -------------------------------------------------------------------------------- /spec/java/compressor_spec.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013-2017, Sebastian Staudt 5 | 6 | require 'helper' 7 | 8 | describe RBzip2::Java::Compressor do 9 | 10 | it_behaves_like 'a compressor' 11 | 12 | end if java? 13 | -------------------------------------------------------------------------------- /spec/java/decompressor_spec.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2013-2017, Sebastian Staudt 5 | 6 | require 'helper' 7 | 8 | describe RBzip2::Java::Decompressor do 9 | 10 | it_behaves_like 'a decompressor' 11 | 12 | end if java? 13 | -------------------------------------------------------------------------------- /spec/ruby/compressor_spec.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2017, Sebastian Staudt 5 | 6 | require 'helper' 7 | 8 | describe RBzip2::Ruby::Compressor do 9 | 10 | it_behaves_like 'a compressor' 11 | 12 | end 13 | -------------------------------------------------------------------------------- /spec/ruby/decompressor_spec.rb: -------------------------------------------------------------------------------- 1 | # This code is free software; you can redistribute it and/or modify it under 2 | # the terms of the new BSD License. 3 | # 4 | # Copyright (c) 2011-2017, Sebastian Staudt 5 | 6 | require 'helper' 7 | 8 | describe RBzip2::Ruby::Decompressor do 9 | 10 | it_behaves_like 'a decompressor' 11 | 12 | end 13 | --------------------------------------------------------------------------------