├── .gitignore ├── .rspec ├── .travis.yml ├── .yardopts ├── CHANGELOG.md ├── Gemfile ├── README.md ├── Rakefile ├── bzip2-ruby.gemspec ├── ext └── bzip2 │ ├── bzip2.c │ ├── common.c │ ├── common.h │ ├── extconf.rb │ ├── reader.c │ ├── reader.h │ ├── writer.c │ └── writer.h ├── lib ├── bzip2.rb └── bzip2 │ ├── internals.rb │ ├── reader.rb │ ├── version.rb │ └── writer.rb └── spec ├── reader_spec.rb ├── spec_helper.rb └── writer_spec.rb /.gitignore: -------------------------------------------------------------------------------- 1 | _10lines_ 2 | doc 3 | ext/Makefile 4 | *.o 5 | *.so 6 | *.bundle 7 | pkg 8 | .bundle 9 | tmp 10 | Gemfile.lock 11 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --colour 2 | --format=documentation 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 1.8.7 4 | - 1.9.2 5 | - 1.9.3 6 | - rbx 7 | - ree 8 | - ruby-head 9 | - rbx-18mode 10 | - rbx-19mode 11 | matrix: 12 | allow_failures: 13 | - rvm: rbx-18mode 14 | - rvm: rbx-19mode 15 | script: 16 | - bundle exec rake 17 | - bundle exec rspec 18 | -------------------------------------------------------------------------------- /.yardopts: -------------------------------------------------------------------------------- 1 | --no-private 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.2.7 2010-11-16 2 | 3 | * Add documentation for an overview of the Bzip2 module 4 | * Document the class methods on Bzip2 and get them to show up in yarddoc 5 | * Remove the ConfigError class because searching for it showed no instances of its usage 6 | * Add a Usage section to the README and a bit about adding it to a Gemfile 7 | * Improve the reader_spec.rb by making it more resilient in lots of places and a bit more descriptive/terse in 8 | * Add a lot more documentation for the Reader class and also touch up the Writer class a bit 9 | * Make the Writer specs more descriptive by giving them some doc strings. 10 | * Wrap up documentation of the Bzip2::Writer class. 11 | * Add lib/bzip2-ruby.rb so it's not always necessary to specify to require 'bzip2' in Gemfiles and such 12 | * Start documenting the Bzip2::Writer class 13 | * Fix a few compiler warnings 14 | * Removed some dead code 15 | * Fix for ruby 1.9 compatibility. 16 | * Fix segfault when exiting in ruby 1.9 17 | * Follow the newer conventions of rspec 18 | * Migrate to using Bundler instead of Jeweler 19 | * use malloc/free instead of ruby_xmalloc/ruby_xfree 20 | 21 | ## 0.2.6 2009-10-6 22 | 23 | * Updated to support Ruby 1.8.5 24 | 25 | ## 0.2.5 2009-06-07 26 | 27 | * initial conversion of original tests over to rspec 28 | 29 | ## 0.2.4 2009-05-02 30 | 31 | * renamed BZ2 module/namespace to Bzip2 32 | 33 | ## 0.2.3 2009-05-02 34 | 35 | * renamed gem to bzip2-ruby from bz2 36 | * initial conversion to jeweler 37 | * bundling gemspec 38 | * README and file structure organization updates 39 | * updated Init_*() ruby initializer function to match new gem name 40 | 41 | ## 0.2.2 2008-12-22 42 | 43 | * 1 major enhancement: 44 | * Gemify bz2 library from http://moulon.inra.fr/ruby/bz2.html 45 | * All credit goes to Guy Decoux 46 | 47 | ## 0.2.1 48 | 49 | * replaced rb_proc_new() with bz_proc_new() for 1.6 50 | (Thanks "Akinori MUSHA" ) 51 | 52 | ## 0.1.9 53 | 54 | * corrected BZ_FINISH_OK (Thanks Rudi Cilibrasi ) 55 | 56 | ## 0.1.6 57 | 58 | * adapted for 1.8.0 (ihi) 59 | * modified ::new 60 | * BZ2::Writer#finish (same than #flush) 61 | 62 | ## 0.1.5 63 | 64 | * corrected extconf.rb 65 | * added close! 66 | * replaced close(false) by #finish 67 | * corrected #flush 68 | 69 | ## 0.1.4 70 | 71 | * corrected bz_iv 72 | * #to_io 73 | * corrected ::Reader#close 74 | 75 | ## 0.1.3 76 | 77 | * corrected #lineno 78 | * corrected ::Writer::new(nil) 79 | * taint result 80 | 81 | ## 0.1.2 82 | 83 | * better (???) interface for #read 84 | * finalize for objects which respond to #closed? 85 | 86 | ## 0.1.1 87 | 88 | * better interface for T_FILE 89 | * corrected bug with gc (buf) 90 | * Reader#lineno, Reader#lineno=, Reader#ungets 91 | * corrected Reader#unused 92 | * taint check in #initialize 93 | * BZ2::bzip2, BZ2::bunzip2 94 | * too many exceptions -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gemspec 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ruby C bindings to libbzip2 2 | 3 | ## Installation 4 | 5 | First make sure you’ve got Gemcutter in your sources list: 6 | 7 | `gem sources -a http://gemcutter.org` 8 | 9 | Then go ahead and install it as usual: 10 | 11 | `sudo gem install bzip2-ruby` 12 | 13 | You may need to specify: 14 | 15 | `--with-bz2-dir=` 16 | 17 | Or in a Gemfile 18 | 19 | `gem 'bzip2-ruby'` 20 | 21 | ## Usage 22 | 23 | The full documentation is hosted on [rdoc.info](http://rdoc.info/github/brianmario/bzip2-ruby/master/frames). 24 | 25 | Here's a quick overview, hower: 26 | 27 | ``` ruby 28 | require 'bzip2' 29 | 30 | # Quick shortcuts 31 | data = Bzip2.compress 'string' 32 | Bzip2.uncompress data 33 | 34 | # Creating a bz2 compressed file 35 | writer = Bzip2::Writer.new File.open('file') 36 | writer << 'data1' 37 | writer.puts 'data2' 38 | writer.print 'data3' 39 | writer.printf '%s', 'data4' 40 | writer.close 41 | 42 | Bzip2::Writer.open('file'){ |f| f << data } 43 | 44 | # Reading a bz2 compressed file 45 | reader = Bzip2::Reader.new File.open('file') 46 | reader.gets # => "data1data2\n" 47 | reader.read # => 'data3data4' 48 | 49 | reader.readline # => raises Bzip2::EOZError 50 | 51 | Bzip2::Reader.open('file'){ |f| puts f.read } 52 | ``` 53 | 54 | ## Copying 55 | 56 | ``` 57 | This extension module is copyrighted free software by Guy Decoux 58 | You can redistribute it and/or modify it under the same term as Ruby. 59 | Guy Decoux 60 | ``` 61 | 62 | ## Modifications from origin version 63 | 64 | * Switch to Jeweler 65 | * Renamed BZ2 module/namespace to Bzip2 66 | * Renamed compiled binary from "bz2" to "bzip2" 67 | * Renamed gem from "bz2" to "bzip2-ruby" 68 | * Converted original tests to rspec 69 | * 1.9 compatibility 70 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler' 2 | Bundler::GemHelper.install_tasks 3 | 4 | # rspec 5 | begin 6 | require 'rspec' 7 | require 'rspec/core/rake_task' 8 | 9 | desc "Run all examples with RCov" 10 | RSpec::Core::RakeTask.new('spec:rcov') do |t| 11 | t.rcov = true 12 | end 13 | RSpec::Core::RakeTask.new('spec') do |t| 14 | t.verbose = true 15 | end 16 | 17 | task :default => :spec 18 | rescue LoadError 19 | puts "rspec, or one of its dependencies, is not available. Install it with: sudo gem install rspec" 20 | end 21 | 22 | # rake-compiler 23 | require 'rake' unless defined? Rake 24 | 25 | gem 'rake-compiler', '>= 0.7.5' 26 | require "rake/extensiontask" 27 | 28 | Rake::ExtensionTask.new('bzip2') do |ext| 29 | ext.cross_compile = true 30 | ext.cross_platform = ['x86-mingw32', 'x86-mswin32-60'] 31 | 32 | ext.lib_dir = File.join 'lib', 'bzip2' 33 | end 34 | 35 | Rake::Task[:spec].prerequisites << :compile 36 | -------------------------------------------------------------------------------- /bzip2-ruby.gemspec: -------------------------------------------------------------------------------- 1 | require './lib/bzip2/version' 2 | 3 | Gem::Specification.new do |s| 4 | s.name = 'bzip2-ruby' 5 | s.version = Bzip2::VERSION 6 | s.authors = ['Guy Decoux', 'Brian Lopezs'] 7 | s.date = Time.now.utc.strftime("%Y-%m-%d") 8 | s.email = ['seniorlopez@gmail.com'] 9 | s.extensions = ['ext/bzip2/extconf.rb'] 10 | s.files = `git ls-files`.split("\n") 11 | s.homepage = 'http://github.com/brianmario/bzip2-ruby' 12 | s.rdoc_options = ["--charset=UTF-8"] 13 | s.require_paths = ['lib'] 14 | s.rubygems_version = %q{1.4.2} 15 | s.summary = 'Ruby C bindings to libbzip2.' 16 | s.test_files = `git ls-files spec`.split("\n") 17 | 18 | # tests 19 | s.add_development_dependency 'rake-compiler', ">= 0.7.5" 20 | s.add_development_dependency 'rspec', ">= 2.0.0" 21 | end 22 | -------------------------------------------------------------------------------- /ext/bzip2/bzip2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "common.h" 5 | #include "reader.h" 6 | #include "writer.h" 7 | 8 | VALUE bz_cWriter, bz_cReader, bz_cInternal; 9 | VALUE bz_eError, bz_eEOZError; 10 | 11 | VALUE bz_internal_ary; 12 | 13 | ID id_new, id_write, id_open, id_flush, id_read; 14 | ID id_closed, id_close, id_str; 15 | 16 | void bz_internal_finalize(VALUE data) { 17 | VALUE elem; 18 | int closed, i; 19 | struct bz_iv *bziv; 20 | struct bz_file *bzf; 21 | 22 | for (i = 0; i < RARRAY_LEN(bz_internal_ary); i++) { 23 | elem = RARRAY_PTR(bz_internal_ary)[i]; 24 | Data_Get_Struct(elem, struct bz_iv, bziv); 25 | if (bziv->bz2) { 26 | RDATA(bziv->bz2)->dfree = free; 27 | if (TYPE(bziv->io) == T_FILE) { 28 | RFILE(bziv->io)->fptr->finalize = bziv->finalize; 29 | } else if (TYPE(bziv->io) == T_DATA) { 30 | RDATA(bziv->io)->dfree = bziv->finalize; 31 | } 32 | Data_Get_Struct(bziv->bz2, struct bz_file, bzf); 33 | closed = bz_writer_internal_flush(bzf); 34 | if (bzf->flags & BZ2_RB_CLOSE) { 35 | bzf->flags &= ~BZ2_RB_CLOSE; 36 | if (!closed && rb_respond_to(bzf->io, id_close)) { 37 | rb_funcall2(bzf->io, id_close, 0, 0); 38 | } 39 | } 40 | } 41 | } 42 | } 43 | 44 | /* 45 | * call-seq: 46 | * compress(str) 47 | * 48 | * Shortcut for compressing just a string. 49 | * 50 | * Bzip2.uncompress Bzip2.compress('data') # => 'data' 51 | * 52 | * @param [String] str the string to compress 53 | * @return [String] +str+ compressed with bz2 54 | */ 55 | VALUE bz_compress(VALUE self, VALUE str) { 56 | VALUE bz2, argv[1] = {Qnil}; 57 | 58 | str = rb_str_to_str(str); 59 | bz2 = rb_funcall2(bz_cWriter, id_new, 1, argv); 60 | if (OBJ_TAINTED(str)) { 61 | struct bz_file *bzf; 62 | Data_Get_Struct(bz2, struct bz_file, bzf); 63 | OBJ_TAINT(bzf->io); 64 | } 65 | bz_writer_write(bz2, str); 66 | return bz_writer_close(bz2); 67 | } 68 | 69 | /* 70 | * Returns the io stream underlying this stream. If the strem was constructed 71 | * with a file, that is returned. Otherwise, an empty string is returned. 72 | * 73 | * @return [File, String] similar to whatever the stream was constructed with 74 | * @raise [IOError] if the stream has been closed 75 | */ 76 | VALUE bz_to_io(VALUE obj) { 77 | struct bz_file *bzf; 78 | 79 | Get_BZ2(obj, bzf); 80 | return bzf->io; 81 | } 82 | 83 | VALUE bz_str_read(int argc, VALUE *argv, VALUE obj) { 84 | struct bz_str *bzs; 85 | VALUE res, len; 86 | int count; 87 | 88 | Data_Get_Struct(obj, struct bz_str, bzs); 89 | rb_scan_args(argc, argv, "01", &len); 90 | if (NIL_P(len)) { 91 | count = (int) RSTRING_LEN(bzs->str); 92 | } else { 93 | count = NUM2INT(len); 94 | if (count < 0) { 95 | rb_raise(rb_eArgError, "negative length %d given", count); 96 | } 97 | } 98 | if (!count || bzs->pos == -1) { 99 | return Qnil; 100 | } 101 | if ((bzs->pos + count) >= RSTRING_LEN(bzs->str)) { 102 | res = rb_str_new(RSTRING_PTR(bzs->str) + bzs->pos, 103 | RSTRING_LEN(bzs->str) - bzs->pos); 104 | bzs->pos = -1; 105 | } else { 106 | res = rb_str_new(RSTRING_PTR(bzs->str) + bzs->pos, count); 107 | bzs->pos += count; 108 | } 109 | return res; 110 | } 111 | 112 | /* 113 | * call-seq: 114 | * uncompress(data) 115 | * Decompress a string of bz2 compressed data. 116 | * 117 | * Bzip2.uncompress Bzip2.compress('asdf') # => 'asdf' 118 | * 119 | * @param [String] data bz2 compressed data 120 | * @return [String] +data+ as uncompressed bz2 data 121 | * @raise [Bzip2::Error] if +data+ is not valid bz2 data 122 | */ 123 | VALUE bz_uncompress(VALUE self, VALUE data) { 124 | VALUE bz2, nilv = Qnil, argv[1]; 125 | 126 | argv[0] = rb_str_to_str(data); 127 | bz2 = rb_funcall2(bz_cReader, id_new, 1, argv); 128 | return bz_reader_read(1, &nilv, bz2); 129 | } 130 | 131 | /* 132 | * Internally allocates data, 133 | * 134 | * @see Bzip2::Writer#initialize 135 | * @see Bzip2::Reader#initialize 136 | * @private 137 | */ 138 | VALUE bz_s_new(int argc, VALUE *argv, VALUE obj) { 139 | VALUE res = rb_funcall2(obj, rb_intern("allocate"), 0, 0); 140 | rb_obj_call_init(res, argc, argv); 141 | return res; 142 | } 143 | 144 | void Init_bzip2() { 145 | VALUE bz_mBzip2, bz_mBzip2Singleton; 146 | 147 | bz_internal_ary = rb_ary_new(); 148 | rb_global_variable(&bz_internal_ary); 149 | rb_set_end_proc(bz_internal_finalize, Qnil); 150 | 151 | id_new = rb_intern("new"); 152 | id_write = rb_intern("write"); 153 | id_open = rb_intern("open"); 154 | id_flush = rb_intern("flush"); 155 | id_read = rb_intern("read"); 156 | id_close = rb_intern("close"); 157 | id_closed = rb_intern("closed?"); 158 | id_str = rb_intern("to_str"); 159 | 160 | bz_mBzip2 = rb_define_module("Bzip2"); 161 | bz_eError = rb_define_class_under(bz_mBzip2, "Error", rb_eIOError); 162 | bz_eEOZError = rb_define_class_under(bz_mBzip2, "EOZError", bz_eError); 163 | 164 | bz_mBzip2Singleton = rb_singleton_class(bz_mBzip2); 165 | rb_define_singleton_method(bz_mBzip2, "compress", bz_compress, 1); 166 | rb_define_singleton_method(bz_mBzip2, "uncompress", bz_uncompress, 1); 167 | rb_define_alias(bz_mBzip2Singleton, "bzip2", "compress"); 168 | rb_define_alias(bz_mBzip2Singleton, "decompress", "uncompress"); 169 | rb_define_alias(bz_mBzip2Singleton, "bunzip2", "uncompress"); 170 | 171 | /* 172 | Writer 173 | */ 174 | bz_cWriter = rb_define_class_under(bz_mBzip2, "Writer", rb_cData); 175 | #if HAVE_RB_DEFINE_ALLOC_FUNC 176 | rb_define_alloc_func(bz_cWriter, bz_writer_s_alloc); 177 | #else 178 | rb_define_singleton_method(bz_cWriter, "allocate", bz_writer_s_alloc, 0); 179 | #endif 180 | rb_define_singleton_method(bz_cWriter, "new", bz_s_new, -1); 181 | rb_define_singleton_method(bz_cWriter, "open", bz_writer_s_open, -1); 182 | rb_define_method(bz_cWriter, "initialize", bz_writer_init, -1); 183 | rb_define_method(bz_cWriter, "write", bz_writer_write, 1); 184 | rb_define_method(bz_cWriter, "putc", bz_writer_putc, 1); 185 | rb_define_method(bz_cWriter, "puts", rb_io_puts, -1); 186 | rb_define_method(bz_cWriter, "print", rb_io_print, -1); 187 | rb_define_method(bz_cWriter, "printf", rb_io_printf, -1); 188 | rb_define_method(bz_cWriter, "<<", rb_io_addstr, 1); 189 | rb_define_method(bz_cWriter, "flush", bz_writer_flush, 0); 190 | rb_define_method(bz_cWriter, "close", bz_writer_close, 0); 191 | rb_define_method(bz_cWriter, "close!", bz_writer_close_bang, 0); 192 | rb_define_method(bz_cWriter, "closed?", bz_writer_closed, 0); 193 | rb_define_method(bz_cWriter, "to_io", bz_to_io, 0); 194 | rb_define_alias(bz_cWriter, "finish", "flush"); 195 | rb_define_alias(bz_cWriter, "closed", "closed?"); 196 | 197 | /* 198 | Reader 199 | */ 200 | bz_cReader = rb_define_class_under(bz_mBzip2, "Reader", rb_cData); 201 | rb_include_module(bz_cReader, rb_mEnumerable); 202 | #if HAVE_RB_DEFINE_ALLOC_FUNC 203 | rb_define_alloc_func(bz_cReader, bz_reader_s_alloc); 204 | #else 205 | rb_define_singleton_method(bz_cReader, "allocate", bz_reader_s_alloc, 0); 206 | #endif 207 | rb_define_singleton_method(bz_cReader, "new", bz_s_new, -1); 208 | rb_define_singleton_method(bz_cReader, "open", bz_reader_s_open, -1); 209 | rb_define_singleton_method(bz_cReader, "foreach", bz_reader_s_foreach, -1); 210 | rb_define_singleton_method(bz_cReader, "readlines", bz_reader_s_readlines, -1); 211 | rb_define_method(bz_cReader, "initialize", bz_reader_init, -1); 212 | rb_define_method(bz_cReader, "read", bz_reader_read, -1); 213 | rb_define_method(bz_cReader, "unused", bz_reader_unused, 0); 214 | rb_define_method(bz_cReader, "unused=", bz_reader_set_unused, 1); 215 | rb_define_method(bz_cReader, "ungetc", bz_reader_ungetc, 1); 216 | rb_define_method(bz_cReader, "ungets", bz_reader_ungets, 1); 217 | rb_define_method(bz_cReader, "getc", bz_reader_getc, 0); 218 | rb_define_method(bz_cReader, "gets", bz_reader_gets_m, -1); 219 | rb_define_method(bz_cReader, "readchar", bz_reader_readchar, 0); 220 | rb_define_method(bz_cReader, "readline", bz_reader_readline, -1); 221 | rb_define_method(bz_cReader, "readlines", bz_reader_readlines, -1); 222 | rb_define_method(bz_cReader, "each", bz_reader_each_line, -1); 223 | rb_define_method(bz_cReader, "each_byte", bz_reader_each_byte, 0); 224 | rb_define_method(bz_cReader, "close", bz_reader_close, 0); 225 | rb_define_method(bz_cReader, "close!", bz_reader_close_bang, 0); 226 | rb_define_method(bz_cReader, "finish", bz_reader_finish, 0); 227 | rb_define_method(bz_cReader, "closed?", bz_reader_closed, 0); 228 | rb_define_method(bz_cReader, "eoz?", bz_reader_eoz, 0); 229 | rb_define_method(bz_cReader, "eof?", bz_reader_eof, 0); 230 | rb_define_method(bz_cReader, "lineno", bz_reader_lineno, 0); 231 | rb_define_method(bz_cReader, "lineno=", bz_reader_set_lineno, 1); 232 | rb_define_method(bz_cReader, "to_io", bz_to_io, 0); 233 | rb_define_alias(bz_cReader, "each_line", "each"); 234 | rb_define_alias(bz_cReader, "closed", "closed?"); 235 | rb_define_alias(bz_cReader, "eoz", "eoz?"); 236 | rb_define_alias(bz_cReader, "eof", "eof?"); 237 | 238 | /* 239 | Internal 240 | */ 241 | bz_cInternal = rb_define_class_under(bz_mBzip2, "InternalStr", rb_cData); 242 | #if HAVE_RB_DEFINE_ALLOC_FUNC 243 | rb_undef_alloc_func(bz_cInternal); 244 | #else 245 | rb_undef_method(CLASS_OF(bz_cInternal), "allocate"); 246 | #endif 247 | rb_undef_method(CLASS_OF(bz_cInternal), "new"); 248 | rb_undef_method(bz_cInternal, "initialize"); 249 | rb_define_method(bz_cInternal, "read", bz_str_read, -1); 250 | } 251 | -------------------------------------------------------------------------------- /ext/bzip2/common.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "common.h" 5 | 6 | void bz_file_mark(struct bz_file * bzf) { 7 | rb_gc_mark(bzf->io); 8 | rb_gc_mark(bzf->in); 9 | } 10 | 11 | void * bz_malloc(void *opaque, int m, int n) { 12 | return malloc(m * n); 13 | } 14 | 15 | void bz_free(void *opaque, void *p) { 16 | free(p); 17 | } 18 | 19 | VALUE bz_raise(int error) { 20 | VALUE exc; 21 | const char *msg; 22 | 23 | exc = bz_eError; 24 | switch (error) { 25 | case BZ_SEQUENCE_ERROR: 26 | msg = "incorrect sequence"; 27 | break; 28 | case BZ_PARAM_ERROR: 29 | msg = "parameter out of range"; 30 | break; 31 | case BZ_MEM_ERROR: 32 | msg = "not enough memory is available"; 33 | break; 34 | case BZ_DATA_ERROR: 35 | msg = "data integrity error is detected"; 36 | break; 37 | case BZ_DATA_ERROR_MAGIC: 38 | msg = "compressed stream does not start with the correct magic bytes"; 39 | break; 40 | case BZ_IO_ERROR: 41 | msg = "error reading or writing"; 42 | break; 43 | case BZ_UNEXPECTED_EOF: 44 | exc = bz_eEOZError; 45 | msg = "compressed file finishes before the logical end of stream is detected"; 46 | break; 47 | case BZ_OUTBUFF_FULL: 48 | msg = "output buffer full"; 49 | break; 50 | default: 51 | msg = "unknown error"; 52 | exc = bz_eError; 53 | } 54 | rb_raise(exc, "%s", msg); 55 | } 56 | -------------------------------------------------------------------------------- /ext/bzip2/common.h: -------------------------------------------------------------------------------- 1 | #ifndef _RB_BZIP2_COMMON_H_ 2 | #define _RB_BZIP2_COMMON_H_ 3 | 4 | #include 5 | #include 6 | 7 | #ifndef RUBY_19_COMPATIBILITY 8 | # include 9 | # include 10 | #else 11 | # include 12 | #endif 13 | 14 | #define BZ2_RB_CLOSE 1 15 | #define BZ2_RB_INTERNAL 2 16 | 17 | #define BZ_RB_BLOCKSIZE 4096 18 | #define DEFAULT_BLOCKS 9 19 | #define ASIZE (1 << CHAR_BIT) 20 | 21 | /* Older versions of Ruby (< 1.8.6) need these */ 22 | #ifndef RSTRING_PTR 23 | # define RSTRING_PTR(s) (RSTRING(s)->ptr) 24 | #endif 25 | #ifndef RSTRING_LEN 26 | # define RSTRING_LEN(s) (RSTRING(s)->len) 27 | #endif 28 | #ifndef RARRAY_PTR 29 | # define RARRAY_PTR(s) (RARRAY(s)->ptr) 30 | #endif 31 | #ifndef RARRAY_LEN 32 | # define RARRAY_LEN(s) (RARRAY(s)->len) 33 | #endif 34 | 35 | struct bz_file { 36 | bz_stream bzs; 37 | VALUE in, io; 38 | char *buf; 39 | unsigned int buflen; 40 | int blocks, work, small; 41 | int flags, lineno, state; 42 | }; 43 | 44 | struct bz_str { 45 | VALUE str; 46 | int pos; 47 | }; 48 | 49 | struct bz_iv { 50 | VALUE bz2, io; 51 | void (*finalize)(); 52 | }; 53 | 54 | #define Get_BZ2(obj, bzf) \ 55 | rb_io_taint_check(obj); \ 56 | Data_Get_Struct(obj, struct bz_file, bzf); \ 57 | if (!RTEST(bzf->io)) { \ 58 | rb_raise(rb_eIOError, "closed IO"); \ 59 | } 60 | 61 | #ifndef ASDFasdf 62 | extern VALUE bz_cWriter, bz_cReader, bz_cInternal; 63 | extern VALUE bz_eError, bz_eEOZError; 64 | 65 | extern VALUE bz_internal_ary; 66 | 67 | extern ID id_new, id_write, id_open, id_flush, id_read; 68 | extern ID id_closed, id_close, id_str; 69 | #endif 70 | 71 | void bz_file_mark(struct bz_file * bzf); 72 | void* bz_malloc(void *opaque, int m, int n); 73 | void bz_free(void *opaque, void *p); 74 | VALUE bz_raise(int err); 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /ext/bzip2/extconf.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | require 'mkmf' 3 | dir_config('bz2') 4 | have_header('bzlib.h') 5 | 6 | $CFLAGS << ' -Wall -Wextra -Wno-unused -funroll-loops ' 7 | # $CFLAGS << ' -O0 -ggdb -Wextra' 8 | 9 | if have_library("bz2", "BZ2_bzWriteOpen") 10 | if enable_config("shared", true) 11 | $static = nil 12 | end 13 | 14 | if RUBY_VERSION.to_f >= 1.9 15 | $CFLAGS << ' -DRUBY_19_COMPATIBILITY' 16 | end 17 | 18 | create_makefile('bzip2/bzip2') 19 | else 20 | puts "libbz2 not found, maybe try manually specifying --with-bz2-dir to find it?" 21 | end 22 | -------------------------------------------------------------------------------- /ext/bzip2/reader.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "reader.h" 5 | #include "common.h" 6 | 7 | void bz_str_mark(struct bz_str *bzs) { 8 | rb_gc_mark(bzs->str); 9 | } 10 | 11 | struct bz_file * bz_get_bzf(VALUE obj) { 12 | struct bz_file *bzf; 13 | 14 | Get_BZ2(obj, bzf); 15 | if (!bzf->buf) { 16 | if (bzf->state != BZ_OK) { 17 | bz_raise(bzf->state); 18 | } 19 | bzf->state = BZ2_bzDecompressInit(&(bzf->bzs), 0, bzf->small); 20 | if (bzf->state != BZ_OK) { 21 | BZ2_bzDecompressEnd(&(bzf->bzs)); 22 | bz_raise(bzf->state); 23 | } 24 | bzf->buf = ALLOC_N(char, BZ_RB_BLOCKSIZE + 1); 25 | bzf->buflen = BZ_RB_BLOCKSIZE; 26 | bzf->buf[0] = bzf->buf[bzf->buflen] = '\0'; 27 | bzf->bzs.total_out_hi32 = bzf->bzs.total_out_lo32 = 0; 28 | bzf->bzs.next_out = bzf->buf; 29 | bzf->bzs.avail_out = 0; 30 | } 31 | if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) { 32 | return 0; 33 | } 34 | return bzf; 35 | } 36 | 37 | int bz_next_available(struct bz_file *bzf, int in){ 38 | bzf->bzs.next_out = bzf->buf; 39 | bzf->bzs.avail_out = 0; 40 | if (bzf->state == BZ_STREAM_END) { 41 | return BZ_STREAM_END; 42 | } 43 | if (!bzf->bzs.avail_in) { 44 | bzf->in = rb_funcall(bzf->io, id_read, 1, INT2FIX(1024)); 45 | if (TYPE(bzf->in) != T_STRING || RSTRING_LEN(bzf->in) == 0) { 46 | BZ2_bzDecompressEnd(&(bzf->bzs)); 47 | bzf->bzs.avail_out = 0; 48 | bzf->state = BZ_UNEXPECTED_EOF; 49 | bz_raise(bzf->state); 50 | } 51 | bzf->bzs.next_in = RSTRING_PTR(bzf->in); 52 | bzf->bzs.avail_in = (int) RSTRING_LEN(bzf->in); 53 | } 54 | if ((bzf->buflen - in) < (BZ_RB_BLOCKSIZE / 2)) { 55 | bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen+BZ_RB_BLOCKSIZE+1); 56 | bzf->buflen += BZ_RB_BLOCKSIZE; 57 | bzf->buf[bzf->buflen] = '\0'; 58 | } 59 | bzf->bzs.avail_out = bzf->buflen - in; 60 | bzf->bzs.next_out = bzf->buf + in; 61 | bzf->state = BZ2_bzDecompress(&(bzf->bzs)); 62 | if (bzf->state != BZ_OK) { 63 | BZ2_bzDecompressEnd(&(bzf->bzs)); 64 | if (bzf->state != BZ_STREAM_END) { 65 | bzf->bzs.avail_out = 0; 66 | bz_raise(bzf->state); 67 | } 68 | } 69 | bzf->bzs.avail_out = bzf->buflen - bzf->bzs.avail_out; 70 | bzf->bzs.next_out = bzf->buf; 71 | return 0; 72 | } 73 | 74 | VALUE bz_read_until(struct bz_file *bzf, const char *str, int len, int *td1) { 75 | VALUE res; 76 | int total, i, nex = 0; 77 | char *p, *t, *tx, *end, *pend = ((char*) str) + len; 78 | 79 | res = rb_str_new(0, 0); 80 | while (1) { 81 | total = bzf->bzs.avail_out; 82 | if (len == 1) { 83 | tx = memchr(bzf->bzs.next_out, *str, bzf->bzs.avail_out); 84 | if (tx) { 85 | i = (int)(tx - bzf->bzs.next_out + len); 86 | res = rb_str_cat(res, bzf->bzs.next_out, i); 87 | bzf->bzs.next_out += i; 88 | bzf->bzs.avail_out -= i; 89 | return res; 90 | } 91 | } else { 92 | tx = bzf->bzs.next_out; 93 | end = bzf->bzs.next_out + bzf->bzs.avail_out; 94 | while (tx + len <= end) { 95 | for (p = (char*) str, t = tx; p != pend; ++p, ++t) { 96 | if (*p != *t) break; 97 | } 98 | if (p == pend) { 99 | i = (int)(tx - bzf->bzs.next_out + len); 100 | res = rb_str_cat(res, bzf->bzs.next_out, i); 101 | bzf->bzs.next_out += i; 102 | bzf->bzs.avail_out -= i; 103 | return res; 104 | } 105 | if (td1) { 106 | tx += td1[(int)*(tx + len)]; 107 | } else { 108 | tx += 1; 109 | } 110 | } 111 | } 112 | nex = 0; 113 | if (total) { 114 | nex = len - 1; 115 | res = rb_str_cat(res, bzf->bzs.next_out, total - nex); 116 | if (nex) { 117 | MEMMOVE(bzf->buf, bzf->bzs.next_out + total - nex, char, nex); 118 | } 119 | } 120 | if (bz_next_available(bzf, nex) == BZ_STREAM_END) { 121 | if (nex) { 122 | res = rb_str_cat(res, bzf->buf, nex); 123 | } 124 | if (RSTRING_LEN(res)) { 125 | return res; 126 | } 127 | return Qnil; 128 | } 129 | } 130 | return Qnil; 131 | } 132 | 133 | /** 134 | * Reads a stream as long as the next character is equal to the specified 135 | * character 136 | * 137 | * Returns the next character in the sequence that's not the same as the one 138 | * given or EOF if it's there until the end of the file. 139 | */ 140 | int bz_read_while(struct bz_file *bzf, char c) { 141 | char *end; 142 | 143 | while (1) { 144 | end = bzf->bzs.next_out + bzf->bzs.avail_out; 145 | while (bzf->bzs.next_out < end) { 146 | if (c != *bzf->bzs.next_out) { 147 | return *bzf->bzs.next_out; 148 | } 149 | ++bzf->bzs.next_out; 150 | --bzf->bzs.avail_out; 151 | } 152 | if (bz_next_available(bzf, 0) == BZ_STREAM_END) { 153 | return EOF; 154 | } 155 | } 156 | return EOF; 157 | } 158 | 159 | /* 160 | * Internally allocates data for a new Reader 161 | * @private 162 | */ 163 | VALUE bz_reader_s_alloc(VALUE obj) { 164 | struct bz_file *bzf; 165 | VALUE res; 166 | res = Data_Make_Struct(obj, struct bz_file, bz_file_mark, free, bzf); 167 | bzf->bzs.bzalloc = bz_malloc; 168 | bzf->bzs.bzfree = bz_free; 169 | bzf->blocks = DEFAULT_BLOCKS; 170 | bzf->state = BZ_OK; 171 | return res; 172 | } 173 | 174 | VALUE bz_reader_close __((VALUE)); 175 | 176 | /* 177 | * call-seq: 178 | * open(filename, &block=nil) -> Bzip2::Reader 179 | * 180 | * @param [String] filename the name of the file to read from 181 | * @yieldparam [Bzip2::Reader] reader the Bzip2::Reader instance 182 | * 183 | * If a block is given, the created Bzip2::Reader instance is yielded to the 184 | * block and will be closed when the block completes. It is guaranteed via 185 | * +ensure+ that the reader is closed 186 | * 187 | * If a block is not given, a Bzip2::Reader instance will be returned 188 | * 189 | * Bzip2::Reader.open('file') { |f| puts f.gets } 190 | * 191 | * reader = Bzip2::Reader.open('file') 192 | * puts reader.gets 193 | * reader.close 194 | * 195 | * @return [Bzip2::Reader, nil] 196 | */ 197 | VALUE bz_reader_s_open(int argc, VALUE *argv, VALUE obj) { 198 | VALUE res; 199 | struct bz_file *bzf; 200 | 201 | if (argc < 1) { 202 | rb_raise(rb_eArgError, "invalid number of arguments"); 203 | } 204 | argv[0] = rb_funcall2(rb_mKernel, id_open, 1, argv); 205 | if (NIL_P(argv[0])) { 206 | return Qnil; 207 | } 208 | res = rb_funcall2(obj, id_new, argc, argv); 209 | Data_Get_Struct(res, struct bz_file, bzf); 210 | bzf->flags |= BZ2_RB_CLOSE; 211 | if (rb_block_given_p()) { 212 | return rb_ensure(rb_yield, res, bz_reader_close, res); 213 | } 214 | return res; 215 | } 216 | 217 | /* 218 | * call-seq: 219 | * initialize(io) 220 | * 221 | * Creates a new stream for reading a bzip file or string 222 | * 223 | * @param [File, string, #read] io the source for input data. If the source is 224 | * a file or something responding to #read, then data will be read via #read, 225 | * otherwise if the input is a string it will be taken as the literal data 226 | * to decompress 227 | */ 228 | VALUE bz_reader_init(int argc, VALUE *argv, VALUE obj) { 229 | struct bz_file *bzf; 230 | int small = 0; 231 | VALUE a, b; 232 | int internal = 0; 233 | 234 | if (rb_scan_args(argc, argv, "11", &a, &b) == 2) { 235 | small = RTEST(b); 236 | } 237 | rb_io_taint_check(a); 238 | if (OBJ_TAINTED(a)) { 239 | OBJ_TAINT(obj); 240 | } 241 | if (rb_respond_to(a, id_read)) { 242 | if (TYPE(a) == T_FILE) { 243 | #ifndef RUBY_19_COMPATIBILITY 244 | OpenFile *fptr; 245 | #else 246 | rb_io_t *fptr; 247 | #endif 248 | 249 | GetOpenFile(a, fptr); 250 | rb_io_check_readable(fptr); 251 | } else if (rb_respond_to(a, id_closed)) { 252 | VALUE iv = rb_funcall2(a, id_closed, 0, 0); 253 | if (RTEST(iv)) { 254 | rb_raise(rb_eArgError, "closed object"); 255 | } 256 | } 257 | } else { 258 | struct bz_str *bzs; 259 | VALUE res; 260 | 261 | if (!rb_respond_to(a, id_str)) { 262 | rb_raise(rb_eArgError, "first argument must respond to #read"); 263 | } 264 | a = rb_funcall2(a, id_str, 0, 0); 265 | if (TYPE(a) != T_STRING) { 266 | rb_raise(rb_eArgError, "#to_str must return a String"); 267 | } 268 | res = Data_Make_Struct(bz_cInternal, struct bz_str, 269 | bz_str_mark, free, bzs); 270 | bzs->str = a; 271 | a = res; 272 | internal = BZ2_RB_INTERNAL; 273 | } 274 | Data_Get_Struct(obj, struct bz_file, bzf); 275 | bzf->io = a; 276 | bzf->small = small; 277 | bzf->flags |= internal; 278 | return obj; 279 | } 280 | 281 | /* 282 | * call-seq: 283 | * read(len = nil) 284 | * 285 | * Read decompressed data from the stream. 286 | * 287 | * Bzip2::Reader.new(Bzip2.compress('ab')).read # => "ab" 288 | * Bzip2::Reader.new(Bzip2.compress('ab')).read(1) # => "a" 289 | * 290 | * @return [String, nil] the decompressed data read or +nil+ if eoz has been 291 | * reached 292 | * @param [Integer] len the number of decompressed bytes which should be read. 293 | * If nothing is specified, the entire stream is read 294 | */ 295 | VALUE bz_reader_read(int argc, VALUE *argv, VALUE obj) { 296 | struct bz_file *bzf; 297 | VALUE res, length; 298 | int total; 299 | int n; 300 | 301 | rb_scan_args(argc, argv, "01", &length); 302 | if (NIL_P(length)) { 303 | n = -1; 304 | } else { 305 | n = NUM2INT(length); 306 | if (n < 0) { 307 | rb_raise(rb_eArgError, "negative length %d given", n); 308 | } 309 | } 310 | bzf = bz_get_bzf(obj); 311 | if (!bzf) { 312 | return Qnil; 313 | } 314 | res = rb_str_new(0, 0); 315 | if (OBJ_TAINTED(obj)) { 316 | OBJ_TAINT(res); 317 | } 318 | if (n == 0) { 319 | free(bzf->buf); 320 | return res; 321 | } 322 | while (1) { 323 | total = bzf->bzs.avail_out; 324 | if (n != -1 && (RSTRING_LEN(res) + total) >= n) { 325 | n -= (int) RSTRING_LEN(res); 326 | res = rb_str_cat(res, bzf->bzs.next_out, n); 327 | bzf->bzs.next_out += n; 328 | bzf->bzs.avail_out -= n; 329 | free(bzf->buf); 330 | return res; 331 | } 332 | if (total) { 333 | res = rb_str_cat(res, bzf->bzs.next_out, total); 334 | } 335 | if (bz_next_available(bzf, 0) == BZ_STREAM_END) { 336 | free(bzf->buf); 337 | return res; 338 | } 339 | } 340 | return Qnil; 341 | } 342 | 343 | int bz_getc(VALUE obj) { 344 | VALUE length = INT2FIX(1); 345 | VALUE res = bz_reader_read(1, &length, obj); 346 | if (NIL_P(res) || RSTRING_LEN(res) == 0) { 347 | return EOF; 348 | } 349 | return RSTRING_PTR(res)[0]; 350 | } 351 | 352 | /* 353 | * call-seq: 354 | * ungetc(byte) 355 | * 356 | * "Ungets" a character/byte. This rewinds the stream by 1 character and inserts 357 | * the given character into that position. The next read will return the given 358 | * character as the first one read 359 | * 360 | * reader = Bzip2::Reader.new Bzip2.compress('abc') 361 | * reader.getc # => 97 362 | * reader.ungetc 97 # => nil 363 | * reader.getc # => 97 364 | * reader.ungetc 42 # => nil 365 | * reader.getc # => 42 366 | * reader.getc # => 98 367 | * reader.getc # => 99 368 | * reader.ungetc 100 # => nil 369 | * reader.getc # => 100 370 | * 371 | * @param [Integer] byte the byte to 'unget' 372 | * @return [nil] always 373 | */ 374 | VALUE bz_reader_ungetc(VALUE obj, VALUE a) { 375 | struct bz_file *bzf; 376 | int c = NUM2INT(a); 377 | 378 | Get_BZ2(obj, bzf); 379 | if (!bzf->buf) { 380 | bz_raise(BZ_SEQUENCE_ERROR); 381 | } 382 | if (bzf->bzs.avail_out < bzf->buflen) { 383 | bzf->bzs.next_out -= 1; 384 | bzf->bzs.next_out[0] = c; 385 | bzf->bzs.avail_out += 1; 386 | } else { 387 | bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + 2); 388 | bzf->buf[bzf->buflen++] = c; 389 | bzf->buf[bzf->buflen] = '\0'; 390 | bzf->bzs.next_out = bzf->buf; 391 | bzf->bzs.avail_out = bzf->buflen; 392 | } 393 | return Qnil; 394 | } 395 | 396 | /* 397 | * call-seq: 398 | * ungets(str) 399 | * 400 | * Equivalently "unget" a string. When called on a string that was just read 401 | * from the stream, this inserts the string back into the stream to br read 402 | * again. 403 | * 404 | * When called with a string which hasn't been read from the stream, it does 405 | * the same thing, and the next read line/data will start from the beginning 406 | * of the given data and the continue on with the rest of the stream. 407 | * 408 | * reader = Bzip2::Reader.new Bzip2.compress("a\nb") 409 | * reader.gets # => "a\n" 410 | * reader.ungets "a\n" # => nil 411 | * reader.gets # => "a\n" 412 | * reader.ungets "foo" # => nil 413 | * reader.gets # => "foob" 414 | * 415 | * @param [String] str the string to insert back into the stream 416 | * @return [nil] always 417 | */ 418 | VALUE bz_reader_ungets(VALUE obj, VALUE a) { 419 | struct bz_file *bzf; 420 | 421 | Check_Type(a, T_STRING); 422 | Get_BZ2(obj, bzf); 423 | if (!bzf->buf) { 424 | bz_raise(BZ_SEQUENCE_ERROR); 425 | } 426 | if ((bzf->bzs.avail_out + RSTRING_LEN(a)) < bzf->buflen) { 427 | bzf->bzs.next_out -= RSTRING_LEN(a); 428 | MEMCPY(bzf->bzs.next_out, RSTRING_PTR(a), char, RSTRING_LEN(a)); 429 | bzf->bzs.avail_out += (int) RSTRING_LEN(a); 430 | } else { 431 | bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + RSTRING_LEN(a) + 1); 432 | MEMCPY(bzf->buf + bzf->buflen, RSTRING_PTR(a), char,RSTRING_LEN(a)); 433 | bzf->buflen += (int) RSTRING_LEN(a); 434 | bzf->buf[bzf->buflen] = '\0'; 435 | bzf->bzs.next_out = bzf->buf; 436 | bzf->bzs.avail_out = bzf->buflen; 437 | } 438 | return Qnil; 439 | } 440 | 441 | VALUE bz_reader_gets(VALUE obj) { 442 | struct bz_file *bzf; 443 | VALUE str = Qnil; 444 | 445 | bzf = bz_get_bzf(obj); 446 | if (bzf) { 447 | str = bz_read_until(bzf, "\n", 1, 0); 448 | if (!NIL_P(str)) { 449 | bzf->lineno++; 450 | OBJ_TAINT(str); 451 | } 452 | } 453 | return str; 454 | } 455 | 456 | VALUE bz_reader_gets_internal(int argc, VALUE *argv, VALUE obj, int *td, int init) { 457 | struct bz_file *bzf; 458 | VALUE rs, res; 459 | const char *rsptr; 460 | int rslen, rspara, *td1; 461 | 462 | rs = rb_rs; 463 | if (argc) { 464 | rb_scan_args(argc, argv, "1", &rs); 465 | if (!NIL_P(rs)) { 466 | Check_Type(rs, T_STRING); 467 | } 468 | } 469 | if (NIL_P(rs)) { 470 | return bz_reader_read(1, &rs, obj); 471 | } 472 | rslen = (int) RSTRING_LEN(rs); 473 | if (rs == rb_default_rs || (rslen == 1 && RSTRING_PTR(rs)[0] == '\n')) { 474 | return bz_reader_gets(obj); 475 | } 476 | 477 | if (rslen == 0) { 478 | rsptr = "\n\n"; 479 | rslen = 2; 480 | rspara = 1; 481 | } else { 482 | rsptr = RSTRING_PTR(rs); 483 | rspara = 0; 484 | } 485 | 486 | bzf = bz_get_bzf(obj); 487 | if (!bzf) { 488 | return Qnil; 489 | } 490 | if (rspara) { 491 | bz_read_while(bzf, '\n'); 492 | } 493 | td1 = 0; 494 | if (rslen != 1) { 495 | if (init) { 496 | int i; 497 | 498 | for (i = 0; i < ASIZE; i++) { 499 | td[i] = rslen + 1; 500 | } 501 | for (i = 0; i < rslen; i++) { 502 | td[(int)*(rsptr + i)] = rslen - i; 503 | } 504 | } 505 | td1 = td; 506 | } 507 | 508 | res = bz_read_until(bzf, rsptr, rslen, td1); 509 | if (rspara) { 510 | bz_read_while(bzf, '\n'); 511 | } 512 | 513 | if (!NIL_P(res)) { 514 | bzf->lineno++; 515 | OBJ_TAINT(res); 516 | } 517 | return res; 518 | } 519 | 520 | /* 521 | * Specs were missing for this method originally and playing around with it 522 | * gave some very odd results, so unless you know what you're doing, I wouldn't 523 | * mess around with this... 524 | */ 525 | VALUE bz_reader_set_unused(VALUE obj, VALUE a) { 526 | struct bz_file *bzf; 527 | 528 | Check_Type(a, T_STRING); 529 | Get_BZ2(obj, bzf); 530 | if (!bzf->in) { 531 | bzf->in = rb_str_new(RSTRING_PTR(a), RSTRING_LEN(a)); 532 | } else { 533 | bzf->in = rb_str_cat(bzf->in, RSTRING_PTR(a), RSTRING_LEN(a)); 534 | } 535 | bzf->bzs.next_in = RSTRING_PTR(bzf->in); 536 | bzf->bzs.avail_in = (int) RSTRING_LEN(bzf->in); 537 | return Qnil; 538 | } 539 | 540 | /* 541 | * Reads one character from the stream, returning the byte read. 542 | * 543 | * reader = Bzip2::Reader.new Bzip2.compress('ab') 544 | * reader.getc # => 97 545 | * reader.getc # => 98 546 | * reader.getc # => nil 547 | * 548 | * @return [Integer, nil] the byte value of the character read or +nil+ if eoz 549 | * has been reached 550 | */ 551 | VALUE bz_reader_getc(VALUE obj) { 552 | VALUE str; 553 | VALUE len = INT2FIX(1); 554 | 555 | str = bz_reader_read(1, &len, obj); 556 | if (NIL_P(str) || RSTRING_LEN(str) == 0) { 557 | return Qnil; 558 | } 559 | return INT2FIX(RSTRING_PTR(str)[0] & 0xff); 560 | } 561 | 562 | void bz_eoz_error() { 563 | rb_raise(bz_eEOZError, "End of Zip component reached"); 564 | } 565 | 566 | /* 567 | * Performs the same as Bzip2::Reader#getc except Bzip2::EOZError is raised if 568 | * eoz has been readhed 569 | * 570 | * @raise [Bzip2::EOZError] if eoz has been reached 571 | */ 572 | VALUE bz_reader_readchar(VALUE obj) { 573 | VALUE res = bz_reader_getc(obj); 574 | 575 | if (NIL_P(res)) { 576 | bz_eoz_error(); 577 | } 578 | return res; 579 | } 580 | 581 | /* 582 | * call-seq: 583 | * gets(sep = "\n") 584 | * 585 | * Reads a line from the stream until the separator is reached. This does not 586 | * throw an exception, but rather returns nil if an eoz/eof error occurs 587 | * 588 | * reader = Bzip2::Reader.new Bzip2.compress("a\nb") 589 | * reader.gets # => "a\n" 590 | * reader.gets # => "b" 591 | * reader.gets # => nil 592 | * 593 | * @return [String, nil] the read data or nil if eoz has been reached 594 | * @see Bzip2::Reader#readline 595 | */ 596 | VALUE bz_reader_gets_m(int argc, VALUE *argv, VALUE obj) { 597 | int td[ASIZE]; 598 | VALUE str = bz_reader_gets_internal(argc, argv, obj, td, Qtrue); 599 | 600 | if (!NIL_P(str)) { 601 | rb_lastline_set(str); 602 | } 603 | return str; 604 | } 605 | 606 | /* 607 | * call-seq: 608 | * readline(sep = "\n") 609 | * 610 | * Reads one line from the stream and returns it (including the separator) 611 | * 612 | * reader = Bzip2::Reader.new Bzip2.compress("a\nb") 613 | * reader.readline # => "a\n" 614 | * reader.readline # => "b" 615 | * reader.readline # => raises Bzip2::EOZError 616 | * 617 | * 618 | * @param [String] sep the newline separator character 619 | * @return [String] the read line 620 | * @see Bzip2::Reader.readlines 621 | * @raise [Bzip2::EOZError] if the stream has reached its end 622 | */ 623 | VALUE bz_reader_readline(int argc, VALUE *argv, VALUE obj) { 624 | VALUE res = bz_reader_gets_m(argc, argv, obj); 625 | 626 | if (NIL_P(res)) { 627 | bz_eoz_error(); 628 | } 629 | return res; 630 | } 631 | 632 | /* 633 | * call-seq: 634 | * readlines(sep = "\n") 635 | * 636 | * Reads the lines of the files and returns the result as an array. 637 | * 638 | * If the stream has reached eoz, then an empty array is returned 639 | * 640 | * @param [String] sep the newline separator character 641 | * @return [Array] an array of lines read 642 | * @see Bzip2::Reader.readlines 643 | */ 644 | VALUE bz_reader_readlines(int argc, VALUE *argv, VALUE obj) { 645 | VALUE line, ary; 646 | int td[ASIZE], in; 647 | 648 | in = Qtrue; 649 | ary = rb_ary_new(); 650 | while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) { 651 | in = Qfalse; 652 | rb_ary_push(ary, line); 653 | } 654 | return ary; 655 | } 656 | 657 | /* 658 | * call-seq: 659 | * each(sep = "\n", &block) 660 | * 661 | * Iterates over the lines of the stream. 662 | * 663 | * @param [String] sep the byte which separates lines 664 | * @yieldparam [String] line the next line of the file (including the separator 665 | * character) 666 | * @see Bzip2::Reader.foreach 667 | */ 668 | VALUE bz_reader_each_line(int argc, VALUE *argv, VALUE obj) { 669 | VALUE line; 670 | int td[ASIZE], in; 671 | 672 | in = Qtrue; 673 | while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) { 674 | in = Qfalse; 675 | rb_yield(line); 676 | } 677 | return obj; 678 | } 679 | 680 | /* 681 | * call-seq: 682 | * each_byte(&block) 683 | * 684 | * Iterates over the decompressed bytes of the file. 685 | * 686 | * Bzip2::Writer.open('file'){ |f| f << 'asdf' } 687 | * reader = Bzip2::Reader.new File.open('file') 688 | * reader.each_byte{ |b| puts "#{b} #{b.chr}" } 689 | * 690 | * # Output: 691 | * # 97 a 692 | * # 115 s 693 | * # 100 d 694 | * # 102 f 695 | * 696 | * @yieldparam [Integer] byte the decompressed bytes of the file 697 | */ 698 | VALUE bz_reader_each_byte(VALUE obj) { 699 | int c; 700 | 701 | while ((c = bz_getc(obj)) != EOF) { 702 | rb_yield(INT2FIX(c & 0xff)); 703 | } 704 | return obj; 705 | } 706 | 707 | /* 708 | * Specs were missing for this method originally and playing around with it 709 | * gave some very odd results, so unless you know what you're doing, I wouldn't 710 | * mess around with this... 711 | */ 712 | VALUE bz_reader_unused(VALUE obj) { 713 | struct bz_file *bzf; 714 | VALUE res; 715 | 716 | Get_BZ2(obj, bzf); 717 | if (!bzf->in || bzf->state != BZ_STREAM_END) { 718 | return Qnil; 719 | } 720 | if (bzf->bzs.avail_in) { 721 | res = rb_tainted_str_new(bzf->bzs.next_in, bzf->bzs.avail_in); 722 | bzf->bzs.avail_in = 0; 723 | } else { 724 | res = rb_tainted_str_new(0, 0); 725 | } 726 | return res; 727 | } 728 | 729 | /* 730 | * Test whether the end of the bzip stream has been reached 731 | * 732 | * @return [Boolean] +true+ if the reader is at the end of the bz stream or 733 | * +false+ otherwise 734 | */ 735 | VALUE bz_reader_eoz(VALUE obj) { 736 | struct bz_file *bzf; 737 | 738 | Get_BZ2(obj, bzf); 739 | if (!bzf->in || !bzf->buf) { 740 | return Qnil; 741 | } 742 | if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) { 743 | return Qtrue; 744 | } 745 | return Qfalse; 746 | } 747 | 748 | /* 749 | * Test whether the bzip stream has reached its end (see Bzip2::Reader#eoz?) 750 | * and then tests that the undlerying IO has also reached an eof 751 | * 752 | * @return [Boolean] +true+ if the stream has reached or +false+ otherwise. 753 | */ 754 | VALUE bz_reader_eof(VALUE obj) { 755 | struct bz_file *bzf; 756 | VALUE res; 757 | 758 | res = bz_reader_eoz(obj); 759 | if (RTEST(res)) { 760 | Get_BZ2(obj, bzf); 761 | if (bzf->bzs.avail_in) { 762 | res = Qfalse; 763 | } else { 764 | res = bz_reader_getc(obj); 765 | if (NIL_P(res)) { 766 | res = Qtrue; 767 | } else { 768 | bz_reader_ungetc(obj, res); 769 | res = Qfalse; 770 | } 771 | } 772 | } 773 | return res; 774 | } 775 | 776 | /* 777 | * Tests whether this reader has be closed. 778 | * 779 | * @return [Boolean] +true+ if it is or +false+ otherwise. 780 | */ 781 | VALUE bz_reader_closed(VALUE obj) { 782 | struct bz_file *bzf; 783 | 784 | Data_Get_Struct(obj, struct bz_file, bzf); 785 | return RTEST(bzf->io)?Qfalse:Qtrue; 786 | } 787 | 788 | /* 789 | * Closes this reader to disallow further reads. 790 | * 791 | * reader = Bzip2::Reader.new File.open('file') 792 | * reader.close 793 | * 794 | * reader.closed? # => true 795 | * 796 | * @return [File] the io with which the reader was created. 797 | * @raise [IOError] if the stream has already been closed 798 | */ 799 | VALUE bz_reader_close(VALUE obj) { 800 | struct bz_file *bzf; 801 | VALUE res; 802 | 803 | Get_BZ2(obj, bzf); 804 | if (bzf->buf) { 805 | free(bzf->buf); 806 | bzf->buf = 0; 807 | } 808 | if (bzf->state == BZ_OK) { 809 | BZ2_bzDecompressEnd(&(bzf->bzs)); 810 | } 811 | if (bzf->flags & BZ2_RB_CLOSE) { 812 | int closed = 0; 813 | if (rb_respond_to(bzf->io, id_closed)) { 814 | VALUE iv = rb_funcall2(bzf->io, id_closed, 0, 0); 815 | closed = RTEST(iv); 816 | } 817 | if (!closed && rb_respond_to(bzf->io, id_close)) { 818 | rb_funcall2(bzf->io, id_close, 0, 0); 819 | } 820 | } 821 | if (bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL)) { 822 | res = Qnil; 823 | } else { 824 | res = bzf->io; 825 | } 826 | bzf->io = 0; 827 | return res; 828 | } 829 | 830 | /* 831 | * Originally undocument and had no sepcs. Appears to call Bzip2::Reader#read 832 | * and then mark the stream as finished, but this didn't work for me... 833 | */ 834 | VALUE bz_reader_finish(VALUE obj) { 835 | struct bz_file *bzf; 836 | 837 | Get_BZ2(obj, bzf); 838 | if (bzf->buf) { 839 | rb_funcall2(obj, id_read, 0, 0); 840 | free(bzf->buf); 841 | } 842 | bzf->buf = 0; 843 | bzf->state = BZ_OK; 844 | return Qnil; 845 | } 846 | 847 | /* 848 | * Originally undocument and had no sepcs. Appears to work nearly the same 849 | * as Bzip2::Reader#close... 850 | */ 851 | VALUE bz_reader_close_bang(VALUE obj) { 852 | struct bz_file *bzf; 853 | int closed; 854 | 855 | Get_BZ2(obj, bzf); 856 | closed = bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL); 857 | bz_reader_close(obj); 858 | if (!closed && rb_respond_to(bzf->io, id_close)) { 859 | if (rb_respond_to(bzf->io, id_closed)) { 860 | closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0)); 861 | } 862 | if (!closed) { 863 | rb_funcall2(bzf->io, id_close, 0, 0); 864 | } 865 | } 866 | return Qnil; 867 | } 868 | 869 | struct foreach_arg { 870 | int argc; 871 | VALUE sep; 872 | VALUE obj; 873 | }; 874 | 875 | VALUE bz_reader_foreach_line(struct foreach_arg *arg) { 876 | VALUE str; 877 | int td[ASIZE], in; 878 | 879 | in = Qtrue; 880 | while (!NIL_P(str = bz_reader_gets_internal(arg->argc, &arg->sep, arg->obj, td, in))) { 881 | in = Qfalse; 882 | rb_yield(str); 883 | } 884 | return Qnil; 885 | } 886 | 887 | /* 888 | * call-seq: 889 | * foreach(filename, &block) 890 | * 891 | * Reads a bz2 compressed file and yields each line to the block 892 | * 893 | * Bzip2::Writer.open('file'){ |f| f << "a\n" << "b\n" << "c\n\nd" } 894 | * Bzip2::Reader.foreach('file'){ |l| p l } 895 | * 896 | * # Output: 897 | * # "a\n" 898 | * # "b\n" 899 | * # "c\n" 900 | * # "\n" 901 | * # "d" 902 | * 903 | * @param [String] filename the path to the file to open 904 | * @yieldparam [String] each line of the file 905 | */ 906 | VALUE bz_reader_s_foreach(int argc, VALUE *argv, VALUE obj) { 907 | VALUE fname, sep; 908 | struct foreach_arg arg; 909 | struct bz_file *bzf; 910 | 911 | if (!rb_block_given_p()) { 912 | rb_raise(rb_eArgError, "call out of a block"); 913 | } 914 | rb_scan_args(argc, argv, "11", &fname, &sep); 915 | #ifdef SafeStringValue 916 | SafeStringValue(fname); 917 | #else 918 | Check_SafeStr(fname); 919 | #endif 920 | arg.argc = argc - 1; 921 | arg.sep = sep; 922 | arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname); 923 | if (NIL_P(arg.obj)) { 924 | return Qnil; 925 | } 926 | arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj); 927 | Data_Get_Struct(arg.obj, struct bz_file, bzf); 928 | bzf->flags |= BZ2_RB_CLOSE; 929 | return rb_ensure(bz_reader_foreach_line, (VALUE)&arg, bz_reader_close, arg.obj); 930 | } 931 | 932 | VALUE bz_reader_i_readlines(struct foreach_arg *arg) { 933 | VALUE str, res; 934 | int td[ASIZE], in; 935 | 936 | in = Qtrue; 937 | res = rb_ary_new(); 938 | while (!NIL_P(str = bz_reader_gets_internal(arg->argc, &arg->sep, arg->obj, td, in))) { 939 | in = Qfalse; 940 | rb_ary_push(res, str); 941 | } 942 | return res; 943 | } 944 | 945 | /* 946 | * call-seq: 947 | * readlines(filename, separator="\n") 948 | * 949 | * Opens the given bz2 compressed file for reading and decompresses the file, 950 | * returning an array of the lines of the file. A line is denoted by the 951 | * separator argument. 952 | * 953 | * Bzip2::Writer.open('file'){ |f| f << "a\n" << "b\n" << "c\n\nd" } 954 | * 955 | * Bzip2::Reader.readlines('file') # => ["a\n", "b\n", "c\n", "\n", "d"] 956 | * Bzip2::Reader.readlines('file', 'c') # => ["a\nb\nc", "\n\nd"] 957 | * 958 | * @param [String] filename the path to the file to read 959 | * @param [String] separator the character to denote a newline in the file 960 | * @see Bzip2::Reader#readlines 961 | * @return [Array] an array of lines for the file 962 | * @raise [Bzip2::Error] if the file is not a valid bz2 compressed file 963 | */ 964 | VALUE bz_reader_s_readlines(int argc, VALUE *argv, VALUE obj) { 965 | VALUE fname, sep; 966 | struct foreach_arg arg; 967 | struct bz_file *bzf; 968 | 969 | rb_scan_args(argc, argv, "11", &fname, &sep); 970 | #ifdef SafeStringValue 971 | SafeStringValue(fname); 972 | #else 973 | Check_SafeStr(fname); 974 | #endif 975 | arg.argc = argc - 1; 976 | arg.sep = sep; 977 | arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname); 978 | if (NIL_P(arg.obj)) { 979 | return Qnil; 980 | } 981 | arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj); 982 | Data_Get_Struct(arg.obj, struct bz_file, bzf); 983 | bzf->flags |= BZ2_RB_CLOSE; 984 | return rb_ensure(bz_reader_i_readlines, (VALUE)&arg, bz_reader_close, arg.obj); 985 | } 986 | 987 | /* 988 | * Returns the current line number that the stream is at. This number is based 989 | * on the newline separator being "\n" 990 | * 991 | * reader = Bzip2::Reader.new Bzip2.compress("a\nb") 992 | * reader.lineno # => 0 993 | * reader.readline # => "a\n" 994 | * reader.lineno # => 1 995 | * reader.readline # => "b" 996 | * reader.lineno # => 2 997 | 998 | * @return [Integer] the current line number 999 | */ 1000 | VALUE bz_reader_lineno(VALUE obj) { 1001 | struct bz_file *bzf; 1002 | 1003 | Get_BZ2(obj, bzf); 1004 | return INT2NUM(bzf->lineno); 1005 | } 1006 | 1007 | /* 1008 | * call-seq: 1009 | * lineno=(num) 1010 | * 1011 | * Sets the internal line number count that this stream should be set at 1012 | * 1013 | * reader = Bzip2::Reader.new Bzip2.compress("a\nb") 1014 | * reader.lineno # => 0 1015 | * reader.readline # => "a\n" 1016 | * reader.lineno # => 1 1017 | * reader.lineno = 0 1018 | * reader.readline # => "b" 1019 | * reader.lineno # => 1 1020 | * 1021 | * @note This does not actually rewind or move the stream forward 1022 | * @param [Integer] lineno the line number which the stream should consider 1023 | * being set at 1024 | * @return [Integer] the line number provided 1025 | */ 1026 | VALUE bz_reader_set_lineno(VALUE obj, VALUE lineno) { 1027 | struct bz_file *bzf; 1028 | 1029 | Get_BZ2(obj, bzf); 1030 | bzf->lineno = NUM2INT(lineno); 1031 | return lineno; 1032 | } 1033 | -------------------------------------------------------------------------------- /ext/bzip2/reader.h: -------------------------------------------------------------------------------- 1 | #ifndef _RB_BZIP2_READER_H_ 2 | #define _RB_BZIP2_READER_H_ 3 | 4 | #include 5 | 6 | /* Instance methods */ 7 | VALUE bz_reader_init(int argc, VALUE *argv, VALUE obj); 8 | VALUE bz_reader_read(int argc, VALUE *argv, VALUE obj); 9 | VALUE bz_reader_ungetc(VALUE obj, VALUE a); 10 | VALUE bz_reader_ungets(VALUE obj, VALUE a); 11 | VALUE bz_reader_getc(VALUE obj); 12 | VALUE bz_reader_readchar(VALUE obj); 13 | VALUE bz_reader_gets_m(int argc, VALUE *argv, VALUE obj); 14 | VALUE bz_reader_readline(int argc, VALUE *argv, VALUE obj); 15 | VALUE bz_reader_readlines(int argc, VALUE *argv, VALUE obj); 16 | VALUE bz_reader_each_line(int argc, VALUE *argv, VALUE obj); 17 | VALUE bz_reader_each_byte(VALUE obj); 18 | VALUE bz_reader_unused(VALUE obj); 19 | VALUE bz_reader_set_unused(VALUE obj, VALUE a); 20 | VALUE bz_reader_eoz(VALUE obj); 21 | VALUE bz_reader_eof(VALUE obj); 22 | VALUE bz_reader_closed(VALUE obj); 23 | VALUE bz_reader_close(VALUE obj); 24 | VALUE bz_reader_close_bang(VALUE obj); 25 | VALUE bz_reader_finish(VALUE obj); 26 | VALUE bz_reader_lineno(VALUE obj); 27 | VALUE bz_reader_set_lineno(VALUE obj, VALUE lineno); 28 | 29 | /* Class methods */ 30 | VALUE bz_reader_s_alloc(VALUE obj); 31 | VALUE bz_reader_s_open(int argc, VALUE *argv, VALUE obj); 32 | VALUE bz_reader_s_foreach(int argc, VALUE *argv, VALUE obj); 33 | VALUE bz_reader_s_readlines(int argc, VALUE *argv, VALUE obj); 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /ext/bzip2/writer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "common.h" 4 | #include "writer.h" 5 | 6 | struct bz_iv * bz_find_struct(VALUE obj, void *ptr, int *posp) { 7 | struct bz_iv *bziv; 8 | int i; 9 | 10 | for (i = 0; i < RARRAY_LEN(bz_internal_ary); i++) { 11 | Data_Get_Struct(RARRAY_PTR(bz_internal_ary)[i], struct bz_iv, bziv); 12 | if (ptr) { 13 | #ifndef RUBY_19_COMPATIBILITY 14 | if (TYPE(bziv->io) == T_FILE && RFILE(bziv->io)->fptr == (OpenFile *)ptr) { 15 | #else 16 | if (TYPE(bziv->io) == T_FILE && RFILE(bziv->io)->fptr == (rb_io_t *)ptr) { 17 | #endif 18 | if (posp) { 19 | *posp = i; 20 | } 21 | return bziv; 22 | } else if (TYPE(bziv->io) == T_DATA && DATA_PTR(bziv->io) == ptr) { 23 | if (posp) *posp = i; 24 | return bziv; 25 | } 26 | } else if (bziv->io == obj) { 27 | if (posp) *posp = i; 28 | return bziv; 29 | } 30 | } 31 | if (posp) *posp = -1; 32 | return 0; 33 | } 34 | 35 | VALUE bz_str_closed(VALUE obj) { 36 | return Qfalse; 37 | } 38 | 39 | void bz_io_data_finalize(void *ptr) { 40 | struct bz_file *bzf; 41 | struct bz_iv *bziv; 42 | int pos; 43 | 44 | bziv = bz_find_struct(0, ptr, &pos); 45 | if (bziv) { 46 | rb_ary_delete_at(bz_internal_ary, pos); 47 | Data_Get_Struct(bziv->bz2, struct bz_file, bzf); 48 | rb_protect((VALUE (*)(VALUE))bz_writer_internal_flush, (VALUE)bzf, 0); 49 | RDATA(bziv->bz2)->dfree = free; 50 | if (bziv->finalize) { 51 | (*bziv->finalize)(ptr); 52 | } else if (TYPE(bzf->io) == T_FILE) { 53 | #ifndef RUBY_19_COMPATIBILITY 54 | OpenFile *file = (OpenFile *)ptr; 55 | if (file->f) { 56 | fclose(file->f); 57 | file->f = 0; 58 | } 59 | if (file->f2) { 60 | fclose(file->f2); 61 | file->f2 = 0; 62 | } 63 | #else 64 | rb_io_t *file = (rb_io_t *)ptr; 65 | if (file->fd) { 66 | close(file->fd); 67 | 68 | file->fd = 0; 69 | } 70 | if (file->stdio_file) { 71 | fclose(file->stdio_file); 72 | file->stdio_file = 0; 73 | } 74 | #endif 75 | } 76 | } 77 | 78 | } 79 | 80 | int bz_writer_internal_flush(struct bz_file *bzf) { 81 | int closed = 1; 82 | 83 | if (rb_respond_to(bzf->io, id_closed)) { 84 | closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0)); 85 | } 86 | if (bzf->buf) { 87 | if (!closed && bzf->state == BZ_OK) { 88 | bzf->bzs.next_in = NULL; 89 | bzf->bzs.avail_in = 0; 90 | do { 91 | bzf->bzs.next_out = bzf->buf; 92 | bzf->bzs.avail_out = bzf->buflen; 93 | bzf->state = BZ2_bzCompress(&(bzf->bzs), BZ_FINISH); 94 | if (bzf->state != BZ_FINISH_OK && bzf->state != BZ_STREAM_END) { 95 | break; 96 | } 97 | if (bzf->bzs.avail_out < bzf->buflen) { 98 | rb_funcall(bzf->io, id_write, 1, rb_str_new(bzf->buf, bzf->buflen - bzf->bzs.avail_out)); 99 | } 100 | } while (bzf->state != BZ_STREAM_END); 101 | } 102 | free(bzf->buf); 103 | bzf->buf = 0; 104 | BZ2_bzCompressEnd(&(bzf->bzs)); 105 | bzf->state = BZ_OK; 106 | if (!closed && rb_respond_to(bzf->io, id_flush)) { 107 | rb_funcall2(bzf->io, id_flush, 0, 0); 108 | } 109 | } 110 | return closed; 111 | } 112 | 113 | VALUE bz_writer_internal_close(struct bz_file *bzf) { 114 | struct bz_iv *bziv; 115 | int pos, closed; 116 | VALUE res; 117 | 118 | closed = bz_writer_internal_flush(bzf); 119 | bziv = bz_find_struct(bzf->io, 0, &pos); 120 | if (bziv) { 121 | if (TYPE(bzf->io) == T_FILE) { 122 | RFILE(bzf->io)->fptr->finalize = bziv->finalize; 123 | } else if (TYPE(bziv->io) == T_DATA) { 124 | RDATA(bziv->io)->dfree = bziv->finalize; 125 | } 126 | RDATA(bziv->bz2)->dfree = free; 127 | bziv->bz2 = 0; 128 | rb_ary_delete_at(bz_internal_ary, pos); 129 | } 130 | if (bzf->flags & BZ2_RB_CLOSE) { 131 | bzf->flags &= ~BZ2_RB_CLOSE; 132 | if (!closed && rb_respond_to(bzf->io, id_close)) { 133 | rb_funcall2(bzf->io, id_close, 0, 0); 134 | } 135 | res = Qnil; 136 | } else { 137 | res = bzf->io; 138 | } 139 | bzf->io = Qnil; 140 | return res; 141 | } 142 | 143 | /* 144 | * Closes this writer for further use. The remaining data is compressed and 145 | * flushed. 146 | * 147 | * If the writer was constructed with an io object, that object is returned. 148 | * Otherwise, the actual compressed data is returned 149 | * 150 | * writer = Bzip2::Writer.new File.open('path', 'w') 151 | * writer << 'a' 152 | * writer.close # => # 153 | * 154 | * writer = Bzip2::Writer.new 155 | * writer << 'a' 156 | * writer.close # => "BZh91AY&SY... 157 | */ 158 | VALUE bz_writer_close(VALUE obj) { 159 | struct bz_file *bzf; 160 | VALUE res; 161 | 162 | Get_BZ2(obj, bzf); 163 | res = bz_writer_internal_close(bzf); 164 | #ifndef RUBINIUS 165 | if (!NIL_P(res) && (bzf->flags & BZ2_RB_INTERNAL)) { 166 | RBASIC(res)->klass = rb_cString; 167 | } 168 | #endif 169 | return res; 170 | } 171 | 172 | /* 173 | * Calls Bzip2::Writer#close and then does some more stuff... 174 | */ 175 | VALUE bz_writer_close_bang(VALUE obj) { 176 | struct bz_file *bzf; 177 | int closed; 178 | 179 | Get_BZ2(obj, bzf); 180 | closed = bzf->flags & (BZ2_RB_INTERNAL|BZ2_RB_CLOSE); 181 | bz_writer_close(obj); 182 | if (!closed && rb_respond_to(bzf->io, id_close)) { 183 | if (rb_respond_to(bzf->io, id_closed)) { 184 | closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0)); 185 | } 186 | if (!closed) { 187 | rb_funcall2(bzf->io, id_close, 0, 0); 188 | } 189 | } 190 | return Qnil; 191 | } 192 | 193 | /* 194 | * Tests whether this writer is closed 195 | * 196 | * @return [Boolean] +true+ if the writer is closed or +false+ otherwise 197 | */ 198 | VALUE bz_writer_closed(VALUE obj) { 199 | struct bz_file *bzf; 200 | 201 | Data_Get_Struct(obj, struct bz_file, bzf); 202 | return RTEST(bzf->io)?Qfalse:Qtrue; 203 | } 204 | 205 | void bz_writer_free(struct bz_file *bzf) { 206 | bz_writer_internal_close(bzf); 207 | free(bzf); 208 | } 209 | 210 | /* 211 | * Internally allocates information about a new writer 212 | * @private 213 | */ 214 | VALUE bz_writer_s_alloc(VALUE obj) { 215 | struct bz_file *bzf; 216 | VALUE res; 217 | res = Data_Make_Struct(obj, struct bz_file, bz_file_mark, bz_writer_free, bzf); 218 | bzf->bzs.bzalloc = bz_malloc; 219 | bzf->bzs.bzfree = bz_free; 220 | bzf->blocks = DEFAULT_BLOCKS; 221 | bzf->state = BZ_OK; 222 | return res; 223 | } 224 | 225 | /* 226 | * Flushes all of the data in this stream to the underlying IO. 227 | * 228 | * If this writer was constructed with no underlying io object, the compressed 229 | * data is returned as a string. 230 | * 231 | * @return [String, nil] 232 | * @raise [IOError] if the stream has been closed 233 | */ 234 | VALUE bz_writer_flush(VALUE obj) { 235 | struct bz_file *bzf; 236 | 237 | Get_BZ2(obj, bzf); 238 | if (bzf->flags & BZ2_RB_INTERNAL) { 239 | return bz_writer_close(obj); 240 | } 241 | bz_writer_internal_flush(bzf); 242 | return Qnil; 243 | } 244 | 245 | /* 246 | * call-seq: 247 | * open(filename, mode='wb', &block=nil) -> Bzip2::Writer 248 | * 249 | * @param [String] filename the name of the file to write to 250 | * @param [String] mode a mode string passed to Kernel#open 251 | * @yieldparam [Bzip2::Writer] writer the Bzip2::Writer instance 252 | * 253 | * If a block is given, the created Bzip2::Writer instance is yielded to the 254 | * block and will be closed when the block completes. It is guaranteed via 255 | * +ensure+ that the writer is closed 256 | * 257 | * If a block is not given, a Bzip2::Writer instance will be returned 258 | * 259 | * Bzip2::Writer.open('file') { |f| f << data } 260 | * 261 | * writer = Bzip2::Writer.open('file') 262 | * writer << data 263 | * writer.close 264 | * 265 | * @return [Bzip2::Writer, nil] 266 | */ 267 | VALUE bz_writer_s_open(int argc, VALUE *argv, VALUE obj) { 268 | VALUE res; 269 | struct bz_file *bzf; 270 | 271 | if (argc < 1) { 272 | rb_raise(rb_eArgError, "invalid number of arguments"); 273 | } 274 | if (argc == 1) { 275 | argv[0] = rb_funcall(rb_mKernel, id_open, 2, argv[0], 276 | rb_str_new2("wb")); 277 | } else { 278 | argv[1] = rb_funcall2(rb_mKernel, id_open, 2, argv); 279 | argv += 1; 280 | argc -= 1; 281 | } 282 | res = rb_funcall2(obj, id_new, argc, argv); 283 | Data_Get_Struct(res, struct bz_file, bzf); 284 | bzf->flags |= BZ2_RB_CLOSE; 285 | if (rb_block_given_p()) { 286 | return rb_ensure(rb_yield, res, bz_writer_close, res); 287 | } 288 | return res; 289 | } 290 | 291 | VALUE bz_str_write(VALUE obj, VALUE str) { 292 | if (TYPE(str) != T_STRING) { 293 | rb_raise(rb_eArgError, "expected a String"); 294 | } 295 | if (RSTRING_LEN(str)) { 296 | rb_str_cat(obj, RSTRING_PTR(str), RSTRING_LEN(str)); 297 | } 298 | return str; 299 | } 300 | 301 | /* 302 | * call-seq: 303 | * initialize(io = nil) 304 | * 305 | * @param [File] io the file which to write compressed data to 306 | * 307 | * Creates a new Bzip2::Writer for compressing a stream of data. An optional 308 | * io object (something responding to +write+) can be supplied which data 309 | * will be written to. 310 | * 311 | * If nothing is given, the Bzip2::Writer#flush method can be called to retrieve 312 | * the compressed stream so far. 313 | * 314 | * writer = Bzip2::Writer.new File.open('files.bz2') 315 | * writer << 'a' 316 | * writer << 'b' 317 | * writer.close 318 | * 319 | * writer = Bzip2::Writer.new 320 | * writer << 'abcde' 321 | * writer.flush # => 'abcde' compressed 322 | */ 323 | VALUE bz_writer_init(int argc, VALUE *argv, VALUE obj) { 324 | struct bz_file *bzf; 325 | int blocks = DEFAULT_BLOCKS; 326 | int work = 0; 327 | VALUE a, b, c; 328 | 329 | switch(rb_scan_args(argc, argv, "03", &a, &b, &c)) { 330 | case 3: 331 | work = NUM2INT(c); 332 | /* ... */ 333 | case 2: 334 | blocks = NUM2INT(b); 335 | } 336 | Data_Get_Struct(obj, struct bz_file, bzf); 337 | if (NIL_P(a)) { 338 | a = rb_str_new(0, 0); 339 | rb_define_method(rb_singleton_class(a), "write", bz_str_write, 1); 340 | rb_define_method(rb_singleton_class(a), "closed?", bz_str_closed, 0); 341 | bzf->flags |= BZ2_RB_INTERNAL; 342 | } else { 343 | VALUE iv; 344 | struct bz_iv *bziv; 345 | #ifndef RUBY_19_COMPATIBILITY 346 | OpenFile *fptr; 347 | #else 348 | rb_io_t *fptr; 349 | #endif 350 | 351 | rb_io_taint_check(a); 352 | if (!rb_respond_to(a, id_write)) { 353 | rb_raise(rb_eArgError, "first argument must respond to #write"); 354 | } 355 | if (TYPE(a) == T_FILE) { 356 | GetOpenFile(a, fptr); 357 | rb_io_check_writable(fptr); 358 | } else if (rb_respond_to(a, id_closed)) { 359 | iv = rb_funcall2(a, id_closed, 0, 0); 360 | if (RTEST(iv)) { 361 | rb_raise(rb_eArgError, "closed object"); 362 | } 363 | } 364 | bziv = bz_find_struct(a, 0, 0); 365 | if (bziv) { 366 | if (RTEST(bziv->bz2)) { 367 | rb_raise(rb_eArgError, "invalid data type"); 368 | } 369 | bziv->bz2 = obj; 370 | } else { 371 | iv = Data_Make_Struct(rb_cData, struct bz_iv, 0, free, bziv); 372 | bziv->io = a; 373 | bziv->bz2 = obj; 374 | rb_ary_push(bz_internal_ary, iv); 375 | } 376 | switch (TYPE(a)) { 377 | case T_FILE: 378 | bziv->finalize = RFILE(a)->fptr->finalize; 379 | RFILE(a)->fptr->finalize = (void (*)(struct rb_io_t *, int))bz_io_data_finalize; 380 | break; 381 | case T_DATA: 382 | bziv->finalize = RDATA(a)->dfree; 383 | RDATA(a)->dfree = bz_io_data_finalize; 384 | break; 385 | } 386 | } 387 | bzf->io = a; 388 | bzf->blocks = blocks; 389 | bzf->work = work; 390 | return obj; 391 | } 392 | 393 | /* 394 | * call-seq: 395 | * write(data) 396 | * Actually writes some data into this stream. 397 | * 398 | * @param [String] data the data to write 399 | * @return [Integer] the length of the data which was written (uncompressed) 400 | * @raise [IOError] if the stream has been closed 401 | */ 402 | VALUE bz_writer_write(VALUE obj, VALUE a) { 403 | struct bz_file *bzf; 404 | int n; 405 | 406 | a = rb_obj_as_string(a); 407 | Get_BZ2(obj, bzf); 408 | if (!bzf->buf) { 409 | if (bzf->state != BZ_OK) { 410 | bz_raise(bzf->state); 411 | } 412 | bzf->state = BZ2_bzCompressInit(&(bzf->bzs), bzf->blocks, 413 | 0, bzf->work); 414 | if (bzf->state != BZ_OK) { 415 | bz_writer_internal_flush(bzf); 416 | bz_raise(bzf->state); 417 | } 418 | bzf->buf = ALLOC_N(char, BZ_RB_BLOCKSIZE + 1); 419 | bzf->buflen = BZ_RB_BLOCKSIZE; 420 | bzf->buf[0] = bzf->buf[bzf->buflen] = '\0'; 421 | } 422 | bzf->bzs.next_in = RSTRING_PTR(a); 423 | bzf->bzs.avail_in = (int) RSTRING_LEN(a); 424 | while (bzf->bzs.avail_in) { 425 | bzf->bzs.next_out = bzf->buf; 426 | bzf->bzs.avail_out = bzf->buflen; 427 | bzf->state = BZ2_bzCompress(&(bzf->bzs), BZ_RUN); 428 | if (bzf->state == BZ_SEQUENCE_ERROR || bzf->state == BZ_PARAM_ERROR) { 429 | bz_writer_internal_flush(bzf); 430 | bz_raise(bzf->state); 431 | } 432 | bzf->state = BZ_OK; 433 | if (bzf->bzs.avail_out < bzf->buflen) { 434 | n = bzf->buflen - bzf->bzs.avail_out; 435 | rb_funcall(bzf->io, id_write, 1, rb_str_new(bzf->buf, n)); 436 | } 437 | } 438 | return INT2NUM(RSTRING_LEN(a)); 439 | } 440 | 441 | /* 442 | * call-seq: 443 | * putc(num) 444 | * 445 | * Write one byte into this stream. 446 | * @param [Integer] num the number value of the character to write 447 | * @return [Integer] always 1 448 | * @raise [IOError] if the stream has been closed 449 | */ 450 | VALUE bz_writer_putc(VALUE obj, VALUE a) { 451 | char c = NUM2CHR(a); 452 | return bz_writer_write(obj, rb_str_new(&c, 1)); 453 | } 454 | -------------------------------------------------------------------------------- /ext/bzip2/writer.h: -------------------------------------------------------------------------------- 1 | #ifndef _RB_BZIP2_WRITER_H_ 2 | #define _RB_BZIP2_WRITER_H_ 3 | 4 | #include 5 | #include "common.h" 6 | 7 | int bz_writer_internal_flush(struct bz_file *bzf); 8 | 9 | /* Instance methods */ 10 | VALUE bz_writer_close(VALUE obj); 11 | VALUE bz_writer_close_bang(VALUE obj); 12 | VALUE bz_writer_closed(VALUE obj); 13 | VALUE bz_writer_flush(VALUE obj); 14 | VALUE bz_writer_init(int argc, VALUE *argv, VALUE obj); 15 | VALUE bz_writer_write(VALUE obj, VALUE a); 16 | VALUE bz_writer_putc(VALUE obj, VALUE a); 17 | 18 | /* Class methods */ 19 | VALUE bz_writer_s_alloc(VALUE obj); 20 | VALUE bz_writer_s_open(int argc, VALUE *argv, VALUE obj); 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /lib/bzip2.rb: -------------------------------------------------------------------------------- 1 | require 'bzip2/bzip2' 2 | require 'bzip2/version' 3 | 4 | # This is the base module for the +bzip2-ruby+ gem. Beneath it are the classes 5 | # for writing and reading data from bzip2 compressed and uncompressed streams. 6 | # 7 | # For example usage, see the Bzip2::Reader or Bzip2::Writer or the 8 | # {README}[link:docs/file/README.rdoc] 9 | # 10 | # @see Bzip2::Writer 11 | # @see Bzip2::Reader 12 | module Bzip2 13 | end 14 | -------------------------------------------------------------------------------- /lib/bzip2/internals.rb: -------------------------------------------------------------------------------- 1 | # This file is mostly here for documentation purposes, do not require this 2 | 3 | module Bzip2 4 | class << self 5 | alias :bzip2 :compress 6 | alias :bunzip2 :uncompress 7 | alias :decompress :uncompress 8 | end 9 | 10 | # @private 11 | class InternalStr 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /lib/bzip2/reader.rb: -------------------------------------------------------------------------------- 1 | # This file is mostly here for documentation purposes, do not require this 2 | 3 | # 4 | module Bzip2 5 | # Bzip2::Reader is meant to read streams of bz2 compressed bytes. It behaves 6 | # like an IO object with many similar methods. It also includes the Enumerable 7 | # module and each element is a 'line' in the stream. 8 | # 9 | # It can both decompress files: 10 | # 11 | # reader = Bzip2::Reader.open('file') 12 | # puts reader.read 13 | # 14 | # reader = Bzip2::Reader.new File.open('file') 15 | # put reader.gets 16 | # 17 | # And it may just decompress raw strings 18 | # 19 | # reader = Bzip2::Reader.new compressed_string 20 | # reader = Bzip2::Reader.new Bzip2.compress('compress-me') 21 | class Reader 22 | alias :each_line :each 23 | alias :closed :closed? 24 | alias :eoz :eoz? 25 | alias :eof :eof? 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/bzip2/version.rb: -------------------------------------------------------------------------------- 1 | module Bzip2 2 | VERSION = "0.2.7" 3 | end -------------------------------------------------------------------------------- /lib/bzip2/writer.rb: -------------------------------------------------------------------------------- 1 | # This file is mostly here for documentation purposes, do not require this 2 | 3 | # 4 | module Bzip2 5 | # A Bzip2::Writer represents a stream which compresses data written to it. 6 | # It can be constructed with another IO object (a File) which data can be 7 | # written to. Otherwise, data is all stored internally as a string and can 8 | # be retrieved via the Bzip2::Writer#flush method 9 | # 10 | # It can both write to files: 11 | # 12 | # writer = Bzip2::Writer.open('file') 13 | # writer << data 14 | # writer.close 15 | # 16 | # Bzip2::Writer.open('file'){ |f| f << data } 17 | # 18 | # writer = Bzip2::Writer.new File.open('file') 19 | # 20 | # And output data as a string 21 | # 22 | # writer = Bzip2::Writer.new 23 | # writer << data 24 | # writer.flush # => data compressed via bz2 25 | # 26 | # @see Bzip2::Writer#initialize The initialize method for examples 27 | class Writer 28 | 29 | alias :finish :flush 30 | alias :closed :closed? 31 | 32 | # Append some data to this buffer, returning the buffer so this method can 33 | # be chained 34 | # 35 | # writer = Bzip2::Writer.new 36 | # writer << 'asdf' << 1 << obj << 'a' 37 | # writer.flush 38 | # 39 | # @param [#to_s] data anything responding to #to_s 40 | # @see IO#<< 41 | def << data 42 | end 43 | 44 | # Adds a number of strings to this buffer. A newline is also inserted into 45 | # the buffer after each object 46 | # @see IO#puts 47 | def puts *objs 48 | end 49 | 50 | # Similar to Bzip2::Writer#puts except a newline is not appended after each 51 | # object appended to this buffer 52 | # 53 | # @see IO#print 54 | def print *objs 55 | end 56 | 57 | # Prints data to this buffer with the specified format. 58 | # 59 | # @see Kernel#sprintf 60 | def printf format, *ojbs 61 | end 62 | 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /spec/reader_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | require 'spec_helper' 3 | 4 | describe Bzip2::Writer do 5 | before(:each) do 6 | @file = "_10lines_" 7 | @data = [ 8 | "00: This is a line\n", 9 | "01: This is a line\n", 10 | "02: This is a line\n", 11 | "03: This is a line\n", 12 | "04: This is a line\n", 13 | "05: This is a line\n", 14 | "06: This is a line\n", 15 | "07: This is a line\n", 16 | "08: This is a line\n", 17 | "09: This is a line\n" 18 | ] 19 | 20 | open("|bzip2 > #{@file}", "w") do |f| 21 | @data.each { |l| f.puts l } 22 | end 23 | end 24 | 25 | after(:each) do 26 | File.delete(@file) if File.exists?(@file) 27 | end 28 | 29 | it "iterate over each line of the file via the foreach method" do 30 | lines = [] 31 | Bzip2::Reader.foreach(@file){ |line| lines << line } 32 | lines.should == @data 33 | 34 | lines.clear 35 | Bzip2::Reader.foreach(@file, nil) do |file| 36 | file.split(/\n/).each{ |line| lines << line + "\n" } 37 | end 38 | lines.should == @data 39 | 40 | count = 0 41 | Bzip2::Reader.foreach(@file, ' ') do |thing| 42 | count += 1 43 | end 44 | count.should == 41 45 | end 46 | 47 | it "returns an array of the lines read via #readlines" do 48 | lines = Bzip2::Reader.readlines(@file) 49 | lines.should == @data 50 | 51 | lines = Bzip2::Reader.readlines(@file, nil) 52 | lines.should == [@data.join] 53 | end 54 | 55 | it "track when the stream has been closed" do 56 | f = Bzip2::Reader.open(@file) 57 | f.should_not be_closed 58 | f.close 59 | f.should be_closed 60 | end 61 | 62 | shared_examples_for 'a line iterator' do |method| 63 | it "iterates over the lines when using #each" do 64 | Bzip2::Reader.open(@file) do |file| 65 | list = [] 66 | file.send(method){ |l| list << l } 67 | list.should == @data 68 | end 69 | 70 | Bzip2::Reader.open(@file) do |file| 71 | file.send(method, nil) do |contents| 72 | contents.should == @data.join 73 | end 74 | end 75 | 76 | count = 0 77 | Bzip2::Reader.open(@file) do |file| 78 | file.send(method, ' ') do |thing| 79 | count += 1 80 | end 81 | end 82 | 41.should == count 83 | end 84 | end 85 | 86 | it_should_behave_like 'a line iterator', :each 87 | it_should_behave_like 'a line iterator', :each_line 88 | 89 | it "iterates over the decompressed bytes via #each_byte" do 90 | bytes = @data.join.bytes.to_a 91 | 92 | Bzip2::Reader.open(@file) do |file| 93 | file.each_byte do |b| 94 | b.should == bytes.shift 95 | end 96 | end 97 | 98 | bytes.size.should == 0 99 | end 100 | 101 | it "keeps track of when eof has been reached" do 102 | Bzip2::Reader.open(@file) do |file| 103 | @data.size.times do 104 | file.should_not be_eof 105 | file.gets 106 | end 107 | 108 | file.should be_eof 109 | end 110 | end 111 | 112 | it "gets only one byte at a time via getc and doesn't raise an exception" do 113 | bytes = @data.join.bytes.to_a 114 | 115 | Bzip2::Reader.open(@file) do |file| 116 | while ch = file.getc 117 | ch.should == bytes.shift 118 | end 119 | 120 | file.getc.should be_nil 121 | end 122 | 123 | bytes.size.should == 0 124 | end 125 | 126 | it "reads an entire line via gets" do 127 | Bzip2::Reader.open(@file) do |file| 128 | lines = [] 129 | while line = file.gets 130 | lines << line 131 | end 132 | lines.should == @data 133 | 134 | file.gets.should be_nil 135 | end 136 | 137 | Bzip2::Reader.open(@file) do |file| 138 | lines = [] 139 | while line = file.gets("line\n") 140 | lines << line 141 | end 142 | lines.should == @data 143 | 144 | file.gets.should be_nil 145 | end 146 | 147 | lines = '' 148 | Bzip2::Reader.open(@file) do |file| 149 | while contents = file.gets(nil) 150 | lines << contents 151 | end 152 | end 153 | lines.should == @data.join 154 | 155 | count = 0 156 | Bzip2::Reader.open(@file) do |file| 157 | count += 1 while file.gets(' ') 158 | end 159 | 41.should == count 160 | end 161 | 162 | it "reads the entire file or a specified length when using #read" do 163 | Bzip2::Reader.open(@file) do |file| 164 | file.read.should == @data.join 165 | end 166 | 167 | Bzip2::Reader.open(@file) do |file| 168 | file.read(12).should == "00: This is " 169 | file.read(12).should == "a line\n01: T" 170 | end 171 | end 172 | 173 | it "reads one character and returns the byte value of the character read" do 174 | count = 0 175 | data = @data.join 176 | Bzip2::Reader.open(@file) do |file| 177 | @data.join.bytes do |byte| 178 | file.readchar.should == byte 179 | end 180 | 181 | lambda { file.readchar }.should raise_error(Bzip2::EOZError) 182 | end 183 | end 184 | 185 | it "reads one line at a time and raises and exception when no more" do 186 | count = 0 187 | Bzip2::Reader.open(@file) do |file| 188 | lines = [] 189 | @data.size.times do |count| 190 | lines << file.readline 191 | end 192 | 193 | lines.should == @data 194 | lambda { file.readline }.should raise_error(Bzip2::EOZError) 195 | end 196 | 197 | Bzip2::Reader.open(@file) do |file| 198 | file.readline(nil).should == @data.join 199 | 200 | lambda { file.readline }.should raise_error(Bzip2::EOZError) 201 | end 202 | 203 | Bzip2::Reader.open(@file) do |file| 204 | 41.times { |count| file.readline(' ') } 205 | lambda { file.readline }.should raise_error(Bzip2::EOZError) 206 | end 207 | end 208 | 209 | it "returns an array of lines in the file" do 210 | Bzip2::Reader.open(@file) do |file| 211 | file.readlines.should == @data 212 | end 213 | 214 | Bzip2::Reader.open(@file) do |file| 215 | file.readlines(nil).should == [@data.join] 216 | end 217 | end 218 | 219 | it "rewinds the stream when #ungetc is called and returns that byte next" do 220 | Bzip2::Reader.open(@file) do |file| 221 | '0'.bytes.first.should == file.getc 222 | '0'.bytes.first.should == file.getc 223 | ':'.bytes.first.should == file.getc 224 | ' '.bytes.first.should == file.getc 225 | 226 | file.ungetc(':'.bytes.first).should be_nil 227 | ':'.bytes.first.should == file.getc 228 | 229 | file.read 230 | 231 | file.ungetc('A'.bytes.first).should be_nil 232 | 'A'.bytes.first.should == file.getc 233 | end 234 | end 235 | 236 | it "rewinds the stream when #ungets is called" do 237 | Bzip2::Reader.open(@file) do |file| 238 | @data[0].should == file.gets 239 | 1.should == file.lineno 240 | file.ungets(@data[0]).should be_nil 241 | @data[0].should == file.gets 242 | end 243 | end 244 | 245 | it "reads entire lines via readline and throws an exception when there is" do 246 | string = File.read(@file) 247 | file = Bzip2::Reader.new(string) 248 | lines = [] 249 | @data.size.times do |count| 250 | lines << file.readline 251 | end 252 | lines.should == @data 253 | lambda { file.readline }.should raise_error(Bzip2::EOZError) 254 | file.close 255 | 256 | file = Bzip2::Reader.new(string) 257 | file.readline(nil).should == @data.join 258 | lambda { file.readline }.should raise_error(Bzip2::EOZError) 259 | file.close 260 | end 261 | end 262 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | require 'bundler/setup' 3 | 4 | require 'rspec/core' 5 | require 'bzip2' 6 | 7 | 8 | RSpec.configure do |config| 9 | config.color_enabled = true 10 | end 11 | 12 | # back-port 1.9 method so the tests will pass in 1.8 as well 13 | if RUBY_VERSION.include?("1.8") 14 | class String 15 | def getbyte(idx) 16 | self[idx] 17 | end 18 | end 19 | end -------------------------------------------------------------------------------- /spec/writer_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: UTF-8 2 | require 'spec_helper' 3 | 4 | describe Bzip2::Writer do 5 | let(:file){ File.expand_path('../_10lines_', __FILE__) } 6 | 7 | class Dummy 8 | def to_s 9 | "dummy" 10 | end 11 | end 12 | 13 | after(:each) do 14 | File.delete(file) if File.exists?(file) 15 | end 16 | 17 | it "performs like IO#<< when using the #<< method" do 18 | Bzip2::Writer.open(file, "w") do |file| 19 | file << 1 << "\n" << Dummy.new << "\n" << "cat\n" 20 | end 21 | expected = [ "1\n", "dummy\n", "cat\n"] 22 | actual = [] 23 | Bzip2::Reader.foreach(file){ |line| actual.push line } 24 | actual.should == expected 25 | end 26 | 27 | it "doesn't immediately flush the data when written to" do 28 | io = File.new(file, "w") 29 | bz2 = Bzip2::Writer.new(io) 30 | bz2 << 1 << "\n" << Dummy.new << "\n" << "cat\n" 31 | bz = Bzip2::Reader.new(file) 32 | lambda { bz.gets }.should raise_error(Bzip2::Error) 33 | bz = Bzip2::Reader.open(file) 34 | lambda { bz.gets }.should raise_error(Bzip2::Error) 35 | io.close 36 | lambda { Bzip2::Reader.new(io) }.should raise_error(IOError) 37 | end 38 | 39 | it "behaves the same as IO#print when using #print" do 40 | Bzip2::Writer.open(file) do |file| 41 | file.print "foo\n" * 4096, "\n" * 4096, 42 | "bar" * 4096, "\n" * 4096, "zot\n" * 1024 43 | end 44 | 45 | Bzip2::Reader.open(file) do |file| 46 | file.gets('').should == "foo\n" * 4096 + "\n" 47 | file.gets('').should == "bar" * 4096 + "\n\n" 48 | file.gets('').should == "zot\n" * 1024 49 | end 50 | end 51 | 52 | it "respects specific global variables like IO#print does via #print" do 53 | Bzip2::Writer.open(file) do |file| 54 | file.print "hello" 55 | file.print 1,2 56 | $_ = "wombat\n" 57 | file.print 58 | $\ = ":" 59 | $, = "," 60 | file.print 3, 4 61 | file.print 5, 6 62 | $\ = nil 63 | file.print "\n" 64 | $, = nil 65 | end 66 | 67 | Bzip2::Reader.open(file) do |file| 68 | file.gets(nil).should == "hello12wombat\n3,4:5,6:\n" 69 | end 70 | end 71 | 72 | it "only writes one byte via the #putc method" do 73 | Bzip2::Writer.open(file, "wb") do |file| 74 | file.putc "A" 75 | 0.upto(255) { |ch| file.putc ch } 76 | end 77 | 78 | Bzip2::Reader.open(file, "rb") do |file| 79 | file.getc.should == 'A'.bytes.first 80 | 0.upto(255) { |ch| file.getc.should == ch } 81 | end 82 | end 83 | 84 | it "behaves the same as IO#puts when using #puts" do 85 | Bzip2::Writer.open(file, "w") do |file| 86 | file.puts "line 1", "line 2" 87 | file.puts [ Dummy.new, 4 ] 88 | end 89 | 90 | Bzip2::Reader.open(file) do |file| 91 | file.gets.should == "line 1\n" 92 | file.gets.should == "line 2\n" 93 | file.gets.should == "dummy\n" 94 | file.gets.should == "4\n" 95 | end 96 | end 97 | 98 | it "writes data successfully to a file and returns the length of the data" do 99 | Bzip2::Writer.open(file, "w") do |file| 100 | file.write('*' * 10).should == 10 101 | file.write('!' * 5).should == 5 102 | file.write('').should == 0 103 | file.write(1).should == 1 104 | file.write(2.30000).should == 3 105 | file.write("\n").should == 1 106 | end 107 | 108 | Bzip2::Reader.open(file) do |file| 109 | file.gets.should == "**********!!!!!12.3\n" 110 | end 111 | end 112 | 113 | it "returns the compressed data when no constructor argument is specified" do 114 | file = Bzip2::Writer.new 115 | file << ('*' * 10) << ('!' * 5) << '' << 1 << 2.3000 << "\n" 116 | Bzip2::bunzip2(file.flush).should == "**********!!!!!12.3\n" 117 | end 118 | 119 | it "compresses data via the #bzip2 shortcut" do 120 | data = ["**********!!!!!12.3\n"] 121 | data << "foo\n"*4096 + "\n"*4096 + "bar"*4096 + "\n"*4096 + "zot\n"*1024 122 | 123 | data.each do |test| 124 | Bzip2::bunzip2(Bzip2::bzip2(test)).should == test 125 | end 126 | end 127 | 128 | it "correctly reports when a writer is closed" do 129 | writer = Bzip2::Writer.open(file, 'w') 130 | writer.should_not be_closed 131 | writer.close 132 | writer.should be_closed 133 | end 134 | end 135 | --------------------------------------------------------------------------------