├── .gitignore
├── .rspec
├── .travis.yml
├── .yardopts
├── CHANGELOG.md
├── Gemfile
├── README.md
├── Rakefile
├── bzip2-ruby.gemspec
├── ext
    └── bzip2
    │   ├── bzip2.c
    │   ├── common.c
    │   ├── common.h
    │   ├── extconf.rb
    │   ├── reader.c
    │   ├── reader.h
    │   ├── writer.c
    │   └── writer.h
├── lib
    ├── bzip2.rb
    └── bzip2
    │   ├── internals.rb
    │   ├── reader.rb
    │   ├── version.rb
    │   └── writer.rb
└── spec
    ├── reader_spec.rb
    ├── spec_helper.rb
    └── writer_spec.rb


/.gitignore:
--------------------------------------------------------------------------------
 1 | _10lines_
 2 | doc
 3 | ext/Makefile
 4 | *.o
 5 | *.so
 6 | *.bundle
 7 | pkg
 8 | .bundle
 9 | tmp
10 | Gemfile.lock
11 | 


--------------------------------------------------------------------------------
/.rspec:
--------------------------------------------------------------------------------
1 | --colour
2 | --format=documentation
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: ruby
 2 | rvm:
 3 |   - 1.8.7
 4 |   - 1.9.2
 5 |   - 1.9.3
 6 |   - rbx
 7 |   - ree
 8 |   - ruby-head
 9 |   - rbx-18mode
10 |   - rbx-19mode
11 | matrix:
12 |   allow_failures:
13 |     - rvm: rbx-18mode
14 |     - rvm: rbx-19mode
15 | script:
16 |   - bundle exec rake
17 |   - bundle exec rspec
18 | 


--------------------------------------------------------------------------------
/.yardopts:
--------------------------------------------------------------------------------
1 | --no-private
2 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## 0.2.7 2010-11-16
 2 | 
 3 | * Add documentation for an overview of the Bzip2 module
 4 | * Document the class methods on Bzip2 and get them to show up in yarddoc
 5 | * Remove the ConfigError class because searching for it showed no instances of its usage
 6 | * Add a Usage section to the README and a bit about adding it to a Gemfile
 7 | * Improve the reader_spec.rb by making it more resilient in lots of places and a bit more descriptive/terse in
 8 | * Add a lot more documentation for the Reader class and also touch up the Writer class a bit
 9 | * Make the Writer specs more descriptive by giving them some doc strings.
10 | * Wrap up documentation of the Bzip2::Writer class.
11 | * Add lib/bzip2-ruby.rb so it's not always necessary to specify to require 'bzip2' in Gemfiles and such
12 | * Start documenting the Bzip2::Writer class
13 | * Fix a few compiler warnings
14 | * Removed some dead code
15 | * Fix for ruby 1.9 compatibility.
16 | * Fix segfault when exiting in ruby 1.9
17 | * Follow the newer conventions of rspec
18 | * Migrate to using Bundler instead of Jeweler
19 | * use malloc/free instead of ruby_xmalloc/ruby_xfree
20 | 
21 | ## 0.2.6 2009-10-6
22 | 
23 | * Updated to support Ruby 1.8.5
24 | 
25 | ## 0.2.5 2009-06-07
26 | 
27 | * initial conversion of original tests over to rspec
28 | 
29 | ## 0.2.4 2009-05-02
30 | 
31 | * renamed BZ2 module/namespace to Bzip2
32 | 
33 | ## 0.2.3 2009-05-02
34 | 
35 | * renamed gem to bzip2-ruby from bz2
36 | * initial conversion to jeweler
37 | * bundling gemspec
38 | * README and file structure organization updates
39 | * updated Init_*() ruby initializer function to match new gem name
40 | 
41 | ## 0.2.2 2008-12-22
42 | 
43 | * 1 major enhancement:
44 |   * Gemify bz2 library from http://moulon.inra.fr/ruby/bz2.html
45 |   * All credit goes to Guy Decoux <ts@moulon.inra.fr>
46 | 
47 | ## 0.2.1
48 | 
49 | * replaced rb_proc_new() with bz_proc_new() for 1.6
50 | (Thanks "Akinori MUSHA" <knu@iDaemons.org>)
51 | 
52 | ## 0.1.9
53 | 
54 | * corrected BZ_FINISH_OK (Thanks Rudi Cilibrasi <Rudi.Cilibrasi@cwi.nl>)
55 | 
56 | ## 0.1.6
57 | 
58 | * adapted for 1.8.0 (ihi)
59 | * modified ::new
60 | * BZ2::Writer#finish (same than #flush)
61 | 
62 | ## 0.1.5
63 | 
64 | * corrected extconf.rb
65 | * added close!
66 | * replaced close(false) by #finish
67 | * corrected #flush
68 | 
69 | ## 0.1.4
70 | 
71 | * corrected bz_iv
72 | * #to_io
73 | * corrected ::Reader#close
74 | 
75 | ## 0.1.3
76 | 
77 | * corrected #lineno
78 | * corrected ::Writer::new(nil)
79 | * taint result
80 | 
81 | ## 0.1.2
82 | 
83 | * better (???) interface for #read
84 | * finalize for objects which respond to #closed?
85 | 
86 | ## 0.1.1
87 | 
88 | * better interface for T_FILE
89 | * corrected bug with gc (buf)
90 | * Reader#lineno, Reader#lineno=, Reader#ungets
91 | * corrected Reader#unused
92 | * taint check in #initialize
93 | * BZ2::bzip2, BZ2::bunzip2
94 | * too many exceptions


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 | 
3 | gemspec
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Ruby C bindings to libbzip2
 2 | 
 3 | ## Installation
 4 | 
 5 | First make sure you’ve got Gemcutter in your sources list:
 6 | 
 7 | `gem sources -a http://gemcutter.org`
 8 | 
 9 | Then go ahead and install it as usual:
10 | 
11 | `sudo gem install bzip2-ruby`
12 | 
13 | You may need to specify:
14 | 
15 | `--with-bz2-dir=<include file directory for libbzip2>`
16 | 
17 | Or in a Gemfile
18 | 
19 | `gem 'bzip2-ruby'`
20 | 
21 | ## Usage
22 | 
23 | The full documentation is hosted on [rdoc.info](http://rdoc.info/github/brianmario/bzip2-ruby/master/frames).
24 | 
25 | Here's a quick overview, hower:
26 | 
27 | ``` ruby
28 | require 'bzip2'
29 | 
30 | # Quick shortcuts
31 | data = Bzip2.compress 'string'
32 | Bzip2.uncompress data
33 | 
34 | # Creating a bz2 compressed file
35 | writer = Bzip2::Writer.new File.open('file')
36 | writer << 'data1'
37 | writer.puts 'data2'
38 | writer.print 'data3'
39 | writer.printf '%s', 'data4'
40 | writer.close
41 | 
42 | Bzip2::Writer.open('file'){ |f| f << data }
43 | 
44 | # Reading a bz2 compressed file
45 | reader = Bzip2::Reader.new File.open('file')
46 | reader.gets # => "data1data2\n"
47 | reader.read # => 'data3data4'
48 | 
49 | reader.readline # => raises Bzip2::EOZError
50 | 
51 | Bzip2::Reader.open('file'){ |f| puts f.read }
52 | ```
53 | 
54 | ## Copying
55 | 
56 | ```
57 | This extension module is copyrighted free software by Guy Decoux
58 | You can redistribute it and/or modify it under the same term as Ruby.
59 | Guy Decoux <ts@moulon.inra.fr>
60 | ```
61 | 
62 | ## Modifications from origin version
63 | 
64 | * Switch to Jeweler
65 | * Renamed BZ2 module/namespace to Bzip2
66 | * Renamed compiled binary from "bz2" to "bzip2"
67 | * Renamed gem from "bz2" to "bzip2-ruby"
68 | * Converted original tests to rspec
69 | * 1.9 compatibility
70 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | require 'bundler'
 2 | Bundler::GemHelper.install_tasks
 3 | 
 4 | # rspec
 5 | begin
 6 |   require 'rspec'
 7 |   require 'rspec/core/rake_task'
 8 | 
 9 |   desc "Run all examples with RCov"
10 |   RSpec::Core::RakeTask.new('spec:rcov') do |t|
11 |     t.rcov = true
12 |   end
13 |   RSpec::Core::RakeTask.new('spec') do |t|
14 |     t.verbose = true
15 |   end
16 | 
17 |   task :default => :spec
18 | rescue LoadError
19 |   puts "rspec, or one of its dependencies, is not available. Install it with: sudo gem install rspec"
20 | end
21 | 
22 | # rake-compiler
23 | require 'rake' unless defined? Rake
24 | 
25 | gem 'rake-compiler', '>= 0.7.5'
26 | require "rake/extensiontask"
27 | 
28 | Rake::ExtensionTask.new('bzip2') do |ext|
29 |   ext.cross_compile = true
30 |   ext.cross_platform = ['x86-mingw32', 'x86-mswin32-60']
31 | 
32 |   ext.lib_dir = File.join 'lib', 'bzip2'
33 | end
34 | 
35 | Rake::Task[:spec].prerequisites << :compile
36 | 


--------------------------------------------------------------------------------
/bzip2-ruby.gemspec:
--------------------------------------------------------------------------------
 1 | require './lib/bzip2/version'
 2 | 
 3 | Gem::Specification.new do |s|
 4 |   s.name     = 'bzip2-ruby'
 5 |   s.version  = Bzip2::VERSION
 6 |   s.authors  = ['Guy Decoux', 'Brian Lopezs']
 7 |   s.date     = Time.now.utc.strftime("%Y-%m-%d")
 8 |   s.email    = ['seniorlopez@gmail.com']
 9 |   s.extensions = ['ext/bzip2/extconf.rb']
10 |   s.files = `git ls-files`.split("\n")
11 |   s.homepage = 'http://github.com/brianmario/bzip2-ruby'
12 |   s.rdoc_options = ["--charset=UTF-8"]
13 |   s.require_paths = ['lib']
14 |   s.rubygems_version = %q{1.4.2}
15 |   s.summary  = 'Ruby C bindings to libbzip2.'
16 |   s.test_files = `git ls-files spec`.split("\n")
17 | 
18 |   # tests
19 |   s.add_development_dependency 'rake-compiler', ">= 0.7.5"
20 |   s.add_development_dependency 'rspec', ">= 2.0.0"
21 | end
22 | 


--------------------------------------------------------------------------------
/ext/bzip2/bzip2.c:
--------------------------------------------------------------------------------
  1 | #include <ruby.h>
  2 | #include <bzlib.h>
  3 | 
  4 | #include "common.h"
  5 | #include "reader.h"
  6 | #include "writer.h"
  7 | 
  8 | VALUE bz_cWriter, bz_cReader, bz_cInternal;
  9 | VALUE bz_eError, bz_eEOZError;
 10 | 
 11 | VALUE bz_internal_ary;
 12 | 
 13 | ID id_new, id_write, id_open, id_flush, id_read;
 14 | ID id_closed, id_close, id_str;
 15 | 
 16 | void bz_internal_finalize(VALUE data) {
 17 |     VALUE elem;
 18 |     int closed, i;
 19 |     struct bz_iv *bziv;
 20 |     struct bz_file *bzf;
 21 | 
 22 |     for (i = 0; i < RARRAY_LEN(bz_internal_ary); i++) {
 23 |         elem = RARRAY_PTR(bz_internal_ary)[i];
 24 |         Data_Get_Struct(elem, struct bz_iv, bziv);
 25 |         if (bziv->bz2) {
 26 |             RDATA(bziv->bz2)->dfree = free;
 27 |             if (TYPE(bziv->io) == T_FILE) {
 28 |                 RFILE(bziv->io)->fptr->finalize = bziv->finalize;
 29 |             } else if (TYPE(bziv->io) == T_DATA) {
 30 |                 RDATA(bziv->io)->dfree = bziv->finalize;
 31 |             }
 32 |             Data_Get_Struct(bziv->bz2, struct bz_file, bzf);
 33 |             closed = bz_writer_internal_flush(bzf);
 34 |             if (bzf->flags & BZ2_RB_CLOSE) {
 35 |                 bzf->flags &= ~BZ2_RB_CLOSE;
 36 |                 if (!closed && rb_respond_to(bzf->io, id_close)) {
 37 |                     rb_funcall2(bzf->io, id_close, 0, 0);
 38 |                 }
 39 |             }
 40 |         }
 41 |     }
 42 | }
 43 | 
 44 | /*
 45 |  * call-seq:
 46 |  *   compress(str)
 47 |  *
 48 |  * Shortcut for compressing just a string.
 49 |  *
 50 |  *    Bzip2.uncompress Bzip2.compress('data') # => 'data'
 51 |  *
 52 |  * @param [String] str the string to compress
 53 |  * @return [String] +str+ compressed with bz2
 54 |  */
 55 | VALUE bz_compress(VALUE self, VALUE str) {
 56 |     VALUE bz2, argv[1] = {Qnil};
 57 | 
 58 |     str = rb_str_to_str(str);
 59 |     bz2 = rb_funcall2(bz_cWriter, id_new, 1, argv);
 60 |     if (OBJ_TAINTED(str)) {
 61 |         struct bz_file *bzf;
 62 |         Data_Get_Struct(bz2, struct bz_file, bzf);
 63 |         OBJ_TAINT(bzf->io);
 64 |     }
 65 |     bz_writer_write(bz2, str);
 66 |     return bz_writer_close(bz2);
 67 | }
 68 | 
 69 | /*
 70 |  * Returns the io stream underlying this stream. If the strem was constructed
 71 |  * with a file, that is returned. Otherwise, an empty string is returned.
 72 |  *
 73 |  * @return [File, String] similar to whatever the stream was constructed with
 74 |  * @raise [IOError] if the stream has been closed
 75 |  */
 76 | VALUE bz_to_io(VALUE obj) {
 77 |     struct bz_file *bzf;
 78 | 
 79 |     Get_BZ2(obj, bzf);
 80 |     return bzf->io;
 81 | }
 82 | 
 83 | VALUE bz_str_read(int argc, VALUE *argv, VALUE obj) {
 84 |     struct bz_str *bzs;
 85 |     VALUE res, len;
 86 |     int count;
 87 | 
 88 |     Data_Get_Struct(obj, struct bz_str, bzs);
 89 |     rb_scan_args(argc, argv, "01", &len);
 90 |     if (NIL_P(len)) {
 91 |         count = (int) RSTRING_LEN(bzs->str);
 92 |     } else {
 93 |         count = NUM2INT(len);
 94 |         if (count < 0) {
 95 |             rb_raise(rb_eArgError, "negative length %d given", count);
 96 |         }
 97 |     }
 98 |     if (!count || bzs->pos == -1) {
 99 |         return Qnil;
100 |     }
101 |     if ((bzs->pos + count) >= RSTRING_LEN(bzs->str)) {
102 |         res = rb_str_new(RSTRING_PTR(bzs->str) + bzs->pos,
103 |             RSTRING_LEN(bzs->str) - bzs->pos);
104 |         bzs->pos = -1;
105 |     } else {
106 |         res = rb_str_new(RSTRING_PTR(bzs->str) + bzs->pos, count);
107 |         bzs->pos += count;
108 |     }
109 |     return res;
110 | }
111 | 
112 | /*
113 |  * call-seq:
114 |  *    uncompress(data)
115 |  * Decompress a string of bz2 compressed data.
116 |  *
117 |  *    Bzip2.uncompress Bzip2.compress('asdf') # => 'asdf'
118 |  *
119 |  * @param [String] data bz2 compressed data
120 |  * @return [String] +data+ as uncompressed bz2 data
121 |  * @raise [Bzip2::Error] if +data+ is not valid bz2 data
122 |  */
123 | VALUE bz_uncompress(VALUE self, VALUE data) {
124 |     VALUE bz2, nilv = Qnil, argv[1];
125 | 
126 |     argv[0] = rb_str_to_str(data);
127 |     bz2 = rb_funcall2(bz_cReader, id_new, 1, argv);
128 |     return bz_reader_read(1, &nilv, bz2);
129 | }
130 | 
131 | /*
132 |  * Internally allocates data,
133 |  *
134 |  * @see Bzip2::Writer#initialize
135 |  * @see Bzip2::Reader#initialize
136 |  * @private
137 |  */
138 | VALUE bz_s_new(int argc, VALUE *argv, VALUE obj) {
139 |     VALUE res = rb_funcall2(obj, rb_intern("allocate"), 0, 0);
140 |     rb_obj_call_init(res, argc, argv);
141 |     return res;
142 | }
143 | 
144 | void Init_bzip2() {
145 |     VALUE bz_mBzip2, bz_mBzip2Singleton;
146 | 
147 |     bz_internal_ary = rb_ary_new();
148 |     rb_global_variable(&bz_internal_ary);
149 |     rb_set_end_proc(bz_internal_finalize, Qnil);
150 | 
151 |     id_new    = rb_intern("new");
152 |     id_write  = rb_intern("write");
153 |     id_open   = rb_intern("open");
154 |     id_flush  = rb_intern("flush");
155 |     id_read   = rb_intern("read");
156 |     id_close  = rb_intern("close");
157 |     id_closed = rb_intern("closed?");
158 |     id_str    = rb_intern("to_str");
159 | 
160 |     bz_mBzip2    = rb_define_module("Bzip2");
161 |     bz_eError    = rb_define_class_under(bz_mBzip2, "Error", rb_eIOError);
162 |     bz_eEOZError = rb_define_class_under(bz_mBzip2, "EOZError", bz_eError);
163 | 
164 |     bz_mBzip2Singleton = rb_singleton_class(bz_mBzip2);
165 |     rb_define_singleton_method(bz_mBzip2, "compress",   bz_compress,    1);
166 |     rb_define_singleton_method(bz_mBzip2, "uncompress", bz_uncompress,  1);
167 |     rb_define_alias(bz_mBzip2Singleton, "bzip2",      "compress");
168 |     rb_define_alias(bz_mBzip2Singleton, "decompress", "uncompress");
169 |     rb_define_alias(bz_mBzip2Singleton, "bunzip2",    "uncompress");
170 | 
171 |     /*
172 |       Writer
173 |     */
174 |     bz_cWriter = rb_define_class_under(bz_mBzip2, "Writer", rb_cData);
175 | #if HAVE_RB_DEFINE_ALLOC_FUNC
176 |     rb_define_alloc_func(bz_cWriter, bz_writer_s_alloc);
177 | #else
178 |     rb_define_singleton_method(bz_cWriter, "allocate", bz_writer_s_alloc, 0);
179 | #endif
180 |     rb_define_singleton_method(bz_cWriter, "new",   bz_s_new,            -1);
181 |     rb_define_singleton_method(bz_cWriter, "open",  bz_writer_s_open,    -1);
182 |     rb_define_method(bz_cWriter, "initialize",      bz_writer_init,      -1);
183 |     rb_define_method(bz_cWriter, "write",           bz_writer_write,      1);
184 |     rb_define_method(bz_cWriter, "putc",            bz_writer_putc,       1);
185 |     rb_define_method(bz_cWriter, "puts",            rb_io_puts,          -1);
186 |     rb_define_method(bz_cWriter, "print",           rb_io_print,         -1);
187 |     rb_define_method(bz_cWriter, "printf",          rb_io_printf,        -1);
188 |     rb_define_method(bz_cWriter, "<<",              rb_io_addstr,         1);
189 |     rb_define_method(bz_cWriter, "flush",           bz_writer_flush,      0);
190 |     rb_define_method(bz_cWriter, "close",           bz_writer_close,      0);
191 |     rb_define_method(bz_cWriter, "close!",          bz_writer_close_bang, 0);
192 |     rb_define_method(bz_cWriter, "closed?",         bz_writer_closed,     0);
193 |     rb_define_method(bz_cWriter, "to_io",           bz_to_io,             0);
194 |     rb_define_alias(bz_cWriter, "finish", "flush");
195 |     rb_define_alias(bz_cWriter, "closed", "closed?");
196 | 
197 |     /*
198 |       Reader
199 |     */
200 |     bz_cReader = rb_define_class_under(bz_mBzip2, "Reader", rb_cData);
201 |     rb_include_module(bz_cReader, rb_mEnumerable);
202 | #if HAVE_RB_DEFINE_ALLOC_FUNC
203 |     rb_define_alloc_func(bz_cReader, bz_reader_s_alloc);
204 | #else
205 |     rb_define_singleton_method(bz_cReader, "allocate", bz_reader_s_alloc, 0);
206 | #endif
207 |     rb_define_singleton_method(bz_cReader, "new",       bz_s_new,         -1);
208 |     rb_define_singleton_method(bz_cReader, "open",      bz_reader_s_open, -1);
209 |     rb_define_singleton_method(bz_cReader, "foreach",   bz_reader_s_foreach,   -1);
210 |     rb_define_singleton_method(bz_cReader, "readlines", bz_reader_s_readlines, -1);
211 |     rb_define_method(bz_cReader, "initialize",  bz_reader_init,      -1);
212 |     rb_define_method(bz_cReader, "read",        bz_reader_read,      -1);
213 |     rb_define_method(bz_cReader, "unused",      bz_reader_unused,     0);
214 |     rb_define_method(bz_cReader, "unused=",     bz_reader_set_unused, 1);
215 |     rb_define_method(bz_cReader, "ungetc",      bz_reader_ungetc,     1);
216 |     rb_define_method(bz_cReader, "ungets",      bz_reader_ungets,     1);
217 |     rb_define_method(bz_cReader, "getc",        bz_reader_getc,       0);
218 |     rb_define_method(bz_cReader, "gets",        bz_reader_gets_m,    -1);
219 |     rb_define_method(bz_cReader, "readchar",    bz_reader_readchar,   0);
220 |     rb_define_method(bz_cReader, "readline",    bz_reader_readline,  -1);
221 |     rb_define_method(bz_cReader, "readlines",   bz_reader_readlines, -1);
222 |     rb_define_method(bz_cReader, "each",        bz_reader_each_line, -1);
223 |     rb_define_method(bz_cReader, "each_byte",   bz_reader_each_byte,  0);
224 |     rb_define_method(bz_cReader, "close",       bz_reader_close,      0);
225 |     rb_define_method(bz_cReader, "close!",      bz_reader_close_bang, 0);
226 |     rb_define_method(bz_cReader, "finish",      bz_reader_finish,     0);
227 |     rb_define_method(bz_cReader, "closed?",     bz_reader_closed,     0);
228 |     rb_define_method(bz_cReader, "eoz?",        bz_reader_eoz,        0);
229 |     rb_define_method(bz_cReader, "eof?",        bz_reader_eof,        0);
230 |     rb_define_method(bz_cReader, "lineno",      bz_reader_lineno,     0);
231 |     rb_define_method(bz_cReader, "lineno=",     bz_reader_set_lineno, 1);
232 |     rb_define_method(bz_cReader, "to_io",       bz_to_io,             0);
233 |     rb_define_alias(bz_cReader, "each_line", "each");
234 |     rb_define_alias(bz_cReader, "closed", "closed?");
235 |     rb_define_alias(bz_cReader, "eoz", "eoz?");
236 |     rb_define_alias(bz_cReader, "eof", "eof?");
237 | 
238 |     /*
239 |       Internal
240 |     */
241 |     bz_cInternal = rb_define_class_under(bz_mBzip2, "InternalStr", rb_cData);
242 | #if HAVE_RB_DEFINE_ALLOC_FUNC
243 |     rb_undef_alloc_func(bz_cInternal);
244 | #else
245 |     rb_undef_method(CLASS_OF(bz_cInternal), "allocate");
246 | #endif
247 |     rb_undef_method(CLASS_OF(bz_cInternal), "new");
248 |     rb_undef_method(bz_cInternal, "initialize");
249 |     rb_define_method(bz_cInternal, "read", bz_str_read, -1);
250 | }
251 | 


--------------------------------------------------------------------------------
/ext/bzip2/common.c:
--------------------------------------------------------------------------------
 1 | #include <ruby.h>
 2 | #include <bzlib.h>
 3 | 
 4 | #include "common.h"
 5 | 
 6 | void bz_file_mark(struct bz_file * bzf) {
 7 |     rb_gc_mark(bzf->io);
 8 |     rb_gc_mark(bzf->in);
 9 | }
10 | 
11 | void * bz_malloc(void *opaque, int m, int n) {
12 |     return malloc(m * n);
13 | }
14 | 
15 | void bz_free(void *opaque, void *p) {
16 |     free(p);
17 | }
18 | 
19 | VALUE bz_raise(int error) {
20 |     VALUE exc;
21 |     const char *msg;
22 | 
23 |     exc = bz_eError;
24 |     switch (error) {
25 |         case BZ_SEQUENCE_ERROR:
26 |             msg = "incorrect sequence";
27 |             break;
28 |         case BZ_PARAM_ERROR:
29 |             msg = "parameter out of range";
30 |             break;
31 |         case BZ_MEM_ERROR:
32 |             msg = "not enough memory is available";
33 |             break;
34 |         case BZ_DATA_ERROR:
35 |             msg = "data integrity error is detected";
36 |             break;
37 |         case BZ_DATA_ERROR_MAGIC:
38 |             msg = "compressed stream does not start with the correct magic bytes";
39 |             break;
40 |         case BZ_IO_ERROR:
41 |             msg = "error reading or writing";
42 |             break;
43 |         case BZ_UNEXPECTED_EOF:
44 |             exc = bz_eEOZError;
45 |             msg = "compressed file finishes before the logical end of stream is detected";
46 |             break;
47 |         case BZ_OUTBUFF_FULL:
48 |             msg = "output buffer full";
49 |             break;
50 |         default:
51 |             msg = "unknown error";
52 |             exc = bz_eError;
53 |     }
54 |     rb_raise(exc, "%s", msg);
55 | }
56 | 


--------------------------------------------------------------------------------
/ext/bzip2/common.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RB_BZIP2_COMMON_H_
 2 | #define _RB_BZIP2_COMMON_H_
 3 | 
 4 | #include <ruby.h>
 5 | #include <bzlib.h>
 6 | 
 7 | #ifndef RUBY_19_COMPATIBILITY
 8 | #  include <rubyio.h>
 9 | #  include <version.h>
10 | #else
11 | #  include <ruby/io.h>
12 | #endif
13 | 
14 | #define BZ2_RB_CLOSE    1
15 | #define BZ2_RB_INTERNAL 2
16 | 
17 | #define BZ_RB_BLOCKSIZE 4096
18 | #define DEFAULT_BLOCKS 9
19 | #define ASIZE (1 << CHAR_BIT)
20 | 
21 | /* Older versions of Ruby (< 1.8.6) need these */
22 | #ifndef RSTRING_PTR
23 | #  define RSTRING_PTR(s) (RSTRING(s)->ptr)
24 | #endif
25 | #ifndef RSTRING_LEN
26 | #  define RSTRING_LEN(s) (RSTRING(s)->len)
27 | #endif
28 | #ifndef RARRAY_PTR
29 | #  define RARRAY_PTR(s) (RARRAY(s)->ptr)
30 | #endif
31 | #ifndef RARRAY_LEN
32 | #  define RARRAY_LEN(s) (RARRAY(s)->len)
33 | #endif
34 | 
35 | struct bz_file {
36 |     bz_stream bzs;
37 |     VALUE in, io;
38 |     char *buf;
39 |     unsigned int buflen;
40 |     int blocks, work, small;
41 |     int flags, lineno, state;
42 | };
43 | 
44 | struct bz_str {
45 |     VALUE str;
46 |     int pos;
47 | };
48 | 
49 | struct bz_iv {
50 |     VALUE bz2, io;
51 |     void (*finalize)();
52 | };
53 | 
54 | #define Get_BZ2(obj, bzf)                       \
55 |     rb_io_taint_check(obj);                     \
56 |     Data_Get_Struct(obj, struct bz_file, bzf);  \
57 |     if (!RTEST(bzf->io)) {                      \
58 |         rb_raise(rb_eIOError, "closed IO");     \
59 |     }
60 | 
61 | #ifndef ASDFasdf
62 | extern VALUE bz_cWriter, bz_cReader, bz_cInternal;
63 | extern VALUE bz_eError, bz_eEOZError;
64 | 
65 | extern VALUE bz_internal_ary;
66 | 
67 | extern ID id_new, id_write, id_open, id_flush, id_read;
68 | extern ID id_closed, id_close, id_str;
69 | #endif
70 | 
71 | void bz_file_mark(struct bz_file * bzf);
72 | void* bz_malloc(void *opaque, int m, int n);
73 | void bz_free(void *opaque, void *p);
74 | VALUE bz_raise(int err);
75 | 
76 | #endif
77 | 


--------------------------------------------------------------------------------
/ext/bzip2/extconf.rb:
--------------------------------------------------------------------------------
 1 | # encoding: UTF-8
 2 | require 'mkmf'
 3 | dir_config('bz2')
 4 | have_header('bzlib.h')
 5 | 
 6 | $CFLAGS << ' -Wall -Wextra -Wno-unused -funroll-loops '
 7 | # $CFLAGS << ' -O0 -ggdb -Wextra'
 8 | 
 9 | if have_library("bz2", "BZ2_bzWriteOpen")
10 |   if enable_config("shared", true)
11 |      $static = nil
12 |   end
13 | 
14 |   if RUBY_VERSION.to_f >= 1.9
15 |     $CFLAGS << ' -DRUBY_19_COMPATIBILITY'
16 |   end
17 |   
18 |   create_makefile('bzip2/bzip2')
19 | else
20 |   puts "libbz2 not found, maybe try manually specifying --with-bz2-dir to find it?"
21 | end
22 | 


--------------------------------------------------------------------------------
/ext/bzip2/reader.c:
--------------------------------------------------------------------------------
   1 | #include <bzlib.h>
   2 | #include <ruby.h>
   3 | 
   4 | #include "reader.h"
   5 | #include "common.h"
   6 | 
   7 | void bz_str_mark(struct bz_str *bzs) {
   8 |     rb_gc_mark(bzs->str);
   9 | }
  10 | 
  11 | struct bz_file * bz_get_bzf(VALUE obj) {
  12 |     struct bz_file *bzf;
  13 | 
  14 |     Get_BZ2(obj, bzf);
  15 |     if (!bzf->buf) {
  16 |         if (bzf->state != BZ_OK) {
  17 |             bz_raise(bzf->state);
  18 |         }
  19 |         bzf->state = BZ2_bzDecompressInit(&(bzf->bzs), 0, bzf->small);
  20 |         if (bzf->state != BZ_OK) {
  21 |             BZ2_bzDecompressEnd(&(bzf->bzs));
  22 |             bz_raise(bzf->state);
  23 |         }
  24 |         bzf->buf = ALLOC_N(char, BZ_RB_BLOCKSIZE + 1);
  25 |         bzf->buflen = BZ_RB_BLOCKSIZE;
  26 |         bzf->buf[0] = bzf->buf[bzf->buflen] = '\0';
  27 |         bzf->bzs.total_out_hi32 = bzf->bzs.total_out_lo32 = 0;
  28 |         bzf->bzs.next_out = bzf->buf;
  29 |         bzf->bzs.avail_out = 0;
  30 |     }
  31 |     if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) {
  32 |         return 0;
  33 |     }
  34 |     return bzf;
  35 | }
  36 | 
  37 | int bz_next_available(struct bz_file *bzf, int in){
  38 |     bzf->bzs.next_out = bzf->buf;
  39 |     bzf->bzs.avail_out = 0;
  40 |     if (bzf->state == BZ_STREAM_END) {
  41 |         return BZ_STREAM_END;
  42 |     }
  43 |     if (!bzf->bzs.avail_in) {
  44 |         bzf->in = rb_funcall(bzf->io, id_read, 1, INT2FIX(1024));
  45 |         if (TYPE(bzf->in) != T_STRING || RSTRING_LEN(bzf->in) == 0) {
  46 |             BZ2_bzDecompressEnd(&(bzf->bzs));
  47 |             bzf->bzs.avail_out = 0;
  48 |             bzf->state = BZ_UNEXPECTED_EOF;
  49 |             bz_raise(bzf->state);
  50 |         }
  51 |         bzf->bzs.next_in  = RSTRING_PTR(bzf->in);
  52 |         bzf->bzs.avail_in = (int) RSTRING_LEN(bzf->in);
  53 |     }
  54 |     if ((bzf->buflen - in) < (BZ_RB_BLOCKSIZE / 2)) {
  55 |         bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen+BZ_RB_BLOCKSIZE+1);
  56 |         bzf->buflen += BZ_RB_BLOCKSIZE;
  57 |         bzf->buf[bzf->buflen] = '\0';
  58 |     }
  59 |     bzf->bzs.avail_out = bzf->buflen - in;
  60 |     bzf->bzs.next_out = bzf->buf + in;
  61 |     bzf->state = BZ2_bzDecompress(&(bzf->bzs));
  62 |     if (bzf->state != BZ_OK) {
  63 |         BZ2_bzDecompressEnd(&(bzf->bzs));
  64 |         if (bzf->state != BZ_STREAM_END) {
  65 |             bzf->bzs.avail_out = 0;
  66 |             bz_raise(bzf->state);
  67 |         }
  68 |     }
  69 |     bzf->bzs.avail_out = bzf->buflen - bzf->bzs.avail_out;
  70 |     bzf->bzs.next_out = bzf->buf;
  71 |     return 0;
  72 | }
  73 | 
  74 | VALUE bz_read_until(struct bz_file *bzf, const char *str, int len, int *td1) {
  75 |     VALUE res;
  76 |     int total, i, nex = 0;
  77 |     char *p, *t, *tx, *end, *pend = ((char*) str) + len;
  78 | 
  79 |     res = rb_str_new(0, 0);
  80 |     while (1) {
  81 |         total = bzf->bzs.avail_out;
  82 |         if (len == 1) {
  83 |             tx = memchr(bzf->bzs.next_out, *str, bzf->bzs.avail_out);
  84 |             if (tx) {
  85 |                 i = (int)(tx - bzf->bzs.next_out + len);
  86 |                 res = rb_str_cat(res, bzf->bzs.next_out, i);
  87 |                 bzf->bzs.next_out += i;
  88 |                 bzf->bzs.avail_out -= i;
  89 |                 return res;
  90 |             }
  91 |         } else {
  92 |             tx = bzf->bzs.next_out;
  93 |             end = bzf->bzs.next_out + bzf->bzs.avail_out;
  94 |             while (tx + len <= end) {
  95 |                 for (p = (char*) str, t = tx; p != pend; ++p, ++t) {
  96 |                     if (*p != *t) break;
  97 |                 }
  98 |                 if (p == pend) {
  99 |                     i = (int)(tx - bzf->bzs.next_out + len);
 100 |                     res = rb_str_cat(res, bzf->bzs.next_out, i);
 101 |                     bzf->bzs.next_out += i;
 102 |                     bzf->bzs.avail_out -= i;
 103 |                     return res;
 104 |                 }
 105 |                 if (td1) {
 106 |                     tx += td1[(int)*(tx + len)];
 107 |                 } else {
 108 |                     tx += 1;
 109 |                 }
 110 |             }
 111 |         }
 112 |         nex = 0;
 113 |         if (total) {
 114 |             nex = len - 1;
 115 |             res = rb_str_cat(res, bzf->bzs.next_out, total - nex);
 116 |             if (nex) {
 117 |                 MEMMOVE(bzf->buf, bzf->bzs.next_out + total - nex, char, nex);
 118 |             }
 119 |         }
 120 |         if (bz_next_available(bzf, nex) == BZ_STREAM_END) {
 121 |             if (nex) {
 122 |                 res = rb_str_cat(res, bzf->buf, nex);
 123 |             }
 124 |             if (RSTRING_LEN(res)) {
 125 |                 return res;
 126 |             }
 127 |             return Qnil;
 128 |         }
 129 |     }
 130 |     return Qnil;
 131 | }
 132 | 
 133 | /**
 134 |  * Reads a stream as long as the next character is equal to the specified
 135 |  * character
 136 |  *
 137 |  * Returns the next character in the sequence that's not the same as the one
 138 |  * given or EOF if it's there until the end of the file.
 139 |  */
 140 | int bz_read_while(struct bz_file *bzf, char c) {
 141 |     char *end;
 142 | 
 143 |     while (1) {
 144 |         end = bzf->bzs.next_out + bzf->bzs.avail_out;
 145 |         while (bzf->bzs.next_out < end) {
 146 |             if (c != *bzf->bzs.next_out) {
 147 |                 return *bzf->bzs.next_out;
 148 |             }
 149 |             ++bzf->bzs.next_out;
 150 |             --bzf->bzs.avail_out;
 151 |         }
 152 |         if (bz_next_available(bzf, 0) == BZ_STREAM_END) {
 153 |             return EOF;
 154 |         }
 155 |     }
 156 |     return EOF;
 157 | }
 158 | 
 159 | /*
 160 |  * Internally allocates data for a new Reader
 161 |  * @private
 162 |  */
 163 | VALUE bz_reader_s_alloc(VALUE obj) {
 164 |     struct bz_file *bzf;
 165 |     VALUE res;
 166 |     res = Data_Make_Struct(obj, struct bz_file, bz_file_mark, free, bzf);
 167 |     bzf->bzs.bzalloc = bz_malloc;
 168 |     bzf->bzs.bzfree = bz_free;
 169 |     bzf->blocks = DEFAULT_BLOCKS;
 170 |     bzf->state = BZ_OK;
 171 |     return res;
 172 | }
 173 | 
 174 | VALUE bz_reader_close __((VALUE));
 175 | 
 176 | /*
 177 |  * call-seq:
 178 |  *   open(filename, &block=nil) -> Bzip2::Reader
 179 |  *
 180 |  * @param [String] filename the name of the file to read from
 181 |  * @yieldparam [Bzip2::Reader] reader the Bzip2::Reader instance
 182 |  *
 183 |  * If a block is given, the created Bzip2::Reader instance is yielded to the
 184 |  * block and will be closed when the block completes. It is guaranteed via
 185 |  * +ensure+ that the reader is closed
 186 |  *
 187 |  * If a block is not given, a Bzip2::Reader instance will be returned
 188 |  *
 189 |  *    Bzip2::Reader.open('file') { |f| puts f.gets }
 190 |  *
 191 |  *    reader = Bzip2::Reader.open('file')
 192 |  *    puts reader.gets
 193 |  *    reader.close
 194 |  *
 195 |  * @return [Bzip2::Reader, nil]
 196 |  */
 197 | VALUE bz_reader_s_open(int argc, VALUE *argv, VALUE obj) {
 198 |     VALUE res;
 199 |     struct bz_file *bzf;
 200 | 
 201 |     if (argc < 1) {
 202 |         rb_raise(rb_eArgError, "invalid number of arguments");
 203 |     }
 204 |     argv[0] = rb_funcall2(rb_mKernel, id_open, 1, argv);
 205 |     if (NIL_P(argv[0])) {
 206 |         return Qnil;
 207 |     }
 208 |     res = rb_funcall2(obj, id_new, argc, argv);
 209 |     Data_Get_Struct(res, struct bz_file, bzf);
 210 |     bzf->flags |= BZ2_RB_CLOSE;
 211 |     if (rb_block_given_p()) {
 212 |         return rb_ensure(rb_yield, res, bz_reader_close, res);
 213 |     }
 214 |     return res;
 215 | }
 216 | 
 217 | /*
 218 |  * call-seq:
 219 |  *    initialize(io)
 220 |  *
 221 |  * Creates a new stream for reading a bzip file or string
 222 |  *
 223 |  * @param [File, string, #read] io the source for input data. If the source is
 224 |  *    a file or something responding to #read, then data will be read via #read,
 225 |  *    otherwise if the input is a string it will be taken as the literal data
 226 |  *    to decompress
 227 |  */
 228 | VALUE bz_reader_init(int argc, VALUE *argv, VALUE obj) {
 229 |     struct bz_file *bzf;
 230 |     int small = 0;
 231 |     VALUE a, b;
 232 |     int internal = 0;
 233 | 
 234 |     if (rb_scan_args(argc, argv, "11", &a, &b) == 2) {
 235 |         small = RTEST(b);
 236 |     }
 237 |     rb_io_taint_check(a);
 238 |     if (OBJ_TAINTED(a)) {
 239 |         OBJ_TAINT(obj);
 240 |     }
 241 |     if (rb_respond_to(a, id_read)) {
 242 |         if (TYPE(a) == T_FILE) {
 243 | #ifndef RUBY_19_COMPATIBILITY
 244 |             OpenFile *fptr;
 245 | #else
 246 |             rb_io_t *fptr;
 247 | #endif
 248 | 
 249 |             GetOpenFile(a, fptr);
 250 |             rb_io_check_readable(fptr);
 251 |         } else if (rb_respond_to(a, id_closed)) {
 252 |             VALUE iv = rb_funcall2(a, id_closed, 0, 0);
 253 |             if (RTEST(iv)) {
 254 |                 rb_raise(rb_eArgError, "closed object");
 255 |             }
 256 |         }
 257 |     } else {
 258 |         struct bz_str *bzs;
 259 |         VALUE res;
 260 | 
 261 |         if (!rb_respond_to(a, id_str)) {
 262 |             rb_raise(rb_eArgError, "first argument must respond to #read");
 263 |         }
 264 |         a = rb_funcall2(a, id_str, 0, 0);
 265 |         if (TYPE(a) != T_STRING) {
 266 |             rb_raise(rb_eArgError, "#to_str must return a String");
 267 |         }
 268 |         res = Data_Make_Struct(bz_cInternal, struct bz_str,
 269 |             bz_str_mark, free, bzs);
 270 |         bzs->str = a;
 271 |         a = res;
 272 |         internal = BZ2_RB_INTERNAL;
 273 |     }
 274 |     Data_Get_Struct(obj, struct bz_file, bzf);
 275 |     bzf->io = a;
 276 |     bzf->small = small;
 277 |     bzf->flags |= internal;
 278 |     return obj;
 279 | }
 280 | 
 281 | /*
 282 |  * call-seq:
 283 |  *    read(len = nil)
 284 |  *
 285 |  * Read decompressed data from the stream.
 286 |  *
 287 |  *    Bzip2::Reader.new(Bzip2.compress('ab')).read    # => "ab"
 288 |  *    Bzip2::Reader.new(Bzip2.compress('ab')).read(1) # => "a"
 289 |  *
 290 |  * @return [String, nil] the decompressed data read or +nil+ if eoz has been
 291 |  *    reached
 292 |  * @param [Integer] len the number of decompressed bytes which should be read.
 293 |  *    If nothing is specified, the entire stream is read
 294 |  */
 295 | VALUE bz_reader_read(int argc, VALUE *argv, VALUE obj) {
 296 |     struct bz_file *bzf;
 297 |     VALUE res, length;
 298 |     int total;
 299 |     int n;
 300 | 
 301 |     rb_scan_args(argc, argv, "01", &length);
 302 |     if (NIL_P(length)) {
 303 |         n = -1;
 304 |     } else {
 305 |         n = NUM2INT(length);
 306 |         if (n < 0) {
 307 |             rb_raise(rb_eArgError, "negative length %d given", n);
 308 |         }
 309 |     }
 310 |     bzf = bz_get_bzf(obj);
 311 |     if (!bzf) {
 312 |         return Qnil;
 313 |     }
 314 |     res = rb_str_new(0, 0);
 315 |     if (OBJ_TAINTED(obj)) {
 316 |         OBJ_TAINT(res);
 317 |     }
 318 |     if (n == 0) {
 319 |         free(bzf->buf);
 320 |         return res;
 321 |     }
 322 |     while (1) {
 323 |         total = bzf->bzs.avail_out;
 324 |         if (n != -1 && (RSTRING_LEN(res) + total) >= n) {
 325 |             n -= (int) RSTRING_LEN(res);
 326 |             res = rb_str_cat(res, bzf->bzs.next_out, n);
 327 |             bzf->bzs.next_out += n;
 328 |             bzf->bzs.avail_out -= n;
 329 |             free(bzf->buf);
 330 |             return res;
 331 |         }
 332 |         if (total) {
 333 |             res = rb_str_cat(res, bzf->bzs.next_out, total);
 334 |         }
 335 |         if (bz_next_available(bzf, 0) == BZ_STREAM_END) {
 336 |             free(bzf->buf);
 337 |             return res;
 338 |         }
 339 |     }
 340 |     return Qnil;
 341 | }
 342 | 
 343 | int bz_getc(VALUE obj) {
 344 |     VALUE length = INT2FIX(1);
 345 |     VALUE res = bz_reader_read(1, &length, obj);
 346 |     if (NIL_P(res) || RSTRING_LEN(res) == 0) {
 347 |         return EOF;
 348 |     }
 349 |     return RSTRING_PTR(res)[0];
 350 | }
 351 | 
 352 | /*
 353 |  * call-seq:
 354 |  *    ungetc(byte)
 355 |  *
 356 |  * "Ungets" a character/byte. This rewinds the stream by 1 character and inserts
 357 |  * the given character into that position. The next read will return the given
 358 |  * character as the first one read
 359 |  *
 360 |  *    reader = Bzip2::Reader.new Bzip2.compress('abc')
 361 |  *    reader.getc         # => 97
 362 |  *    reader.ungetc 97    # => nil
 363 |  *    reader.getc         # => 97
 364 |  *    reader.ungetc 42    # => nil
 365 |  *    reader.getc         # => 42
 366 |  *    reader.getc         # => 98
 367 |  *    reader.getc         # => 99
 368 |  *    reader.ungetc 100   # => nil
 369 |  *    reader.getc         # => 100
 370 |  *
 371 |  * @param [Integer] byte the byte to 'unget'
 372 |  * @return [nil] always
 373 |  */
 374 | VALUE bz_reader_ungetc(VALUE obj, VALUE a) {
 375 |     struct bz_file *bzf;
 376 |     int c = NUM2INT(a);
 377 | 
 378 |     Get_BZ2(obj, bzf);
 379 |     if (!bzf->buf) {
 380 |         bz_raise(BZ_SEQUENCE_ERROR);
 381 |     }
 382 |     if (bzf->bzs.avail_out < bzf->buflen) {
 383 |         bzf->bzs.next_out -= 1;
 384 |         bzf->bzs.next_out[0] = c;
 385 |         bzf->bzs.avail_out += 1;
 386 |     } else {
 387 |         bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + 2);
 388 |         bzf->buf[bzf->buflen++] = c;
 389 |         bzf->buf[bzf->buflen] = '\0';
 390 |         bzf->bzs.next_out = bzf->buf;
 391 |         bzf->bzs.avail_out = bzf->buflen;
 392 |     }
 393 |     return Qnil;
 394 | }
 395 | 
 396 | /*
 397 |  * call-seq:
 398 |  *    ungets(str)
 399 |  *
 400 |  * Equivalently "unget" a string. When called on a string that was just read
 401 |  * from the stream, this inserts the string back into the stream to br read
 402 |  * again.
 403 |  *
 404 |  * When called with a string which hasn't been read from the stream, it does
 405 |  * the same thing, and the next read line/data will start from the beginning
 406 |  * of the given data and the continue on with the rest of the stream.
 407 |  *
 408 |  *    reader = Bzip2::Reader.new Bzip2.compress("a\nb")
 409 |  *    reader.gets           # => "a\n"
 410 |  *    reader.ungets "a\n"   # => nil
 411 |  *    reader.gets           # => "a\n"
 412 |  *    reader.ungets "foo"   # => nil
 413 |  *    reader.gets           # => "foob"
 414 |  *
 415 |  * @param [String] str the string to insert back into the stream
 416 |  * @return [nil] always
 417 |  */
 418 | VALUE bz_reader_ungets(VALUE obj, VALUE a) {
 419 |     struct bz_file *bzf;
 420 | 
 421 |     Check_Type(a, T_STRING);
 422 |     Get_BZ2(obj, bzf);
 423 |     if (!bzf->buf) {
 424 |         bz_raise(BZ_SEQUENCE_ERROR);
 425 |     }
 426 |     if ((bzf->bzs.avail_out + RSTRING_LEN(a)) < bzf->buflen) {
 427 |         bzf->bzs.next_out -= RSTRING_LEN(a);
 428 |         MEMCPY(bzf->bzs.next_out, RSTRING_PTR(a), char, RSTRING_LEN(a));
 429 |         bzf->bzs.avail_out += (int) RSTRING_LEN(a);
 430 |     } else {
 431 |         bzf->buf = REALLOC_N(bzf->buf, char, bzf->buflen + RSTRING_LEN(a) + 1);
 432 |         MEMCPY(bzf->buf + bzf->buflen, RSTRING_PTR(a), char,RSTRING_LEN(a));
 433 |         bzf->buflen += (int) RSTRING_LEN(a);
 434 |         bzf->buf[bzf->buflen] = '\0';
 435 |         bzf->bzs.next_out = bzf->buf;
 436 |         bzf->bzs.avail_out = bzf->buflen;
 437 |     }
 438 |     return Qnil;
 439 | }
 440 | 
 441 | VALUE bz_reader_gets(VALUE obj) {
 442 |     struct bz_file *bzf;
 443 |     VALUE str = Qnil;
 444 | 
 445 |     bzf = bz_get_bzf(obj);
 446 |     if (bzf) {
 447 |         str = bz_read_until(bzf, "\n", 1, 0);
 448 |         if (!NIL_P(str)) {
 449 |             bzf->lineno++;
 450 |             OBJ_TAINT(str);
 451 |         }
 452 |     }
 453 |     return str;
 454 | }
 455 | 
 456 | VALUE bz_reader_gets_internal(int argc, VALUE *argv, VALUE obj, int *td, int init) {
 457 |     struct bz_file *bzf;
 458 |     VALUE rs, res;
 459 |     const char *rsptr;
 460 |     int rslen, rspara, *td1;
 461 | 
 462 |     rs = rb_rs;
 463 |     if (argc) {
 464 |         rb_scan_args(argc, argv, "1", &rs);
 465 |         if (!NIL_P(rs)) {
 466 |             Check_Type(rs, T_STRING);
 467 |         }
 468 |     }
 469 |     if (NIL_P(rs)) {
 470 |         return bz_reader_read(1, &rs, obj);
 471 |     }
 472 |     rslen = (int) RSTRING_LEN(rs);
 473 |     if (rs == rb_default_rs || (rslen == 1 && RSTRING_PTR(rs)[0] == '\n')) {
 474 |         return bz_reader_gets(obj);
 475 |     }
 476 | 
 477 |     if (rslen == 0) {
 478 |         rsptr = "\n\n";
 479 |         rslen = 2;
 480 |         rspara = 1;
 481 |     } else {
 482 |         rsptr = RSTRING_PTR(rs);
 483 |         rspara = 0;
 484 |     }
 485 | 
 486 |     bzf = bz_get_bzf(obj);
 487 |     if (!bzf) {
 488 |         return Qnil;
 489 |     }
 490 |     if (rspara) {
 491 |         bz_read_while(bzf, '\n');
 492 |     }
 493 |     td1 = 0;
 494 |     if (rslen != 1) {
 495 |         if (init) {
 496 |             int i;
 497 | 
 498 |             for (i = 0; i < ASIZE; i++) {
 499 |                 td[i] = rslen + 1;
 500 |             }
 501 |             for (i = 0; i < rslen; i++) {
 502 |                 td[(int)*(rsptr + i)] = rslen - i;
 503 |             }
 504 |         }
 505 |         td1 = td;
 506 |     }
 507 | 
 508 |     res = bz_read_until(bzf, rsptr, rslen, td1);
 509 |     if (rspara) {
 510 |         bz_read_while(bzf, '\n');
 511 |     }
 512 | 
 513 |     if (!NIL_P(res)) {
 514 |         bzf->lineno++;
 515 |         OBJ_TAINT(res);
 516 |     }
 517 |     return res;
 518 | }
 519 | 
 520 | /*
 521 |  * Specs were missing for this method originally and playing around with it
 522 |  * gave some very odd results, so unless you know what you're doing, I wouldn't
 523 |  * mess around with this...
 524 |  */
 525 | VALUE bz_reader_set_unused(VALUE obj, VALUE a) {
 526 |     struct bz_file *bzf;
 527 | 
 528 |     Check_Type(a, T_STRING);
 529 |     Get_BZ2(obj, bzf);
 530 |     if (!bzf->in) {
 531 |         bzf->in = rb_str_new(RSTRING_PTR(a), RSTRING_LEN(a));
 532 |     } else {
 533 |         bzf->in = rb_str_cat(bzf->in, RSTRING_PTR(a), RSTRING_LEN(a));
 534 |     }
 535 |     bzf->bzs.next_in = RSTRING_PTR(bzf->in);
 536 |     bzf->bzs.avail_in = (int) RSTRING_LEN(bzf->in);
 537 |     return Qnil;
 538 | }
 539 | 
 540 | /*
 541 |  * Reads one character from the stream, returning the byte read.
 542 |  *
 543 |  *    reader = Bzip2::Reader.new Bzip2.compress('ab')
 544 |  *    reader.getc # => 97
 545 |  *    reader.getc # => 98
 546 |  *    reader.getc # => nil
 547 |  *
 548 |  * @return [Integer, nil] the byte value of the character read or +nil+ if eoz
 549 |  *    has been reached
 550 |  */
 551 | VALUE bz_reader_getc(VALUE obj) {
 552 |     VALUE str;
 553 |     VALUE len = INT2FIX(1);
 554 | 
 555 |     str = bz_reader_read(1, &len, obj);
 556 |     if (NIL_P(str) || RSTRING_LEN(str) == 0) {
 557 |         return Qnil;
 558 |     }
 559 |     return INT2FIX(RSTRING_PTR(str)[0] & 0xff);
 560 | }
 561 | 
 562 | void bz_eoz_error() {
 563 |     rb_raise(bz_eEOZError, "End of Zip component reached");
 564 | }
 565 | 
 566 | /*
 567 |  * Performs the same as Bzip2::Reader#getc except Bzip2::EOZError is raised if
 568 |  * eoz has been readhed
 569 |  *
 570 |  * @raise [Bzip2::EOZError] if eoz has been reached
 571 |  */
 572 | VALUE bz_reader_readchar(VALUE obj) {
 573 |     VALUE res = bz_reader_getc(obj);
 574 | 
 575 |     if (NIL_P(res)) {
 576 |         bz_eoz_error();
 577 |     }
 578 |     return res;
 579 | }
 580 | 
 581 | /*
 582 |  * call-seq:
 583 |  *    gets(sep = "\n")
 584 |  *
 585 |  * Reads a line from the stream until the separator is reached. This does not
 586 |  * throw an exception, but rather returns nil if an eoz/eof error occurs
 587 |  *
 588 |  *    reader = Bzip2::Reader.new Bzip2.compress("a\nb")
 589 |  *    reader.gets # => "a\n"
 590 |  *    reader.gets # => "b"
 591 |  *    reader.gets # => nil
 592 |  *
 593 |  * @return [String, nil] the read data or nil if eoz has been reached
 594 |  * @see Bzip2::Reader#readline
 595 |  */
 596 | VALUE bz_reader_gets_m(int argc, VALUE *argv, VALUE obj) {
 597 |     int td[ASIZE];
 598 |     VALUE str = bz_reader_gets_internal(argc, argv, obj, td, Qtrue);
 599 | 
 600 |     if (!NIL_P(str)) {
 601 |         rb_lastline_set(str);
 602 |     }
 603 |     return str;
 604 | }
 605 | 
 606 | /*
 607 |  * call-seq:
 608 |  *    readline(sep = "\n")
 609 |  *
 610 |  * Reads one line from the stream and returns it (including the separator)
 611 |  *
 612 |  *    reader = Bzip2::Reader.new Bzip2.compress("a\nb")
 613 |  *    reader.readline # => "a\n"
 614 |  *    reader.readline # => "b"
 615 |  *    reader.readline # => raises Bzip2::EOZError
 616 |  *
 617 |  *
 618 |  * @param [String] sep the newline separator character
 619 |  * @return [String] the read line
 620 |  * @see Bzip2::Reader.readlines
 621 |  * @raise [Bzip2::EOZError] if the stream has reached its end
 622 |  */
 623 | VALUE bz_reader_readline(int argc, VALUE *argv, VALUE obj) {
 624 |     VALUE res = bz_reader_gets_m(argc, argv, obj);
 625 | 
 626 |     if (NIL_P(res)) {
 627 |         bz_eoz_error();
 628 |     }
 629 |     return res;
 630 | }
 631 | 
 632 | /*
 633 |  * call-seq:
 634 |  *    readlines(sep = "\n")
 635 |  *
 636 |  * Reads the lines of the files and returns the result as an array.
 637 |  *
 638 |  * If the stream has reached eoz, then an empty array is returned
 639 |  *
 640 |  * @param [String] sep the newline separator character
 641 |  * @return [Array] an array of lines read
 642 |  * @see Bzip2::Reader.readlines
 643 |  */
 644 | VALUE bz_reader_readlines(int argc, VALUE *argv, VALUE obj) {
 645 |     VALUE line, ary;
 646 |     int td[ASIZE], in;
 647 | 
 648 |     in = Qtrue;
 649 |     ary = rb_ary_new();
 650 |     while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) {
 651 |         in = Qfalse;
 652 |         rb_ary_push(ary, line);
 653 |     }
 654 |     return ary;
 655 | }
 656 | 
 657 | /*
 658 |  * call-seq:
 659 |  *    each(sep = "\n", &block)
 660 |  *
 661 |  * Iterates over the lines of the stream.
 662 |  *
 663 |  * @param [String] sep the byte which separates lines
 664 |  * @yieldparam [String] line the next line of the file (including the separator
 665 |  *    character)
 666 |  * @see Bzip2::Reader.foreach
 667 |  */
 668 | VALUE bz_reader_each_line(int argc, VALUE *argv, VALUE obj) {
 669 |     VALUE line;
 670 |     int td[ASIZE], in;
 671 | 
 672 |     in = Qtrue;
 673 |     while (!NIL_P(line = bz_reader_gets_internal(argc, argv, obj, td, in))) {
 674 |         in = Qfalse;
 675 |         rb_yield(line);
 676 |     }
 677 |     return obj;
 678 | }
 679 | 
 680 | /*
 681 |  * call-seq:
 682 |  *    each_byte(&block)
 683 |  *
 684 |  * Iterates over the decompressed bytes of the file.
 685 |  *
 686 |  *    Bzip2::Writer.open('file'){ |f| f << 'asdf' }
 687 |  *    reader = Bzip2::Reader.new File.open('file')
 688 |  *    reader.each_byte{ |b| puts "#{b} #{b.chr}" }
 689 |  *
 690 |  *    # Output:
 691 |  *    # 97 a
 692 |  *    # 115 s
 693 |  *    # 100 d
 694 |  *    # 102 f
 695 |  *
 696 |  * @yieldparam [Integer] byte the decompressed bytes of the file
 697 |  */
 698 | VALUE bz_reader_each_byte(VALUE obj) {
 699 |     int c;
 700 | 
 701 |     while ((c = bz_getc(obj)) != EOF) {
 702 |         rb_yield(INT2FIX(c & 0xff));
 703 |     }
 704 |     return obj;
 705 | }
 706 | 
 707 | /*
 708 |  * Specs were missing for this method originally and playing around with it
 709 |  * gave some very odd results, so unless you know what you're doing, I wouldn't
 710 |  * mess around with this...
 711 |  */
 712 | VALUE bz_reader_unused(VALUE obj) {
 713 |     struct bz_file *bzf;
 714 |     VALUE res;
 715 | 
 716 |     Get_BZ2(obj, bzf);
 717 |     if (!bzf->in || bzf->state != BZ_STREAM_END) {
 718 |         return Qnil;
 719 |     }
 720 |     if (bzf->bzs.avail_in) {
 721 |         res = rb_tainted_str_new(bzf->bzs.next_in, bzf->bzs.avail_in);
 722 |         bzf->bzs.avail_in = 0;
 723 |     } else {
 724 |         res = rb_tainted_str_new(0, 0);
 725 |     }
 726 |     return res;
 727 | }
 728 | 
 729 | /*
 730 |  * Test whether the end of the bzip stream has been reached
 731 |  *
 732 |  * @return [Boolean] +true+ if the reader is at the end of the bz stream or
 733 |  *                   +false+ otherwise
 734 |  */
 735 | VALUE bz_reader_eoz(VALUE obj) {
 736 |     struct bz_file *bzf;
 737 | 
 738 |     Get_BZ2(obj, bzf);
 739 |     if (!bzf->in || !bzf->buf) {
 740 |         return Qnil;
 741 |     }
 742 |     if (bzf->state == BZ_STREAM_END && !bzf->bzs.avail_out) {
 743 |         return Qtrue;
 744 |     }
 745 |     return Qfalse;
 746 | }
 747 | 
 748 | /*
 749 |  * Test whether the bzip stream has reached its end (see Bzip2::Reader#eoz?)
 750 |  * and then tests that the undlerying IO has also reached an eof
 751 |  *
 752 |  * @return [Boolean] +true+ if the stream has reached or +false+ otherwise.
 753 |  */
 754 | VALUE bz_reader_eof(VALUE obj) {
 755 |     struct bz_file *bzf;
 756 |     VALUE res;
 757 | 
 758 |     res = bz_reader_eoz(obj);
 759 |     if (RTEST(res)) {
 760 |         Get_BZ2(obj, bzf);
 761 |         if (bzf->bzs.avail_in) {
 762 |             res = Qfalse;
 763 |         } else {
 764 |             res = bz_reader_getc(obj);
 765 |             if (NIL_P(res)) {
 766 |                 res = Qtrue;
 767 |             } else {
 768 |                 bz_reader_ungetc(obj, res);
 769 |                 res = Qfalse;
 770 |             }
 771 |         }
 772 |     }
 773 |     return res;
 774 | }
 775 | 
 776 | /*
 777 |  * Tests whether this reader has be closed.
 778 |  *
 779 |  * @return [Boolean] +true+ if it is or +false+ otherwise.
 780 |  */
 781 | VALUE bz_reader_closed(VALUE obj) {
 782 |     struct bz_file *bzf;
 783 | 
 784 |     Data_Get_Struct(obj, struct bz_file, bzf);
 785 |     return RTEST(bzf->io)?Qfalse:Qtrue;
 786 | }
 787 | 
 788 | /*
 789 |  * Closes this reader to disallow further reads.
 790 |  *
 791 |  *    reader = Bzip2::Reader.new File.open('file')
 792 |  *    reader.close
 793 |  *
 794 |  *    reader.closed? # => true
 795 |  *
 796 |  * @return [File] the io with which the reader was created.
 797 |  * @raise [IOError] if the stream has already been closed
 798 |  */
 799 | VALUE bz_reader_close(VALUE obj) {
 800 |     struct bz_file *bzf;
 801 |     VALUE res;
 802 | 
 803 |     Get_BZ2(obj, bzf);
 804 |     if (bzf->buf) {
 805 |         free(bzf->buf);
 806 |         bzf->buf = 0;
 807 |     }
 808 |     if (bzf->state == BZ_OK) {
 809 |         BZ2_bzDecompressEnd(&(bzf->bzs));
 810 |     }
 811 |     if (bzf->flags & BZ2_RB_CLOSE) {
 812 |         int closed = 0;
 813 |         if (rb_respond_to(bzf->io, id_closed)) {
 814 |             VALUE iv = rb_funcall2(bzf->io, id_closed, 0, 0);
 815 |             closed = RTEST(iv);
 816 |         }
 817 |         if (!closed && rb_respond_to(bzf->io, id_close)) {
 818 |             rb_funcall2(bzf->io, id_close, 0, 0);
 819 |         }
 820 |     }
 821 |     if (bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL)) {
 822 |         res = Qnil;
 823 |     } else {
 824 |         res = bzf->io;
 825 |     }
 826 |     bzf->io = 0;
 827 |     return res;
 828 | }
 829 | 
 830 | /*
 831 |  * Originally undocument and had no sepcs. Appears to call Bzip2::Reader#read
 832 |  * and then mark the stream as finished, but this didn't work for me...
 833 |  */
 834 | VALUE bz_reader_finish(VALUE obj) {
 835 |     struct bz_file *bzf;
 836 | 
 837 |     Get_BZ2(obj, bzf);
 838 |     if (bzf->buf) {
 839 |         rb_funcall2(obj, id_read, 0, 0);
 840 |         free(bzf->buf);
 841 |     }
 842 |     bzf->buf = 0;
 843 |     bzf->state = BZ_OK;
 844 |     return Qnil;
 845 | }
 846 | 
 847 | /*
 848 |  * Originally undocument and had no sepcs. Appears to work nearly the same
 849 |  * as Bzip2::Reader#close...
 850 |  */
 851 | VALUE bz_reader_close_bang(VALUE obj) {
 852 |     struct bz_file *bzf;
 853 |     int closed;
 854 | 
 855 |     Get_BZ2(obj, bzf);
 856 |     closed = bzf->flags & (BZ2_RB_CLOSE|BZ2_RB_INTERNAL);
 857 |     bz_reader_close(obj);
 858 |     if (!closed && rb_respond_to(bzf->io, id_close)) {
 859 |         if (rb_respond_to(bzf->io, id_closed)) {
 860 |             closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0));
 861 |         }
 862 |         if (!closed) {
 863 |             rb_funcall2(bzf->io, id_close, 0, 0);
 864 |         }
 865 |     }
 866 |     return Qnil;
 867 | }
 868 | 
 869 | struct foreach_arg {
 870 |     int argc;
 871 |     VALUE sep;
 872 |     VALUE obj;
 873 | };
 874 | 
 875 | VALUE bz_reader_foreach_line(struct foreach_arg *arg) {
 876 |     VALUE str;
 877 |     int td[ASIZE], in;
 878 | 
 879 |     in = Qtrue;
 880 |     while (!NIL_P(str = bz_reader_gets_internal(arg->argc, &arg->sep, arg->obj, td, in))) {
 881 |         in = Qfalse;
 882 |         rb_yield(str);
 883 |     }
 884 |     return Qnil;
 885 | }
 886 | 
 887 | /*
 888 |  * call-seq:
 889 |  *    foreach(filename, &block)
 890 |  *
 891 |  * Reads a bz2 compressed file and yields each line to the block
 892 |  *
 893 |  *    Bzip2::Writer.open('file'){ |f| f << "a\n" << "b\n" << "c\n\nd" }
 894 |  *    Bzip2::Reader.foreach('file'){ |l| p l }
 895 |  *
 896 |  *    # Output:
 897 |  *    # "a\n"
 898 |  *    # "b\n"
 899 |  *    # "c\n"
 900 |  *    # "\n"
 901 |  *    # "d"
 902 |  *
 903 |  * @param [String] filename the path to the file to open
 904 |  * @yieldparam [String] each line of the file
 905 |  */
 906 | VALUE bz_reader_s_foreach(int argc, VALUE *argv, VALUE obj) {
 907 |     VALUE fname, sep;
 908 |     struct foreach_arg arg;
 909 |     struct bz_file *bzf;
 910 | 
 911 |     if (!rb_block_given_p()) {
 912 |         rb_raise(rb_eArgError, "call out of a block");
 913 |     }
 914 |     rb_scan_args(argc, argv, "11", &fname, &sep);
 915 | #ifdef SafeStringValue
 916 |     SafeStringValue(fname);
 917 | #else
 918 |     Check_SafeStr(fname);
 919 | #endif
 920 |     arg.argc = argc - 1;
 921 |     arg.sep = sep;
 922 |     arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname);
 923 |     if (NIL_P(arg.obj)) {
 924 |         return Qnil;
 925 |     }
 926 |     arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj);
 927 |     Data_Get_Struct(arg.obj, struct bz_file, bzf);
 928 |     bzf->flags |= BZ2_RB_CLOSE;
 929 |     return rb_ensure(bz_reader_foreach_line, (VALUE)&arg, bz_reader_close, arg.obj);
 930 | }
 931 | 
 932 | VALUE bz_reader_i_readlines(struct foreach_arg *arg) {
 933 |     VALUE str, res;
 934 |     int td[ASIZE], in;
 935 | 
 936 |     in = Qtrue;
 937 |     res = rb_ary_new();
 938 |     while (!NIL_P(str = bz_reader_gets_internal(arg->argc, &arg->sep, arg->obj, td, in))) {
 939 |         in = Qfalse;
 940 |         rb_ary_push(res, str);
 941 |     }
 942 |     return res;
 943 | }
 944 | 
 945 | /*
 946 |  * call-seq:
 947 |  *    readlines(filename, separator="\n")
 948 |  *
 949 |  * Opens the given bz2 compressed file for reading and decompresses the file,
 950 |  * returning an array of the lines of the file. A line is denoted by the
 951 |  * separator argument.
 952 |  *
 953 |  *    Bzip2::Writer.open('file'){ |f| f << "a\n" << "b\n" << "c\n\nd" }
 954 |  *
 955 |  *    Bzip2::Reader.readlines('file')      # => ["a\n", "b\n", "c\n", "\n", "d"]
 956 |  *    Bzip2::Reader.readlines('file', 'c') # => ["a\nb\nc", "\n\nd"]
 957 |  *
 958 |  * @param [String] filename the path to the file to read
 959 |  * @param [String] separator the character to denote a newline in the file
 960 |  * @see Bzip2::Reader#readlines
 961 |  * @return [Array] an array of lines for the file
 962 |  * @raise [Bzip2::Error] if the file is not a valid bz2 compressed file
 963 |  */
 964 | VALUE bz_reader_s_readlines(int argc, VALUE *argv, VALUE obj) {
 965 |     VALUE fname, sep;
 966 |     struct foreach_arg arg;
 967 |     struct bz_file *bzf;
 968 | 
 969 |     rb_scan_args(argc, argv, "11", &fname, &sep);
 970 | #ifdef SafeStringValue
 971 |     SafeStringValue(fname);
 972 | #else
 973 |     Check_SafeStr(fname);
 974 | #endif
 975 |     arg.argc = argc - 1;
 976 |     arg.sep = sep;
 977 |     arg.obj = rb_funcall2(rb_mKernel, id_open, 1, &fname);
 978 |     if (NIL_P(arg.obj)) {
 979 |         return Qnil;
 980 |     }
 981 |     arg.obj = rb_funcall2(obj, id_new, 1, &arg.obj);
 982 |     Data_Get_Struct(arg.obj, struct bz_file, bzf);
 983 |     bzf->flags |= BZ2_RB_CLOSE;
 984 |     return rb_ensure(bz_reader_i_readlines, (VALUE)&arg, bz_reader_close, arg.obj);
 985 | }
 986 | 
 987 | /*
 988 |  * Returns the current line number that the stream is at. This number is based
 989 |  * on the newline separator being "\n"
 990 |  *
 991 |  *    reader = Bzip2::Reader.new Bzip2.compress("a\nb")
 992 |  *    reader.lineno     # => 0
 993 |  *    reader.readline   # => "a\n"
 994 |  *    reader.lineno     # => 1
 995 |  *    reader.readline   # => "b"
 996 |  *    reader.lineno     # => 2
 997 | 
 998 |  * @return [Integer] the current line number
 999 |  */
1000 | VALUE bz_reader_lineno(VALUE obj) {
1001 |     struct bz_file *bzf;
1002 | 
1003 |     Get_BZ2(obj, bzf);
1004 |     return INT2NUM(bzf->lineno);
1005 | }
1006 | 
1007 | /*
1008 |  * call-seq:
1009 |  *    lineno=(num)
1010 |  *
1011 |  * Sets the internal line number count that this stream should be set at
1012 |  *
1013 |  *    reader = Bzip2::Reader.new Bzip2.compress("a\nb")
1014 |  *    reader.lineno     # => 0
1015 |  *    reader.readline   # => "a\n"
1016 |  *    reader.lineno     # => 1
1017 |  *    reader.lineno = 0
1018 |  *    reader.readline   # => "b"
1019 |  *    reader.lineno     # => 1
1020 |  *
1021 |  * @note This does not actually rewind or move the stream forward
1022 |  * @param [Integer] lineno the line number which the stream should consider
1023 |  *    being set at
1024 |  * @return [Integer] the line number provided
1025 |  */
1026 | VALUE bz_reader_set_lineno(VALUE obj, VALUE lineno) {
1027 |     struct bz_file *bzf;
1028 | 
1029 |     Get_BZ2(obj, bzf);
1030 |     bzf->lineno = NUM2INT(lineno);
1031 |     return lineno;
1032 | }
1033 | 


--------------------------------------------------------------------------------
/ext/bzip2/reader.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RB_BZIP2_READER_H_
 2 | #define _RB_BZIP2_READER_H_
 3 | 
 4 | #include <ruby.h>
 5 | 
 6 | /* Instance methods */
 7 | VALUE bz_reader_init(int argc, VALUE *argv, VALUE obj);
 8 | VALUE bz_reader_read(int argc, VALUE *argv, VALUE obj);
 9 | VALUE bz_reader_ungetc(VALUE obj, VALUE a);
10 | VALUE bz_reader_ungets(VALUE obj, VALUE a);
11 | VALUE bz_reader_getc(VALUE obj);
12 | VALUE bz_reader_readchar(VALUE obj);
13 | VALUE bz_reader_gets_m(int argc, VALUE *argv, VALUE obj);
14 | VALUE bz_reader_readline(int argc, VALUE *argv, VALUE obj);
15 | VALUE bz_reader_readlines(int argc, VALUE *argv, VALUE obj);
16 | VALUE bz_reader_each_line(int argc, VALUE *argv, VALUE obj);
17 | VALUE bz_reader_each_byte(VALUE obj);
18 | VALUE bz_reader_unused(VALUE obj);
19 | VALUE bz_reader_set_unused(VALUE obj, VALUE a);
20 | VALUE bz_reader_eoz(VALUE obj);
21 | VALUE bz_reader_eof(VALUE obj);
22 | VALUE bz_reader_closed(VALUE obj);
23 | VALUE bz_reader_close(VALUE obj);
24 | VALUE bz_reader_close_bang(VALUE obj);
25 | VALUE bz_reader_finish(VALUE obj);
26 | VALUE bz_reader_lineno(VALUE obj);
27 | VALUE bz_reader_set_lineno(VALUE obj, VALUE lineno);
28 | 
29 | /* Class methods */
30 | VALUE bz_reader_s_alloc(VALUE obj);
31 | VALUE bz_reader_s_open(int argc, VALUE *argv, VALUE obj);
32 | VALUE bz_reader_s_foreach(int argc, VALUE *argv, VALUE obj);
33 | VALUE bz_reader_s_readlines(int argc, VALUE *argv, VALUE obj);
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/ext/bzip2/writer.c:
--------------------------------------------------------------------------------
  1 | #include <ruby.h>
  2 | #include <unistd.h>
  3 | #include "common.h"
  4 | #include "writer.h"
  5 | 
  6 | struct bz_iv * bz_find_struct(VALUE obj, void *ptr, int *posp) {
  7 |     struct bz_iv *bziv;
  8 |     int i;
  9 | 
 10 |     for (i = 0; i < RARRAY_LEN(bz_internal_ary); i++) {
 11 |         Data_Get_Struct(RARRAY_PTR(bz_internal_ary)[i], struct bz_iv, bziv);
 12 |         if (ptr) {
 13 | #ifndef RUBY_19_COMPATIBILITY
 14 |             if (TYPE(bziv->io) == T_FILE && RFILE(bziv->io)->fptr == (OpenFile *)ptr) {
 15 | #else
 16 |             if (TYPE(bziv->io) == T_FILE && RFILE(bziv->io)->fptr == (rb_io_t *)ptr) {
 17 | #endif
 18 |                 if (posp) {
 19 |                     *posp = i;
 20 |                 }
 21 |                 return bziv;
 22 |             } else if (TYPE(bziv->io) == T_DATA && DATA_PTR(bziv->io) == ptr) {
 23 |                     if (posp) *posp = i;
 24 |                     return bziv;
 25 |             }
 26 |         } else if (bziv->io == obj) {
 27 |             if (posp) *posp = i;
 28 |             return bziv;
 29 |         }
 30 |     }
 31 |     if (posp) *posp = -1;
 32 |     return 0;
 33 | }
 34 | 
 35 | VALUE bz_str_closed(VALUE obj) {
 36 |     return Qfalse;
 37 | }
 38 | 
 39 | void bz_io_data_finalize(void *ptr) {
 40 |     struct bz_file *bzf;
 41 |     struct bz_iv *bziv;
 42 |     int pos;
 43 | 
 44 |     bziv = bz_find_struct(0, ptr, &pos);
 45 |     if (bziv) {
 46 |         rb_ary_delete_at(bz_internal_ary, pos);
 47 |         Data_Get_Struct(bziv->bz2, struct bz_file, bzf);
 48 |         rb_protect((VALUE (*)(VALUE))bz_writer_internal_flush, (VALUE)bzf, 0);
 49 |         RDATA(bziv->bz2)->dfree = free;
 50 |         if (bziv->finalize) {
 51 |             (*bziv->finalize)(ptr);
 52 |         } else if (TYPE(bzf->io) == T_FILE) {
 53 | #ifndef RUBY_19_COMPATIBILITY
 54 |             OpenFile *file = (OpenFile *)ptr;
 55 |             if (file->f) {
 56 |                 fclose(file->f);
 57 |                 file->f = 0;
 58 |             }
 59 |             if (file->f2) {
 60 |                 fclose(file->f2);
 61 |                 file->f2 = 0;
 62 |             }
 63 | #else
 64 |             rb_io_t *file = (rb_io_t *)ptr;
 65 |             if (file->fd) {
 66 |                 close(file->fd);
 67 | 
 68 |                 file->fd = 0;
 69 |             }
 70 |             if (file->stdio_file) {
 71 |                 fclose(file->stdio_file);
 72 |                 file->stdio_file = 0;
 73 |             }
 74 | #endif
 75 |         }
 76 |     }
 77 | 
 78 | }
 79 | 
 80 | int bz_writer_internal_flush(struct bz_file *bzf) {
 81 |     int closed = 1;
 82 | 
 83 |     if (rb_respond_to(bzf->io, id_closed)) {
 84 |         closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0));
 85 |     }
 86 |     if (bzf->buf) {
 87 |         if (!closed && bzf->state == BZ_OK) {
 88 |             bzf->bzs.next_in = NULL;
 89 |             bzf->bzs.avail_in = 0;
 90 |             do {
 91 |                 bzf->bzs.next_out = bzf->buf;
 92 |                 bzf->bzs.avail_out = bzf->buflen;
 93 |                 bzf->state = BZ2_bzCompress(&(bzf->bzs), BZ_FINISH);
 94 |                 if (bzf->state != BZ_FINISH_OK && bzf->state != BZ_STREAM_END) {
 95 |                     break;
 96 |                 }
 97 |                 if (bzf->bzs.avail_out < bzf->buflen) {
 98 |                     rb_funcall(bzf->io, id_write, 1, rb_str_new(bzf->buf, bzf->buflen - bzf->bzs.avail_out));
 99 |                 }
100 |             } while (bzf->state != BZ_STREAM_END);
101 |         }
102 |         free(bzf->buf);
103 |         bzf->buf = 0;
104 |         BZ2_bzCompressEnd(&(bzf->bzs));
105 |         bzf->state = BZ_OK;
106 |         if (!closed && rb_respond_to(bzf->io, id_flush)) {
107 |             rb_funcall2(bzf->io, id_flush, 0, 0);
108 |         }
109 |     }
110 |     return closed;
111 | }
112 | 
113 | VALUE bz_writer_internal_close(struct bz_file *bzf) {
114 |     struct bz_iv *bziv;
115 |     int pos, closed;
116 |     VALUE res;
117 | 
118 |     closed = bz_writer_internal_flush(bzf);
119 |     bziv = bz_find_struct(bzf->io, 0, &pos);
120 |     if (bziv) {
121 |         if (TYPE(bzf->io) == T_FILE) {
122 |             RFILE(bzf->io)->fptr->finalize = bziv->finalize;
123 |         } else if (TYPE(bziv->io) == T_DATA) {
124 |             RDATA(bziv->io)->dfree = bziv->finalize;
125 |         }
126 |         RDATA(bziv->bz2)->dfree = free;
127 |         bziv->bz2 = 0;
128 |         rb_ary_delete_at(bz_internal_ary, pos);
129 |     }
130 |     if (bzf->flags & BZ2_RB_CLOSE) {
131 |         bzf->flags &= ~BZ2_RB_CLOSE;
132 |         if (!closed && rb_respond_to(bzf->io, id_close)) {
133 |             rb_funcall2(bzf->io, id_close, 0, 0);
134 |         }
135 |         res = Qnil;
136 |     } else {
137 |         res = bzf->io;
138 |     }
139 |     bzf->io = Qnil;
140 |     return res;
141 | }
142 | 
143 | /*
144 |  * Closes this writer for further use. The remaining data is compressed and
145 |  * flushed.
146 |  *
147 |  * If the writer was constructed with an io object, that object is returned.
148 |  * Otherwise, the actual compressed data is returned
149 |  *
150 |  *    writer = Bzip2::Writer.new File.open('path', 'w')
151 |  *    writer << 'a'
152 |  *    writer.close # => #<File:path>
153 |  *
154 |  *    writer = Bzip2::Writer.new
155 |  *    writer << 'a'
156 |  *    writer.close # => "BZh91AY&SY...
157 |  */
158 | VALUE bz_writer_close(VALUE obj) {
159 |     struct bz_file *bzf;
160 |     VALUE res;
161 | 
162 |     Get_BZ2(obj, bzf);
163 |     res = bz_writer_internal_close(bzf);
164 | #ifndef RUBINIUS
165 |     if (!NIL_P(res) && (bzf->flags & BZ2_RB_INTERNAL)) {
166 |         RBASIC(res)->klass = rb_cString;
167 |     }
168 | #endif
169 |     return res;
170 | }
171 | 
172 | /*
173 |  * Calls Bzip2::Writer#close and then does some more stuff...
174 |  */
175 | VALUE bz_writer_close_bang(VALUE obj) {
176 |     struct bz_file *bzf;
177 |     int closed;
178 | 
179 |     Get_BZ2(obj, bzf);
180 |     closed = bzf->flags & (BZ2_RB_INTERNAL|BZ2_RB_CLOSE);
181 |     bz_writer_close(obj);
182 |     if (!closed && rb_respond_to(bzf->io, id_close)) {
183 |         if (rb_respond_to(bzf->io, id_closed)) {
184 |             closed = RTEST(rb_funcall2(bzf->io, id_closed, 0, 0));
185 |         }
186 |         if (!closed) {
187 |             rb_funcall2(bzf->io, id_close, 0, 0);
188 |         }
189 |     }
190 |     return Qnil;
191 | }
192 | 
193 | /*
194 |  * Tests whether this writer is closed
195 |  *
196 |  * @return [Boolean] +true+ if the writer is closed or +false+ otherwise
197 |  */
198 | VALUE bz_writer_closed(VALUE obj) {
199 |   struct bz_file *bzf;
200 | 
201 |   Data_Get_Struct(obj, struct bz_file, bzf);
202 |   return RTEST(bzf->io)?Qfalse:Qtrue;
203 | }
204 | 
205 | void bz_writer_free(struct bz_file *bzf) {
206 |     bz_writer_internal_close(bzf);
207 |     free(bzf);
208 | }
209 | 
210 | /*
211 |  * Internally allocates information about a new writer
212 |  * @private
213 |  */
214 | VALUE bz_writer_s_alloc(VALUE obj) {
215 |     struct bz_file *bzf;
216 |     VALUE res;
217 |     res = Data_Make_Struct(obj, struct bz_file, bz_file_mark, bz_writer_free, bzf);
218 |     bzf->bzs.bzalloc = bz_malloc;
219 |     bzf->bzs.bzfree = bz_free;
220 |     bzf->blocks = DEFAULT_BLOCKS;
221 |     bzf->state = BZ_OK;
222 |     return res;
223 | }
224 | 
225 | /*
226 |  * Flushes all of the data in this stream to the underlying IO.
227 |  *
228 |  * If this writer was constructed with no underlying io object, the compressed
229 |  * data is returned as a string.
230 |  *
231 |  * @return [String, nil]
232 |  * @raise [IOError] if the stream has been closed
233 |  */
234 | VALUE bz_writer_flush(VALUE obj) {
235 |     struct bz_file *bzf;
236 | 
237 |     Get_BZ2(obj, bzf);
238 |     if (bzf->flags & BZ2_RB_INTERNAL) {
239 |         return bz_writer_close(obj);
240 |     }
241 |     bz_writer_internal_flush(bzf);
242 |     return Qnil;
243 | }
244 | 
245 | /*
246 |  * call-seq:
247 |  *   open(filename, mode='wb', &block=nil) -> Bzip2::Writer
248 |  *
249 |  * @param [String] filename the name of the file to write to
250 |  * @param [String] mode a mode string passed to Kernel#open
251 |  * @yieldparam [Bzip2::Writer] writer the Bzip2::Writer instance
252 |  *
253 |  * If a block is given, the created Bzip2::Writer instance is yielded to the
254 |  * block and will be closed when the block completes. It is guaranteed via
255 |  * +ensure+ that the writer is closed
256 |  *
257 |  * If a block is not given, a Bzip2::Writer instance will be returned
258 |  *
259 |  *    Bzip2::Writer.open('file') { |f| f << data }
260 |  *
261 |  *    writer = Bzip2::Writer.open('file')
262 |  *    writer << data
263 |  *    writer.close
264 |  *
265 |  * @return [Bzip2::Writer, nil]
266 |  */
267 | VALUE bz_writer_s_open(int argc, VALUE *argv, VALUE obj) {
268 |     VALUE res;
269 |     struct bz_file *bzf;
270 | 
271 |     if (argc < 1) {
272 |         rb_raise(rb_eArgError, "invalid number of arguments");
273 |     }
274 |     if (argc == 1) {
275 |         argv[0] = rb_funcall(rb_mKernel, id_open, 2, argv[0],
276 |             rb_str_new2("wb"));
277 |     } else {
278 |         argv[1] = rb_funcall2(rb_mKernel, id_open, 2, argv);
279 |         argv += 1;
280 |         argc -= 1;
281 |     }
282 |     res = rb_funcall2(obj, id_new, argc, argv);
283 |     Data_Get_Struct(res, struct bz_file, bzf);
284 |     bzf->flags |= BZ2_RB_CLOSE;
285 |     if (rb_block_given_p()) {
286 |         return rb_ensure(rb_yield, res, bz_writer_close, res);
287 |     }
288 |     return res;
289 | }
290 | 
291 | VALUE bz_str_write(VALUE obj, VALUE str) {
292 |     if (TYPE(str) != T_STRING) {
293 |         rb_raise(rb_eArgError, "expected a String");
294 |     }
295 |     if (RSTRING_LEN(str)) {
296 |         rb_str_cat(obj, RSTRING_PTR(str), RSTRING_LEN(str));
297 |     }
298 |     return str;
299 | }
300 | 
301 | /*
302 |  * call-seq:
303 |  *    initialize(io = nil)
304 |  *
305 |  * @param [File] io the file which to write compressed data to
306 |  *
307 |  * Creates a new Bzip2::Writer for compressing a stream of data. An optional
308 |  * io object (something responding to +write+) can be supplied which data
309 |  * will be written to.
310 |  *
311 |  * If nothing is given, the Bzip2::Writer#flush method can be called to retrieve
312 |  * the compressed stream so far.
313 |  *
314 |  *    writer = Bzip2::Writer.new File.open('files.bz2')
315 |  *    writer << 'a'
316 |  *    writer << 'b'
317 |  *    writer.close
318 |  *
319 |  *    writer = Bzip2::Writer.new
320 |  *    writer << 'abcde'
321 |  *    writer.flush # => 'abcde' compressed
322 |  */
323 | VALUE bz_writer_init(int argc, VALUE *argv, VALUE obj) {
324 |     struct bz_file *bzf;
325 |     int blocks = DEFAULT_BLOCKS;
326 |     int work = 0;
327 |     VALUE a, b, c;
328 | 
329 |     switch(rb_scan_args(argc, argv, "03", &a, &b, &c)) {
330 |         case 3:
331 |         work = NUM2INT(c);
332 |     /* ... */
333 |         case 2:
334 |         blocks = NUM2INT(b);
335 |     }
336 |     Data_Get_Struct(obj, struct bz_file, bzf);
337 |     if (NIL_P(a)) {
338 |         a = rb_str_new(0, 0);
339 |         rb_define_method(rb_singleton_class(a), "write", bz_str_write, 1);
340 |         rb_define_method(rb_singleton_class(a), "closed?", bz_str_closed, 0);
341 |         bzf->flags |= BZ2_RB_INTERNAL;
342 |     } else {
343 |         VALUE iv;
344 |         struct bz_iv *bziv;
345 | #ifndef RUBY_19_COMPATIBILITY
346 |         OpenFile *fptr;
347 | #else
348 |         rb_io_t *fptr;
349 | #endif
350 | 
351 |         rb_io_taint_check(a);
352 |         if (!rb_respond_to(a, id_write)) {
353 |             rb_raise(rb_eArgError, "first argument must respond to #write");
354 |         }
355 |         if (TYPE(a) == T_FILE) {
356 |             GetOpenFile(a, fptr);
357 |             rb_io_check_writable(fptr);
358 |         } else if (rb_respond_to(a, id_closed)) {
359 |             iv = rb_funcall2(a, id_closed, 0, 0);
360 |             if (RTEST(iv)) {
361 |                 rb_raise(rb_eArgError, "closed object");
362 |             }
363 |         }
364 |         bziv = bz_find_struct(a, 0, 0);
365 |         if (bziv) {
366 |             if (RTEST(bziv->bz2)) {
367 |                 rb_raise(rb_eArgError, "invalid data type");
368 |             }
369 |             bziv->bz2 = obj;
370 |         } else {
371 |             iv = Data_Make_Struct(rb_cData, struct bz_iv, 0, free, bziv);
372 |             bziv->io = a;
373 |             bziv->bz2 = obj;
374 |             rb_ary_push(bz_internal_ary, iv);
375 |         }
376 |         switch (TYPE(a)) {
377 |             case T_FILE:
378 |                 bziv->finalize = RFILE(a)->fptr->finalize;
379 |                 RFILE(a)->fptr->finalize = (void (*)(struct rb_io_t *, int))bz_io_data_finalize;
380 |                 break;
381 |             case T_DATA:
382 |                 bziv->finalize = RDATA(a)->dfree;
383 |                 RDATA(a)->dfree = bz_io_data_finalize;
384 |                 break;
385 |         }
386 |     }
387 |     bzf->io = a;
388 |     bzf->blocks = blocks;
389 |     bzf->work = work;
390 |     return obj;
391 | }
392 | 
393 | /*
394 |  * call-seq:
395 |  *    write(data)
396 |  * Actually writes some data into this stream.
397 |  *
398 |  * @param [String] data the data to write
399 |  * @return [Integer] the length of the data which was written (uncompressed)
400 |  * @raise [IOError] if the stream has been closed
401 |  */
402 | VALUE bz_writer_write(VALUE obj, VALUE a) {
403 |     struct bz_file *bzf;
404 |     int n;
405 | 
406 |     a = rb_obj_as_string(a);
407 |     Get_BZ2(obj, bzf);
408 |     if (!bzf->buf) {
409 |         if (bzf->state != BZ_OK) {
410 |             bz_raise(bzf->state);
411 |         }
412 |         bzf->state = BZ2_bzCompressInit(&(bzf->bzs), bzf->blocks,
413 |             0, bzf->work);
414 |         if (bzf->state != BZ_OK) {
415 |             bz_writer_internal_flush(bzf);
416 |             bz_raise(bzf->state);
417 |         }
418 |         bzf->buf = ALLOC_N(char, BZ_RB_BLOCKSIZE + 1);
419 |         bzf->buflen = BZ_RB_BLOCKSIZE;
420 |         bzf->buf[0] = bzf->buf[bzf->buflen] = '\0';
421 |     }
422 |     bzf->bzs.next_in  = RSTRING_PTR(a);
423 |     bzf->bzs.avail_in = (int) RSTRING_LEN(a);
424 |     while (bzf->bzs.avail_in) {
425 |         bzf->bzs.next_out = bzf->buf;
426 |         bzf->bzs.avail_out = bzf->buflen;
427 |         bzf->state = BZ2_bzCompress(&(bzf->bzs), BZ_RUN);
428 |         if (bzf->state == BZ_SEQUENCE_ERROR || bzf->state == BZ_PARAM_ERROR) {
429 |             bz_writer_internal_flush(bzf);
430 |             bz_raise(bzf->state);
431 |         }
432 |         bzf->state = BZ_OK;
433 |         if (bzf->bzs.avail_out < bzf->buflen) {
434 |             n = bzf->buflen - bzf->bzs.avail_out;
435 |             rb_funcall(bzf->io, id_write, 1, rb_str_new(bzf->buf, n));
436 |         }
437 |     }
438 |     return INT2NUM(RSTRING_LEN(a));
439 | }
440 | 
441 | /*
442 |  * call-seq:
443 |  *    putc(num)
444 |  *
445 |  * Write one byte into this stream.
446 |  * @param [Integer] num the number value of the character to write
447 |  * @return [Integer] always 1
448 |  * @raise [IOError] if the stream has been closed
449 |  */
450 | VALUE bz_writer_putc(VALUE obj, VALUE a) {
451 |     char c = NUM2CHR(a);
452 |     return bz_writer_write(obj, rb_str_new(&c, 1));
453 | }
454 | 


--------------------------------------------------------------------------------
/ext/bzip2/writer.h:
--------------------------------------------------------------------------------
 1 | #ifndef _RB_BZIP2_WRITER_H_
 2 | #define _RB_BZIP2_WRITER_H_
 3 | 
 4 | #include <ruby.h>
 5 | #include "common.h"
 6 | 
 7 | int bz_writer_internal_flush(struct bz_file *bzf);
 8 | 
 9 | /* Instance methods */
10 | VALUE bz_writer_close(VALUE obj);
11 | VALUE bz_writer_close_bang(VALUE obj);
12 | VALUE bz_writer_closed(VALUE obj);
13 | VALUE bz_writer_flush(VALUE obj);
14 | VALUE bz_writer_init(int argc, VALUE *argv, VALUE obj);
15 | VALUE bz_writer_write(VALUE obj, VALUE a);
16 | VALUE bz_writer_putc(VALUE obj, VALUE a);
17 | 
18 | /* Class methods */
19 | VALUE bz_writer_s_alloc(VALUE obj);
20 | VALUE bz_writer_s_open(int argc, VALUE *argv, VALUE obj);
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/lib/bzip2.rb:
--------------------------------------------------------------------------------
 1 | require 'bzip2/bzip2'
 2 | require 'bzip2/version'
 3 | 
 4 | # This is the base module for the +bzip2-ruby+ gem. Beneath it are the classes
 5 | # for writing and reading data from bzip2 compressed and uncompressed streams.
 6 | #
 7 | # For example usage, see the Bzip2::Reader or Bzip2::Writer or the
 8 | # {README}[link:docs/file/README.rdoc]
 9 | #
10 | # @see Bzip2::Writer
11 | # @see Bzip2::Reader
12 | module Bzip2
13 | end
14 | 


--------------------------------------------------------------------------------
/lib/bzip2/internals.rb:
--------------------------------------------------------------------------------
 1 | # This file is mostly here for documentation purposes, do not require this
 2 | 
 3 | module Bzip2
 4 |   class << self
 5 |     alias :bzip2 :compress
 6 |     alias :bunzip2 :uncompress
 7 |     alias :decompress :uncompress
 8 |   end
 9 | 
10 |   # @private
11 |   class InternalStr
12 |   end
13 | end
14 | 


--------------------------------------------------------------------------------
/lib/bzip2/reader.rb:
--------------------------------------------------------------------------------
 1 | # This file is mostly here for documentation purposes, do not require this
 2 | 
 3 | #
 4 | module Bzip2
 5 |   # Bzip2::Reader is meant to read streams of bz2 compressed bytes. It behaves
 6 |   # like an IO object with many similar methods. It also includes the Enumerable
 7 |   # module and each element is a 'line' in the stream.
 8 |   #
 9 |   # It can both decompress files:
10 |   #
11 |   #     reader = Bzip2::Reader.open('file')
12 |   #     puts reader.read
13 |   #
14 |   #     reader = Bzip2::Reader.new File.open('file')
15 |   #     put reader.gets
16 |   #
17 |   # And it may just decompress raw strings
18 |   #
19 |   #     reader = Bzip2::Reader.new compressed_string
20 |   #     reader = Bzip2::Reader.new Bzip2.compress('compress-me')
21 |   class Reader
22 |     alias :each_line :each
23 |     alias :closed :closed?
24 |     alias :eoz :eoz?
25 |     alias :eof :eof?
26 |   end
27 | end
28 | 


--------------------------------------------------------------------------------
/lib/bzip2/version.rb:
--------------------------------------------------------------------------------
1 | module Bzip2
2 |   VERSION = "0.2.7"
3 | end


--------------------------------------------------------------------------------
/lib/bzip2/writer.rb:
--------------------------------------------------------------------------------
 1 | # This file is mostly here for documentation purposes, do not require this
 2 | 
 3 | #
 4 | module Bzip2
 5 |   # A Bzip2::Writer represents a stream which compresses data written to it.
 6 |   # It can be constructed with another IO object (a File) which data can be
 7 |   # written to. Otherwise, data is all stored internally as a string and can
 8 |   # be retrieved via the Bzip2::Writer#flush method
 9 |   #
10 |   # It can both write to files:
11 |   #
12 |   #     writer = Bzip2::Writer.open('file')
13 |   #     writer << data
14 |   #     writer.close
15 |   #
16 |   #     Bzip2::Writer.open('file'){ |f| f << data }
17 |   #
18 |   #     writer = Bzip2::Writer.new File.open('file')
19 |   #
20 |   # And output data as a string
21 |   #
22 |   #     writer = Bzip2::Writer.new
23 |   #     writer << data
24 |   #     writer.flush # => data compressed via bz2
25 |   #
26 |   # @see Bzip2::Writer#initialize The initialize method for examples
27 |   class Writer
28 | 
29 |     alias :finish :flush
30 |     alias :closed :closed?
31 | 
32 |     # Append some data to this buffer, returning the buffer so this method can
33 |     # be chained
34 |     #
35 |     #   writer = Bzip2::Writer.new
36 |     #   writer << 'asdf' << 1 << obj << 'a'
37 |     #   writer.flush
38 |     #
39 |     # @param [#to_s] data anything responding to #to_s
40 |     # @see IO#<<
41 |     def << data
42 |     end
43 | 
44 |     # Adds a number of strings to this buffer. A newline is also inserted into
45 |     # the buffer after each object
46 |     # @see IO#puts
47 |     def puts *objs
48 |     end
49 | 
50 |     # Similar to Bzip2::Writer#puts except a newline is not appended after each
51 |     # object appended to this buffer
52 |     #
53 |     # @see IO#print
54 |     def print *objs
55 |     end
56 | 
57 |     # Prints data to this buffer with the specified format.
58 |     #
59 |     # @see Kernel#sprintf
60 |     def printf format, *ojbs
61 |     end
62 | 
63 |   end
64 | end
65 | 


--------------------------------------------------------------------------------
/spec/reader_spec.rb:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | require 'spec_helper'
  3 | 
  4 | describe Bzip2::Writer do
  5 |   before(:each) do
  6 |     @file = "_10lines_"
  7 |     @data = [
  8 |       "00: This is a line\n",
  9 |       "01: This is a line\n",
 10 |       "02: This is a line\n",
 11 |       "03: This is a line\n",
 12 |       "04: This is a line\n",
 13 |       "05: This is a line\n",
 14 |       "06: This is a line\n",
 15 |       "07: This is a line\n",
 16 |       "08: This is a line\n",
 17 |       "09: This is a line\n"
 18 |     ]
 19 | 
 20 |     open("|bzip2 > #{@file}", "w") do |f|
 21 |       @data.each { |l| f.puts l }
 22 |     end
 23 |   end
 24 | 
 25 |   after(:each) do
 26 |     File.delete(@file) if File.exists?(@file)
 27 |   end
 28 | 
 29 |   it "iterate over each line of the file via the foreach method" do
 30 |     lines = []
 31 |     Bzip2::Reader.foreach(@file){ |line| lines << line }
 32 |     lines.should == @data
 33 | 
 34 |     lines.clear
 35 |     Bzip2::Reader.foreach(@file, nil) do |file|
 36 |       file.split(/\n/).each{ |line| lines << line + "\n" }
 37 |     end
 38 |     lines.should == @data
 39 | 
 40 |     count = 0
 41 |     Bzip2::Reader.foreach(@file, ' ') do |thing|
 42 |       count += 1
 43 |     end
 44 |     count.should == 41
 45 |   end
 46 | 
 47 |   it "returns an array of the lines read via #readlines" do
 48 |     lines = Bzip2::Reader.readlines(@file)
 49 |     lines.should == @data
 50 | 
 51 |     lines = Bzip2::Reader.readlines(@file, nil)
 52 |     lines.should == [@data.join]
 53 |   end
 54 | 
 55 |   it "track when the stream has been closed" do
 56 |     f = Bzip2::Reader.open(@file)
 57 |     f.should_not be_closed
 58 |     f.close
 59 |     f.should be_closed
 60 |   end
 61 | 
 62 |   shared_examples_for 'a line iterator' do |method|
 63 |     it "iterates over the lines when using #each" do
 64 |       Bzip2::Reader.open(@file) do |file|
 65 |         list = []
 66 |         file.send(method){ |l| list << l }
 67 |         list.should == @data
 68 |       end
 69 | 
 70 |       Bzip2::Reader.open(@file) do |file|
 71 |         file.send(method, nil) do |contents|
 72 |           contents.should == @data.join
 73 |         end
 74 |       end
 75 | 
 76 |       count = 0
 77 |       Bzip2::Reader.open(@file) do |file|
 78 |         file.send(method, ' ') do |thing|
 79 |           count += 1
 80 |         end
 81 |       end
 82 |       41.should == count
 83 |     end
 84 |   end
 85 | 
 86 |   it_should_behave_like 'a line iterator', :each
 87 |   it_should_behave_like 'a line iterator', :each_line
 88 | 
 89 |   it "iterates over the decompressed bytes via #each_byte" do
 90 |     bytes = @data.join.bytes.to_a
 91 | 
 92 |     Bzip2::Reader.open(@file) do |file|
 93 |       file.each_byte do |b|
 94 |         b.should == bytes.shift
 95 |       end
 96 |     end
 97 | 
 98 |     bytes.size.should == 0
 99 |   end
100 | 
101 |   it "keeps track of when eof has been reached" do
102 |     Bzip2::Reader.open(@file) do |file|
103 |       @data.size.times do
104 |         file.should_not be_eof
105 |         file.gets
106 |       end
107 | 
108 |       file.should be_eof
109 |     end
110 |   end
111 | 
112 |   it "gets only one byte at a time via getc and doesn't raise an exception" do
113 |     bytes = @data.join.bytes.to_a
114 | 
115 |     Bzip2::Reader.open(@file) do |file|
116 |       while ch = file.getc
117 |         ch.should == bytes.shift
118 |       end
119 | 
120 |       file.getc.should be_nil
121 |     end
122 | 
123 |     bytes.size.should == 0
124 |   end
125 | 
126 |   it "reads an entire line via gets" do
127 |     Bzip2::Reader.open(@file) do |file|
128 |       lines = []
129 |       while line = file.gets
130 |         lines << line
131 |       end
132 |       lines.should == @data
133 | 
134 |       file.gets.should be_nil
135 |     end
136 | 
137 |     Bzip2::Reader.open(@file) do |file|
138 |       lines = []
139 |       while line = file.gets("line\n")
140 |         lines << line
141 |       end
142 |       lines.should == @data
143 | 
144 |       file.gets.should be_nil
145 |     end
146 | 
147 |     lines = ''
148 |     Bzip2::Reader.open(@file) do |file|
149 |       while contents = file.gets(nil)
150 |         lines << contents
151 |       end
152 |     end
153 |     lines.should == @data.join
154 | 
155 |     count = 0
156 |     Bzip2::Reader.open(@file) do |file|
157 |       count += 1 while file.gets(' ')
158 |     end
159 |     41.should == count
160 |   end
161 | 
162 |   it "reads the entire file or a specified length when using #read" do
163 |     Bzip2::Reader.open(@file) do |file|
164 |       file.read.should == @data.join
165 |     end
166 | 
167 |     Bzip2::Reader.open(@file) do |file|
168 |       file.read(12).should == "00: This is "
169 |       file.read(12).should == "a line\n01: T"
170 |     end
171 |   end
172 | 
173 |   it "reads one character and returns the byte value of the character read" do
174 |     count = 0
175 |     data = @data.join
176 |     Bzip2::Reader.open(@file) do |file|
177 |       @data.join.bytes do |byte|
178 |         file.readchar.should == byte
179 |       end
180 | 
181 |       lambda { file.readchar }.should raise_error(Bzip2::EOZError)
182 |     end
183 |   end
184 | 
185 |   it "reads one line at a time and raises and exception when no more" do
186 |     count = 0
187 |     Bzip2::Reader.open(@file) do |file|
188 |       lines = []
189 |       @data.size.times do |count|
190 |         lines << file.readline
191 |       end
192 | 
193 |       lines.should == @data
194 |       lambda { file.readline }.should raise_error(Bzip2::EOZError)
195 |     end
196 | 
197 |     Bzip2::Reader.open(@file) do |file|
198 |       file.readline(nil).should == @data.join
199 | 
200 |       lambda { file.readline }.should raise_error(Bzip2::EOZError)
201 |     end
202 | 
203 |     Bzip2::Reader.open(@file) do |file|
204 |       41.times { |count| file.readline(' ') }
205 |       lambda { file.readline }.should raise_error(Bzip2::EOZError)
206 |     end
207 |   end
208 | 
209 |   it "returns an array of lines in the file" do
210 |     Bzip2::Reader.open(@file) do |file|
211 |       file.readlines.should == @data
212 |     end
213 | 
214 |     Bzip2::Reader.open(@file) do |file|
215 |       file.readlines(nil).should == [@data.join]
216 |     end
217 |   end
218 | 
219 |   it "rewinds the stream when #ungetc is called and returns that byte next" do
220 |     Bzip2::Reader.open(@file) do |file|
221 |       '0'.bytes.first.should == file.getc
222 |       '0'.bytes.first.should == file.getc
223 |       ':'.bytes.first.should == file.getc
224 |       ' '.bytes.first.should == file.getc
225 | 
226 |       file.ungetc(':'.bytes.first).should be_nil
227 |       ':'.bytes.first.should == file.getc
228 | 
229 |       file.read
230 | 
231 |       file.ungetc('A'.bytes.first).should be_nil
232 |       'A'.bytes.first.should == file.getc
233 |     end
234 |   end
235 | 
236 |   it "rewinds the stream when #ungets is called" do
237 |     Bzip2::Reader.open(@file) do |file|
238 |       @data[0].should == file.gets
239 |       1.should == file.lineno
240 |       file.ungets(@data[0]).should be_nil
241 |       @data[0].should == file.gets
242 |     end
243 |   end
244 | 
245 |   it "reads entire lines via readline and throws an exception when there is" do
246 |     string = File.read(@file)
247 |     file = Bzip2::Reader.new(string)
248 |     lines = []
249 |     @data.size.times do |count|
250 |       lines << file.readline
251 |     end
252 |     lines.should == @data
253 |     lambda { file.readline }.should raise_error(Bzip2::EOZError)
254 |     file.close
255 | 
256 |     file = Bzip2::Reader.new(string)
257 |     file.readline(nil).should == @data.join
258 |     lambda { file.readline }.should raise_error(Bzip2::EOZError)
259 |     file.close
260 |   end
261 | end
262 | 


--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
 1 | # encoding: UTF-8
 2 | require 'bundler/setup'
 3 | 
 4 | require 'rspec/core'
 5 | require 'bzip2'
 6 | 
 7 | 
 8 | RSpec.configure do |config|
 9 |   config.color_enabled = true
10 | end
11 | 
12 | # back-port 1.9 method so the tests will pass in 1.8 as well
13 | if RUBY_VERSION.include?("1.8")
14 |   class String
15 |     def getbyte(idx)
16 |       self[idx]
17 |     end
18 |   end
19 | end


--------------------------------------------------------------------------------
/spec/writer_spec.rb:
--------------------------------------------------------------------------------
  1 | # encoding: UTF-8
  2 | require 'spec_helper'
  3 | 
  4 | describe Bzip2::Writer do
  5 |   let(:file){ File.expand_path('../_10lines_', __FILE__) }
  6 | 
  7 |   class Dummy
  8 |     def to_s
  9 |       "dummy"
 10 |     end
 11 |   end
 12 | 
 13 |   after(:each) do
 14 |     File.delete(file) if File.exists?(file)
 15 |   end
 16 | 
 17 |   it "performs like IO#<< when using the #<< method" do
 18 |     Bzip2::Writer.open(file, "w") do |file|
 19 |       file << 1 << "\n" << Dummy.new << "\n" << "cat\n"
 20 |     end
 21 |     expected = [ "1\n", "dummy\n", "cat\n"]
 22 |     actual   = []
 23 |     Bzip2::Reader.foreach(file){ |line| actual.push line }
 24 |     actual.should == expected
 25 |   end
 26 | 
 27 |   it "doesn't immediately flush the data when written to" do
 28 |     io = File.new(file, "w")
 29 |     bz2 = Bzip2::Writer.new(io)
 30 |     bz2 << 1 << "\n" << Dummy.new << "\n" << "cat\n"
 31 |     bz = Bzip2::Reader.new(file)
 32 |     lambda { bz.gets }.should raise_error(Bzip2::Error)
 33 |     bz = Bzip2::Reader.open(file)
 34 |     lambda { bz.gets }.should raise_error(Bzip2::Error)
 35 |     io.close
 36 |     lambda { Bzip2::Reader.new(io) }.should raise_error(IOError)
 37 |   end
 38 | 
 39 |   it "behaves the same as IO#print when using #print" do
 40 |     Bzip2::Writer.open(file) do |file|
 41 |       file.print "foo\n" * 4096, "\n" * 4096,
 42 |                  "bar" * 4096, "\n" * 4096, "zot\n" * 1024
 43 |     end
 44 | 
 45 |     Bzip2::Reader.open(file) do |file|
 46 |       file.gets('').should == "foo\n" * 4096 + "\n"
 47 |       file.gets('').should == "bar" * 4096 + "\n\n"
 48 |       file.gets('').should == "zot\n" * 1024
 49 |     end
 50 |   end
 51 | 
 52 |   it "respects specific global variables like IO#print does via #print" do
 53 |     Bzip2::Writer.open(file) do |file|
 54 |       file.print "hello"
 55 |       file.print 1,2
 56 |       $_ = "wombat\n"
 57 |       file.print
 58 |       $\ = ":"
 59 |       $, = ","
 60 |       file.print 3, 4
 61 |       file.print 5, 6
 62 |       $\ = nil
 63 |       file.print "\n"
 64 |       $, = nil
 65 |     end
 66 | 
 67 |     Bzip2::Reader.open(file) do |file|
 68 |       file.gets(nil).should == "hello12wombat\n3,4:5,6:\n"
 69 |     end
 70 |   end
 71 | 
 72 |   it "only writes one byte via the #putc method" do
 73 |     Bzip2::Writer.open(file, "wb") do |file|
 74 |       file.putc "A"
 75 |       0.upto(255) { |ch| file.putc ch }
 76 |     end
 77 | 
 78 |     Bzip2::Reader.open(file, "rb") do |file|
 79 |       file.getc.should == 'A'.bytes.first
 80 |       0.upto(255) { |ch| file.getc.should == ch }
 81 |     end
 82 |   end
 83 | 
 84 |   it "behaves the same as IO#puts when using #puts" do
 85 |     Bzip2::Writer.open(file, "w") do |file|
 86 |       file.puts "line 1", "line 2"
 87 |       file.puts [ Dummy.new, 4 ]
 88 |     end
 89 | 
 90 |     Bzip2::Reader.open(file) do |file|
 91 |       file.gets.should == "line 1\n"
 92 |       file.gets.should == "line 2\n"
 93 |       file.gets.should == "dummy\n"
 94 |       file.gets.should == "4\n"
 95 |     end
 96 |   end
 97 | 
 98 |   it "writes data successfully to a file and returns the length of the data" do
 99 |     Bzip2::Writer.open(file, "w") do |file|
100 |       file.write('*' * 10).should == 10
101 |       file.write('!' * 5).should == 5
102 |       file.write('').should == 0
103 |       file.write(1).should == 1
104 |       file.write(2.30000).should == 3
105 |       file.write("\n").should == 1
106 |     end
107 | 
108 |     Bzip2::Reader.open(file) do |file|
109 |       file.gets.should == "**********!!!!!12.3\n"
110 |     end
111 |   end
112 | 
113 |   it "returns the compressed data when no constructor argument is specified" do
114 |     file = Bzip2::Writer.new
115 |     file << ('*' * 10) << ('!' * 5) << '' << 1 << 2.3000 << "\n"
116 |     Bzip2::bunzip2(file.flush).should == "**********!!!!!12.3\n"
117 |   end
118 | 
119 |   it "compresses data via the #bzip2 shortcut" do
120 |     data = ["**********!!!!!12.3\n"]
121 |     data << "foo\n"*4096 + "\n"*4096 + "bar"*4096 + "\n"*4096 + "zot\n"*1024
122 | 
123 |     data.each do |test|
124 |       Bzip2::bunzip2(Bzip2::bzip2(test)).should == test
125 |     end
126 |   end
127 | 
128 |   it "correctly reports when a writer is closed" do
129 |     writer = Bzip2::Writer.open(file, 'w')
130 |     writer.should_not be_closed
131 |     writer.close
132 |     writer.should be_closed
133 |   end
134 | end
135 | 


--------------------------------------------------------------------------------