├── .gitignore ├── lib └── ole │ ├── types.rb │ ├── storage.rb │ ├── storage │ ├── version.rb │ ├── meta_data.rb │ ├── file_system.rb │ └── base.rb │ ├── base.rb │ ├── file_system.rb │ ├── types │ ├── property_set.rb │ └── base.rb │ ├── support.rb │ └── ranges_io.rb ├── test ├── test.doc ├── oleWithDirs.ole ├── test_word_6.doc ├── test_word_95.doc ├── test_word_97.doc ├── test_SummaryInformation ├── test_property_set.rb ├── test_mbat.rb ├── test_meta_data.rb ├── test_types.rb ├── test_ranges_io.rb ├── test_support.rb ├── test_storage.rb └── test_filesystem.rb ├── Gemfile ├── .github └── workflows │ └── ci.yml ├── COPYING ├── ruby-ole.gemspec ├── bin └── oletool ├── Rakefile ├── README.rdoc └── ChangeLog /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | doc 3 | misc 4 | coverage 5 | Gemfile.lock -------------------------------------------------------------------------------- /lib/ole/types.rb: -------------------------------------------------------------------------------- 1 | require 'ole/types/base' 2 | require 'ole/types/property_set' 3 | -------------------------------------------------------------------------------- /test/test.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aquasync/ruby-ole/HEAD/test/test.doc -------------------------------------------------------------------------------- /test/oleWithDirs.ole: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aquasync/ruby-ole/HEAD/test/oleWithDirs.ole -------------------------------------------------------------------------------- /test/test_word_6.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aquasync/ruby-ole/HEAD/test/test_word_6.doc -------------------------------------------------------------------------------- /test/test_word_95.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aquasync/ruby-ole/HEAD/test/test_word_95.doc -------------------------------------------------------------------------------- /test/test_word_97.doc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aquasync/ruby-ole/HEAD/test/test_word_97.doc -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | gemspec 3 | 4 | gem 'rake' 5 | gem 'test-unit' 6 | gem 'logger' -------------------------------------------------------------------------------- /test/test_SummaryInformation: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aquasync/ruby-ole/HEAD/test/test_SummaryInformation -------------------------------------------------------------------------------- /lib/ole/storage.rb: -------------------------------------------------------------------------------- 1 | require 'ole/storage/base' 2 | require 'ole/storage/file_system' 3 | require 'ole/storage/meta_data' 4 | -------------------------------------------------------------------------------- /lib/ole/storage/version.rb: -------------------------------------------------------------------------------- 1 | # encoding: ASCII-8BIT 2 | 3 | module Ole # :nodoc: 4 | class Storage 5 | VERSION = '1.2.13.1' 6 | end 7 | end 8 | 9 | -------------------------------------------------------------------------------- /lib/ole/base.rb: -------------------------------------------------------------------------------- 1 | # encoding: ASCII-8BIT 2 | 3 | 4 | require 'ole/support' 5 | require 'ole/storage/version' 6 | 7 | module Ole # :nodoc: 8 | Log = Logger.new_with_callstack 9 | end 10 | 11 | -------------------------------------------------------------------------------- /lib/ole/file_system.rb: -------------------------------------------------------------------------------- 1 | warn <<-end 2 | Use of ole/file_system is deprecated. Use ole/storage (the file_system api 3 | is recommended and enabled by default). 4 | end 5 | 6 | require 'ole/storage' 7 | 8 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | 15 | strategy: 16 | matrix: 17 | ruby-version: 18 | - 3.3 19 | - 3.2 20 | - 3.1 21 | - 3.0 22 | - 2.7 23 | 24 | steps: 25 | - uses: actions/checkout@v3 26 | 27 | - name: Set up Ruby 28 | uses: ruby/setup-ruby@v1 29 | with: 30 | ruby-version: ${{ matrix.ruby-version }} 31 | bundler-cache: true 32 | 33 | - name: Run tests 34 | run: bundle exec rake test 35 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Copyright (c) 2007-2010 Charles Lowe 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | 21 | -------------------------------------------------------------------------------- /ruby-ole.gemspec: -------------------------------------------------------------------------------- 1 | $:.unshift File.dirname(__FILE__) + '/lib' 2 | require 'ole/storage/version' 3 | 4 | PKG_NAME = 'ruby-ole' 5 | PKG_VERSION = Ole::Storage::VERSION 6 | 7 | Gem::Specification.new do |s| 8 | s.name = PKG_NAME 9 | s.version = PKG_VERSION 10 | s.summary = %q{Ruby OLE library.} 11 | s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.} 12 | s.authors = ['Charles Lowe'] 13 | s.email = %q{aquasync@gmail.com} 14 | s.homepage = %q{https://github.com/aquasync/ruby-ole} 15 | s.metadata = {'homepage_uri' => s.homepage} 16 | s.license = 'MIT' 17 | 18 | s.executables = ['oletool'] 19 | s.files = ['README.rdoc', 'COPYING', 'Rakefile', 'ChangeLog', 'ruby-ole.gemspec'] 20 | s.files += Dir.glob('lib/**/*.rb') 21 | s.files += Dir.glob('test/{test_*.rb,*.doc,oleWithDirs.ole,test_SummaryInformation}') 22 | s.files += Dir.glob('bin/*') 23 | s.test_files = Dir.glob('test/test_*.rb') 24 | 25 | s.extra_rdoc_files = ['README.rdoc', 'ChangeLog'] 26 | s.rdoc_options += [ 27 | '--main', 'README.rdoc', 28 | '--title', "#{PKG_NAME} documentation", 29 | '--tab-width', '2' 30 | ] 31 | end 32 | 33 | -------------------------------------------------------------------------------- /test/test_property_set.rb: -------------------------------------------------------------------------------- 1 | #! /usr/bin/ruby 2 | 3 | $: << File.dirname(__FILE__) + '/../lib' 4 | 5 | require 'test/unit' 6 | require 'ole/types' 7 | 8 | class TestPropertySet < Test::Unit::TestCase 9 | include Ole::Types 10 | 11 | def setup 12 | @io = open File.dirname(__FILE__) + '/test_SummaryInformation', 'rb' 13 | end 14 | 15 | def teardown 16 | @io.close 17 | end 18 | 19 | def test_property_set 20 | propset = PropertySet.new @io 21 | assert_equal :mac, propset.os 22 | assert_equal 1, propset.sections.length 23 | section = propset.sections.first 24 | assert_equal 14, section.length 25 | assert_equal 'f29f85e0-4ff9-1068-ab91-08002b27b3d9', section.guid.format 26 | assert_equal PropertySet::FMTID_SummaryInformation, section.guid 27 | assert_equal 'Charles Lowe', section.to_a.assoc(4).last 28 | assert_equal 'Charles Lowe', propset.doc_author 29 | assert_equal 'Charles Lowe', propset.to_h[:doc_author] 30 | 31 | # knows the difference between existent and non-existent properties 32 | assert_raise(NoMethodError) { propset.non_existent_key } 33 | assert_raise(NotImplementedError) { propset.doc_author = 'New Author'} 34 | assert_raise(NoMethodError) { propset.non_existent_key = 'Value'} 35 | 36 | # a valid property that has no value in this property set 37 | assert_equal nil, propset.security 38 | end 39 | end 40 | 41 | -------------------------------------------------------------------------------- /test/test_mbat.rb: -------------------------------------------------------------------------------- 1 | #! /usr/bin/ruby 2 | 3 | $: << File.dirname(__FILE__) + '/../lib' 4 | 5 | require 'test/unit' 6 | require 'ole/storage' 7 | require 'tempfile' 8 | 9 | class TestWriteMbat < Test::Unit::TestCase 10 | def test_write_mbat 11 | Tempfile.open 'myolefile' do |temp| 12 | temp.binmode 13 | 14 | # this used to raise an error at flush time, due to failure to write the mbat 15 | Ole::Storage.open temp do |ole| 16 | # create a 10mb file 17 | ole.file.open 'myfile', 'w' do |f| 18 | s = 0.chr * 1_000_000 19 | 10.times { f.write s } 20 | end 21 | end 22 | 23 | assert((10_000_000..10_100_000) === temp.size, 'check file size') 24 | 25 | Ole::Storage.open temp do |ole| 26 | assert_equal 10_000_000, ole.file.size('myfile') 27 | compare = ole.bbat.truncate[(0...ole.bbat.length).find { |i| ole.bbat[i] > 50_000 }..-1] 28 | c = Ole::Storage::AllocationTable 29 | # 10_000_000 * 4 / 512 / 512 rounded up is 153. but then there is room needed to store the 30 | # bat in the bat, and the mbat too. hence 154. 31 | expect = [c::EOC] * 2 + [c::BAT] * 154 + [c::META_BAT] 32 | assert_equal expect, compare, 'allocation table structure' 33 | # the sbat should be empty. in fact the file shouldn't exist at all, so the root's first 34 | # block should be EOC 35 | assert ole.sbat.empty? 36 | assert_equal c::EOC, ole.root.first_block 37 | end 38 | end 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /bin/oletool: -------------------------------------------------------------------------------- 1 | #! /usr/bin/ruby 2 | 3 | require 'optparse' 4 | require 'ole/storage' 5 | 6 | def oletool 7 | opts = {:verbose => false, :action => :tree} 8 | op = OptionParser.new do |op| 9 | op.banner = "Usage: oletool [options] [files]" 10 | op.separator '' 11 | op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree } 12 | op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack } 13 | op.on('-m', '--mimetype', 'Print the guessed mime types') { opts[:action] = :mimetype } 14 | op.on('-y', '--metadata', 'Dump the internal meta data as YAML') { opts[:action] = :metadata } 15 | op.separator '' 16 | op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v } 17 | op.on_tail('-h', '--help', 'Show this message') { puts op; exit } 18 | end 19 | files = op.parse ARGV 20 | if files.empty? 21 | puts 'Must specify 1 or more msg files.' 22 | puts op 23 | exit 1 24 | end 25 | Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL 26 | files.each do |file| 27 | case opts[:action] 28 | when :tree 29 | Ole::Storage.open(file) { |ole| puts ole.root.to_tree } 30 | when :repack 31 | Ole::Storage.open file, 'rb+', &:repack 32 | when :metadata 33 | require 'yaml' 34 | Ole::Storage.open(file) { |ole| puts ole.meta_data.to_h.to_yaml } 35 | when :mimetype 36 | puts Ole::Storage.open(file) { |ole| ole.meta_data.mime_type } 37 | end 38 | end 39 | end 40 | 41 | oletool 42 | -------------------------------------------------------------------------------- /test/test_meta_data.rb: -------------------------------------------------------------------------------- 1 | #! /usr/bin/ruby 2 | 3 | $: << File.dirname(__FILE__) + '/../lib' 4 | 5 | require 'test/unit' 6 | require 'ole/storage' 7 | 8 | class TestMetaData < Test::Unit::TestCase 9 | def test_meta_data 10 | Ole::Storage.open File.dirname(__FILE__) + '/test.doc', 'rb' do |ole| 11 | assert_equal 'Charles Lowe', ole.meta_data[:doc_author] 12 | assert_equal 'Charles Lowe', ole.meta_data['doc_author'] 13 | assert_equal 'Charles Lowe', ole.meta_data.to_h[:doc_author] 14 | assert_equal 'Title', ole.meta_data.doc_title 15 | assert_equal 'MSWordDoc', ole.meta_data.file_format 16 | assert_equal 'application/msword', ole.meta_data.mime_type 17 | assert_raises NotImplementedError do 18 | ole.meta_data[:doc_author] = 'New Author' 19 | end 20 | end 21 | end 22 | 23 | # this tests the other ways of getting the mime_type, than using "\001CompObj", 24 | # ie, relying on root clsid, and on the heuristics 25 | def test_mime_type 26 | ole = Ole::Storage.new StringIO.new 27 | ole.root.clsid = Ole::Storage::MetaData::CLSID_EXCEL97.to_s 28 | assert_equal nil, ole.meta_data.file_format 29 | assert_equal 'application/vnd.ms-excel', ole.meta_data.mime_type 30 | 31 | ole.root.clsid = 0.chr * Ole::Types::Clsid::SIZE 32 | assert_equal nil, ole.meta_data.file_format 33 | assert_equal 'application/x-ole-storage', ole.meta_data.mime_type 34 | 35 | ole.file.open('Book', 'w') { |f| } 36 | assert_equal 'application/vnd.ms-excel', ole.meta_data.mime_type 37 | ole.file.open('WordDocument', 'w') { |f| } 38 | assert_equal 'application/msword', ole.meta_data.mime_type 39 | ole.file.open('__properties_version1.0', 'w') { |f| } 40 | assert_equal 'application/vnd.ms-outlook', ole.meta_data.mime_type 41 | end 42 | end 43 | 44 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'rake/testtask' 3 | 4 | require 'rbconfig' 5 | require 'fileutils' 6 | 7 | spec = eval File.read('ruby-ole.gemspec') 8 | 9 | task :default => [:test] 10 | 11 | Rake::TestTask.new do |t| 12 | t.test_files = FileList["test/test_*.rb"] 13 | t.warning = true 14 | t.verbose = true 15 | end 16 | 17 | begin 18 | Rake::TestTask.new(:coverage) do |t| 19 | t.test_files = FileList["test/test_*.rb"] 20 | t.warning = true 21 | t.verbose = true 22 | t.ruby_opts = ['-rsimplecov -e "SimpleCov.start; load(ARGV.shift)"'] 23 | end 24 | rescue LoadError 25 | # SimpleCov not available 26 | end 27 | 28 | begin 29 | require 'rdoc/task' 30 | RDoc::Task.new do |t| 31 | t.rdoc_dir = 'doc' 32 | t.rdoc_files.include 'lib/**/*.rb' 33 | t.rdoc_files.include 'README.rdoc', 'ChangeLog' 34 | t.title = "#{PKG_NAME} documentation" 35 | t.options += %w[--line-numbers --inline-source --tab-width 2] 36 | t.main = 'README.rdoc' 37 | end 38 | rescue LoadError 39 | # RDoc not available or too old (<2.4.2) 40 | end 41 | 42 | begin 43 | require 'rubygems/package_task' 44 | Gem::PackageTask.new(spec) do |t| 45 | t.need_tar = true 46 | t.need_zip = false 47 | t.package_dir = 'build' 48 | end 49 | rescue LoadError 50 | # RubyGems too old (<1.3.2) 51 | end 52 | 53 | desc 'Run various benchmarks' 54 | task :benchmark do 55 | require 'benchmark' 56 | require 'tempfile' 57 | require 'ole/storage' 58 | 59 | # should probably add some read benchmarks too 60 | def write_benchmark opts={} 61 | files, size = opts[:files], opts[:size] 62 | block_size = opts[:block_size] || 100_000 63 | block = 0.chr * block_size 64 | blocks, remaining = size.divmod block_size 65 | remaining = 0.chr * remaining 66 | Tempfile.open 'ole_storage_benchmark' do |temp| 67 | Ole::Storage.open temp do |ole| 68 | files.times do |i| 69 | ole.file.open "file_#{i}", 'w' do |f| 70 | blocks.times { f.write block } 71 | f.write remaining 72 | end 73 | end 74 | end 75 | end 76 | end 77 | 78 | Benchmark.bm do |bm| 79 | bm.report 'write_1mb_1x5' do 80 | 5.times { write_benchmark :files => 1, :size => 1_000_000 } 81 | end 82 | 83 | bm.report 'write_1mb_2x5' do 84 | 5.times { write_benchmark :files => 1_000, :size => 1_000 } 85 | end 86 | end 87 | end 88 | 89 | -------------------------------------------------------------------------------- /test/test_types.rb: -------------------------------------------------------------------------------- 1 | #! /usr/bin/ruby 2 | # encoding: ASCII-8BIT 3 | 4 | $: << File.dirname(__FILE__) + '/../lib' 5 | 6 | require 'test/unit' 7 | require 'ole/types' 8 | 9 | class TestTypes < Test::Unit::TestCase 10 | include Ole::Types 11 | 12 | def test_lpwstr 13 | assert_equal "t\000e\000s\000t\000", Lpwstr.dump('test') 14 | str = Lpwstr.load "t\000e\000s\000t\000" 15 | assert_equal 'test', str 16 | assert_equal Lpwstr, str.class 17 | end 18 | 19 | def test_lpstr 20 | # no null byte? probably wrong 21 | assert_equal 'test', Lpstr.dump('test') 22 | assert_equal 'test', Lpstr.load("test\000") 23 | end 24 | 25 | # in actual fact the same code path would be used for systime i expect 26 | def test_filetime 27 | # for saving, we can use Date, Time, or DateTime. 28 | assert_equal "\000\000\260\3077-\307\001", FileTime.dump(Time.gm(2007, 1, 1)) 29 | time = FileTime.load "\000\000\260\3077-\307\001" 30 | assert_equal FileTime, time.class 31 | assert_equal '2007-01-01T00:00:00+00:00', time.to_s 32 | # note that if we'd used Time.local, instead of gm, we'd get a different value. eg 33 | assert_equal "\000\370\331\336\r-\307\001", FileTime.dump(DateTime.parse('2007-01-01 00:00 +0500')) 34 | # note that it still loads up as GMT, because there's no associated time zone. 35 | # essentially, i'm storing and loading times as GMT. maybe i should add in conversion to local time 36 | # zone when loading 37 | assert_equal '2006-12-31T19:00:00+00:00', FileTime.load("\000\370\331\336\r-\307\001").to_s 38 | # test loading a bogus time 39 | assert_equal nil, FileTime.load(0.chr * 8) 40 | # this used to be counted as an "unlikely time", and discarded. that has been removed 41 | assert_equal '1700-01-01T00:00:00+00:00', FileTime.load(FileTime.dump(Date.new(1700, 1, 1))).to_s 42 | assert_equal '#', FileTime.load("\000\370\331\336\r-\307\001").inspect 43 | end 44 | 45 | def test_guid 46 | assert_equal "\x29\x03\x02\x00\x80\x08\x07\x40\xc0\x01\x12\x34\x56\x78\x90\x46", 47 | Clsid.dump('{00020329-0880-4007-c001-123456789046}') 48 | assert_equal '#', 49 | Clsid.load("\x29\x03\x02\x00\x80\x08\x07\x40\xc0\x01\x12\x34\x56\x78\x90\x46").inspect 50 | end 51 | 52 | def test_variant 53 | assert_equal "\x29\x03\x02\x00\x80\x08\x07\x40\xc0\x01\x12\x34\x56\x78\x90\x46", 54 | Variant.dump(VT_CLSID, '{00020329-0880-4007-c001-123456789046}') 55 | assert_equal "2006-12-31T19:00:00+00:00", Variant.load(VT_FILETIME, "\000\370\331\336\r-\307\001").to_s 56 | data = Variant.load VT_DATE, 'blahblah' 57 | assert_equal Data, data.class 58 | assert_equal 'blahblah', Variant.dump(VT_DATE, 'blahblah') 59 | end 60 | 61 | # purely for the purposes of coverage, i'll test these old aliases: 62 | def test_deprecated_aliases 63 | assert_equal '#', 64 | Ole::Types.load_guid("\x29\x03\x02\x00\x80\x08\x07\x40\xc0\x01\x12\x34\x56\x78\x90\x46").inspect 65 | assert_equal '2006-12-31T19:00:00+00:00', Ole::Types.load_time("\000\370\331\336\r-\307\001").to_s 66 | end 67 | end 68 | 69 | -------------------------------------------------------------------------------- /README.rdoc: -------------------------------------------------------------------------------- 1 | = Introduction 2 | 3 | The ruby-ole library provides a variety of functions primarily for 4 | working with OLE2 structured storage files, such as those produced by 5 | Microsoft Office - eg *.doc, *.msg etc. 6 | 7 | = Example Usage 8 | 9 | Here are some examples of how to use the library functionality, 10 | categorised roughly by purpose. 11 | 12 | 1. Reading and writing files within an OLE container 13 | 14 | The recommended way to manipulate the contents is via the 15 | "file_system" API, whereby you use Ole::Storage instance methods 16 | similar to the regular File and Dir class methods. 17 | 18 | ole = Ole::Storage.open('oleWithDirs.ole', 'rb+') 19 | p ole.dir.entries('.') # => [".", "..", "dir1", "dir2", "file1"] 20 | p ole.file.read('file1')[0, 25] # => "this is the entry 'file1'" 21 | ole.dir.mkdir('newdir') 22 | 23 | 2. Accessing OLE meta data 24 | 25 | Some convenience functions are provided for (currently read only) 26 | access to OLE property sets and other sources of meta data. 27 | 28 | ole = Ole::Storage.open('test_word_95.doc') 29 | p ole.meta_data.file_format # => "MSWordDoc" 30 | p ole.meta_data.mime_type # => "application/msword" 31 | p ole.meta_data.doc_author.split.first # => "Charles" 32 | 33 | 3. Raw access to underlying OLE internals 34 | 35 | This is probably of little interest to most developers using the 36 | library, but for some use cases you may need to drop down to the 37 | lower level API on which the "file_system" API is constructed, 38 | which exposes more of the format details. 39 | 40 | Ole::Storage files can have multiple files with the same name, 41 | or with a slash in the name, and other things that are probably 42 | strictly invalid. This API is the only way to access those files. 43 | 44 | You can access the header object directly: 45 | 46 | p ole.header.num_sbat # => 1 47 | p ole.header.magic.unpack('H*') # => ["d0cf11e0a1b11ae1"] 48 | 49 | You can directly access the array of all Dirent objects, 50 | including the root: 51 | 52 | p ole.dirents.length # => 5 53 | puts ole.root.to_tree 54 | # => 55 | - # 56 | |- # 57 | |- # 58 | |- # 59 | \- # 60 | 61 | You can access (through RangesIO methods, or by using the 62 | relevant Dirent and AllocationTable methods) information like where within 63 | the container a stream is located (these are offset/length pairs): 64 | 65 | p ole.root["\001CompObj"].open { |io| io.ranges } # => [[0, 64], [64, 34]] 66 | 67 | See the documentation for each class for more details. 68 | 69 | = Thanks 70 | 71 | * The code contained in this project was initially based on chicago's libole 72 | (source available at http://prdownloads.sf.net/chicago/ole.tgz). 73 | 74 | * It was later augmented with some corrections by inspecting pole, and (purely 75 | for header definitions) gsf. 76 | 77 | * The property set parsing code came from the apache java project POIFS. 78 | 79 | * The excellent idea for using a pseudo file system style interface by providing 80 | #file and #dir methods which mimic File and Dir, was borrowed (along with almost 81 | unchanged tests!) from Thomas Sondergaard's rubyzip. 82 | -------------------------------------------------------------------------------- /test/test_ranges_io.rb: -------------------------------------------------------------------------------- 1 | #! /usr/bin/ruby 2 | 3 | $: << File.dirname(__FILE__) + '/../lib' 4 | 5 | require 'test/unit' 6 | require 'ole/ranges_io' 7 | require 'stringio' 8 | 9 | class TestRangesIO < Test::Unit::TestCase 10 | TEST_DIR = File.dirname __FILE__ 11 | 12 | def setup 13 | # read from ourself, also using overlaps. 14 | ranges = [100..200, 0..10, 100..150] 15 | @io = RangesIO.new open("#{TEST_DIR}/test_ranges_io.rb"), :ranges => ranges, :close_parent => true 16 | end 17 | 18 | def teardown 19 | @io.close 20 | end 21 | 22 | def test_open 23 | # block form 24 | f = open("#{TEST_DIR}/test_ranges_io.rb") 25 | assert_equal false, f.closed? 26 | RangesIO.open f, :ranges => [] 27 | assert_equal false, f.closed? 28 | RangesIO.open(f, :ranges => [], :close_parent => true) {} 29 | assert_equal true, f.closed? 30 | end 31 | 32 | def test_combine 33 | ranges = [[0, 100], 100...200, [200, 100]] 34 | io = RangesIO.new STDOUT, 'r+', :ranges => ranges 35 | assert_equal [[0, 300]], io.ranges 36 | io = RangesIO.new STDOUT, 'r+', :ranges => ranges, :combine => false 37 | assert_equal [[0, 100], [100, 100], [200, 100]], io.ranges 38 | end 39 | 40 | def test_basics 41 | assert_equal 160, @io.size 42 | assert_match %r{size=160}, @io.inspect 43 | end 44 | 45 | def test_truncate 46 | assert_raises(NotImplementedError) { @io.size += 10 } 47 | end 48 | 49 | def test_seek 50 | @io.pos = 10 51 | @io.seek(-100, IO::SEEK_END) 52 | @io.seek(-10, IO::SEEK_CUR) 53 | @io.pos += 20 54 | assert_equal 70, @io.pos 55 | @io.rewind 56 | assert_equal 0, @io.pos 57 | # seeking past the end doesn't throw an exception for normal 58 | # files, even in read mode, but RangesIO does 59 | assert_raises(Errno::EINVAL) { @io.seek 500 } 60 | assert_raises(Errno::EINVAL) { @io.seek(-500, IO::SEEK_END) } 61 | assert_raises(Errno::EINVAL) { @io.seek 1, 10 } 62 | end 63 | 64 | def test_read 65 | # this will map to the start of the file: 66 | @io.pos = 100 67 | assert_equal '#! /usr/bi', @io.read(10) 68 | # test selection of initial range, offset within that range 69 | pos = 80 70 | @io.seek pos 71 | # test advancing of pos properly, by... 72 | chunked = (0...10).map { @io.read 10 }.join 73 | # given the file is 160 long: 74 | assert_equal 80, chunked.length 75 | @io.seek pos 76 | # comparing with a flat read 77 | assert_equal chunked, @io.read(80) 78 | end 79 | 80 | # should test gets, lineno, and other IO methods we want to have 81 | def test_gets 82 | assert_equal "io'\n", @io.gets 83 | end 84 | 85 | def test_write 86 | str = File.read "#{TEST_DIR}/test_ranges_io.rb" 87 | @io = RangesIO.new StringIO.new(str), :ranges => @io.ranges 88 | assert_equal "io'\nrequir", str[100, 10] 89 | @io.write 'testing testing' 90 | assert_equal 'testing te', str[100, 10] 91 | @io.seek 0 92 | assert_equal 'testing te', @io.read(10) 93 | # lets write over a range barrier 94 | assert_equal '#! /usr/bi', str[0, 10] 95 | assert_equal "LE__\n\n\tdef", str[195, 10] 96 | @io.write 'x' * 100 97 | assert_equal 'x' * 10, str[0, 10] 98 | assert_equal "xxxxx\n\tdef", str[195, 10] 99 | # write enough to overflow the file 100 | assert_raises(IOError) { @io.write 'x' * 60 } 101 | end 102 | 103 | def test_non_resizeable 104 | # will try to truncate, which will fail 105 | assert_raises NotImplementedError do 106 | @io = RangesIO.new(StringIO.new, 'w', :ranges => []) 107 | end 108 | # will be fine 109 | @io = RangesIONonResizeable.new(StringIO.new, 'w', :ranges => []) 110 | assert_equal '#', @io.instance_variable_get(:@mode).inspect 111 | end 112 | end 113 | 114 | -------------------------------------------------------------------------------- /test/test_support.rb: -------------------------------------------------------------------------------- 1 | #! /usr/bin/ruby 2 | 3 | $: << File.dirname(__FILE__) + '/../lib' 4 | 5 | require 'test/unit' 6 | require 'ole/support' 7 | 8 | class TestSupport < Test::Unit::TestCase 9 | TEST_DIR = File.dirname __FILE__ 10 | 11 | def test_file 12 | assert_equal 4096, open("#{TEST_DIR}/oleWithDirs.ole") { |f| f.size } 13 | # point is to have same interface as: 14 | assert_equal 4096, StringIO.open(open("#{TEST_DIR}/oleWithDirs.ole", 'rb', &:read)).size 15 | end 16 | 17 | def test_enumerable 18 | expect = {0 => [2, 4], 1 => [1, 3]} 19 | assert_equal expect, [1, 2, 3, 4].group_by { |i| i & 1 } 20 | assert_equal 10, [1, 2, 3, 4].sum 21 | assert_equal %w[1 2 3 4], [1, 2, 3, 4].map(&:to_s) 22 | end 23 | 24 | def test_logger 25 | io = StringIO.new 26 | log = Logger.new_with_callstack io 27 | log.warn 'test' 28 | expect = %r{^\[\d\d:\d\d:\d\d .*?test_support\.rb:\d+:test_logger\]\nWARN test$} 29 | assert_match expect, io.string.chomp 30 | end 31 | 32 | def test_io 33 | str = 'a' * 5000 + 'b' 34 | src, dst = StringIO.new(str), StringIO.new 35 | IO.copy src, dst 36 | assert_equal str, dst.string 37 | end 38 | 39 | def test_symbol 40 | array = (1..10).to_a 41 | assert_equal 55, array.inject(&:+) 42 | end 43 | end 44 | 45 | class TestIOMode < Test::Unit::TestCase 46 | def mode s 47 | Ole::IOMode.new s 48 | end 49 | 50 | def test_parse 51 | assert_equal true, mode('r+bbbbb').binary? 52 | assert_equal false, mode('r+').binary? 53 | 54 | assert_equal false, mode('r+').create? 55 | assert_equal false, mode('r').create? 56 | assert_equal true, mode('wb').create? 57 | 58 | assert_equal true, mode('w').truncate? 59 | assert_equal false, mode('r').truncate? 60 | assert_equal false, mode('r+').truncate? 61 | 62 | assert_equal true, mode('r+').readable? 63 | assert_equal true, mode('r+').writeable? 64 | assert_equal false, mode('r').writeable? 65 | assert_equal false, mode('w').readable? 66 | 67 | assert_equal true, mode('a').append? 68 | assert_equal false, mode('w+').append? 69 | end 70 | 71 | def test_invalid 72 | assert_raises(ArgumentError) { mode 'rba' } 73 | assert_raises(ArgumentError) { mode '+r' } 74 | end 75 | 76 | def test_inspect 77 | assert_equal '#', mode('r').inspect 78 | assert_equal '#', mode('wb+').inspect 79 | assert_equal '#', mode('a').inspect 80 | end 81 | end 82 | 83 | class TestRecursivelyEnumerable < Test::Unit::TestCase 84 | class Container 85 | include RecursivelyEnumerable 86 | 87 | def initialize *children 88 | @children = children 89 | end 90 | 91 | def each_child(&block) 92 | @children.each(&block) 93 | end 94 | 95 | def inspect 96 | "#" 97 | end 98 | end 99 | 100 | def setup 101 | @root = Container.new( 102 | Container.new(1), 103 | Container.new(2, 104 | Container.new( 105 | Container.new(3) 106 | ) 107 | ), 108 | 4, 109 | Container.new() 110 | ) 111 | end 112 | 113 | def test_find 114 | i = 0 115 | found = @root.recursive.find do |obj| 116 | i += 1 117 | obj == 4 118 | end 119 | assert_equal found, 4 120 | assert_equal 9, i 121 | 122 | i = 0 123 | found = @root.recursive(:breadth_first).find do |obj| 124 | i += 1 125 | obj == 4 126 | end 127 | assert_equal found, 4 128 | assert_equal 4, i 129 | 130 | # this is to make sure we hit the breadth first child cache 131 | i = 0 132 | found = @root.recursive(:breadth_first).find do |obj| 133 | i += 1 134 | obj == 3 135 | end 136 | assert_equal found, 3 137 | assert_equal 10, i 138 | end 139 | 140 | def test_to_tree 141 | assert_equal <<-'end', @root.to_tree 142 | - # 143 | |- # 144 | | \- 1 145 | |- # 146 | | |- 2 147 | | \- # 148 | | \- # 149 | | \- 3 150 | |- 4 151 | \- # 152 | end 153 | end 154 | end 155 | 156 | -------------------------------------------------------------------------------- /lib/ole/storage/meta_data.rb: -------------------------------------------------------------------------------- 1 | # encoding: ASCII-8BIT 2 | 3 | require 'ole/types/property_set' 4 | 5 | module Ole 6 | class Storage 7 | # 8 | # The MetaData class is designed to be high level interface to all the 9 | # underlying meta data stored within different sections, themselves within 10 | # different property set streams. 11 | # 12 | # With this class, you can simply get properties using their names, without 13 | # needing to know about the underlying guids, property ids etc. 14 | # 15 | # Example: 16 | # 17 | # Ole::Storage.open('test.doc') { |ole| p ole.meta_data.doc_author } 18 | # 19 | # TODO: 20 | # 21 | # * add write support 22 | # * fix some of the missing type coercion (eg FileTime) 23 | # * maybe add back the ability to access individual property sets as a unit 24 | # directly. ie ole.summary_information. Is this useful? 25 | # * full key support, for unknown keys, like 26 | # ole.meta_data[myguid, myid]. probably needed for user-defined 27 | # properties too. 28 | # 29 | class MetaData 30 | include Enumerable 31 | 32 | FILE_MAP = { 33 | Types::PropertySet::FMTID_SummaryInformation => "\005SummaryInformation", 34 | Types::PropertySet::FMTID_DocSummaryInfo => "\005DocumentSummaryInformation" 35 | } 36 | 37 | FORMAT_MAP = { 38 | 'MSWordDoc' => :doc 39 | } 40 | 41 | CLSID_EXCEL97 = Types::Clsid.parse "{00020820-0000-0000-c000-000000000046}" 42 | CLSID_EXCEL95 = Types::Clsid.parse "{00020810-0000-0000-c000-000000000046}" 43 | CLSID_WORD97 = Types::Clsid.parse "{00020906-0000-0000-c000-000000000046}" 44 | CLSID_WORD95 = Types::Clsid.parse "{00020900-0000-0000-c000-000000000046}" 45 | 46 | CLSID_MAP = { 47 | CLSID_EXCEL97 => :xls, 48 | CLSID_EXCEL95 => :xls, 49 | CLSID_WORD97 => :doc, 50 | CLSID_WORD95 => :doc 51 | } 52 | 53 | MIME_TYPES = { 54 | :xls => 'application/vnd.ms-excel', 55 | :doc => 'application/msword', 56 | :ppt => 'application/vnd.ms-powerpoint', 57 | # not registered at IANA, but seems most common usage 58 | :msg => 'application/vnd.ms-outlook', 59 | # this is my default fallback option. also not registered at IANA. 60 | # file(1)'s default is application/msword, which is useless... 61 | nil => 'application/x-ole-storage' 62 | } 63 | 64 | def initialize ole 65 | @ole = ole 66 | end 67 | 68 | # i'm thinking of making file_format and mime_type available through 69 | # #[], #each, and #to_h also, as calculated meta data (not assignable) 70 | 71 | def comp_obj 72 | return {} unless dirent = @ole.root["\001CompObj"] 73 | data = dirent.read 74 | # see - https://gnunet.org/svn/Extractor/doc/StarWrite_File_Format.html 75 | # compobj_version: 0x0001 76 | # byte_order: 0xffe 77 | # windows_version: 0x00000a03 (win31 apparently) 78 | # marker: 0xffffffff 79 | # compobj_version, byte_order, windows_version, marker, clsid = 80 | # data.unpack("vvVVa#{Types::Clsid::SIZE}") 81 | strings = [] 82 | i = 28 83 | while i < data.length 84 | len = data[i, 4].unpack('V').first 85 | i += 4 86 | strings << data[i, len - 1] 87 | i += len 88 | end 89 | # in the unknown chunk, you usually see something like 'Word.Document.6' 90 | {:username => strings[0], :file_format => strings[1], :unknown => strings[2..-1]} 91 | end 92 | private :comp_obj 93 | 94 | def file_format 95 | comp_obj[:file_format] 96 | end 97 | 98 | def mime_type 99 | # based on the CompObj stream contents 100 | type = FORMAT_MAP[file_format] 101 | return MIME_TYPES[type] if type 102 | 103 | # based on the root clsid 104 | type = CLSID_MAP[Types::Clsid.load(@ole.root.clsid)] 105 | return MIME_TYPES[type] if type 106 | 107 | # fallback to heuristics 108 | has_file = Hash[*@ole.root.children.map { |d| [d.name.downcase, true] }.flatten] 109 | return MIME_TYPES[:msg] if has_file['__nameid_version1.0'] or has_file['__properties_version1.0'] 110 | return MIME_TYPES[:doc] if has_file['worddocument'] or has_file['document'] 111 | return MIME_TYPES[:xls] if has_file['workbook'] or has_file['book'] 112 | 113 | MIME_TYPES[nil] 114 | end 115 | 116 | def [] key 117 | pair = Types::PropertySet::PROPERTY_MAP[key.to_s] or return nil 118 | file = FILE_MAP[pair.first] or return nil 119 | dirent = @ole.root[file] or return nil 120 | dirent.open { |io| return Types::PropertySet.new(io)[key] } 121 | end 122 | 123 | def []= key, value 124 | raise NotImplementedError, 'meta data writes not implemented' 125 | end 126 | 127 | def each(&block) 128 | FILE_MAP.values.each do |file| 129 | dirent = @ole.root[file] or next 130 | dirent.open { |io| Types::PropertySet.new(io).each(&block) } 131 | end 132 | end 133 | 134 | def to_h 135 | inject({}) { |hash, (name, value)| hash.update name.to_sym => value } 136 | end 137 | 138 | def method_missing name, *args, &block 139 | return super unless args.empty? 140 | return super unless Types::PropertySet::PROPERTY_MAP[name.to_s] 141 | self[name] 142 | end 143 | end 144 | 145 | def meta_data 146 | @meta_data ||= MetaData.new(self) 147 | end 148 | end 149 | end 150 | 151 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | == 1.2.13.1 / 2024-03-28 2 | 3 | - Try using gemspec metadata to see if it will update rubygems homepage link. 4 | 5 | == 1.2.13 / 2024-03-28 6 | 7 | - Drop defunct Travis sudo: false directive (github #27, olleolleolle). 8 | - Fix broken '-y' command line option (github #20). 9 | 10 | == 1.2.12.2 / 2019-03-12 11 | 12 | - Fix to work with frozen string literals (github #24, taichi-ishitani). 13 | 14 | == 1.2.12.1 / 2017-03-12 15 | 16 | - Integer unification for ruby 2.4.0+ (github #19, koic). 17 | 18 | == 1.2.12 / 2015-12-29 19 | 20 | - Change project homepage to github. 21 | - Add MIT licence to the gemspec (github #17, reiz). 22 | 23 | == 1.2.11.8 / 2014-12-30 24 | 25 | - Fix duplicate key warning (github #15 and googlecode #12). 26 | 27 | == 1.2.11.7 / 2013-06-24 28 | 29 | - Various encoding fixes to make tests pass on current rubies. 30 | - Fix RangesIO#write behaviour when passed an encoded string (github #14, 31 | romuloceccon). 32 | - Fix Dirent#each_child attempting iteration on file children (github #13). 33 | - Unused variable fixes to avoid warnings (github #12, kachick). 34 | 35 | == 1.2.11.6 / 2012-12-10 36 | 37 | - Fix breakage of writable IO stream detection on Windows (github #11). 38 | 39 | == 1.2.11.5 / 2012-11-06 40 | 41 | - Fix breakage of IO.parse_mode on Rubinius (issue #10). 42 | - Make tests pass on rubinius (issue #11). 43 | - Improve RangesIO test coverage. 44 | - Don't warn when mbat_start is AVAIL instead of EOC (github #9). 45 | 46 | == 1.2.11.4 / 2012-07-03 47 | 48 | - Embed PropertySet meta data GUIDs and field lists, to avoid hitting the 49 | filesystem and remove dependency on YAML. 50 | - Update Rakefile to avoid warnings about both deprecated tasks and space 51 | before parentheses. 52 | - Remove Dirent#children=. 53 | 54 | == 1.2.11.3 / 2012-02-25 55 | 56 | - Various fixes for ruby 1.9.3 - handle iconv deprecation and 57 | fix FileTime.from_time (github #7, brawnski). 58 | - Avoid constant redefinition warning in gemspec. 59 | 60 | == 1.2.11.2 / 2011-09-07 61 | 62 | - Remove empty dirents after constructing tree (fixes non-contiguous 63 | allocation table usage). 64 | - Fix fallback Symbol#to_proc to match activesupport definition in case 65 | we get loaded first (github #2, lazylester). 66 | - Use method_defined? for fallback guards to support newer versions of 67 | ruby (jocker). 68 | - Add guard on FileTime#initialize to skip for newer versions of ruby. 69 | Missing required methods, but optimization no longer relevant 70 | anyway (github #4, sagmor). 71 | 72 | == 1.2.11.1 / 2010-10-24 73 | 74 | - Add gemspec and docs to packages so tests can pass again. 75 | - Build tarballs again in package task. 76 | 77 | == 1.2.11 / 2010-10-17 78 | 79 | - Add COPYING for packaging (issue #7) 80 | - Make tests pass using home_run (github #1) 81 | - Make tests pass using mathn. 82 | - Updates to suppress warnings on 1.9.2. 83 | - Split out gemspec into separate file and use for Rakefile. 84 | 85 | == 1.2.10.1 / 2010-03-19 86 | 87 | - Avoid use of DateTime#new! (issue #4) 88 | - Remove warning about unexpected root names (issue #5) 89 | 90 | == 1.2.10 / 2009-07-20 91 | 92 | - Mostly more performance enhancements, significantly faster for 93 | certain operations. 94 | - Using lots of files is faster due to new hash lookup for dirents by name. 95 | - Writes of many files are faster now too as Dirent & FileTime serialization 96 | has been improved. 97 | - Certain operations from the filesystem api have been profiled and sped up. 98 | - Don't use syswrite on jruby to avoid the buffered stream warnings. 99 | 100 | == 1.2.9 / 2009-07-14 101 | 102 | - Lots of performance enhancements for RangesIO. 103 | 104 | == 1.2.8.2 / 2009-01-01 105 | 106 | - Update code to support ruby 1.9.1 107 | 108 | == 1.2.8.1 / 2008-10-22 109 | 110 | - Fix a couple of breakages when using $KCODE = 'UTF8' 111 | 112 | == 1.2.8 / 2008-10-08 113 | 114 | - Check in the new fixes to the mbat support. 115 | - Update README to be a bit more useful. 116 | 117 | == 1.2.7 / 2008-08-12 118 | 119 | - Prepare Ole::Types::PropertySet for write support. 120 | - Introduce Ole::Storage#meta_data as an easy interface to meta data stored 121 | within various property sets. 122 | - Add new --metadata action to oletool to dump said metadata. 123 | - Add new --mimetype action to oletool, and corresponding Ole::Storage#mime_type 124 | function to try to guess mime type of a file based on some simple heuristics. 125 | - Restructure project files a bit, and pull in file_system & meta_data support 126 | by default. 127 | - More tests - now have 100% coverage. 128 | 129 | == 1.2.6 / 2008-07-21 130 | 131 | - Fix FileClass#expand_path to work properly on darwin (issue #2) 132 | - Guard against Enumerable#sum clash with active support (issue #3) 133 | 134 | == 1.2.5 / 2008-02-16 135 | 136 | - Make all tests pass on ruby 1.9. 137 | 138 | == 1.2.4 / 2008-01-09 139 | 140 | - Make all tests pass on windows (issue #1). 141 | - Make all tests pass on a power pc (running ubuntu). 142 | - Property set convenience access functions. 143 | 144 | == 1.2.3 / 2007-12-28 145 | 146 | - MBAT write support re-implmented. Can now write files over ~8mb again. 147 | - Minor fixes (truncation in #flush, file modification timestamps) 148 | - More test coverage 149 | - Initial (read-only) property set support. 150 | - Complete filesystem api, to pass most of the rubyzip tests. 151 | - Add a ChangeLog :). 152 | 153 | == 1.2.2 / 2007-11-05 154 | 155 | - Lots of test updates, 90% coverage. 156 | - Fix +to_tree+ method to be more efficient, and stream output. 157 | - Optimizations from benchmarks and profiling, mostly for writes. Fixed 158 | AllocationTable#resize_chain, RangesIOResizable#truncate and 159 | AllocationTable#free_block. 160 | - Add in filesystem test file from rubyzip, and start working on a 161 | filesystem api. 162 | 163 | == 1.2.1 / 2007-08-20 164 | 165 | - Separate out from ruby-msg as new project. 166 | 167 | -------------------------------------------------------------------------------- /lib/ole/types/property_set.rb: -------------------------------------------------------------------------------- 1 | # encoding: ASCII-8BIT 2 | 3 | module Ole 4 | module Types 5 | # 6 | # The PropertySet class currently supports readonly access to the properties 7 | # serialized in "property set" streams, such as the file "\005SummaryInformation", 8 | # in OLE files. 9 | # 10 | # Think it has its roots in MFC property set serialization. 11 | # 12 | # See http://poi.apache.org/hpsf/internals.html for details 13 | # 14 | class PropertySet 15 | HEADER_SIZE = 28 16 | HEADER_PACK = "vvVa#{Clsid::SIZE}V" 17 | OS_MAP = { 18 | 0 => :win16, 19 | 1 => :mac, 20 | 2 => :win32, 21 | 0x20001 => :ooffice, # open office on linux... 22 | } 23 | 24 | # define a smattering of the property set guids. 25 | DATA = { 26 | Clsid.parse('{f29f85e0-4ff9-1068-ab91-08002b27b3d9}') => ['FMTID_SummaryInformation', { 27 | 2 => 'doc_title', 28 | 3 => 'doc_subject', 29 | 4 => 'doc_author', 30 | 5 => 'doc_keywords', 31 | 6 => 'doc_comments', 32 | 7 => 'doc_template', 33 | 8 => 'doc_last_author', 34 | 9 => 'doc_rev_number', 35 | 10 => 'doc_edit_time', 36 | 11 => 'doc_last_printed', 37 | 12 => 'doc_created_time', 38 | 13 => 'doc_last_saved_time', 39 | 14 => 'doc_page_count', 40 | 15 => 'doc_word_count', 41 | 16 => 'doc_char_count', 42 | 18 => 'doc_app_name', 43 | 19 => 'security' 44 | }], 45 | Clsid.parse('{d5cdd502-2e9c-101b-9397-08002b2cf9ae}') => ['FMTID_DocSummaryInfo', { 46 | 2 => 'doc_category', 47 | 3 => 'doc_presentation_target', 48 | 4 => 'doc_byte_count', 49 | 5 => 'doc_line_count', 50 | 6 => 'doc_para_count', 51 | 7 => 'doc_slide_count', 52 | 8 => 'doc_note_count', 53 | 9 => 'doc_hidden_count', 54 | 10 => 'mmclips', 55 | 11 => 'scale_crop', 56 | 12 => 'heading_pairs', 57 | 13 => 'doc_part_titles', 58 | 14 => 'doc_manager', 59 | 15 => 'doc_company', 60 | 16 => 'links_up_to_date' 61 | }], 62 | Clsid.parse('{d5cdd505-2e9c-101b-9397-08002b2cf9ae}') => ['FMTID_UserDefinedProperties', {}] 63 | } 64 | 65 | # create an inverted map of names to guid/key pairs 66 | PROPERTY_MAP = DATA.inject({}) do |h1, (guid, data)| 67 | data[1].inject(h1) { |h2, (id, name)| h2.update name => [guid, id] } 68 | end 69 | 70 | module Constants 71 | DATA.each { |guid, (name, _)| const_set name, guid } 72 | end 73 | 74 | include Constants 75 | include Enumerable 76 | 77 | class Section 78 | include Variant::Constants 79 | include Enumerable 80 | 81 | SIZE = Clsid::SIZE + 4 82 | PACK = "a#{Clsid::SIZE}v" 83 | 84 | attr_accessor :guid, :offset 85 | attr_reader :length 86 | 87 | def initialize str, property_set 88 | @property_set = property_set 89 | @guid, @offset = str.unpack PACK 90 | self.guid = Clsid.load guid 91 | load_header 92 | end 93 | 94 | def io 95 | @property_set.io 96 | end 97 | 98 | def load_header 99 | io.seek offset 100 | @byte_size, @length = io.read(8).unpack 'V2' 101 | end 102 | 103 | def [] key 104 | each_raw do |id, property_offset| 105 | return read_property(property_offset).last if key == id 106 | end 107 | nil 108 | end 109 | 110 | def []= key, value 111 | raise NotImplementedError, 'section writes not yet implemented' 112 | end 113 | 114 | def each 115 | each_raw do |id, property_offset| 116 | yield id, read_property(property_offset).last 117 | end 118 | end 119 | 120 | private 121 | 122 | def each_raw 123 | io.seek offset + 8 124 | io.read(length * 8).each_chunk(8) { |str| yield(*str.unpack('V2')) } 125 | end 126 | 127 | def read_property property_offset 128 | io.seek offset + property_offset 129 | type, value = io.read(8).unpack('V2') 130 | # is the method of serialization here custom? 131 | case type 132 | when VT_LPSTR, VT_LPWSTR 133 | value = Variant.load type, io.read(value) 134 | # .... 135 | end 136 | [type, value] 137 | end 138 | end 139 | 140 | attr_reader :io, :signature, :unknown, :os, :guid, :sections 141 | 142 | def initialize io 143 | @io = io 144 | load_header io.read(HEADER_SIZE) 145 | load_section_list io.read(@num_sections * Section::SIZE) 146 | # expect no gap between last section and start of data. 147 | #Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min 148 | end 149 | 150 | def load_header str 151 | @signature, @unknown, @os_id, @guid, @num_sections = str.unpack HEADER_PACK 152 | # should i check that unknown == 0? it usually is. so is the guid actually 153 | @guid = Clsid.load @guid 154 | @os = OS_MAP[@os_id] || Log.warn("unknown operating system id #{@os_id}") 155 | end 156 | 157 | def load_section_list str 158 | @sections = str.to_enum(:each_chunk, Section::SIZE).map { |s| Section.new s, self } 159 | end 160 | 161 | def [] key 162 | pair = PROPERTY_MAP[key.to_s] or return nil 163 | section = @sections.find { |s| s.guid == pair.first } or return nil 164 | section[pair.last] 165 | end 166 | 167 | def []= key, value 168 | pair = PROPERTY_MAP[key.to_s] or return nil 169 | section = @sections.find { |s| s.guid == pair.first } or return nil 170 | section[pair.last] = value 171 | end 172 | 173 | def method_missing name, *args, &block 174 | if name.to_s =~ /(.*)=$/ 175 | return super unless args.length == 1 176 | return super unless PROPERTY_MAP[$1] 177 | self[$1] = args.first 178 | else 179 | return super unless args.length == 0 180 | return super unless PROPERTY_MAP[name.to_s] 181 | self[name] 182 | end 183 | end 184 | 185 | def each 186 | @sections.each do |section| 187 | next unless pair = DATA[section.guid] 188 | map = pair.last 189 | section.each do |id, value| 190 | name = map[id] or next 191 | yield name, value 192 | end 193 | end 194 | end 195 | 196 | def to_h 197 | inject({}) { |hash, (name, value)| hash.update name.to_sym => value } 198 | end 199 | end 200 | end 201 | end 202 | 203 | -------------------------------------------------------------------------------- /lib/ole/support.rb: -------------------------------------------------------------------------------- 1 | # encoding: ASCII-8BIT 2 | 3 | # 4 | # A file with general support functions used by most files in the project. 5 | # 6 | # These are the only methods added to other classes. 7 | # 8 | 9 | require 'logger' 10 | require 'stringio' 11 | require 'enumerator' 12 | 13 | class String # :nodoc: 14 | def each_chunk size 15 | (length / size.to_f).ceil.times { |i| yield self[i * size, size] } 16 | end 17 | end 18 | 19 | class File # :nodoc: 20 | # for interface consistency with StringIO etc (rather than adding #stat 21 | # to them). used by RangesIO. 22 | unless File.method_defined?(:size) 23 | def size 24 | stat.size 25 | end 26 | end 27 | end 28 | 29 | class Symbol # :nodoc: 30 | unless Symbol.method_defined?(:to_proc) 31 | def to_proc 32 | Proc.new { |*args| args.shift.__send__(self, *args) } 33 | end 34 | end 35 | end 36 | 37 | module Enumerable # :nodoc: 38 | unless [].respond_to? :group_by 39 | # 1.9 backport 40 | def group_by 41 | hash = Hash.new { |h, key| h[key] = [] } 42 | each { |item| hash[yield(item)] << item } 43 | hash 44 | end 45 | end 46 | 47 | unless [].respond_to? :sum 48 | def sum initial=0 49 | inject(initial) { |a, b| a + b } 50 | end 51 | end 52 | end 53 | 54 | # move to support? 55 | class IO # :nodoc: 56 | # Copy data from IO-like object +src+, to +dst+ 57 | def self.copy src, dst 58 | until src.eof? 59 | buf = src.read(4096) 60 | dst.write buf 61 | end 62 | end 63 | end 64 | 65 | class Logger # :nodoc: 66 | # A helper method for creating a +Logger+ which produce call stack 67 | # in their output 68 | def self.new_with_callstack logdev=STDERR 69 | log = Logger.new logdev 70 | log.level = WARN 71 | log.formatter = proc do |severity, time, progname, msg| 72 | # find where we were called from, in our code 73 | callstack = caller.dup 74 | callstack.shift while callstack.first =~ /\/logger\.rb:\d+:in/ 75 | from = callstack.first.sub(/:in `(.*?)'/, ":\\1") 76 | "[%s %s]\n%-7s%s\n" % [time.strftime('%H:%M:%S'), from, severity, msg.to_s] 77 | end 78 | log 79 | end 80 | end 81 | 82 | # Include this module into a class that defines #each_child. It should 83 | # maybe use #each instead, but its easier to be more specific, and use 84 | # an alias. 85 | # 86 | # I don't want to force the class to cache children (eg where children 87 | # are loaded on request in pst), because that forces the whole tree to 88 | # be loaded. So, the methods should only call #each_child once, and 89 | # breadth first iteration holds its own copy of the children around. 90 | # 91 | # Main methods are #recursive, and #to_tree 92 | module RecursivelyEnumerable # :nodoc: 93 | def each_recursive_depth_first(&block) 94 | each_child do |child| 95 | yield child 96 | if child.respond_to? :each_recursive_depth_first 97 | child.each_recursive_depth_first(&block) 98 | end 99 | end 100 | end 101 | 102 | # don't think this is actually a proper breadth first recursion. only first 103 | # level is breadth first. 104 | def each_recursive_breadth_first(&block) 105 | children = [] 106 | each_child do |child| 107 | children << child if child.respond_to? :each_recursive_breadth_first 108 | yield child 109 | end 110 | children.each { |child| child.each_recursive_breadth_first(&block) } 111 | end 112 | 113 | def each_recursive mode=:depth_first, &block 114 | # we always actually yield ourself (the tree root) before recursing 115 | yield self 116 | send "each_recursive_#{mode}", &block 117 | end 118 | 119 | # the idea of this function, is to allow use of regular Enumerable methods 120 | # in a recursive fashion. eg: 121 | # 122 | # # just looks at top level children 123 | # root.find { |child| child.some_condition? } 124 | # # recurse into all children getting non-folders, breadth first 125 | # root.recursive(:breadth_first).select { |child| !child.folder? } 126 | # # just get everything 127 | # items = root.recursive.to_a 128 | # 129 | def recursive mode=:depth_first 130 | to_enum(:each_recursive, mode) 131 | end 132 | 133 | # streams a "tree" form of the recursively enumerable structure to +io+, or 134 | # return a string form instead if +io+ is not specified. 135 | # 136 | # mostly a debugging aid. can specify a different block which will be called 137 | # to provide the string form for each node. 138 | def to_tree io=''.dup, &inspect 139 | inspect ||= :inspect.to_proc 140 | io << "- #{inspect[self]}\n" 141 | recurse = proc do |node, prefix| 142 | child = nil 143 | node.each_child do |next_child| 144 | if child 145 | io << "#{prefix}|- #{inspect[child]}\n" 146 | recurse.call child, prefix + '| ' 147 | end 148 | child = next_child 149 | end if node.respond_to?(:each_child) 150 | if child 151 | io << "#{prefix}\\- #{inspect[child]}\n" 152 | recurse.call child, prefix + ' ' 153 | end 154 | end 155 | recurse.call self, ' ' 156 | io 157 | end 158 | end 159 | 160 | module Ole 161 | class IOMode 162 | # ruby 1.9 defines binary as 0, which isn't very helpful. 163 | # its 4 in rubinius. no longer using 164 | # 165 | # BINARY = 0x4 unless defined?(BINARY) 166 | # 167 | # for that reason, have my own constants module here 168 | module Constants 169 | include File::Constants 170 | BINARY = 0x4 171 | end 172 | 173 | include Constants 174 | NAMES = %w[rdonly wronly rdwr creat trunc append binary] 175 | 176 | # nabbed from rubinius, and modified 177 | def self.parse_mode mode 178 | ret = 0 179 | 180 | case mode[0, 1] 181 | when 'r'; ret |= RDONLY 182 | when 'w'; ret |= WRONLY | CREAT | TRUNC 183 | when 'a'; ret |= WRONLY | CREAT | APPEND 184 | else raise ArgumentError, "illegal access mode #{mode}" 185 | end 186 | 187 | (1...mode.length).each do |i| 188 | case mode[i, 1] 189 | when '+'; ret = (ret & ~(RDONLY | WRONLY)) | RDWR 190 | when 'b'; ret |= BINARY 191 | else raise ArgumentError, "illegal access mode #{mode}" 192 | end 193 | end 194 | 195 | ret 196 | end 197 | 198 | attr_reader :flags 199 | def initialize flags 200 | flags = self.class.parse_mode flags.to_str if flags.respond_to? :to_str 201 | raise ArgumentError, "invalid flags - #{flags.inspect}" unless Integer === flags 202 | @flags = flags 203 | end 204 | 205 | def writeable? 206 | #(@flags & RDONLY) == 0 207 | (@flags & 0x3) != RDONLY 208 | end 209 | 210 | def readable? 211 | (@flags & WRONLY) == 0 212 | end 213 | 214 | def truncate? 215 | (@flags & TRUNC) != 0 216 | end 217 | 218 | def append? 219 | (@flags & APPEND) != 0 220 | end 221 | 222 | def create? 223 | (@flags & CREAT) != 0 224 | end 225 | 226 | def binary? 227 | (@flags & BINARY) != 0 228 | end 229 | 230 | =begin 231 | # revisit this 232 | def apply io 233 | if truncate? 234 | io.truncate 0 235 | elsif append? 236 | io.seek IO::SEEK_END, 0 237 | end 238 | end 239 | =end 240 | 241 | def inspect 242 | names = NAMES.map { |name| name if (flags & IOMode.const_get(name.upcase)) != 0 } 243 | names.unshift 'rdonly' if (flags & 0x3) == 0 244 | "#<#{self.class} #{names.compact * '|'}>" 245 | end 246 | end 247 | end 248 | 249 | -------------------------------------------------------------------------------- /test/test_storage.rb: -------------------------------------------------------------------------------- 1 | #! /usr/bin/ruby 2 | # coding: utf-8 3 | 4 | $: << File.dirname(__FILE__) + '/../lib' 5 | #require 'rubygems' 6 | 7 | require 'test/unit' 8 | require 'ole/storage' 9 | require 'digest/sha1' 10 | require 'stringio' 11 | require 'tempfile' 12 | 13 | # 14 | # = TODO 15 | # 16 | # These tests could be a lot more complete. 17 | # 18 | 19 | # should test resizeable and migrateable IO. 20 | 21 | class TestStorageRead < Test::Unit::TestCase 22 | TEST_DIR = File.dirname __FILE__ 23 | 24 | def setup 25 | @ole = Ole::Storage.open "#{TEST_DIR}/test_word_6.doc", 'rb' 26 | end 27 | 28 | def teardown 29 | @ole.close 30 | end 31 | 32 | def test_header 33 | # should have further header tests, testing the validation etc. 34 | assert_equal 17, @ole.header.to_a.length 35 | assert_equal 117, @ole.header.dirent_start 36 | assert_equal 1, @ole.header.num_bat 37 | assert_equal 1, @ole.header.num_sbat 38 | assert_equal 0, @ole.header.num_mbat 39 | end 40 | 41 | def test_new_without_explicit_mode 42 | open "#{TEST_DIR}/test_word_6.doc", 'rb' do |f| 43 | assert_equal false, Ole::Storage.new(f).writeable 44 | end 45 | end 46 | 47 | def capture_warnings 48 | @warn = [] 49 | outer_warn = @warn 50 | old_log = Ole::Log 51 | old_verbose = $VERBOSE 52 | begin 53 | $VERBOSE = nil 54 | Ole.const_set :Log, Object.new 55 | # restore for the yield 56 | $VERBOSE = old_verbose 57 | (class << Ole::Log; self; end).send :define_method, :warn do |message| 58 | outer_warn << message 59 | end 60 | yield 61 | ensure 62 | $VERBOSE = nil 63 | Ole.const_set :Log, old_log 64 | $VERBOSE = old_verbose 65 | end 66 | end 67 | 68 | def test_invalid 69 | assert_raises Ole::Storage::FormatError do 70 | Ole::Storage.open StringIO.new(0.chr * 1024) 71 | end 72 | assert_raises Ole::Storage::FormatError do 73 | Ole::Storage.open StringIO.new(Ole::Storage::Header::MAGIC + 0.chr * 1024) 74 | end 75 | capture_warnings do 76 | head = Ole::Storage::Header.new 77 | head.threshold = 1024 78 | assert_raises NoMethodError do 79 | Ole::Storage.open StringIO.new(head.to_s + 0.chr * 1024) 80 | end 81 | end 82 | assert_equal ['may not be a valid OLE2 structured storage file'], @warn 83 | end 84 | 85 | def test_inspect 86 | assert_match(/# root=#>/, @ole.inspect) 87 | end 88 | 89 | def test_fat 90 | # the fat block has all the numbers from 5..118 bar 117 91 | bbat_table = [112] + ((5..118).to_a - [112, 117]) 92 | assert_equal bbat_table, @ole.bbat.reject { |i| i >= (1 << 32) - 3 }, 'bbat' 93 | sbat_table = (1..43).to_a - [2, 3] 94 | assert_equal sbat_table, @ole.sbat.reject { |i| i >= (1 << 32) - 3 }, 'sbat' 95 | end 96 | 97 | def test_directories 98 | assert_equal 5, @ole.dirents.length, 'have all directories' 99 | # a more complicated one would be good for this 100 | assert_equal 4, @ole.root.children.length, 'properly nested directories' 101 | end 102 | 103 | def test_utf16_conversion 104 | assert_equal 'Root Entry', @ole.root.name 105 | assert_equal 'WordDocument', @ole.root.children[2].name 106 | end 107 | 108 | def test_read 109 | # the regular String#hash was different on the mac, so asserting 110 | # against full strings. switch this to sha1 instead of this fugly blob 111 | sha1sums = %w[ 112 | d3d1cde9eb43ed4b77d197af879f5ca8b8837577 113 | 65b75cbdd1f94ade632baeeb0848dec2a342c844 114 | cfc230ec7515892cfdb85e4a173e0ce364094970 115 | ffd859d94647a11b693f06f092d1a2bccc59d50d 116 | ] 117 | 118 | # test the ole storage type 119 | type = 'Microsoft Word 6.0-Dokument' 120 | assert_equal type, (@ole.root/"\001CompObj").read[32..-1][/([^\x00]+)/m, 1] 121 | # i was actually not loading data correctly before, so carefully check everything here 122 | assert_equal sha1sums, @ole.root.children.map { |child| Digest::SHA1.hexdigest child.read } 123 | end 124 | 125 | def test_dirent 126 | dirent = @ole.root.children.first 127 | assert_equal "\001Ole", dirent.name 128 | assert_equal 20, dirent.size 129 | assert_equal '#', @ole.root.inspect 130 | 131 | # exercise Dirent#[]. note that if you use a number, you get the Struct 132 | # fields. 133 | assert_equal dirent, @ole.root["\001Ole"] 134 | assert_equal dirent.name_utf16, dirent[0] 135 | assert_equal nil, @ole.root.time 136 | 137 | assert_equal @ole.root.children, @ole.root.to_enum(:each_child).to_a 138 | 139 | dirent.open('r') { |f| assert_equal 2, f.first_block } 140 | dirent.open('w') { |f| } 141 | dirent.open('a') { |f| } 142 | end 143 | 144 | def test_delete 145 | dirent = @ole.root.children.first 146 | assert_raises(ArgumentError) { @ole.root.delete nil } 147 | assert_equal [dirent], @ole.root.children & [dirent] 148 | assert_equal 20, dirent.size 149 | @ole.root.delete dirent 150 | assert_equal [], @ole.root.children & [dirent] 151 | assert_equal 0, dirent.size 152 | end 153 | end 154 | 155 | class TestStorageWrite < Test::Unit::TestCase 156 | TEST_DIR = File.dirname __FILE__ 157 | 158 | def sha1 str 159 | Digest::SHA1.hexdigest str 160 | end 161 | 162 | # try and test all the various things the #flush function does 163 | def test_flush 164 | end 165 | 166 | # FIXME 167 | # don't really want to lock down the actual internal api's yet. this will just 168 | # ensure for the time being that #flush continues to work properly. need a host 169 | # of checks involving writes that resize their file bigger/smaller, that resize 170 | # the bats to more blocks, that resizes the sb_blocks, that has migration etc. 171 | def test_write_hash 172 | io = StringIO.open open("#{TEST_DIR}/test_word_6.doc", 'rb', &:read) 173 | assert_equal '9974e354def8471225f548f82b8d81c701221af7', sha1(io.string) 174 | Ole::Storage.open(io, :update_timestamps => false) { } 175 | # hash changed. used to be efa8cfaf833b30b1d1d9381771ddaafdfc95305c 176 | # thats because i now truncate the io, and am probably removing some trailing 177 | # allocated available blocks. 178 | assert_equal 'a39e3c4041b8a893c753d50793af8d21ca8f0a86', sha1(io.string) 179 | # add a repack test here 180 | Ole::Storage.open io, :update_timestamps => false, &:repack 181 | assert_equal 'c8bb9ccacf0aaad33677e1b2a661ee6e66a48b5a', sha1(io.string) 182 | end 183 | 184 | def test_plain_repack 185 | io = StringIO.open open("#{TEST_DIR}/test_word_6.doc", 'rb', &:read) 186 | assert_equal '9974e354def8471225f548f82b8d81c701221af7', sha1(io.string) 187 | Ole::Storage.open io, :update_timestamps => false, &:repack 188 | # note equivalence to the above flush, repack, flush 189 | assert_equal 'c8bb9ccacf0aaad33677e1b2a661ee6e66a48b5a', sha1(io.string) 190 | # lets do it again using memory backing 191 | Ole::Storage.open(io, :update_timestamps => false) { |ole| ole.repack :mem } 192 | # note equivalence to the above flush, repack, flush 193 | assert_equal 'c8bb9ccacf0aaad33677e1b2a661ee6e66a48b5a', sha1(io.string) 194 | assert_raises ArgumentError do 195 | Ole::Storage.open(io, :update_timestamps => false) { |ole| ole.repack :typo } 196 | end 197 | end 198 | 199 | def test_create_from_scratch_hash 200 | io = StringIO.new(''.dup) 201 | Ole::Storage.open(io) { } 202 | assert_equal '6bb9d6c1cdf1656375e30991948d70c5fff63d57', sha1(io.string) 203 | # more repack test, note invariance 204 | Ole::Storage.open io, :update_timestamps => false, &:repack 205 | assert_equal '6bb9d6c1cdf1656375e30991948d70c5fff63d57', sha1(io.string) 206 | end 207 | 208 | def test_create_dirent 209 | Ole::Storage.open StringIO.new do |ole| 210 | dirent = Ole::Storage::Dirent.new ole, :name => 'test name', :type => :dir 211 | assert_equal 'test name', dirent.name 212 | assert_equal :dir, dirent.type 213 | # for a dirent created from scratch, type_id is currently not set until serialization: 214 | assert_equal 0, dirent.type_id 215 | dirent.to_s 216 | assert_equal 1, dirent.type_id 217 | assert_raises(ArgumentError) { Ole::Storage::Dirent.new ole, :type => :bogus } 218 | end 219 | end 220 | end 221 | 222 | -------------------------------------------------------------------------------- /lib/ole/ranges_io.rb: -------------------------------------------------------------------------------- 1 | # encoding: ASCII-8BIT 2 | 3 | # need Ole::IOMode 4 | require 'ole/support' 5 | 6 | # 7 | # = Introduction 8 | # 9 | # +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder 10 | # slices of the input file by providing a list of ranges. Intended as an initial measure to curb 11 | # inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with 12 | # no method to stream it. 13 | # 14 | # This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file 15 | # and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just 16 | # getting 16 bytes doesn't read the whole thing). 17 | # 18 | # In the simplest case it can be used with a single range to provide a limited io to a section of 19 | # a file. 20 | # 21 | # = Limitations 22 | # 23 | # * No buffering. by design at the moment. Intended for large reads 24 | # 25 | # = TODO 26 | # 27 | # On further reflection, this class is something of a joining/optimization of 28 | # two separate IO classes. a SubfileIO, for providing access to a range within 29 | # a File as a separate IO object, and a ConcatIO, allowing the presentation of 30 | # a bunch of io objects as a single unified whole. 31 | # 32 | # I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will 33 | # convert a whole mime message into an IO stream, that can be read from. 34 | # It will just be the concatenation of a series of IO objects, corresponding to 35 | # headers and boundaries, as StringIO's, and SubfileIO objects, coming from the 36 | # original message proper, or RangesIO as provided by the Attachment#data, that 37 | # will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the- 38 | # fly. Thus the attachment, in its plain or encoded form, and the message as a 39 | # whole never exists as a single string in memory, as it does now. This is a 40 | # fair bit of work to achieve, but generally useful I believe. 41 | # 42 | # This class isn't ole specific, maybe move it to my general ruby stream project. 43 | # 44 | class RangesIO 45 | attr_reader :io, :mode, :ranges, :size, :pos 46 | # +io+:: the parent io object that we are wrapping. 47 | # +mode+:: the mode to use 48 | # +params+:: hash of params. 49 | # * :ranges - byte offsets, either: 50 | # 1. an array of ranges [1..2, 4..5, 6..8] or 51 | # 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above 52 | # (think the way String indexing works) 53 | # * :close_parent - boolean to close parent when this object is closed 54 | # 55 | # NOTE: the +ranges+ can overlap. 56 | def initialize io, mode='r', params={} 57 | mode, params = 'r', mode if Hash === mode 58 | ranges = params[:ranges] 59 | @params = {:close_parent => false}.merge params 60 | @mode = Ole::IOMode.new mode 61 | @io = io 62 | # initial position in the file 63 | @pos = 0 64 | self.ranges = ranges || [[0, io.size]] 65 | # handle some mode flags 66 | truncate 0 if @mode.truncate? 67 | seek size if @mode.append? 68 | end 69 | 70 | # add block form. TODO add test for this 71 | def self.open(*args, &block) 72 | ranges_io = new(*args) 73 | if block_given? 74 | begin; yield ranges_io 75 | ensure; ranges_io.close 76 | end 77 | else 78 | ranges_io 79 | end 80 | end 81 | 82 | def ranges= ranges 83 | # convert ranges to arrays. check for negative ranges? 84 | ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r } 85 | # combine ranges 86 | if @params[:combine] == false 87 | # might be useful for debugging... 88 | @ranges = ranges 89 | else 90 | @ranges = [] 91 | next_pos = nil 92 | ranges.each do |pos, len| 93 | if next_pos == pos 94 | @ranges.last[1] += len 95 | next_pos += len 96 | else 97 | @ranges << [pos, len] 98 | next_pos = pos + len 99 | end 100 | end 101 | end 102 | # calculate cumulative offsets from range sizes 103 | @size = 0 104 | @offsets = [] 105 | @ranges.each do |pos, len| 106 | @offsets << @size 107 | @size += len 108 | end 109 | self.pos = @pos 110 | end 111 | 112 | def pos= pos, whence=IO::SEEK_SET 113 | case whence 114 | when IO::SEEK_SET 115 | when IO::SEEK_CUR 116 | pos += @pos 117 | when IO::SEEK_END 118 | pos = @size + pos 119 | else raise Errno::EINVAL 120 | end 121 | raise Errno::EINVAL unless (0..@size) === pos 122 | @pos = pos 123 | 124 | # do a binary search throuh @offsets to find the active range. 125 | a, c, b = 0, 0, @offsets.length 126 | while a < b 127 | c = (a + b).div(2) 128 | pivot = @offsets[c] 129 | if pos == pivot 130 | @active = c 131 | return 132 | elsif pos < pivot 133 | b = c 134 | else 135 | a = c + 1 136 | end 137 | end 138 | 139 | @active = a - 1 140 | end 141 | 142 | alias seek :pos= 143 | alias tell :pos 144 | 145 | def rewind 146 | seek 0 147 | end 148 | 149 | def close 150 | @io.close if @params[:close_parent] 151 | end 152 | 153 | def eof? 154 | @pos == @size 155 | end 156 | 157 | # read bytes from file, to a maximum of +limit+, or all available if unspecified. 158 | def read limit=nil 159 | data = ''.dup 160 | return data if eof? 161 | limit ||= size 162 | pos, len = @ranges[@active] 163 | diff = @pos - @offsets[@active] 164 | pos += diff 165 | len -= diff 166 | loop do 167 | @io.seek pos 168 | if limit < len 169 | s = @io.read(limit).to_s 170 | @pos += s.length 171 | data << s 172 | break 173 | end 174 | s = @io.read(len).to_s 175 | @pos += s.length 176 | data << s 177 | break if s.length != len 178 | limit -= len 179 | break if @active == @ranges.length - 1 180 | @active += 1 181 | pos, len = @ranges[@active] 182 | end 183 | data 184 | end 185 | 186 | # you may override this call to update @ranges and @size, if applicable. 187 | def truncate size 188 | raise NotImplementedError, 'truncate not supported' 189 | end 190 | 191 | # using explicit forward instead of an alias now for overriding. 192 | # should override truncate. 193 | def size= size 194 | truncate size 195 | end 196 | 197 | def write data 198 | # duplicates object to avoid side effects for the caller, but do so only if 199 | # encoding isn't already ASCII-8BIT (slight optimization) 200 | if data.respond_to?(:encoding) and data.encoding != Encoding::ASCII_8BIT 201 | data = data.dup.force_encoding(Encoding::ASCII_8BIT) 202 | end 203 | return 0 if data.empty? 204 | data_pos = 0 205 | # if we don't have room, we can use the truncate hook to make more space. 206 | if data.length > @size - @pos 207 | begin 208 | truncate @pos + data.length 209 | rescue NotImplementedError 210 | raise IOError, "unable to grow #{inspect} to write #{data.length} bytes" 211 | end 212 | end 213 | pos, len = @ranges[@active] 214 | diff = @pos - @offsets[@active] 215 | pos += diff 216 | len -= diff 217 | loop do 218 | @io.seek pos 219 | if data_pos + len > data.length 220 | chunk = data[data_pos..-1] 221 | @io.write chunk 222 | @pos += chunk.length 223 | data_pos = data.length 224 | break 225 | end 226 | @io.write data[data_pos, len] 227 | @pos += len 228 | data_pos += len 229 | break if @active == @ranges.length - 1 230 | @active += 1 231 | pos, len = @ranges[@active] 232 | end 233 | data_pos 234 | end 235 | 236 | alias << write 237 | 238 | # i can wrap it in a buffered io stream that 239 | # provides gets, and appropriately handle pos, 240 | # truncate. mostly added just to past the tests. 241 | # FIXME 242 | def gets 243 | s = read 1024 244 | i = s.index "\n" 245 | self.pos -= s.length - (i+1) 246 | s[0..i] 247 | end 248 | alias readline :gets 249 | 250 | def inspect 251 | "#<#{self.class} io=#{io.inspect}, size=#{@size}, pos=#{@pos}>" 252 | end 253 | end 254 | 255 | # this subclass of ranges io explicitly ignores the truncate part of 'w' modes. 256 | # only really needed for the allocation table writes etc. maybe just use explicit modes 257 | # for those 258 | # better yet write a test that breaks before I fix it. added nodoc for the 259 | # time being. 260 | class RangesIONonResizeable < RangesIO # :nodoc: 261 | def initialize io, mode='r', params={} 262 | mode, params = 'r', mode if Hash === mode 263 | flags = Ole::IOMode.new(mode).flags & ~IO::TRUNC 264 | super io, flags, params 265 | end 266 | end 267 | 268 | -------------------------------------------------------------------------------- /lib/ole/types/base.rb: -------------------------------------------------------------------------------- 1 | # encoding: ASCII-8BIT 2 | 3 | require 'date' 4 | 5 | require 'ole/base' 6 | 7 | module Ole # :nodoc: 8 | # 9 | # The Types module contains all the serialization and deserialization code for standard ole 10 | # types. 11 | # 12 | # It also defines all the variant type constants, and symbolic names. 13 | # 14 | module Types 15 | # for anything that we don't have serialization code for 16 | class Data < String 17 | def self.load str 18 | new str 19 | end 20 | 21 | def self.dump str 22 | str.to_s 23 | end 24 | end 25 | 26 | class Lpstr < String 27 | def self.load str 28 | # not sure if its always there, but there is often a trailing 29 | # null byte. 30 | new str.chomp(0.chr) 31 | end 32 | 33 | def self.dump str 34 | # do i need to append the null byte? 35 | str.to_s 36 | end 37 | end 38 | 39 | if ''.respond_to? :encode 40 | # NOTE: only here in the interim to preserve behaviour of 41 | # FROM/TO_UTF16 constants for ruby-msg. 42 | class Iconv # :nodoc: 43 | def initialize(to, from) 44 | @to, @from = to, from 45 | end 46 | 47 | def iconv(str) 48 | str.encode(@to, @from) 49 | end 50 | end 51 | 52 | # for VT_LPWSTR 53 | class Lpwstr < String 54 | FROM_UTF16 = Iconv.new 'utf-8', 'utf-16le' 55 | TO_UTF16 = Iconv.new 'utf-16le', 'utf-8' 56 | 57 | def self.load str 58 | new str.encode(Encoding::UTF_8, Encoding::UTF_16LE).chomp(0.chr) 59 | end 60 | 61 | def self.dump str 62 | # need to append nulls? 63 | data = str.encode(Encoding::UTF_16LE) 64 | # not sure if this is the recommended way to do it, but I want to treat 65 | # the resulting utf16 data as regular bytes, not characters. 66 | data.force_encoding Encoding::ASCII_8BIT 67 | data 68 | end 69 | end 70 | else 71 | require 'iconv' 72 | 73 | # for VT_LPWSTR 74 | class Lpwstr < String 75 | FROM_UTF16 = Iconv.new 'utf-8', 'utf-16le' 76 | TO_UTF16 = Iconv.new 'utf-16le', 'utf-8' 77 | 78 | def self.load str 79 | new FROM_UTF16.iconv(str).chomp(0.chr) 80 | end 81 | 82 | def self.dump str 83 | # need to append nulls? 84 | TO_UTF16.iconv str 85 | end 86 | end 87 | end 88 | 89 | # for VT_FILETIME 90 | class FileTime < DateTime 91 | SIZE = 8 92 | 93 | # DateTime.new is slow... faster version for FileTime 94 | def self.new year, month, day, hour=0, min=0, sec=0 95 | # DateTime will remove leap and leap-leap seconds 96 | sec = 59 if sec > 59 97 | if month <= 2 98 | month += 12 99 | year -= 1 100 | end 101 | y = year + 4800 102 | m = month - 3 103 | jd = day + (153 * m + 2).div(5) + 365 * y + y.div(4) - y.div(100) + y.div(400) - 32045 104 | fr = hour / 24.0 + min / 1440.0 + sec / 86400.0 105 | # new! was actually new0 in older versions of ruby (<=1.8.4?) 106 | # see issue #4. 107 | msg = respond_to?(:new!) ? :new! : :new0 108 | send msg, jd + fr - 0.5, 0, ITALY 109 | end if respond_to?(:new!) || respond_to?(:new0) 110 | 111 | def self.from_time time 112 | new(*time.to_a[0, 6].reverse) 113 | end 114 | 115 | def self.now 116 | from_time Time.now 117 | end 118 | 119 | EPOCH = new 1601, 1, 1 120 | 121 | #def initialize year, month, day, hour, min, sec 122 | 123 | # Create a +DateTime+ object from a struct +FILETIME+ 124 | # (http://msdn2.microsoft.com/en-us/library/ms724284.aspx). 125 | # 126 | # Converts +str+ to two 32 bit time values, comprising the high and low 32 bits of 127 | # the 100's of nanoseconds since 1st january 1601 (Epoch). 128 | def self.load str 129 | low, high = str.to_s.unpack 'V2' 130 | # we ignore these, without even warning about it 131 | return nil if low == 0 and high == 0 132 | # the + 0.00001 here stinks a bit... 133 | seconds = (high * (1 << 32) + low) / 1e7 + 0.00001 134 | obj = EPOCH + seconds / 86400 rescue return 135 | # work around home_run not preserving derived class 136 | obj = new! obj.jd + obj.day_fraction - 0.5, 0, ITALY unless FileTime === obj 137 | obj 138 | end 139 | 140 | # +time+ should be able to be either a Time, Date, or DateTime. 141 | def self.dump time 142 | return 0.chr * SIZE unless time 143 | # convert whatever is given to be a datetime, to handle the large range 144 | case time 145 | when Date # this includes DateTime & FileTime 146 | when Time 147 | time = from_time time 148 | else 149 | raise ArgumentError, 'unknown time argument - %p' % [time] 150 | end 151 | # round to milliseconds (throwing away nanosecond precision) to 152 | # compensate for using Float-based DateTime 153 | nanoseconds = ((time - EPOCH).to_f * 864000000).round * 1000 154 | high, low = nanoseconds.divmod 1 << 32 155 | [low, high].pack 'V2' 156 | end 157 | 158 | def inspect 159 | "#<#{self.class} #{to_s}>" 160 | end 161 | end 162 | 163 | # for VT_CLSID 164 | # Unlike most of the other conversions, the Guid's are serialized/deserialized by actually 165 | # doing nothing! (eg, _load & _dump are null ops) 166 | # Rather, its just a string with a different inspect string, and it includes a 167 | # helper method for creating a Guid from that readable form (#format). 168 | class Clsid < String 169 | SIZE = 16 170 | PACK = 'V v v CC C6' 171 | 172 | def self.load str 173 | new str.to_s 174 | end 175 | 176 | def self.dump guid 177 | return 0.chr * SIZE unless guid 178 | # allow use of plain strings in place of guids. 179 | guid['-'] ? parse(guid) : guid 180 | end 181 | 182 | def self.parse str 183 | vals = str.scan(/[a-f\d]+/i).map(&:hex) 184 | if vals.length == 5 185 | # this is pretty ugly 186 | vals[3] = ('%04x' % vals[3]).scan(/../).map(&:hex) 187 | vals[4] = ('%012x' % vals[4]).scan(/../).map(&:hex) 188 | guid = new vals.flatten.pack(PACK) 189 | return guid if guid.format.delete('{}') == str.downcase.delete('{}') 190 | end 191 | raise ArgumentError, 'invalid guid - %p' % str 192 | end 193 | 194 | def format 195 | "%08x-%04x-%04x-%02x%02x-#{'%02x' * 6}" % unpack(PACK) 196 | end 197 | 198 | def inspect 199 | "#<#{self.class}:{#{format}}>" 200 | end 201 | end 202 | 203 | # 204 | # The OLE variant types, extracted from 205 | # http://www.marin.clara.net/COM/variant_type_definitions.htm. 206 | # 207 | # A subset is also in WIN32OLE::VARIANT, but its not cross platform (obviously). 208 | # 209 | # Use like: 210 | # 211 | # p Ole::Types::Variant::NAMES[0x001f] => 'VT_LPWSTR' 212 | # p Ole::Types::VT_DATE # => 7 213 | # 214 | # The serialization / deserialization functions should be fixed to make it easier 215 | # to work with. like 216 | # 217 | # Ole::Types.from_str(VT_DATE, data) # and 218 | # Ole::Types.to_str(VT_DATE, data) 219 | # 220 | # Or similar, rather than having to do VT_* <=> ad hoc class name etc as it is 221 | # currently. 222 | # 223 | module Variant 224 | NAMES = { 225 | 0x0000 => 'VT_EMPTY', 226 | 0x0001 => 'VT_NULL', 227 | 0x0002 => 'VT_I2', 228 | 0x0003 => 'VT_I4', 229 | 0x0004 => 'VT_R4', 230 | 0x0005 => 'VT_R8', 231 | 0x0006 => 'VT_CY', 232 | 0x0007 => 'VT_DATE', 233 | 0x0008 => 'VT_BSTR', 234 | 0x0009 => 'VT_DISPATCH', 235 | 0x000a => 'VT_ERROR', 236 | 0x000b => 'VT_BOOL', 237 | 0x000c => 'VT_VARIANT', 238 | 0x000d => 'VT_UNKNOWN', 239 | 0x000e => 'VT_DECIMAL', 240 | 0x0010 => 'VT_I1', 241 | 0x0011 => 'VT_UI1', 242 | 0x0012 => 'VT_UI2', 243 | 0x0013 => 'VT_UI4', 244 | 0x0014 => 'VT_I8', 245 | 0x0015 => 'VT_UI8', 246 | 0x0016 => 'VT_INT', 247 | 0x0017 => 'VT_UINT', 248 | 0x0018 => 'VT_VOID', 249 | 0x0019 => 'VT_HRESULT', 250 | 0x001a => 'VT_PTR', 251 | 0x001b => 'VT_SAFEARRAY', 252 | 0x001c => 'VT_CARRAY', 253 | 0x001d => 'VT_USERDEFINED', 254 | 0x001e => 'VT_LPSTR', 255 | 0x001f => 'VT_LPWSTR', 256 | 0x0040 => 'VT_FILETIME', 257 | 0x0041 => 'VT_BLOB', 258 | 0x0042 => 'VT_STREAM', 259 | 0x0043 => 'VT_STORAGE', 260 | 0x0044 => 'VT_STREAMED_OBJECT', 261 | 0x0045 => 'VT_STORED_OBJECT', 262 | 0x0046 => 'VT_BLOB_OBJECT', 263 | 0x0047 => 'VT_CF', 264 | 0x0048 => 'VT_CLSID', 265 | 0x0fff => 'VT_ILLEGALMASKED', 266 | 0x1000 => 'VT_VECTOR', 267 | 0x2000 => 'VT_ARRAY', 268 | 0x4000 => 'VT_BYREF', 269 | 0x8000 => 'VT_RESERVED', 270 | 0xffff => 'VT_ILLEGAL' 271 | } 272 | 273 | CLASS_MAP = { 274 | # haven't seen one of these. wonder if its same as FILETIME? 275 | #'VT_DATE' => ?, 276 | 'VT_LPSTR' => Lpstr, 277 | 'VT_LPWSTR' => Lpwstr, 278 | 'VT_FILETIME' => FileTime, 279 | 'VT_CLSID' => Clsid 280 | } 281 | 282 | module Constants 283 | NAMES.each { |num, name| const_set name, num } 284 | # VT_TYPEMASK has the same value as VT_ILLEGALMASKED. Keep the latter in the 285 | # NAMES hash so that it will be used when mapping a concrete type to display 286 | # string, but still define this constant here for other uses 287 | VT_TYPEMASK = 0x0fff 288 | end 289 | 290 | def self.load type, str 291 | type = NAMES[type] or raise ArgumentError, 'unknown ole type - 0x%04x' % type 292 | (CLASS_MAP[type] || Data).load str 293 | end 294 | 295 | def self.dump type, variant 296 | type = NAMES[type] or raise ArgumentError, 'unknown ole type - 0x%04x' % type 297 | (CLASS_MAP[type] || Data).dump variant 298 | end 299 | end 300 | 301 | include Variant::Constants 302 | 303 | # deprecated aliases, kept mostly for the benefit of ruby-msg, until 304 | # i release a new version. 305 | def self.load_guid str 306 | Variant.load VT_CLSID, str 307 | end 308 | 309 | def self.load_time str 310 | Variant.load VT_FILETIME, str 311 | end 312 | 313 | FROM_UTF16 = Lpwstr::FROM_UTF16 314 | TO_UTF16 = Lpwstr::TO_UTF16 315 | end 316 | end 317 | 318 | -------------------------------------------------------------------------------- /lib/ole/storage/file_system.rb: -------------------------------------------------------------------------------- 1 | # encoding: ASCII-8BIT 2 | 3 | # 4 | # = Introduction 5 | # 6 | # This file intends to provide file system-like api support, a la zip/zipfilesystem. 7 | # 8 | # = TODO 9 | # 10 | # - need to implement some more IO functions on RangesIO, like #puts, #print 11 | # etc, like AbstractOutputStream from zipfile. 12 | # 13 | # - check Dir.mkdir, and File.open, and File.rename, to add in filename 14 | # length checks (max 32 / 31 or something). 15 | # do the automatic truncation, and add in any necessary warnings. 16 | # 17 | # - File.split('a/') == File.split('a') == ['.', 'a'] 18 | # the implication of this, is that things that try to force directory 19 | # don't work. like, File.rename('a', 'b'), should work if a is a file 20 | # or directory, but File.rename('a/', 'b') should only work if a is 21 | # a directory. tricky, need to clean things up a bit more. 22 | # i think a general path name => dirent method would work, with flags 23 | # about what should raise an error. 24 | # 25 | # - Need to look at streamlining things after getting all the tests passing, 26 | # as this file's getting pretty long - almost half the real implementation. 27 | # and is probably more inefficient than necessary. 28 | # too many exceptions in the expected path of certain functions. 29 | # 30 | # - should look at profiles before and after switching ruby-msg to use 31 | # the filesystem api. 32 | # 33 | 34 | module Ole # :nodoc: 35 | class Storage 36 | def file 37 | @file ||= FileClass.new self 38 | end 39 | 40 | def dir 41 | @dir ||= DirClass.new self 42 | end 43 | 44 | # tries to get a dirent for path. return nil if it doesn't exist 45 | # (change it) 46 | def dirent_from_path path 47 | dirent = @root 48 | path = file.expand_path(path).split('/') 49 | until path.empty? 50 | part = path.shift 51 | next if part.empty? 52 | return nil if dirent.file? 53 | return nil unless dirent = dirent/part 54 | end 55 | dirent 56 | end 57 | 58 | class FileClass 59 | class Stat 60 | attr_reader :ftype, :size, :blocks, :blksize 61 | attr_reader :nlink, :uid, :gid, :dev, :rdev, :ino 62 | def initialize dirent 63 | @dirent = dirent 64 | @size = dirent.size 65 | if file? 66 | @ftype = 'file' 67 | bat = dirent.ole.bat_for_size(dirent.size) 68 | @blocks = bat.chain(dirent.first_block).length 69 | @blksize = bat.block_size 70 | else 71 | @ftype = 'directory' 72 | @blocks = 0 73 | @blksize = 0 74 | end 75 | # a lot of these are bogus. ole file format has no analogs 76 | @nlink = 1 77 | @uid, @gid = 0, 0 78 | @dev, @rdev = 0, 0 79 | @ino = 0 80 | # need to add times - atime, mtime, ctime. 81 | end 82 | 83 | alias rdev_major :rdev 84 | alias rdev_minor :rdev 85 | 86 | def file? 87 | @dirent.file? 88 | end 89 | 90 | def directory? 91 | @dirent.dir? 92 | end 93 | 94 | def size? 95 | size if file? 96 | end 97 | 98 | def inspect 99 | pairs = (instance_variables - ['@dirent']).map do |n| 100 | "#{n[1..-1]}=#{instance_variable_get n}" 101 | end 102 | "#<#{self.class} #{pairs * ', '}>" 103 | end 104 | end 105 | 106 | def initialize ole 107 | @ole = ole 108 | end 109 | 110 | def expand_path path 111 | # its already absolute if it starts with a '/' 112 | unless path =~ /^\// 113 | # get the raw stored pwd value (its blank for root) 114 | pwd = @ole.dir.instance_variable_get :@pwd 115 | path = "#{pwd}/#{path}" 116 | end 117 | # at this point its already absolute. we use File.expand_path 118 | # just for the .. and . handling 119 | # No longer use RUBY_PLATFORM =~ /win/ as it matches darwin. better way? 120 | if File::ALT_SEPARATOR != "\\" 121 | File.expand_path(path) 122 | else 123 | File.expand_path(path)[2..-1] 124 | end 125 | end 126 | 127 | # +orig_path+ is just so that we can use the requested path 128 | # in the error messages even if it has been already modified 129 | def dirent_from_path path, orig_path=nil 130 | orig_path ||= path 131 | dirent = @ole.dirent_from_path path 132 | raise Errno::ENOENT, orig_path unless dirent 133 | raise Errno::EISDIR, orig_path if dirent.dir? 134 | dirent 135 | end 136 | private :dirent_from_path 137 | 138 | def exists? path 139 | !!@ole.dirent_from_path(path) 140 | end 141 | alias exist? :exists? 142 | 143 | def file? path 144 | dirent = @ole.dirent_from_path path 145 | dirent and dirent.file? 146 | end 147 | 148 | def directory? path 149 | dirent = @ole.dirent_from_path path 150 | dirent and dirent.dir? 151 | end 152 | 153 | def open path, mode='r', &block 154 | if IOMode.new(mode).create? 155 | begin 156 | dirent = dirent_from_path path 157 | rescue Errno::ENOENT 158 | # maybe instead of repeating this everywhere, i should have 159 | # a get_parent_dirent function. 160 | parent_path, basename = File.split expand_path(path) 161 | parent = @ole.dir.send :dirent_from_path, parent_path, path 162 | parent << dirent = Dirent.new(@ole, :type => :file, :name => basename) 163 | end 164 | else 165 | dirent = dirent_from_path path 166 | end 167 | dirent.open mode, &block 168 | end 169 | 170 | # explicit wrapper instead of alias to inhibit block 171 | def new path, mode='r' 172 | open path, mode 173 | end 174 | 175 | def size path 176 | dirent_from_path(path).size 177 | rescue Errno::EISDIR 178 | # kind of arbitrary. I'm getting 4096 from ::File, but 179 | # the zip tests want 0. 180 | 0 181 | end 182 | 183 | def size? path 184 | dirent_from_path(path).size 185 | # any other exceptions i need to rescue? 186 | rescue Errno::ENOENT, Errno::EISDIR 187 | nil 188 | end 189 | 190 | def stat path 191 | # we do this to allow dirs. 192 | dirent = @ole.dirent_from_path path 193 | raise Errno::ENOENT, path unless dirent 194 | Stat.new dirent 195 | end 196 | 197 | def read path 198 | open path, &:read 199 | end 200 | 201 | # most of the work this function does is moving the dirent between 202 | # 2 parents. the actual name changing is quite simple. 203 | # File.rename can move a file into another folder, which is why i've 204 | # done it too, though i think its not always possible... 205 | # 206 | # FIXME File.rename can be used for directories too.... 207 | def rename from_path, to_path 208 | # check what we want to rename from exists. do it this 209 | # way to allow directories. 210 | dirent = @ole.dirent_from_path from_path 211 | raise Errno::ENOENT, from_path unless dirent 212 | # delete what we want to rename to if necessary 213 | begin 214 | unlink to_path 215 | rescue Errno::ENOENT 216 | # we actually get here, but rcov doesn't think so. add 1 + 1 to 217 | # keep rcov happy for now... :) 218 | 1 + 1 219 | end 220 | # reparent the dirent 221 | to_parent_path, to_basename = File.split expand_path(to_path) 222 | from_parent = dirent.parent 223 | to_parent = @ole.dir.send :dirent_from_path, to_parent_path, to_path 224 | from_parent.delete dirent, false 225 | # and also change its name 226 | dirent.name = to_basename 227 | to_parent << dirent 228 | 0 229 | end 230 | 231 | def unlink(*paths) 232 | paths.each do |path| 233 | dirent = dirent_from_path path 234 | dirent.parent.delete dirent 235 | end 236 | paths.length # hmmm. as per ::File ? 237 | end 238 | alias delete :unlink 239 | end 240 | 241 | # 242 | # An *instance* of this class is supposed to provide similar methods 243 | # to the class methods of Dir itself. 244 | # 245 | # Fairly complete - like zip/zipfilesystem's implementation, i provide 246 | # everything except chroot and glob. glob could be done with a glob 247 | # to regex conversion, and then simply match in the entries array... 248 | # although recursive glob complicates that somewhat. 249 | # 250 | # Dir.chroot, Dir.glob, Dir.[], and Dir.tmpdir is the complete list of 251 | # methods still missing. 252 | # 253 | class DirClass 254 | def initialize ole 255 | @ole = ole 256 | @pwd = '' 257 | end 258 | 259 | # +orig_path+ is just so that we can use the requested path 260 | # in the error messages even if it has been already modified 261 | def dirent_from_path path, orig_path=nil 262 | orig_path ||= path 263 | dirent = @ole.dirent_from_path path 264 | raise Errno::ENOENT, orig_path unless dirent 265 | raise Errno::ENOTDIR, orig_path unless dirent.dir? 266 | dirent 267 | end 268 | private :dirent_from_path 269 | 270 | def open path 271 | dir = Dir.new path, entries(path) 272 | return dir unless block_given? 273 | yield dir 274 | end 275 | 276 | # as for file, explicit alias to inhibit block 277 | def new path 278 | open path 279 | end 280 | 281 | # pwd is always stored without the trailing slash. we handle 282 | # the root case here 283 | def pwd 284 | return '/' if @pwd.empty? 285 | @pwd 286 | end 287 | alias getwd :pwd 288 | 289 | def chdir orig_path 290 | # make path absolute, squeeze slashes, and remove trailing slash 291 | path = @ole.file.expand_path(orig_path).squeeze('/').sub(/\/$/, '') 292 | # this is just for the side effects of the exceptions if invalid 293 | dirent_from_path path, orig_path 294 | if block_given? 295 | old_pwd = @pwd 296 | begin 297 | @pwd = path 298 | yield 299 | ensure 300 | @pwd = old_pwd 301 | end 302 | else 303 | @pwd = path 304 | 0 305 | end 306 | end 307 | 308 | def entries path 309 | dirent = dirent_from_path path 310 | # Not sure about adding on the dots... 311 | entries = %w[. ..] + dirent.children.map(&:name) 312 | # do some checks about un-reachable files 313 | seen = {} 314 | entries.each do |n| 315 | Log.warn "inaccessible file (filename contains slash) - #{n.inspect}" if n['/'] 316 | Log.warn "inaccessible file (duplicate filename) - #{n.inspect}" if seen[n] 317 | seen[n] = true 318 | end 319 | entries 320 | end 321 | 322 | def foreach path, &block 323 | entries(path).each(&block) 324 | end 325 | 326 | def mkdir path 327 | parent_path, basename = File.split @ole.file.expand_path(path) 328 | # note that we will complain about the full path despite accessing 329 | # the parent path. this is consistent with ::Dir 330 | parent = dirent_from_path parent_path, path 331 | # now, we first should ensure that it doesn't already exist 332 | # either as a file or a directory. 333 | raise Errno::EEXIST, path if parent/basename 334 | parent << Dirent.new(@ole, :type => :dir, :name => basename) 335 | 0 336 | end 337 | 338 | def rmdir path 339 | dirent = dirent_from_path path 340 | raise Errno::ENOTEMPTY, path unless dirent.children.empty? 341 | dirent.parent.delete dirent 342 | 0 # hmmm. as per ::Dir ? 343 | end 344 | alias delete :rmdir 345 | alias unlink :rmdir 346 | 347 | # note that there is nothing remotely ole specific about 348 | # this class. it simply provides the dir like sequential access 349 | # methods on top of an array. 350 | class Dir 351 | include Enumerable 352 | 353 | attr_reader :path 354 | def initialize path, entries 355 | @path, @entries, @pos = path, entries, 0 356 | @closed = false 357 | end 358 | 359 | def pos 360 | raise IOError if @closed 361 | @pos 362 | end 363 | 364 | def each(&block) 365 | raise IOError if @closed 366 | @entries.each(&block) 367 | end 368 | 369 | def close 370 | @closed = true 371 | end 372 | 373 | def read 374 | raise IOError if @closed 375 | @entries[pos] 376 | ensure 377 | @pos += 1 if pos < @entries.length 378 | end 379 | 380 | def pos= pos 381 | raise IOError if @closed 382 | @pos = [[0, pos].max, @entries.length].min 383 | end 384 | alias tell :pos 385 | alias seek :pos= 386 | 387 | def rewind 388 | seek 0 389 | end 390 | end 391 | end 392 | end 393 | end 394 | 395 | -------------------------------------------------------------------------------- /test/test_filesystem.rb: -------------------------------------------------------------------------------- 1 | #! /usr/bin/ruby 2 | # encoding: ASCII-8BIT 3 | 4 | # 5 | # = NOTE 6 | # 7 | # This file was originally called "zipfilesystemtest.rb", and was part of 8 | # the test case for the "rubyzip" project. 9 | # 10 | # As I borrowed the smart idea of using a filesystem style interface, it 11 | # only seemed right that I appropriate the test case in addition :). It is 12 | # a testament to the cleanliness of the original api & tests as to how 13 | # easy it was to repurpose it for this project. 14 | # 15 | # I have made some modifications to the file due to some differences in the 16 | # capabilities of zip vs ole, but the majority of the copyright and credit 17 | # still goes to Thomas. His original copyright message: 18 | # 19 | # Copyright (C) 2002, 2003 Thomas Sondergaard 20 | # rubyzip is free software; you can redistribute it and/or 21 | # modify it under the terms of the ruby license. 22 | # 23 | 24 | TEST_DIR = File.dirname __FILE__ 25 | $:.unshift "#{TEST_DIR}/../lib" 26 | 27 | require 'ole/storage' 28 | require 'test/unit' 29 | 30 | module ExtraAssertions 31 | 32 | def assert_forwarded(anObject, method, retVal, *expectedArgs) 33 | callArgs = nil 34 | setCallArgsProc = proc { |args| callArgs = args } 35 | anObject.instance_eval <<-"end_eval" 36 | alias #{method}_org #{method} 37 | def #{method}(*args) 38 | ObjectSpace._id2ref(#{setCallArgsProc.object_id}).call(args) 39 | ObjectSpace._id2ref(#{retVal.object_id}) 40 | end 41 | end_eval 42 | 43 | assert_equal(retVal, yield) # Invoke test 44 | assert_equal(expectedArgs, callArgs) 45 | ensure 46 | anObject.instance_eval "alias #{method} #{method}_org" 47 | end 48 | 49 | end 50 | 51 | class OleFsNonmutatingTest < Test::Unit::TestCase 52 | def setup 53 | @ole = Ole::Storage.open TEST_DIR + '/oleWithDirs.ole', 'rb' 54 | end 55 | 56 | def teardown 57 | @ole.close if @ole 58 | end 59 | 60 | =begin 61 | def test_umask 62 | assert_equal(File.umask, @ole.file.umask) 63 | @ole.file.umask(0006) 64 | end 65 | =end 66 | 67 | def test_exists? 68 | assert(! @ole.file.exists?("notAFile")) 69 | assert(@ole.file.exists?("file1")) 70 | assert(@ole.file.exists?("dir1")) 71 | assert(@ole.file.exists?("dir1/")) 72 | assert(@ole.file.exists?("dir1/file12")) 73 | assert(@ole.file.exist?("dir1/file12")) # notice, tests exist? alias of exists? ! 74 | 75 | @ole.dir.chdir "dir1/" 76 | assert(!@ole.file.exists?("file1")) 77 | assert(@ole.file.exists?("file12")) 78 | end 79 | 80 | def test_open_read 81 | blockCalled = false 82 | @ole.file.open("file1", "r") { 83 | |f| 84 | blockCalled = true 85 | assert_equal("this is the entry 'file1' in my test archive!", 86 | f.readline.chomp) 87 | } 88 | assert(blockCalled) 89 | 90 | blockCalled = false 91 | @ole.dir.chdir "dir2" 92 | @ole.file.open("file21", "r") { 93 | |f| 94 | blockCalled = true 95 | assert_equal("this is the entry 'dir2/file21' in my test archive!", 96 | f.readline.chomp) 97 | } 98 | assert(blockCalled) 99 | @ole.dir.chdir "/" 100 | 101 | assert_raise(Errno::ENOENT) { 102 | @ole.file.open("noSuchEntry") 103 | } 104 | 105 | begin 106 | is = @ole.file.open("file1") 107 | assert_equal("this is the entry 'file1' in my test archive!", 108 | is.readline.chomp) 109 | ensure 110 | is.close if is 111 | end 112 | end 113 | 114 | def test_new 115 | begin 116 | is = @ole.file.new("file1") 117 | assert_equal("this is the entry 'file1' in my test archive!", 118 | is.readline.chomp) 119 | ensure 120 | is.close if is 121 | end 122 | begin 123 | is = @ole.file.new("file1") { 124 | fail "should not call block" 125 | } 126 | ensure 127 | is.close if is 128 | end 129 | end 130 | 131 | # currently commented out because I've taken the approach of 132 | # using implicit NameError rather than explicit NotImplementedError. 133 | =begin 134 | def test_symlink 135 | assert_raise(NotImplementedError) { 136 | @ole.file.symlink("file1", "aSymlink") 137 | } 138 | end 139 | =end 140 | 141 | def test_size 142 | assert_raise(Errno::ENOENT) { @ole.file.size("notAFile") } 143 | assert_equal(72, @ole.file.size("file1")) 144 | assert_equal(0, @ole.file.size("dir2/dir21")) 145 | 146 | assert_equal(72, @ole.file.stat("file1").size) 147 | assert_equal(0, @ole.file.stat("dir2/dir21").size) 148 | end 149 | 150 | def test_size? 151 | assert_equal(nil, @ole.file.size?("notAFile")) 152 | assert_equal(72, @ole.file.size?("file1")) 153 | assert_equal(nil, @ole.file.size?("dir2/dir21")) 154 | 155 | assert_equal(72, @ole.file.stat("file1").size?) 156 | assert_equal(nil, @ole.file.stat("dir2/dir21").size?) 157 | end 158 | 159 | def test_file? 160 | assert(@ole.file.file?("file1")) 161 | assert(@ole.file.file?("dir2/file21")) 162 | assert(! @ole.file.file?("dir1")) 163 | assert(! @ole.file.file?("dir1/dir11")) 164 | 165 | assert(@ole.file.stat("file1").file?) 166 | assert(@ole.file.stat("dir2/file21").file?) 167 | assert(! @ole.file.stat("dir1").file?) 168 | assert(! @ole.file.stat("dir1/dir11").file?) 169 | end 170 | 171 | =begin 172 | include ExtraAssertions 173 | 174 | def test_dirname 175 | assert_forwarded(File, :dirname, "retVal", "a/b/c/d") { 176 | @ole.file.dirname("a/b/c/d") 177 | } 178 | end 179 | 180 | def test_basename 181 | assert_forwarded(File, :basename, "retVal", "a/b/c/d") { 182 | @ole.file.basename("a/b/c/d") 183 | } 184 | end 185 | 186 | def test_split 187 | assert_forwarded(File, :split, "retVal", "a/b/c/d") { 188 | @ole.file.split("a/b/c/d") 189 | } 190 | end 191 | 192 | def test_join 193 | assert_equal("a/b/c", @ole.file.join("a/b", "c")) 194 | assert_equal("a/b/c/d", @ole.file.join("a/b", "c/d")) 195 | assert_equal("/c/d", @ole.file.join("", "c/d")) 196 | assert_equal("a/b/c/d", @ole.file.join("a", "b", "c", "d")) 197 | end 198 | 199 | def test_utime 200 | t_now = Time.now 201 | t_bak = @ole.file.mtime("file1") 202 | @ole.file.utime(t_now, "file1") 203 | assert_equal(t_now, @ole.file.mtime("file1")) 204 | @ole.file.utime(t_bak, "file1") 205 | assert_equal(t_bak, @ole.file.mtime("file1")) 206 | end 207 | 208 | 209 | def assert_always_false(operation) 210 | assert(! @ole.file.send(operation, "noSuchFile")) 211 | assert(! @ole.file.send(operation, "file1")) 212 | assert(! @ole.file.send(operation, "dir1")) 213 | assert(! @ole.file.stat("file1").send(operation)) 214 | assert(! @ole.file.stat("dir1").send(operation)) 215 | end 216 | 217 | def assert_true_if_entry_exists(operation) 218 | assert(! @ole.file.send(operation, "noSuchFile")) 219 | assert(@ole.file.send(operation, "file1")) 220 | assert(@ole.file.send(operation, "dir1")) 221 | assert(@ole.file.stat("file1").send(operation)) 222 | assert(@ole.file.stat("dir1").send(operation)) 223 | end 224 | 225 | def test_pipe? 226 | assert_always_false(:pipe?) 227 | end 228 | 229 | def test_blockdev? 230 | assert_always_false(:blockdev?) 231 | end 232 | 233 | def test_symlink? 234 | assert_always_false(:symlink?) 235 | end 236 | 237 | def test_socket? 238 | assert_always_false(:socket?) 239 | end 240 | 241 | def test_chardev? 242 | assert_always_false(:chardev?) 243 | end 244 | 245 | def test_truncate 246 | assert_raise(StandardError, "truncate not supported") { 247 | @ole.file.truncate("file1", 100) 248 | } 249 | end 250 | 251 | def assert_e_n_o_e_n_t(operation, args = ["NoSuchFile"]) 252 | assert_raise(Errno::ENOENT) { 253 | @ole.file.send(operation, *args) 254 | } 255 | end 256 | 257 | def test_ftype 258 | assert_e_n_o_e_n_t(:ftype) 259 | assert_equal("file", @ole.file.ftype("file1")) 260 | assert_equal("directory", @ole.file.ftype("dir1/dir11")) 261 | assert_equal("directory", @ole.file.ftype("dir1/dir11/")) 262 | end 263 | =end 264 | 265 | def test_directory? 266 | assert(! @ole.file.directory?("notAFile")) 267 | assert(! @ole.file.directory?("file1")) 268 | assert(! @ole.file.directory?("dir1/file11")) 269 | assert(@ole.file.directory?("dir1")) 270 | assert(@ole.file.directory?("dir1/")) 271 | assert(@ole.file.directory?("dir2/dir21")) 272 | 273 | assert(! @ole.file.stat("file1").directory?) 274 | assert(! @ole.file.stat("dir1/file11").directory?) 275 | assert(@ole.file.stat("dir1").directory?) 276 | assert(@ole.file.stat("dir1/").directory?) 277 | assert(@ole.file.stat("dir2/dir21").directory?) 278 | end 279 | 280 | =begin 281 | def test_chown 282 | assert_equal(2, @ole.file.chown(1,2, "dir1", "file1")) 283 | assert_equal(1, @ole.file.stat("dir1").uid) 284 | assert_equal(2, @ole.file.stat("dir1").gid) 285 | assert_equal(2, @ole.file.chown(nil, nil, "dir1", "file1")) 286 | end 287 | 288 | def test_zero? 289 | assert(! @ole.file.zero?("notAFile")) 290 | assert(! @ole.file.zero?("file1")) 291 | assert(@ole.file.zero?("dir1")) 292 | blockCalled = false 293 | ZipFile.open("data/generated/5entry.zip") { 294 | |zf| 295 | blockCalled = true 296 | assert(zf.file.zero?("data/generated/empty.txt")) 297 | } 298 | assert(blockCalled) 299 | 300 | assert(! @ole.file.stat("file1").zero?) 301 | assert(@ole.file.stat("dir1").zero?) 302 | blockCalled = false 303 | ZipFile.open("data/generated/5entry.zip") { 304 | |zf| 305 | blockCalled = true 306 | assert(zf.file.stat("data/generated/empty.txt").zero?) 307 | } 308 | assert(blockCalled) 309 | end 310 | =end 311 | 312 | def test_expand_path 313 | assert_equal("/", @ole.file.expand_path(".")) 314 | @ole.dir.chdir "dir1" 315 | assert_equal("/dir1", @ole.file.expand_path(".")) 316 | assert_equal("/dir1/file12", @ole.file.expand_path("file12")) 317 | assert_equal("/", @ole.file.expand_path("..")) 318 | assert_equal("/dir2/dir21", @ole.file.expand_path("../dir2/dir21")) 319 | end 320 | 321 | =begin 322 | def test_mtime 323 | assert_equal(Time.at(1027694306), 324 | @ole.file.mtime("dir2/file21")) 325 | assert_equal(Time.at(1027690863), 326 | @ole.file.mtime("dir2/dir21")) 327 | assert_raise(Errno::ENOENT) { 328 | @ole.file.mtime("noSuchEntry") 329 | } 330 | 331 | assert_equal(Time.at(1027694306), 332 | @ole.file.stat("dir2/file21").mtime) 333 | assert_equal(Time.at(1027690863), 334 | @ole.file.stat("dir2/dir21").mtime) 335 | end 336 | 337 | def test_ctime 338 | assert_nil(@ole.file.ctime("file1")) 339 | assert_nil(@ole.file.stat("file1").ctime) 340 | end 341 | 342 | def test_atime 343 | assert_nil(@ole.file.atime("file1")) 344 | assert_nil(@ole.file.stat("file1").atime) 345 | end 346 | 347 | def test_readable? 348 | assert(! @ole.file.readable?("noSuchFile")) 349 | assert(@ole.file.readable?("file1")) 350 | assert(@ole.file.readable?("dir1")) 351 | assert(@ole.file.stat("file1").readable?) 352 | assert(@ole.file.stat("dir1").readable?) 353 | end 354 | 355 | def test_readable_real? 356 | assert(! @ole.file.readable_real?("noSuchFile")) 357 | assert(@ole.file.readable_real?("file1")) 358 | assert(@ole.file.readable_real?("dir1")) 359 | assert(@ole.file.stat("file1").readable_real?) 360 | assert(@ole.file.stat("dir1").readable_real?) 361 | end 362 | 363 | def test_writable? 364 | assert(! @ole.file.writable?("noSuchFile")) 365 | assert(@ole.file.writable?("file1")) 366 | assert(@ole.file.writable?("dir1")) 367 | assert(@ole.file.stat("file1").writable?) 368 | assert(@ole.file.stat("dir1").writable?) 369 | end 370 | 371 | def test_writable_real? 372 | assert(! @ole.file.writable_real?("noSuchFile")) 373 | assert(@ole.file.writable_real?("file1")) 374 | assert(@ole.file.writable_real?("dir1")) 375 | assert(@ole.file.stat("file1").writable_real?) 376 | assert(@ole.file.stat("dir1").writable_real?) 377 | end 378 | 379 | def test_executable? 380 | assert(! @ole.file.executable?("noSuchFile")) 381 | assert(! @ole.file.executable?("file1")) 382 | assert(@ole.file.executable?("dir1")) 383 | assert(! @ole.file.stat("file1").executable?) 384 | assert(@ole.file.stat("dir1").executable?) 385 | end 386 | 387 | def test_executable_real? 388 | assert(! @ole.file.executable_real?("noSuchFile")) 389 | assert(! @ole.file.executable_real?("file1")) 390 | assert(@ole.file.executable_real?("dir1")) 391 | assert(! @ole.file.stat("file1").executable_real?) 392 | assert(@ole.file.stat("dir1").executable_real?) 393 | end 394 | 395 | def test_owned? 396 | assert_true_if_entry_exists(:owned?) 397 | end 398 | 399 | def test_grpowned? 400 | assert_true_if_entry_exists(:grpowned?) 401 | end 402 | 403 | def test_setgid? 404 | assert_always_false(:setgid?) 405 | end 406 | 407 | def test_setuid? 408 | assert_always_false(:setgid?) 409 | end 410 | 411 | def test_sticky? 412 | assert_always_false(:sticky?) 413 | end 414 | 415 | def test_stat 416 | s = @ole.file.stat("file1") 417 | assert(s.kind_of?(File::Stat)) # It pretends 418 | assert_raise(Errno::ENOENT, "No such file or directory - noSuchFile") { 419 | @ole.file.stat("noSuchFile") 420 | } 421 | end 422 | 423 | def test_lstat 424 | assert(@ole.file.lstat("file1").file?) 425 | end 426 | 427 | 428 | def test_chmod 429 | assert_raise(Errno::ENOENT, "No such file or directory - noSuchFile") { 430 | @ole.file.chmod(0644, "file1", "NoSuchFile") 431 | } 432 | assert_equal(2, @ole.file.chmod(0644, "file1", "dir1")) 433 | end 434 | 435 | def test_pipe 436 | assert_raise(NotImplementedError) { 437 | @ole.file.pipe 438 | } 439 | end 440 | 441 | def test_foreach 442 | ZipFile.open("data/generated/zipWithDir.zip") { 443 | |zf| 444 | ref = [] 445 | File.foreach("data/file1.txt") { |e| ref << e } 446 | 447 | index = 0 448 | zf.file.foreach("data/file1.txt") { 449 | |l| 450 | assert_equal(ref[index], l) 451 | index = index.next 452 | } 453 | assert_equal(ref.size, index) 454 | } 455 | 456 | ZipFile.open("data/generated/zipWithDir.zip") { 457 | |zf| 458 | ref = [] 459 | File.foreach("data/file1.txt", " ") { |e| ref << e } 460 | 461 | index = 0 462 | zf.file.foreach("data/file1.txt", " ") { 463 | |l| 464 | assert_equal(ref[index], l) 465 | index = index.next 466 | } 467 | assert_equal(ref.size, index) 468 | } 469 | end 470 | 471 | def test_popen 472 | cmd = /mswin/i =~ RUBY_PLATFORM ? 'dir' : 'ls' 473 | 474 | assert_equal(File.popen(cmd) { |f| f.read }, 475 | @ole.file.popen(cmd) { |f| f.read }) 476 | end 477 | 478 | # Can be added later 479 | # def test_select 480 | # fail "implement test" 481 | # end 482 | 483 | def test_readlines 484 | ZipFile.open("data/generated/zipWithDir.zip") { 485 | |zf| 486 | assert_equal(File.readlines("data/file1.txt"), 487 | zf.file.readlines("data/file1.txt")) 488 | } 489 | end 490 | 491 | def test_read 492 | ZipFile.open("data/generated/zipWithDir.zip") { 493 | |zf| 494 | assert_equal(File.read("data/file1.txt"), 495 | zf.file.read("data/file1.txt")) 496 | } 497 | end 498 | =end 499 | end 500 | 501 | class OleFsFileStatTest < Test::Unit::TestCase 502 | 503 | def setup 504 | @ole = Ole::Storage.open TEST_DIR + '/oleWithDirs.ole', 'rb' 505 | end 506 | 507 | def teardown 508 | @ole.close if @ole 509 | end 510 | 511 | def test_blocks 512 | assert_equal(2, @ole.file.stat("file1").blocks) 513 | end 514 | 515 | def test_ino 516 | assert_equal(0, @ole.file.stat("file1").ino) 517 | end 518 | 519 | def test_uid 520 | assert_equal(0, @ole.file.stat("file1").uid) 521 | end 522 | 523 | def test_gid 524 | assert_equal(0, @ole.file.stat("file1").gid) 525 | end 526 | 527 | def test_ftype 528 | assert_equal("file", @ole.file.stat("file1").ftype) 529 | assert_equal("directory", @ole.file.stat("dir1").ftype) 530 | end 531 | 532 | =begin 533 | def test_mode 534 | assert_equal(0600, @ole.file.stat("file1").mode & 0777) 535 | assert_equal(0600, @ole.file.stat("file1").mode & 0777) 536 | assert_equal(0755, @ole.file.stat("dir1").mode & 0777) 537 | assert_equal(0755, @ole.file.stat("dir1").mode & 0777) 538 | end 539 | =end 540 | 541 | def test_dev 542 | assert_equal(0, @ole.file.stat("file1").dev) 543 | end 544 | 545 | def test_rdev 546 | assert_equal(0, @ole.file.stat("file1").rdev) 547 | end 548 | 549 | def test_rdev_major 550 | assert_equal(0, @ole.file.stat("file1").rdev_major) 551 | end 552 | 553 | def test_rdev_minor 554 | assert_equal(0, @ole.file.stat("file1").rdev_minor) 555 | end 556 | 557 | def test_nlink 558 | assert_equal(1, @ole.file.stat("file1").nlink) 559 | end 560 | 561 | def test_blksize 562 | assert_equal(64, @ole.file.stat("file1").blksize) 563 | end 564 | 565 | # an additional test i added for coverage. i've tried to make the inspect 566 | # string on the ole stat match that of the regular one. 567 | def test_inspect 568 | # normalize, as instance_variables order is undefined 569 | normalize = proc { |s| s[/ (.*)>$/, 1].split(', ').sort.join(', ') } 570 | assert_match %r{blocks=2.*ftype=file.*size=72}, normalize[@ole.file.stat('file1').inspect] 571 | end 572 | end 573 | 574 | class OleFsFileMutatingTest < Test::Unit::TestCase 575 | def setup 576 | # we use an in memory copy of the file instead of the original 577 | # file based. 578 | @io = StringIO.new open(TEST_DIR + '/oleWithDirs.ole', 'rb', &:read) 579 | end 580 | 581 | def teardown 582 | @io.close if @io 583 | end 584 | 585 | def test_delete 586 | do_test_delete_or_unlink(:delete) 587 | end 588 | 589 | def test_unlink 590 | do_test_delete_or_unlink(:unlink) 591 | end 592 | 593 | def test_open_write 594 | Ole::Storage.open(@io) { 595 | |zf| 596 | 597 | blockCalled = nil 598 | zf.file.open("test_open_write_entry", "w") { 599 | |f| 600 | blockCalled = true 601 | f.write "This is what I'm writing" 602 | } 603 | assert(blockCalled) 604 | assert_equal("This is what I'm writing", 605 | zf.file.read("test_open_write_entry")) 606 | 607 | blockCalled = nil 608 | # Test with existing entry 609 | zf.file.open("file1", "w") { 610 | |f| 611 | blockCalled = true 612 | f.write "This is what I'm writing too" 613 | } 614 | assert(blockCalled) 615 | assert_equal("This is what I'm writing too", 616 | zf.file.read("file1")) 617 | } 618 | end 619 | 620 | def test_rename 621 | Ole::Storage.open(@io) { 622 | |zf| 623 | assert_raise(Errno::ENOENT, "") { 624 | zf.file.rename("NoSuchFile", "bimse") 625 | } 626 | zf.file.rename("file1", "newNameForFile1") 627 | # lets also try moving a file to a different directory, 628 | # and renaming a directory 629 | zf.file.rename('/dir1/file11', '/dir1/dir11/file111') 630 | zf.file.rename('dir1', 'dir9') 631 | } 632 | 633 | Ole::Storage.open(@io) { 634 | |zf| 635 | assert(! zf.file.exists?("file1")) 636 | assert(zf.file.exists?("newNameForFile1")) 637 | assert(zf.file.exists?("dir9/dir11/file111")) 638 | } 639 | end 640 | 641 | def do_test_delete_or_unlink(symbol) 642 | Ole::Storage.open(@io) { 643 | |zf| 644 | assert(zf.file.exists?("dir2/dir21/dir221/file2221")) 645 | zf.file.send(symbol, "dir2/dir21/dir221/file2221") 646 | assert(! zf.file.exists?("dir2/dir21/dir221/file2221")) 647 | 648 | assert(zf.file.exists?("dir1/file11")) 649 | assert(zf.file.exists?("dir1/file12")) 650 | zf.file.send(symbol, "dir1/file11", "dir1/file12") 651 | assert(! zf.file.exists?("dir1/file11")) 652 | assert(! zf.file.exists?("dir1/file12")) 653 | 654 | assert_raise(Errno::ENOENT) { zf.file.send(symbol, "noSuchFile") } 655 | assert_raise(Errno::EISDIR) { zf.file.send(symbol, "dir1/dir11") } 656 | assert_raise(Errno::EISDIR) { zf.file.send(symbol, "dir1/dir11/") } 657 | } 658 | 659 | Ole::Storage.open(@io) { 660 | |zf| 661 | assert(! zf.file.exists?("dir2/dir21/dir221/file2221")) 662 | assert(! zf.file.exists?("dir1/file11")) 663 | assert(! zf.file.exists?("dir1/file12")) 664 | 665 | assert(zf.file.exists?("dir1/dir11")) 666 | assert(zf.file.exists?("dir1/dir11/")) 667 | } 668 | end 669 | 670 | end 671 | 672 | class OleFsDirectoryTest < Test::Unit::TestCase 673 | def setup 674 | # we use an in memory copy of the file instead of the original 675 | # file based. 676 | @io = StringIO.new open(TEST_DIR + '/oleWithDirs.ole', 'rb', &:read) 677 | end 678 | 679 | def teardown 680 | @io.close if @io 681 | end 682 | 683 | def test_delete 684 | Ole::Storage.open(@io) { 685 | |zf| 686 | assert_raise(Errno::ENOENT, "No such file or directory - NoSuchFile.txt") { 687 | zf.dir.delete("NoSuchFile.txt") 688 | } 689 | # see explanation below, touch a && ruby -e 'Dir.delete "a"' gives ENOTDIR not EINVAL 690 | assert_raise(Errno::ENOTDIR, "Invalid argument - file1") { 691 | zf.dir.delete("file1") 692 | } 693 | assert(zf.file.exists?("dir1")) 694 | #zf.dir.delete("dir1") 695 | #assert(! zf.file.exists?("dir1")) 696 | # ^ this was allowed in zipfilesystem, but my code follows Dir.delete, and requires that 697 | # the directory be empty first. need to delete recursively if you want other behaviour. 698 | assert_raises(Errno::ENOTEMPTY) { zf.dir.delete('dir1') } 699 | } 700 | end 701 | 702 | def test_mkdir 703 | Ole::Storage.open(@io) { 704 | |zf| 705 | assert_raise(Errno::EEXIST, "File exists - dir1") { 706 | zf.dir.mkdir("file1") 707 | } 708 | assert_raise(Errno::EEXIST, "File exists - dir1") { 709 | zf.dir.mkdir("dir1") 710 | } 711 | assert(!zf.file.exists?("newDir")) 712 | zf.dir.mkdir("newDir") 713 | assert(zf.file.directory?("newDir")) 714 | assert(!zf.file.exists?("newDir2")) 715 | # FIXME - mode not supported yet 716 | #zf.dir.mkdir("newDir2", 3485) 717 | #assert(zf.file.directory?("newDir2")) 718 | zf.dir.rmdir 'newDir' 719 | assert(!zf.file.exists?("newDir")) 720 | } 721 | end 722 | 723 | def test_pwd_chdir_entries 724 | Ole::Storage.open(@io) { 725 | |zf| 726 | assert_equal("/", zf.dir.pwd) 727 | 728 | assert_raise(Errno::ENOENT, "No such file or directory - no such dir") { 729 | zf.dir.chdir "no such dir" 730 | } 731 | 732 | # changed this to ENOTDIR, which is what touch a; ruby -e "Dir.chdir('a')" gives you. 733 | assert_raise(Errno::ENOTDIR, "Invalid argument - file1") { 734 | zf.dir.chdir "file1" 735 | } 736 | 737 | assert_equal(['.', '..', "dir1", "dir2", "file1"].sort, zf.dir.entries(".").sort) 738 | zf.dir.chdir "dir1" 739 | assert_equal("/dir1", zf.dir.pwd) 740 | zf.dir.chdir('dir11') { assert_equal '/dir1/dir11', zf.dir.pwd } 741 | assert_equal '/dir1', zf.dir.pwd 742 | assert_equal(['.', '..', "dir11", "file11", "file12"], zf.dir.entries(".").sort) 743 | 744 | zf.dir.chdir "../dir2/dir21" 745 | assert_equal("/dir2/dir21", zf.dir.pwd) 746 | assert_equal(['.', '..', "dir221"].sort, zf.dir.entries(".").sort) 747 | } 748 | end 749 | 750 | # results here are a bit different from zip/zipfilesystem, as i've chosen to fake '.' 751 | # and '..' 752 | def test_foreach 753 | Ole::Storage.open(@io) { 754 | |zf| 755 | 756 | blockCalled = false 757 | assert_raise(Errno::ENOENT, "No such file or directory - noSuchDir") { 758 | zf.dir.foreach("noSuchDir") { |e| blockCalled = true } 759 | } 760 | assert(! blockCalled) 761 | 762 | assert_raise(Errno::ENOTDIR, "Not a directory - file1") { 763 | zf.dir.foreach("file1") { |e| blockCalled = true } 764 | } 765 | assert(! blockCalled) 766 | 767 | entries = [] 768 | zf.dir.foreach(".") { |e| entries << e } 769 | assert_equal(['.', '..', "dir1", "dir2", "file1"].sort, entries.sort) 770 | 771 | entries = [] 772 | zf.dir.foreach("dir1") { |e| entries << e } 773 | assert_equal(['.', '..', "dir11", "file11", "file12"], entries.sort) 774 | } 775 | end 776 | 777 | =begin 778 | # i've gone for NoMethodError instead. 779 | def test_chroot 780 | Ole::Storage.open(@io) { 781 | |zf| 782 | assert_raise(NotImplementedError) { 783 | zf.dir.chroot 784 | } 785 | } 786 | end 787 | =end 788 | 789 | # Globbing not supported yet 790 | #def test_glob 791 | # # test alias []-operator too 792 | # fail "implement test" 793 | #end 794 | 795 | def test_open_new 796 | Ole::Storage.open(@io) { 797 | |zf| 798 | 799 | assert_raise(Errno::ENOTDIR, "Not a directory - file1") { 800 | zf.dir.new("file1") 801 | } 802 | 803 | assert_raise(Errno::ENOENT, "No such file or directory - noSuchFile") { 804 | zf.dir.new("noSuchFile") 805 | } 806 | 807 | d = zf.dir.new(".") 808 | assert_equal(['.', '..', "file1", "dir1", "dir2"].sort, d.entries.sort) 809 | d.close 810 | 811 | zf.dir.open("dir1") { 812 | |d2| 813 | assert_equal(['.', '..', "dir11", "file11", "file12"].sort, d2.entries.sort) 814 | } 815 | } 816 | end 817 | 818 | end 819 | 820 | class OleFsDirIteratorTest < Test::Unit::TestCase 821 | 822 | FILENAME_ARRAY = [ "f1", "f2", "f3", "f4", "f5", "f6" ] 823 | 824 | def setup 825 | @dirIt = Ole::Storage::DirClass::Dir.new('/', FILENAME_ARRAY) 826 | end 827 | 828 | def test_close 829 | @dirIt.close 830 | assert_raise(IOError, "closed directory") { 831 | @dirIt.each { |e| p e } 832 | } 833 | assert_raise(IOError, "closed directory") { 834 | @dirIt.read 835 | } 836 | assert_raise(IOError, "closed directory") { 837 | @dirIt.rewind 838 | } 839 | assert_raise(IOError, "closed directory") { 840 | @dirIt.seek(0) 841 | } 842 | assert_raise(IOError, "closed directory") { 843 | @dirIt.tell 844 | } 845 | 846 | end 847 | 848 | def test_each 849 | # Tested through Enumerable.entries 850 | assert_equal(FILENAME_ARRAY, @dirIt.entries) 851 | end 852 | 853 | def test_read 854 | FILENAME_ARRAY.size.times { 855 | |i| 856 | assert_equal(FILENAME_ARRAY[i], @dirIt.read) 857 | } 858 | end 859 | 860 | def test_rewind 861 | @dirIt.read 862 | @dirIt.read 863 | assert_equal(FILENAME_ARRAY[2], @dirIt.read) 864 | @dirIt.rewind 865 | assert_equal(FILENAME_ARRAY[0], @dirIt.read) 866 | end 867 | 868 | def test_tell_seek 869 | @dirIt.read 870 | @dirIt.read 871 | pos = @dirIt.tell 872 | valAtPos = @dirIt.read 873 | @dirIt.read 874 | @dirIt.seek(pos) 875 | assert_equal(valAtPos, @dirIt.read) 876 | end 877 | 878 | end 879 | 880 | class OleUnicodeTest < Test::Unit::TestCase 881 | def setup 882 | @io = StringIO.new ''.dup 883 | end 884 | 885 | def test_unicode 886 | # in ruby-1.8, encoding is assumed to be UTF-8 (and converted with iconv). 887 | # in ruby-1.9, UTF-8 should work also, but probably shouldn't be using fixed 888 | # TO_UTF16 iconv for other encodings. 889 | resume = "R\xc3\xa9sum\xc3\xa9".dup 890 | resume.force_encoding Encoding::UTF_8 if resume.respond_to? :encoding 891 | Ole::Storage.open @io do |ole| 892 | ole.file.open(resume, 'w') { |f| f.write 'Skills: writing bad unit tests' } 893 | end 894 | Ole::Storage.open @io do |ole| 895 | assert_equal ['.', '..', resume], ole.dir.entries('.') 896 | # use internal api to verify utf16 encoding 897 | assert_equal "R\x00\xE9\x00s\x00u\x00m\x00\xE9\x00", ole.root.children[0].name_utf16[0, 6 * 2] 898 | # FIXME: there is a bug in ruby-1.9 (at least in p376), which makes encoded 899 | # strings useless as hash keys. identical bytes, identical encodings, identical 900 | # according to #==, but different hash. 901 | temp = File.expand_path("/#{resume}").split('/').last 902 | if resume == temp and resume.hash != temp.hash 903 | warn 'skipping assertion due to broken String#hash' 904 | else 905 | assert_equal 'Skills', ole.file.read(resume).split(': ', 2).first 906 | end 907 | end 908 | end 909 | 910 | def test_write_utf8_string 911 | programmer = "programa\xC3\xA7\xC3\xA3o ".dup 912 | programmer.force_encoding Encoding::UTF_8 if programmer.respond_to? :encoding 913 | Ole::Storage.open @io do |ole| 914 | ole.file.open '1', 'w' do |writer| 915 | writer.write(programmer) 916 | writer.write('ruby') 917 | end 918 | end 919 | Ole::Storage.open @io do |ole| 920 | ole.file.open '1', 'r' do |reader| 921 | s = reader.read 922 | s = s.force_encoding('UTF-8') if s.respond_to?(:encoding) 923 | assert_equal(programmer + 'ruby', s) 924 | end 925 | end 926 | end 927 | end 928 | 929 | # Copyright (C) 2002, 2003 Thomas Sondergaard 930 | # rubyzip is free software; you can redistribute it and/or 931 | # modify it under the terms of the ruby license. 932 | 933 | -------------------------------------------------------------------------------- /lib/ole/storage/base.rb: -------------------------------------------------------------------------------- 1 | # encoding: ASCII-8BIT 2 | 3 | require 'tempfile' 4 | 5 | require 'ole/base' 6 | require 'ole/types' 7 | require 'ole/ranges_io' 8 | 9 | module Ole # :nodoc: 10 | # 11 | # This class is the primary way the user interacts with an OLE storage file. 12 | # 13 | # = TODO 14 | # 15 | # * the custom header cruft for Header and Dirent needs some love. 16 | # * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent, 17 | # and, in a manner of speaking, but arguably different, Storage itself. 18 | # they have differing api's which would be nice to rethink. 19 | # AllocationTable::Big must be created aot now, as it is used for all subsequent reads. 20 | # 21 | class Storage 22 | # thrown for any bogus OLE file errors. 23 | class FormatError < StandardError # :nodoc: 24 | end 25 | 26 | # options used at creation time 27 | attr_reader :params 28 | # The top of the ole tree structure 29 | attr_reader :root 30 | # The tree structure in its original flattened form. only valid after #load, or #flush. 31 | attr_reader :dirents 32 | # The underlying io object to/from which the ole object is serialized, whether we 33 | # should close it, and whether it is writeable 34 | attr_reader :io, :close_parent, :writeable 35 | # Low level internals, you probably shouldn't need to mess with these 36 | attr_reader :header, :bbat, :sbat, :sb_file 37 | 38 | # +arg+ should be either a filename, or an +IO+ object, and needs to be seekable. 39 | # +mode+ is optional, and should be a regular mode string. 40 | def initialize arg, mode=nil, params={} 41 | params, mode = mode, nil if Hash === mode 42 | params = {:update_timestamps => true}.merge(params) 43 | @params = params 44 | 45 | # get the io object 46 | @close_parent, @io = if String === arg 47 | mode ||= 'rb' 48 | [true, open(arg, mode)] 49 | else 50 | raise ArgumentError, 'unable to specify mode string with io object' if mode 51 | [false, arg] 52 | end 53 | # force encoding, to avoid picking up source encoding with StringIO or files in text mode 54 | @io.set_encoding Encoding::ASCII_8BIT if @io.respond_to?(:set_encoding) 55 | # do we have this file opened for writing? use mode when provided, 56 | # otherwise try no-op methods which will raise if read-only 57 | @writeable = begin 58 | if mode 59 | IOMode.new(mode).writeable? 60 | else 61 | # works on mri 1.8 & jruby 62 | @io.flush 63 | begin 64 | # works on mri 1.9 & rubinius, throws EBADF on windows 65 | @io.write_nonblock('') if @io.respond_to?(:write_nonblock) 66 | rescue Errno::EBADF 67 | # for windows 68 | @io.syswrite(''); 69 | end 70 | true 71 | end 72 | rescue IOError 73 | false 74 | end 75 | # silence undefined warning in clear 76 | @sb_file = nil 77 | # if the io object has data, we should load it, otherwise start afresh 78 | # this should be based on the mode string rather. 79 | @io.size > 0 ? load : clear 80 | end 81 | 82 | # somewhat similar to File.open, the open class method allows a block form where 83 | # the Ole::Storage object is automatically closed on completion of the block. 84 | def self.open arg, mode=nil, params={} 85 | ole = new arg, mode, params 86 | if block_given? 87 | begin yield ole 88 | ensure; ole.close 89 | end 90 | else ole 91 | end 92 | end 93 | 94 | # load document from file. 95 | # 96 | # TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :) 97 | # 98 | # 1. reterminate any chain not ending in EOC. 99 | # compare file size with actually allocated blocks per file. 100 | # 2. pass through all chain heads looking for collisions, and making sure nothing points to them 101 | # (ie they are really heads). in both sbat and mbat 102 | # 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks 103 | # in the bat for them. 104 | # 4. maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size, 105 | # (eg what is used for truncate in #flush), then maybe add some sort of message about that. it 106 | # will be automatically thrown away at close time. 107 | def load 108 | # we always read 512 for the header block. if the block size ends up being different, 109 | # what happens to the 109 fat entries. are there more/less entries? 110 | @io.rewind 111 | header_block = @io.read 512 112 | @header = Header.new header_block 113 | 114 | # create an empty bbat. 115 | @bbat = AllocationTable::Big.new self 116 | bbat_chain = header_block[Header::SIZE..-1].unpack 'V*' 117 | mbat_block = @header.mbat_start 118 | @header.num_mbat.times do 119 | blocks = @bbat.read([mbat_block]).unpack 'V*' 120 | mbat_block = blocks.pop 121 | bbat_chain += blocks 122 | end 123 | # am i using num_bat in the right way? 124 | @bbat.load @bbat.read(bbat_chain[0, @header.num_bat]) 125 | 126 | # get block chain for directories, read it, then split it into chunks and load the 127 | # directory entries. semantics changed - used to cut at first dir where dir.type == 0 128 | @dirents = @bbat.read(@header.dirent_start).to_enum(:each_chunk, Dirent::SIZE). 129 | map { |str| Dirent.new self, str } 130 | 131 | # now reorder from flat into a tree 132 | # links are stored in some kind of balanced binary tree 133 | # check that everything is visited at least, and at most once 134 | # similarly with the blocks of the file. 135 | # was thinking of moving this to Dirent.to_tree instead. 136 | class << @dirents 137 | def to_tree idx=0 138 | return [] if idx == Dirent::EOT 139 | d = self[idx] 140 | to_tree(d.child).each { |child| d << child } 141 | raise FormatError, "directory #{d.inspect} used twice" if d.idx 142 | d.idx = idx 143 | to_tree(d.prev) + [d] + to_tree(d.next) 144 | end 145 | end 146 | 147 | @root = @dirents.to_tree.first 148 | @dirents.reject! { |d| d.type_id == 0 } 149 | # silence this warning by default, its not really important (issue #5). 150 | # fairly common one appears to be "R" (from office OS X?) which smells 151 | # like some kind of UTF16 snafu, but scottwillson also has had some kanji... 152 | #Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry' 153 | unused = @dirents.reject(&:idx).length 154 | Log.warn "#{unused} unused directories" if unused > 0 155 | 156 | # FIXME i don't currently use @header.num_sbat which i should 157 | # hmm. nor do i write it. it means what exactly again? 158 | # which mode to use here? 159 | @sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size 160 | @sbat = AllocationTable::Small.new self 161 | @sbat.load @bbat.read(@header.sbat_start) 162 | end 163 | 164 | def close 165 | @sb_file.close 166 | flush if @writeable 167 | @io.close if @close_parent 168 | end 169 | 170 | # the flush method is the main "save" method. all file contents are always 171 | # written directly to the file by the RangesIO objects, all this method does 172 | # is write out all the file meta data - dirents, allocation tables, file header 173 | # etc. 174 | # 175 | # maybe add an option to zero the padding, and any remaining avail blocks in the 176 | # allocation table. 177 | # 178 | # TODO: long and overly complex. simplify and test better. eg, perhaps move serialization 179 | # of bbat to AllocationTable::Big. 180 | def flush 181 | # update root dirent, and flatten dirent tree 182 | @root.name = 'Root Entry' 183 | @root.first_block = @sb_file.first_block 184 | @root.size = @sb_file.size 185 | @dirents = @root.flatten 186 | 187 | # serialize the dirents using the bbat 188 | RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io| 189 | io.write @dirents.map { |dirent| dirent.to_s }.join 190 | padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size 191 | io.write 0.chr * padding 192 | @header.dirent_start = io.first_block 193 | end 194 | 195 | # serialize the sbat 196 | # perhaps the blocks used by the sbat should be marked with BAT? 197 | RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io| 198 | io.write @sbat.to_s 199 | @header.sbat_start = io.first_block 200 | @header.num_sbat = @bbat.chain(@header.sbat_start).length 201 | end 202 | 203 | # create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using 204 | # truncate. then when its time to write, convert that chain and some chunk of blocks at 205 | # the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its 206 | # done. 207 | # this is perhaps not good, as we reclaim all bat blocks here, which 208 | # may include the sbat we just wrote. FIXME 209 | @bbat.map! do |b| 210 | b == AllocationTable::BAT || b == AllocationTable::META_BAT ? AllocationTable::AVAIL : b 211 | end 212 | 213 | # currently we use a loop. this could be better, but basically, 214 | # the act of writing out the bat, itself requires blocks which get 215 | # recorded in the bat. 216 | # 217 | # i'm sure that there'd be some simpler closed form solution to this. solve 218 | # recursive func: 219 | # 220 | # num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0)) 221 | # bbat_len = initial_bbat_len + num_mbat_blocks 222 | # mbat_len = ceil(bbat_len * 4 / block_size) 223 | # 224 | # the actual bbat allocation table is itself stored throughout the file, and that chain 225 | # is stored in the initial blocks, and the mbat blocks. 226 | num_mbat_blocks = 0 227 | io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC 228 | # truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a 229 | # contiguous chunk at the end. 230 | # hmmm, i think this truncate should be matched with a truncate of the underlying io. if you 231 | # delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can 232 | # be fixed easily, add an io truncate 233 | @bbat.truncate! 234 | @io.truncate @bbat.block_size * (@bbat.length + 1) 235 | while true 236 | # get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of 237 | # the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration 238 | # progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the 239 | # mbat must remain contiguous. 240 | bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size 241 | # now storing the excess mbat blocks also increases the size of the bbat: 242 | new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / (@bbat.block_size.to_f - 4)).ceil 243 | if new_num_mbat_blocks != num_mbat_blocks 244 | # need more space for the mbat. 245 | num_mbat_blocks = new_num_mbat_blocks 246 | elsif io.size != bbat_data_len 247 | # need more space for the bat 248 | # this may grow the bbat, depending on existing available blocks 249 | io.truncate bbat_data_len 250 | else 251 | break 252 | end 253 | end 254 | 255 | # now extract the info we want: 256 | ranges = io.ranges 257 | bbat_chain = @bbat.chain io.first_block 258 | io.close 259 | bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT } 260 | # tack on the mbat stuff 261 | @header.num_bat = bbat_chain.length 262 | mbat_blocks = (0...num_mbat_blocks).map do 263 | block = @bbat.free_block 264 | @bbat[block] = AllocationTable::META_BAT 265 | block 266 | end 267 | @header.mbat_start = mbat_blocks.first || AllocationTable::EOC 268 | 269 | # now finally write the bbat, using a not resizable io. 270 | # the mode here will be 'r', which allows write atm. 271 | RangesIO.open(@io, :ranges => ranges) { |f| f.write @bbat.to_s } 272 | 273 | # this is the mbat. pad it out. 274 | bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max 275 | @header.num_mbat = num_mbat_blocks 276 | if num_mbat_blocks != 0 277 | # write out the mbat blocks now. first of all, where are they going to be? 278 | mbat_data = bbat_chain[109..-1] 279 | # expand the mbat_data to include the linked list forward pointers. 280 | mbat_data = mbat_data.to_enum(:each_slice, @bbat.block_size / 4 - 1).to_a. 281 | zip(mbat_blocks[1..-1] + [nil]).map { |a, b| b ? a + [b] : a } 282 | # pad out the last one. 283 | mbat_data.last.push(*([AllocationTable::AVAIL] * (@bbat.block_size / 4 - mbat_data.last.length))) 284 | RangesIO.open @io, :ranges => @bbat.ranges(mbat_blocks) do |f| 285 | f.write mbat_data.flatten.pack('V*') 286 | end 287 | end 288 | 289 | # now seek back and write the header out 290 | @io.seek 0 291 | @io.write @header.to_s + bbat_chain[0, 109].pack('V*') 292 | @io.flush 293 | end 294 | 295 | def clear 296 | # initialize to equivalent of loading an empty ole document. 297 | Log.warn 'creating new ole storage object on non-writable io' unless @writeable 298 | @header = Header.new 299 | @bbat = AllocationTable::Big.new self 300 | @root = Dirent.new self, :type => :root, :name => 'Root Entry' 301 | @dirents = [@root] 302 | @root.idx = 0 303 | @sb_file.close if @sb_file 304 | @sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC 305 | @sbat = AllocationTable::Small.new self 306 | # throw everything else the hell away 307 | @io.truncate 0 308 | end 309 | 310 | # could be useful with mis-behaving ole documents. or to just clean them up. 311 | def repack temp=:file 312 | case temp 313 | when :file 314 | Tempfile.open 'ole-repack' do |io| 315 | io.binmode 316 | repack_using_io io 317 | end 318 | when :mem; StringIO.open(''.dup, &method(:repack_using_io)) 319 | else raise ArgumentError, "unknown temp backing #{temp.inspect}" 320 | end 321 | end 322 | 323 | def repack_using_io temp_io 324 | @io.rewind 325 | IO.copy @io, temp_io 326 | clear 327 | Storage.open temp_io, nil, @params do |temp_ole| 328 | #temp_ole.root.type = :dir 329 | Dirent.copy temp_ole.root, root 330 | end 331 | end 332 | 333 | def bat_for_size size 334 | # note >=, not > previously. 335 | size >= @header.threshold ? @bbat : @sbat 336 | end 337 | 338 | def inspect 339 | "#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>" 340 | end 341 | 342 | # 343 | # A class which wraps the ole header 344 | # 345 | # Header.new can be both used to load from a string, or to create from 346 | # defaults. Serialization is accomplished with the #to_s method. 347 | # 348 | class Header < Struct.new( 349 | :magic, :clsid, :minor_ver, :major_ver, :byte_order, :b_shift, :s_shift, 350 | :reserved, :csectdir, :num_bat, :dirent_start, :transacting_signature, :threshold, 351 | :sbat_start, :num_sbat, :mbat_start, :num_mbat 352 | ) 353 | PACK = 'a8 a16 v2 a2 v2 a6 V3 a4 V5' 354 | SIZE = 0x4c 355 | # i have seen it pointed out that the first 4 bytes of hex, 356 | # 0xd0cf11e0, is supposed to spell out docfile. hmmm :) 357 | MAGIC = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" # expected value of Header#magic 358 | # what you get if creating new header from scratch. 359 | # AllocationTable::EOC isn't available yet. meh. 360 | EOC = 0xfffffffe 361 | DEFAULT = [ 362 | MAGIC, 0.chr * 16, 59, 3, "\xfe\xff", 9, 6, 363 | 0.chr * 6, 0, 1, EOC, 0.chr * 4, 364 | 4096, EOC, 0, EOC, 0 365 | ] 366 | 367 | def initialize values=DEFAULT 368 | values = values.unpack(PACK) if String === values 369 | super(*values) 370 | validate! 371 | end 372 | 373 | def to_s 374 | to_a.pack PACK 375 | end 376 | 377 | def validate! 378 | raise FormatError, "OLE2 signature is invalid" unless magic == MAGIC 379 | if num_bat == 0 or # is that valid for a completely empty file? 380 | # not sure about this one. basically to do max possible bat given size of mbat 381 | num_bat > 109 && num_bat > 109 + num_mbat * (1 << b_shift - 2) or 382 | # shouldn't need to use the mbat as there is enough space in the header block 383 | num_bat < 109 && num_mbat != 0 or 384 | # given the size of the header is 76, if b_shift <= 6, blocks address the header. 385 | s_shift > b_shift or b_shift <= 6 or b_shift >= 31 or 386 | # we only handle little endian 387 | byte_order != "\xfe\xff" 388 | raise FormatError, "not valid OLE2 structured storage file" 389 | end 390 | # relaxed this, due to test-msg/qwerty_[1-3]*.msg they all had 391 | # 3 for this value. 392 | # transacting_signature != "\x00" * 4 or 393 | if threshold != 4096 or 394 | num_mbat == 0 && ![AllocationTable::EOC, AllocationTable::AVAIL].include?(mbat_start) or 395 | reserved != "\x00" * 6 396 | Log.warn "may not be a valid OLE2 structured storage file" 397 | end 398 | true 399 | end 400 | end 401 | 402 | # 403 | # +AllocationTable+'s hold the chains corresponding to files. Given 404 | # an initial index, AllocationTable#chain follows the chain, returning 405 | # the blocks that make up that file. 406 | # 407 | # There are 2 allocation tables, the bbat, and sbat, for big and small 408 | # blocks respectively. The block chain should be loaded using either 409 | # Storage#read_big_blocks or Storage#read_small_blocks 410 | # as appropriate. 411 | # 412 | # Whether or not big or small blocks are used for a file depends on 413 | # whether its size is over the Header#threshold level. 414 | # 415 | # An Ole::Storage document is serialized as a series of directory objects, 416 | # which are stored in blocks throughout the file. The blocks are either 417 | # big or small, and are accessed using the AllocationTable. 418 | # 419 | # The bbat allocation table's data is stored in the spare room in the header 420 | # block, and in extra blocks throughout the file as referenced by the meta 421 | # bat. That chain is linear, as there is no higher level table. 422 | # 423 | # AllocationTable.new is used to create an empty table. It can parse a string 424 | # with the #load method. Serialization is accomplished with the #to_s method. 425 | # 426 | class AllocationTable < Array 427 | # a free block (I don't currently leave any blocks free), although I do pad out 428 | # the allocation table with AVAIL to the block size. 429 | AVAIL = 0xffffffff 430 | EOC = 0xfffffffe # end of a chain 431 | # these blocks are used for storing the allocation table chains 432 | BAT = 0xfffffffd 433 | META_BAT = 0xfffffffc 434 | 435 | attr_reader :ole, :io, :block_size 436 | def initialize ole 437 | @ole = ole 438 | @sparse = true 439 | super() 440 | end 441 | 442 | def load data 443 | replace data.unpack('V*') 444 | end 445 | 446 | def truncate 447 | # this strips trailing AVAILs. come to think of it, this has the potential to break 448 | # bogus ole. if you terminate using AVAIL instead of EOC, like I did before. but that is 449 | # very broken. however, if a chain ends with AVAIL, it should probably be fixed to EOC 450 | # at load time. 451 | temp = reverse 452 | not_avail = temp.find { |b| b != AVAIL } and temp = temp[temp.index(not_avail)..-1] 453 | temp.reverse 454 | end 455 | 456 | def truncate! 457 | replace truncate 458 | end 459 | 460 | def to_s 461 | table = truncate 462 | # pad it out some 463 | num = @ole.bbat.block_size / 4 464 | # do you really use AVAIL? they probably extend past end of file, and may shortly 465 | # be used for the bat. not really good. 466 | table += [AVAIL] * (num - (table.length % num)) if (table.length % num) != 0 467 | table.pack 'V*' 468 | end 469 | 470 | # rewrote this to be non-recursive as it broke on a large attachment 471 | # chain with a stack error 472 | def chain idx 473 | a = [] 474 | until idx >= META_BAT 475 | raise FormatError, "broken allocationtable chain" if idx < 0 || idx > length 476 | a << idx 477 | idx = self[idx] 478 | end 479 | Log.warn "invalid chain terminator #{idx}" unless idx == EOC 480 | a 481 | end 482 | 483 | # Turn a chain (an array given by +chain+) of blocks (optionally 484 | # truncated to +size+) into an array of arrays describing the stretches of 485 | # bytes in the file that it belongs to. 486 | # 487 | # The blocks are Big or Small blocks depending on the table type. 488 | def blocks_to_ranges chain, size=nil 489 | # truncate the chain if required 490 | chain = chain[0, (size.to_f / block_size).ceil] if size 491 | # convert chain to ranges of the block size 492 | ranges = chain.map { |i| [block_size * i, block_size] } 493 | # truncate final range if required 494 | ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size 495 | ranges 496 | end 497 | 498 | def ranges chain, size=nil 499 | chain = self.chain(chain) unless Array === chain 500 | blocks_to_ranges chain, size 501 | end 502 | 503 | # quick shortcut. chain can be either a head (in which case the table is used to 504 | # turn it into a chain), or a chain. it is converted to ranges, then to rangesio. 505 | def open chain, size=nil, &block 506 | RangesIO.open @io, :ranges => ranges(chain, size), &block 507 | end 508 | 509 | def read chain, size=nil 510 | open chain, size, &:read 511 | end 512 | 513 | # catch any method that may add an AVAIL somewhere in the middle, thus invalidating 514 | # the @sparse speedup for free_block. annoying using eval, but define_method won't 515 | # work for this. 516 | # FIXME 517 | [:map!, :collect!].each do |name| 518 | eval <<-END 519 | def #{name}(*args, &block) 520 | @sparse = true 521 | super 522 | end 523 | END 524 | end 525 | 526 | def []= idx, val 527 | @sparse = true if val == AVAIL 528 | super 529 | end 530 | 531 | def free_block 532 | if @sparse 533 | i = index(AVAIL) and return i 534 | @sparse = false 535 | end 536 | push AVAIL 537 | length - 1 538 | end 539 | 540 | # must return first_block. modifies +blocks+ in place 541 | def resize_chain blocks, size 542 | new_num_blocks = (size / block_size.to_f).ceil 543 | old_num_blocks = blocks.length 544 | if new_num_blocks < old_num_blocks 545 | # de-allocate some of our old blocks. TODO maybe zero them out in the file??? 546 | (new_num_blocks...old_num_blocks).each { |i| self[blocks[i]] = AVAIL } 547 | self[blocks[new_num_blocks-1]] = EOC if new_num_blocks > 0 548 | blocks.slice! new_num_blocks..-1 549 | elsif new_num_blocks > old_num_blocks 550 | # need some more blocks. 551 | last_block = blocks.last 552 | (new_num_blocks - old_num_blocks).times do 553 | block = free_block 554 | # connect the chain. handle corner case of blocks being [] initially 555 | self[last_block] = block if last_block 556 | blocks << block 557 | last_block = block 558 | self[last_block] = EOC 559 | end 560 | end 561 | # update ranges, and return that also now 562 | blocks 563 | end 564 | 565 | class Big < AllocationTable 566 | def initialize(*args) 567 | super 568 | @block_size = 1 << @ole.header.b_shift 569 | @io = @ole.io 570 | end 571 | 572 | # Big blocks are kind of -1 based, in order to not clash with the header. 573 | def blocks_to_ranges chain, size=nil 574 | #super chain.map { |b| b + 1 }, size 575 | # duplicated from AllocationTable#blocks_to_ranges to avoid chain.map 576 | # which was decent part of benchmark profile 577 | chain = chain[0, (size.to_f / block_size).ceil] if size 578 | ranges = chain.map { |i| [block_size * (i + 1), block_size] } 579 | ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size 580 | ranges 581 | end 582 | end 583 | 584 | class Small < AllocationTable 585 | def initialize(*args) 586 | super 587 | @block_size = 1 << @ole.header.s_shift 588 | @io = @ole.sb_file 589 | end 590 | end 591 | end 592 | 593 | # like normal RangesIO, but Ole::Storage specific. the ranges are backed by an 594 | # AllocationTable, and can be resized. used for read/write to 2 streams: 595 | # 1. serialized dirent data 596 | # 2. sbat table data 597 | # 3. all dirents but through RangesIOMigrateable below 598 | # 599 | # Note that all internal access to first_block is through accessors, as it is sometimes 600 | # useful to redirect it. 601 | class RangesIOResizeable < RangesIO 602 | attr_reader :bat 603 | attr_accessor :first_block 604 | def initialize bat, mode='r', params={} 605 | mode, params = 'r', mode if Hash === mode 606 | first_block, size = params.values_at :first_block, :size 607 | raise ArgumentError, 'must specify first_block' unless first_block 608 | @bat = bat 609 | self.first_block = first_block 610 | # we now cache the blocks chain, for faster resizing. 611 | @blocks = @bat.chain first_block 612 | super @bat.io, mode, :ranges => @bat.ranges(@blocks, size) 613 | end 614 | 615 | def truncate size 616 | # note that old_blocks is != @ranges.length necessarily. i'm planning to write a 617 | # merge_ranges function that merges sequential ranges into one as an optimization. 618 | @bat.resize_chain @blocks, size 619 | @pos = size if @pos > size 620 | self.ranges = @bat.ranges(@blocks, size) 621 | self.first_block = @blocks.empty? ? AllocationTable::EOC : @blocks.first 622 | 623 | # don't know if this is required, but we explicitly request our @io to grow if necessary 624 | # we never shrink it though. maybe this belongs in allocationtable, where smarter decisions 625 | # can be made. 626 | # maybe its ok to just seek out there later?? 627 | max = @ranges.map { |pos, len| pos + len }.max || 0 628 | @io.truncate max if max > @io.size 629 | end 630 | end 631 | 632 | # like RangesIOResizeable, but Ole::Storage::Dirent specific. provides for migration 633 | # between bats based on size, and updating the dirent. 634 | class RangesIOMigrateable < RangesIOResizeable 635 | attr_reader :dirent 636 | def initialize dirent, mode='r' 637 | @dirent = dirent 638 | super @dirent.ole.bat_for_size(@dirent.size), mode, 639 | :first_block => @dirent.first_block, :size => @dirent.size 640 | end 641 | 642 | def truncate size 643 | bat = @dirent.ole.bat_for_size size 644 | if bat.class != @bat.class 645 | # bat migration needed! we need to backup some data. the amount of data 646 | # should be <= @ole.header.threshold, so we can just hold it all in one buffer. 647 | # backup this 648 | pos = [@pos, size].min 649 | self.pos = 0 650 | keep = read [@size, size].min 651 | # this does a normal truncate to 0, removing our presence from the old bat, and 652 | # rewrite the dirent's first_block 653 | super 0 654 | @bat = bat 655 | # just change the underlying io from right under everyone :) 656 | @io = bat.io 657 | # important to do this now, before the write. as the below write will always 658 | # migrate us back to sbat! this will now allocate us +size+ in the new bat. 659 | super 660 | self.pos = 0 661 | write keep 662 | self.pos = pos 663 | else 664 | super 665 | end 666 | # now just update the file 667 | @dirent.size = size 668 | end 669 | 670 | # forward this to the dirent 671 | def first_block 672 | @dirent.first_block 673 | end 674 | 675 | def first_block= val 676 | @dirent.first_block = val 677 | end 678 | end 679 | 680 | # 681 | # A class which wraps an ole directory entry. Can be either a directory 682 | # (Dirent#dir?) or a file (Dirent#file?) 683 | # 684 | # Most interaction with Ole::Storage is through this class. 685 | # The 2 most important functions are Dirent#children, and 686 | # Dirent#data. 687 | # 688 | # was considering separate classes for dirs and files. some methods/attrs only 689 | # applicable to one or the other. 690 | # 691 | # As with the other classes, #to_s performs the serialization. 692 | # 693 | class Dirent < Struct.new( 694 | :name_utf16, :name_len, :type_id, :colour, :prev, :next, :child, 695 | :clsid, :flags, # dirs only 696 | :create_time_str, :modify_time_str, # files only 697 | :first_block, :size, :reserved 698 | ) 699 | include RecursivelyEnumerable 700 | 701 | PACK = 'a64 v C C V3 a16 V a8 a8 V2 a4' 702 | SIZE = 128 703 | TYPE_MAP = { 704 | # this is temporary 705 | 0 => :empty, 706 | 1 => :dir, 707 | 2 => :file, 708 | 5 => :root 709 | } 710 | # something to do with the fact that the tree is supposed to be red-black 711 | COLOUR_MAP = { 712 | 0 => :red, 713 | 1 => :black 714 | } 715 | # used in the next / prev / child stuff to show that the tree ends here. 716 | # also used for first_block for directory. 717 | EOT = 0xffffffff 718 | DEFAULT = [ 719 | 0.chr * 2, 2, 0, # will get overwritten 720 | 1, EOT, EOT, EOT, 721 | 0.chr * 16, 0, nil, nil, 722 | AllocationTable::EOC, 0, 0.chr * 4 723 | ] 724 | 725 | # This returns all the children of this +Dirent+. It is filled in 726 | # when the tree structure is recreated. 727 | attr_reader :children 728 | attr_reader :name 729 | attr_reader :ole, :type, :create_time, :modify_time 730 | attr_reader :parent 731 | 732 | # i think its just used by the tree building 733 | attr_accessor :idx 734 | 735 | # these are for internal use and are used for faster lookup. 736 | attr_reader :name_lookup 737 | attr_writer :parent 738 | protected :name_lookup, :parent= 739 | 740 | def initialize ole, values=DEFAULT, params={} 741 | @ole = ole 742 | values, params = DEFAULT, values if Hash === values 743 | values = values.unpack(PACK) if String === values 744 | super(*values) 745 | 746 | # extra parsing from the actual struct values 747 | @name = params[:name] || Types::Variant.load(Types::VT_LPWSTR, name_utf16[0...name_len]) 748 | @type = if params[:type] 749 | unless TYPE_MAP.values.include?(params[:type]) 750 | raise ArgumentError, "unknown type #{params[:type].inspect}" 751 | end 752 | params[:type] 753 | else 754 | TYPE_MAP[type_id] or raise FormatError, "unknown type_id #{type_id.inspect}" 755 | end 756 | 757 | # further extra type specific stuff 758 | if file? 759 | default_time = @ole.params[:update_timestamps] ? Types::FileTime.now : nil 760 | @create_time ||= default_time 761 | @modify_time ||= default_time 762 | @create_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if create_time_str 763 | @modify_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if modify_time_str 764 | @children = nil 765 | @name_lookup = nil 766 | else 767 | @create_time = nil 768 | @modify_time = nil 769 | self.size = 0 unless @type == :root 770 | @children = [] 771 | @name_lookup = {} 772 | end 773 | 774 | @parent = nil 775 | 776 | # to silence warnings. used for tree building at load time 777 | # only. 778 | @idx = nil 779 | end 780 | 781 | def name= name 782 | if @parent 783 | map = @parent.instance_variable_get :@name_lookup 784 | map.delete @name 785 | map[name] = self 786 | end 787 | @name = name 788 | end 789 | 790 | def open mode='r' 791 | raise Errno::EISDIR unless file? 792 | io = RangesIOMigrateable.new self, mode 793 | @modify_time = Types::FileTime.now if io.mode.writeable? 794 | if block_given? 795 | begin yield io 796 | ensure; io.close 797 | end 798 | else io 799 | end 800 | end 801 | 802 | def read limit=nil 803 | open { |io| io.read limit } 804 | end 805 | 806 | def file? 807 | type == :file 808 | end 809 | 810 | def dir? 811 | # to count root as a dir. 812 | !file? 813 | end 814 | 815 | # maybe need some options regarding case sensitivity. 816 | def / name 817 | @name_lookup[name] 818 | end 819 | 820 | def [] idx 821 | if String === idx 822 | #warn 'String form of Dirent#[] is deprecated' 823 | self / idx 824 | else 825 | super 826 | end 827 | end 828 | 829 | # move to ruby-msg. and remove from here 830 | def time 831 | #warn 'Dirent#time is deprecated' 832 | create_time || modify_time 833 | end 834 | 835 | def each_child(&block) 836 | @children.each(&block) if dir? 837 | end 838 | 839 | # flattens the tree starting from here into +dirents+. note it modifies its argument. 840 | def flatten dirents=[] 841 | @idx = dirents.length 842 | dirents << self 843 | if file? 844 | self.prev = self.next = self.child = EOT 845 | else 846 | children.each { |child| child.flatten dirents } 847 | self.child = Dirent.flatten_helper children 848 | end 849 | dirents 850 | end 851 | 852 | # i think making the tree structure optimized is actually more complex than this, and 853 | # requires some intelligent ordering of the children based on names, but as long as 854 | # it is valid its ok. 855 | # actually, i think its ok. gsf for example only outputs a singly-linked-list, where 856 | # prev is always EOT. 857 | def self.flatten_helper children 858 | return EOT if children.empty? 859 | i = children.length / 2 860 | this = children[i] 861 | this.prev, this.next = [(0...i), (i+1..-1)].map { |r| flatten_helper children[r] } 862 | this.idx 863 | end 864 | 865 | def to_s 866 | tmp = Types::Variant.dump(Types::VT_LPWSTR, name) 867 | tmp = tmp[0, 62] if tmp.length > 62 868 | tmp += 0.chr * 2 869 | self.name_len = tmp.length 870 | self.name_utf16 = tmp + 0.chr * (64 - tmp.length) 871 | # type_id can perhaps be set in the initializer, as its read only now. 872 | self.type_id = TYPE_MAP.to_a.find { |id, name| @type == name }.first 873 | # for the case of files, it is assumed that that was handled already 874 | # note not dir?, so as not to override root's first_block 875 | self.first_block = Dirent::EOT if type == :dir 876 | if file? 877 | # this is messed up. it changes the time stamps regardless of whether the file 878 | # was actually touched. instead, any open call with a writeable mode, should update 879 | # the modify time. create time would be set in new. 880 | if @ole.params[:update_timestamps] 881 | self.create_time_str = Types::Variant.dump Types::VT_FILETIME, @create_time 882 | self.modify_time_str = Types::Variant.dump Types::VT_FILETIME, @modify_time 883 | end 884 | else 885 | self.create_time_str = 0.chr * 8 886 | self.modify_time_str = 0.chr * 8 887 | end 888 | to_a.pack PACK 889 | end 890 | 891 | def inspect 892 | str = "#' 904 | end 905 | 906 | def << child 907 | child.parent = self 908 | @name_lookup[child.name] = child 909 | @children << child 910 | end 911 | 912 | # remove the Dirent +child+ from the children array, truncating the data 913 | # by default. 914 | def delete child, truncate=true 915 | # remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone 916 | unless @children.delete(child) 917 | raise ArgumentError, "#{child.inspect} not a child of #{self.inspect}" 918 | end 919 | @name_lookup.delete(child.name) 920 | child.parent = nil 921 | # free our blocks 922 | child.open { |io| io.truncate 0 } if child.file? 923 | end 924 | 925 | def self.copy src, dst 926 | # copies the contents of src to dst. must be the same type. this will throw an 927 | # error on copying to root. maybe this will recurse too much for big documents?? 928 | raise ArgumentError, 'differing types' if src.file? and !dst.file? 929 | dst.name = src.name 930 | if src.dir? 931 | src.children.each do |src_child| 932 | dst_child = Dirent.new dst.ole, :type => src_child.type 933 | dst << dst_child 934 | Dirent.copy src_child, dst_child 935 | end 936 | else 937 | src.open do |src_io| 938 | dst.open { |dst_io| IO.copy src_io, dst_io } 939 | end 940 | end 941 | end 942 | end 943 | end 944 | end 945 | 946 | --------------------------------------------------------------------------------