├── .gitignore ├── .infinity_test ├── Gemfile ├── Gemfile.lock ├── LICENSE ├── README ├── Rakefile ├── bin └── rtf_parse ├── lib ├── ruby-rtf.rb └── ruby-rtf │ ├── colour.rb │ ├── document.rb │ ├── font.rb │ ├── invalid_document.rb │ ├── parser.rb │ ├── ruby-rtf.rb │ ├── table.rb │ └── version.rb ├── ruby-rtf.gemspec └── spec ├── colour_spec.rb ├── document_spec.rb ├── font_spec.rb ├── parser_spec.rb └── spec_helper.rb /.gitignore: -------------------------------------------------------------------------------- 1 | .rvmrc 2 | .bundle/ 3 | .yardoc/ 4 | 5 | doc/ 6 | tmp/ 7 | data/ 8 | pkg/ 9 | 10 | *.html 11 | -------------------------------------------------------------------------------- /.infinity_test: -------------------------------------------------------------------------------- 1 | infinity_test do 2 | notifications :growl do 3 | show_images :mode => :hands 4 | end 5 | 6 | use :test_framework => :rspec 7 | 8 | before_run { clear :terminal } 9 | 10 | heuristics do 11 | add('^spec/(.*)_spec.rb') do |file| 12 | run :test_for => file 13 | end 14 | add('^spec/spec_helper.rb') do |file| 15 | run :all => :tests 16 | end 17 | add('^lib/ruby-rtf.rb') do |file| 18 | run :all => :tests 19 | end 20 | add('^lib/(.*)\.rb') do |file| 21 | run :test_for => file[1].split('/').last 22 | end 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'http://rubygems.org' 2 | 3 | gemspec -------------------------------------------------------------------------------- /Gemfile.lock: -------------------------------------------------------------------------------- 1 | PATH 2 | remote: . 3 | specs: 4 | ruby-rtf (0.0.5) 5 | 6 | GEM 7 | remote: http://rubygems.org/ 8 | specs: 9 | diff-lcs (1.3) 10 | rspec (3.6.0) 11 | rspec-core (~> 3.6.0) 12 | rspec-expectations (~> 3.6.0) 13 | rspec-mocks (~> 3.6.0) 14 | rspec-core (3.6.0) 15 | rspec-support (~> 3.6.0) 16 | rspec-expectations (3.6.0) 17 | diff-lcs (>= 1.2.0, < 2.0) 18 | rspec-support (~> 3.6.0) 19 | rspec-mocks (3.6.0) 20 | diff-lcs (>= 1.2.0, < 2.0) 21 | rspec-support (~> 3.6.0) 22 | rspec-support (3.6.0) 23 | yard (0.9.36) 24 | 25 | PLATFORMS 26 | ruby 27 | 28 | DEPENDENCIES 29 | rspec (> 2.0) 30 | ruby-rtf! 31 | yard (~> 0.9.11) 32 | 33 | BUNDLED WITH 34 | 1.16.1 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 dan sinclair 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to 5 | deal in the Software without restriction, including without limitation the 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | RubyRTF 2 | ======= 3 | 4 | The Ruby RTF library is an attempt to parse RTF files. There is a lot still missing 5 | but the basics are there including the beginnings of table support. 6 | 7 | You can see an example of using the library in the bin/rtf_parse script which attempts 8 | to convert an RTF file to an HTML file. 9 | 10 | Issues 11 | ====== 12 | Please report any issues to the GitHub Issue tracker (https://github.com/dj2/ruby-rtf). 13 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler' 2 | Bundler::GemHelper.install_tasks 3 | 4 | require 'yard' 5 | require 'rspec/core/rake_task' 6 | 7 | task :default => [:spec] 8 | 9 | desc "run spec tests" 10 | RSpec::Core::RakeTask.new('spec') do |t| 11 | t.pattern = 'spec/**/*_spec.rb' 12 | end 13 | 14 | task :test => :spec 15 | 16 | desc 'Generate Documentation' 17 | YARD::Rake::YardocTask.new do |t| 18 | t.files = ['lib/**/*.rb', '-', 'LICENSE'] 19 | t.options = ['--main', 'README', '--no-private', '--hide-void-return'] 20 | end 21 | -------------------------------------------------------------------------------- /bin/rtf_parse: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # encoding: utf-8 3 | 4 | require 'ruby-rtf' 5 | require 'pp' 6 | require 'base64' 7 | 8 | @prefix = '' 9 | @suffix = '' 10 | 11 | def add(open, close = open) 12 | @prefix << "<#{open}>" 13 | @suffix = "#{@suffix}" 14 | end 15 | 16 | def format(str, section) 17 | @prefix = '' 18 | @suffix = '' 19 | 20 | mods = section[:modifiers] 21 | 22 | if mods[:paragraph] 23 | if section[:text].empty? 24 | str << "

\n" 25 | else 26 | add('p') 27 | end 28 | 29 | elsif mods[:tab] 30 | str << "    " 31 | return 32 | elsif mods[:newline] 33 | str << "
\n" 34 | return 35 | elsif mods[:rquote] 36 | str << "’" 37 | return 38 | elsif mods[:lquote] 39 | str << "‘" 40 | return 41 | elsif mods[:ldblquote] 42 | str << "“" 43 | return 44 | elsif mods[:rdblquote] 45 | str << "”" 46 | return 47 | elsif mods[:emdash] 48 | str << "—" 49 | return 50 | elsif mods[:endash] 51 | str << "–" 52 | return 53 | elsif mods[:nbsp] 54 | str << " " 55 | return 56 | end 57 | return if section[:text].empty? 58 | 59 | add('b') if mods[:bold] 60 | add('i') if mods[:italic] 61 | add('u') if mods[:underline] 62 | add('sup') if mods[:superscript] 63 | add('sub') if mods[:subscript] 64 | add('del') if mods[:strikethrough] 65 | 66 | style = '' 67 | style << "font-variant: small-caps;" if mods[:smallcaps] 68 | style << "font-size: #{mods[:font_size]}pt;" if mods[:font_size] 69 | style << "font-family: \"#{mods[:font].name}\";" if mods[:font] 70 | if mods[:foreground_colour] && !mods[:foreground_colour].use_default? 71 | colour = mods[:foreground_colour] 72 | style << "color: rgb(#{colour.red},#{colour.green},#{colour.blue});" 73 | end 74 | if mods[:background_colour] && !mods[:background_colour].use_default? 75 | colour = mods[:background_colour] 76 | style << "background-color: rgb(#{colour.red},#{colour.green},#{colour.blue});" 77 | end 78 | 79 | add("span style='#{style}'", 'span') unless style.empty? 80 | 81 | str << @prefix + section[:text].force_encoding('UTF-8') + @suffix 82 | end 83 | 84 | def process_image(section) 85 | mods = section[:modifiers] 86 | mime = '' 87 | case mods[:picture_format] 88 | when 'jpeg' 89 | mime = 'image/jpg' 90 | when 'png' 91 | mime = 'image/png' 92 | when 'bmp' 93 | mime = 'image/bmp' 94 | when 'wmf' 95 | mime = 'image/x-wmf' 96 | end 97 | hex = section[:text].scan(/../).map(&:hex).pack('c*') 98 | base64 = Base64.strict_encode64(hex) 99 | width = 'auto' 100 | width = mods[:picture_width] * (mods[:picture_scale_x] || 100) / 100 if mods[:picture_width] 101 | height = 'auto' 102 | height = mods[:picture_height] * (mods[:picture_scale_y] || 100) / 100 if mods[:picture_height] 103 | "\n" 104 | end 105 | 106 | doc = RubyRTF::Parser.new.parse(File.open(ARGV[0]).read) 107 | 108 | STDERR.puts doc 109 | 110 | str = '' 111 | doc.sections.each do |section| 112 | mods = section[:modifiers] 113 | 114 | if mods[:table] 115 | str << "\n" 116 | mods[:table].rows.each do |row| 117 | str << "\n" 118 | row.cells.each do |cell| 119 | str << "\n" 124 | end 125 | str << "\n" 126 | end 127 | str << "
\n" 120 | cell.sections.each do |sect| 121 | format(str, sect) 122 | end 123 | str << "
\n" 128 | next 129 | elsif mods[:picture] 130 | str << process_image(section) 131 | next 132 | end 133 | 134 | format(str, section) 135 | end 136 | 137 | str << "" 138 | puts str 139 | -------------------------------------------------------------------------------- /lib/ruby-rtf.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require 'ruby-rtf/version' 4 | require 'ruby-rtf/invalid_document' 5 | 6 | require 'ruby-rtf/ruby-rtf' 7 | require 'ruby-rtf/font' 8 | require 'ruby-rtf/colour' 9 | require 'ruby-rtf/table' 10 | require 'ruby-rtf/document' 11 | require 'ruby-rtf/parser' 12 | -------------------------------------------------------------------------------- /lib/ruby-rtf/colour.rb: -------------------------------------------------------------------------------- 1 | module RubyRTF 2 | # Holds information about a colour 3 | class Colour 4 | # @return [Integer] The red value 5 | attr_accessor :red 6 | 7 | # @return [Integer] The green value 8 | attr_accessor :green 9 | 10 | # @return [Integer] The blue value 11 | attr_accessor :blue 12 | 13 | # @return [Integer] The shade value 14 | attr_accessor :shade 15 | 16 | # @return [Integer] The tint value 17 | attr_accessor :tint 18 | 19 | # @return [Symbol] The theme information 20 | attr_accessor :theme 21 | 22 | # @return [Boolean] True if reader should use it's default colour 23 | attr_accessor :use_default 24 | alias :use_default? :use_default 25 | 26 | # Create a new colour 27 | # 28 | # @param red [Integer] Red value between 0 and 255 (default: 0) 29 | # @param green [Integer] Green value between 0 and 255 (default: 0) 30 | # @param blue [Integer] Blue value between 0 and 255 (default: 0) 31 | # @return [RubyRTF::Colour] New colour object 32 | def initialize(red = 0, green = 0, blue = 0) 33 | @red = red 34 | @green = green 35 | @blue = blue 36 | @use_default = false 37 | end 38 | 39 | # Convert the colour to a string 40 | # 41 | # @return [String] The string representation of the colour 42 | def to_s 43 | return "default" if use_default? 44 | "[#{red}, #{green}, #{blue}]" 45 | end 46 | end 47 | 48 | # Alias the Colour class as Color 49 | Color = Colour 50 | end -------------------------------------------------------------------------------- /lib/ruby-rtf/document.rb: -------------------------------------------------------------------------------- 1 | module RubyRTF 2 | # Represents the entire RTF document 3 | class Document 4 | # @return [Array] The font table 5 | attr_reader :font_table 6 | 7 | # @return [Array] The colour table 8 | attr_reader :colour_table 9 | alias :color_table :colour_table 10 | 11 | # @return [Integer] The default font number for the document 12 | attr_accessor :default_font 13 | 14 | # @return [String] The characgter set for the document (:ansi, :pc, :pca, :mac) 15 | attr_accessor :character_set 16 | 17 | # @return [Array] The different formatted sections of the document 18 | attr_reader :sections 19 | 20 | # Creates a new document 21 | # 22 | # @return [RubyRTF::Document] The new document 23 | def initialize 24 | @font_table = [] 25 | @colour_table = [] 26 | @character_set = :ansi 27 | @default_font = 0 28 | 29 | @sections = [] 30 | end 31 | 32 | def <<(obj) 33 | @sections << obj 34 | end 35 | end 36 | end -------------------------------------------------------------------------------- /lib/ruby-rtf/font.rb: -------------------------------------------------------------------------------- 1 | module RubyRTF 2 | # Holds the information for a given font 3 | class Font 4 | # @return [Integer] The font number 5 | attr_accessor :number 6 | 7 | # @return [String] The font name 8 | attr_accessor :name 9 | 10 | # @return [String] The alternate name for this font 11 | attr_accessor :alternate_name 12 | 13 | # @return [String] The panose number for the font 14 | attr_accessor :panose 15 | 16 | # @return [Symbol] The theme for this font 17 | attr_accessor :theme 18 | 19 | # @return [Symbol] The pitch information for this font 20 | attr_accessor :pitch 21 | 22 | # @return [Integer] The character set number for the font 23 | attr_accessor :character_set 24 | 25 | # @return [String] The non-tagged name for the font 26 | attr_accessor :non_tagged_name 27 | 28 | # @return [Symbol] The font family command 29 | attr_accessor :family_command 30 | 31 | # The font families 32 | FAMILIES = [:nil, :roman, :swiss, :modern, :script, :decor, :tech, :bldl] 33 | 34 | # The font pitch values 35 | PITCHES = [:default, :fixed, :variable] 36 | 37 | # Creates a new font 38 | # 39 | # @param name [String] The font name to set (default: '') 40 | # @return [RubyRTF::Font] The new font 41 | def initialize(name = '') 42 | @family_command = :nil 43 | @name = name 44 | @alternate_name = '' 45 | @non_tagged_name = '' 46 | @panose = '' 47 | end 48 | 49 | # Set the pitch value for the font 50 | # 51 | # @param val [Integer] The pitch value to set (0, 1, or 2) 52 | # @return [Nil] 53 | def pitch=(val) 54 | @pitch = PITCHES[val] 55 | end 56 | 57 | # Cleans up the various font names 58 | # 59 | # @return [Nil] 60 | def cleanup_names 61 | @name = cleanup_name(@name) 62 | @alternate_name = cleanup_name(@alternate_name) 63 | @non_tagged_name = cleanup_name(@non_tagged_name) 64 | end 65 | 66 | # Convert to string format 67 | # 68 | # @return [String] The string representation 69 | def to_s 70 | "#{number}: #{name}" 71 | end 72 | 73 | private 74 | 75 | # Cleanups up a given font name 76 | # 77 | # @param str [String] The font name to cleanup 78 | # @return [String] The cleaned font name 79 | def cleanup_name(str) 80 | str.gsub(/;$/, '') 81 | end 82 | end 83 | end 84 | -------------------------------------------------------------------------------- /lib/ruby-rtf/invalid_document.rb: -------------------------------------------------------------------------------- 1 | module RubyRTF 2 | # Raised if the parsed document is not valid RTF 3 | class InvalidDocument < RuntimeError ; end 4 | end -------------------------------------------------------------------------------- /lib/ruby-rtf/parser.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | module RubyRTF 4 | # Handles the parsing of RTF content into an RubyRTF::Document 5 | class Parser 6 | attr_accessor :current_section, :encoding 7 | 8 | # @return [Array] The current formatting block to use as the basis for new sections 9 | attr_reader :formatting_stack 10 | 11 | attr_reader :doc 12 | 13 | # @param unknown_control_warning_enabled [Boolean] Whether to write unknown control directive warnings to STDERR 14 | def initialize(unknown_control_warning_enabled: true) 15 | # default_mods needs to be the same has in the formatting stack and in 16 | # the current_section modifiers or the first stack ends up getting lost. 17 | default_mods = {} 18 | @formatting_stack = [default_mods] 19 | @current_section = {:text => '', :modifiers => default_mods} 20 | @unknown_control_warning_enabled = unknown_control_warning_enabled 21 | 22 | @seen = {} 23 | 24 | @doc = RubyRTF::Document.new 25 | @context_stack = [] 26 | end 27 | 28 | # Parses a given string into an RubyRTF::Document 29 | # 30 | # @param src [String] The document to parse 31 | # @return [RubyRTF::Document] The RTF document representing the provided @doc 32 | # @raise [RubyRTF::InvalidDocument] Raised if the document is not valid RTF 33 | def parse(src) 34 | raise RubyRTF::InvalidDocument.new("Opening \\rtf1 missing") unless src =~ /\{\\rtf1/ 35 | 36 | current_pos = 0 37 | len = src.length 38 | 39 | group_level = 0 40 | while (current_pos < len) 41 | char = src[current_pos] 42 | current_pos += 1 43 | 44 | case(char) 45 | when '\\' then 46 | name, val, current_pos = parse_control(src, current_pos) 47 | current_pos = handle_control(name, val, src, current_pos) 48 | 49 | when '{' then 50 | add_section! 51 | group_level += 1 52 | 53 | when '}' then 54 | pop_formatting! 55 | add_section! 56 | group_level -= 1 57 | 58 | when *["\r", "\n"] then ; 59 | else current_section[:text] << char 60 | end 61 | end 62 | 63 | unless current_section[:text].empty? 64 | current_context << current_section 65 | end 66 | 67 | raise RubyRTF::InvalidDocument.new("Unbalanced {}s") unless group_level == 0 68 | @doc 69 | end 70 | 71 | STOP_CHARS = [' ', '\\', '{', '}', "\r", "\n", ';'] 72 | 73 | # Parses a control switch 74 | # 75 | # @param src [String] The fragment to parse 76 | # @param current_pos [Integer] The position in string the control starts at (after the \) 77 | # @return [String, String|Integer, Integer] The name, optional control value and the new current position 78 | # 79 | # @api private 80 | def parse_control(src, current_pos = 0) 81 | ctrl = '' 82 | val = nil 83 | 84 | max_len = src.length 85 | start = current_pos 86 | 87 | # handle hex special 88 | if src[current_pos] == "'" 89 | val = src[(current_pos + 1), 2].hex.chr 90 | if encoding 91 | val = val.force_encoding(encoding).encode('UTF-8') 92 | end 93 | current_pos += 3 94 | return [:hex, val, current_pos] 95 | end 96 | 97 | while (true) 98 | break if current_pos >= max_len 99 | break if STOP_CHARS.include?(src[current_pos]) 100 | 101 | current_pos += 1 102 | end 103 | return [src[current_pos].to_sym, nil, current_pos + 1] if start == current_pos 104 | 105 | contents = src[start, current_pos - start] 106 | m = contents.match(/([\*a-z]+)(\-?\d+)?\*?/) 107 | ctrl = m[1].to_sym unless m.nil? || m[1].nil? 108 | val = m[2].to_i unless m.nil? || m[2].nil? 109 | 110 | # we advance past the optional space if present 111 | current_pos += 1 if src[current_pos] == ' ' 112 | 113 | [ctrl, val, current_pos] 114 | end 115 | 116 | # Handle a given control 117 | # 118 | # @param name [Symbol] The control name 119 | # @param val [Integer|nil] The controls value, or nil if non associated 120 | # @param src [String] The source document 121 | # @param current_pos [Integer] The current document position 122 | # @return [Integer] The new current position 123 | # 124 | # @api private 125 | def handle_control(name, val, src, current_pos) 126 | case(name) 127 | when :rtf then ; 128 | when :deff then @doc.default_font = val 129 | when :ansicpg then 130 | begin 131 | # Set the encoding if it's valid 132 | Encoding.find("windows-#{val}") 133 | self.encoding = "windows-#{val}" 134 | rescue => e 135 | # ignore 136 | end 137 | 138 | when *[:ansi, :mac, :pc, :pca] then @doc.character_set = name 139 | when :fonttbl then current_pos = parse_font_table(src, current_pos) 140 | when :colortbl then current_pos = parse_colour_table(src, current_pos) 141 | when :stylesheet then current_pos = parse_stylesheet(src, current_pos) 142 | when :info then current_pos = parse_info(src, current_pos) 143 | when :* then current_pos = parse_skip(src, current_pos) 144 | 145 | when :f then add_section!(:font => @doc.font_table[val]) 146 | 147 | # RTF font sizes are in half-points. divide by 2 to get points 148 | when :fs then add_section!(:font_size => (val.to_f / 2.0)) 149 | when :b then 150 | if val 151 | @formatting_stack.pop 152 | add_section! 153 | else 154 | add_section!(:bold => true) 155 | end 156 | 157 | when :i then 158 | if val 159 | @formatting_stack.pop 160 | add_section! 161 | else 162 | add_section!(:italic => true) 163 | end 164 | 165 | when :ul then 166 | if val 167 | @formatting_stack.pop 168 | add_section! 169 | else 170 | add_section!(:underline => true) 171 | end 172 | when :ulnone then 173 | current_section[:modifiers][:underline] = false 174 | @formatting_stack.pop 175 | 176 | when :super then add_section!(:superscript => true) 177 | when :sub then add_section!(:subscript => true) 178 | when :strike then add_section!(:strikethrough => true) 179 | when :scaps then add_section!(:smallcaps => true) 180 | when :ql then add_section!(:justification => :left) 181 | when :qr then add_section!(:justification => :right) 182 | when :qj then add_section!(:justification => :full) 183 | when :qc then add_section!(:justification => :center) 184 | when :fi then add_section!(:first_line_indent => RubyRTF.twips_to_points(val)) 185 | when :li then add_section!(:left_indent => RubyRTF.twips_to_points(val)) 186 | when :ri then add_section!(:right_indent => RubyRTF.twips_to_points(val)) 187 | when :margl then add_section!(:left_margin => RubyRTF.twips_to_points(val)) 188 | when :margr then add_section!(:right_margin => RubyRTF.twips_to_points(val)) 189 | when :margt then add_section!(:top_margin => RubyRTF.twips_to_points(val)) 190 | when :margb then add_section!(:bottom_margin => RubyRTF.twips_to_points(val)) 191 | when :sb then add_section!(:space_before => RubyRTF.twips_to_points(val)) 192 | when :sa then add_section!(:space_after => RubyRTF.twips_to_points(val)) 193 | when :cf then add_section!(:foreground_colour => @doc.colour_table[val]) 194 | when :cb then add_section!(:background_colour => @doc.colour_table[val]) 195 | when :hex then current_section[:text] << val 196 | when :uc then @skip_byte = val.to_i 197 | when :u then 198 | if @skip_byte && @skip_byte == 0 199 | val = val % 100 200 | @skip_byte = nil 201 | end 202 | if val == 32 || val == 8232 203 | add_modifier_section({:newline => true}, "\n") 204 | else 205 | val += 65_536 if val < 0 206 | char = if val < 10_000 207 | [val.to_s.hex].pack('U*') 208 | else 209 | [val].pack('U*') 210 | end 211 | current_section[:text] << char 212 | end 213 | 214 | when *[:rquote, :lquote] then add_modifier_section({name => true}, "'") 215 | when *[:rdblquote, :ldblquote] then add_modifier_section({name => true}, '"') 216 | 217 | when :'{' then current_section[:text] << "{" 218 | when :'}' then current_section[:text] << "}" 219 | when :'\\' then current_section[:text] << '\\' 220 | 221 | when :~ then add_modifier_section({:nbsp => true}, " ") 222 | 223 | when :tab then add_modifier_section({:tab => true}, "\t") 224 | when :emdash then add_modifier_section({:emdash => true}, "--") 225 | when :endash then add_modifier_section({:endash => true}, "-") 226 | 227 | when *[:line, :"\n"] then add_modifier_section({:newline => true}, "\n") 228 | when :"\r" then ; 229 | 230 | when :par then add_modifier_section({:paragraph => true}) 231 | when *[:pard, :plain] then reset_current_section! 232 | 233 | when :trowd then 234 | table = nil 235 | table = doc.sections.last[:modifiers][:table] if doc.sections.last && doc.sections.last[:modifiers][:table] 236 | if table 237 | table.add_row 238 | else 239 | table = RubyRTF::Table.new 240 | 241 | if !current_section[:text].empty? 242 | force_section!({:table => table}) 243 | else 244 | current_section[:modifiers][:table] = table 245 | pop_formatting! 246 | end 247 | 248 | force_section! 249 | pop_formatting! 250 | end 251 | 252 | @context_stack.push(table.current_row.current_cell) 253 | 254 | when :trgaph then 255 | raise "trgaph outside of a table?" if !current_context.respond_to?(:table) 256 | current_context.table.half_gap = RubyRTF.twips_to_points(val) 257 | 258 | when :trleft then 259 | raise "trleft outside of a table?" if !current_context.respond_to?(:table) 260 | current_context.table.left_margin = RubyRTF.twips_to_points(val) 261 | 262 | when :cellx then 263 | raise "cellx outside of a table?" if !current_context.respond_to?(:row) 264 | current_context.row.end_positions.push(RubyRTF.twips_to_points(val)) 265 | 266 | when :intbl then ; 267 | 268 | when :cell then 269 | pop_formatting! 270 | 271 | table = current_context.table if current_context.respond_to?(:table) 272 | 273 | force_section! #unless current_section[:text].empty? 274 | reset_current_section! 275 | 276 | @context_stack.pop 277 | 278 | # only add a cell if the row isn't full already 279 | if table && table.current_row && (table.current_row.cells.length < table.current_row.end_positions.length) 280 | cell = table.current_row.add_cell 281 | @context_stack.push(cell) 282 | end 283 | 284 | when :row then 285 | if current_context.sections.empty? 286 | # empty row 287 | table = current_context.table 288 | table.rows.pop 289 | 290 | @context_stack.pop 291 | end 292 | when :pict then add_section!(picture: true) 293 | when :jpegblip then add_section!(picture_format:'jpeg') 294 | when :pngblip then add_section!(picture_format:'png') 295 | when *[:dibitmap, :wbitmap] then add_section!(picture_format:'bmp') 296 | when *[:wmetafile, :pmmetafile] then add_section!(picture_format:'wmf') 297 | when :pich then add_section!(picture_height: RubyRTF.twips_to_points(val)) 298 | when :picw then add_section!(picture_width: RubyRTF.twips_to_points(val)) 299 | when :picscalex then add_section!(picture_scale_x: val.to_i) 300 | when :picscaley then add_section!(picture_scale_y: val.to_i) 301 | 302 | else 303 | unless @seen[name] 304 | @seen[name] = true 305 | if @unknown_control_warning_enabled 306 | warn "Unknown control #{name.inspect} with #{val} at #{current_pos}" 307 | end 308 | end 309 | end 310 | current_pos 311 | end 312 | 313 | # Parses the font table group 314 | # 315 | # @param src [String] The source document 316 | # @param current_pos [Integer] The starting position 317 | # @return [Integer] The new current position 318 | # 319 | # @api private 320 | def parse_font_table(src, current_pos) 321 | group = 1 322 | 323 | font = nil 324 | in_extra = nil 325 | 326 | while (true) 327 | case(src[current_pos]) 328 | when '{' then 329 | font = RubyRTF::Font.new if group == 1 330 | in_extra = nil 331 | 332 | group += 1 333 | 334 | when '}' then 335 | group -= 1 336 | 337 | if group <= 1 338 | break if font.nil? 339 | font.cleanup_names 340 | @doc.font_table[font.number] = font 341 | end 342 | 343 | in_extra = nil 344 | 345 | break if group == 0 346 | 347 | when '\\' then 348 | ctrl, val, current_pos = parse_control(src, current_pos + 1) 349 | 350 | font = RubyRTF::Font.new if font.nil? 351 | 352 | case(ctrl) 353 | when :f then font.number = @doc.font_table.count 354 | when :fprq then font.pitch = val 355 | when :fcharset then font.character_set = val 356 | when *[:flomajor, :fhimajor, :fdbmajor, :fbimajor, 357 | :flominor, :fhiminor, :fdbminor, :fbiminor] then 358 | font.theme = ctrl.to_s[1..-1].to_sym 359 | 360 | when *[:falt, :fname, :panose] then in_extra = ctrl 361 | else 362 | cmd = ctrl.to_s[1..-1].to_sym 363 | if RubyRTF::Font::FAMILIES.include?(cmd) 364 | font.family_command = cmd 365 | end 366 | end 367 | 368 | # need to next as parse_control will leave current_pos at the 369 | # next character already so current_pos += 1 below would move us too far 370 | next 371 | when *["\r", "\n"] then ; 372 | else 373 | case(in_extra) 374 | when :falt then font.alternate_name << src[current_pos] 375 | when :panose then font.panose << src[current_pos] 376 | when :fname then font.non_tagged_name << src[current_pos] 377 | when nil then font.name << src[current_pos] 378 | end 379 | end 380 | current_pos += 1 381 | end 382 | 383 | current_pos 384 | end 385 | 386 | # Parses the colour table group 387 | # 388 | # @param src [String] The source document 389 | # @param current_pos [Integer] The starting position 390 | # @return [Integer] The new current position 391 | # 392 | # @api private 393 | def parse_colour_table(src, current_pos) 394 | if src[current_pos] == ';' 395 | colour = RubyRTF::Colour.new 396 | colour.use_default = true 397 | 398 | @doc.colour_table << colour 399 | 400 | current_pos += 1 401 | end 402 | 403 | colour = RubyRTF::Colour.new 404 | 405 | while (true) 406 | case(src[current_pos]) 407 | when '\\' then 408 | ctrl, val, current_pos = parse_control(src, current_pos + 1) 409 | 410 | case(ctrl) 411 | when :red then colour.red = val 412 | when :green then colour.green = val 413 | when :blue then colour.blue = val 414 | when :ctint then colour.tint = val 415 | when :cshade then colour.shade = val 416 | when *[:cmaindarkone, :cmainlightone, :cmaindarktwo, :cmainlighttwo, :caccentone, 417 | :caccenttwo, :caccentthree, :caccentfour, :caccentfive, :caccentsix, 418 | :chyperlink, :cfollowedhyperlink, :cbackgroundone, :ctextone, 419 | :cbackgroundtwo, :ctexttwo] then 420 | colour.theme = ctrl.to_s[1..-1].to_sym 421 | end 422 | 423 | when *["\r", "\n", " "] then current_pos += 1 424 | when ';' then 425 | @doc.colour_table << colour 426 | 427 | colour = RubyRTF::Colour.new 428 | current_pos += 1 429 | 430 | when '}' then break 431 | end 432 | end 433 | 434 | current_pos 435 | end 436 | 437 | # Parses the stylesheet group 438 | # 439 | # @param src [String] The source document 440 | # @param current_pos [Integer] The starting position 441 | # @return [Integer] The new current position 442 | # 443 | # @api private 444 | def parse_stylesheet(src, current_pos) 445 | group = 1 446 | while (true) 447 | case(src[current_pos]) 448 | when '{' then group += 1 449 | when '}' then 450 | group -= 1 451 | break if group == 0 452 | end 453 | current_pos += 1 454 | end 455 | 456 | current_pos 457 | end 458 | 459 | # Parses the info group 460 | # 461 | # @param src [String] The source document 462 | # @param current_pos [Integer] The starting position 463 | # @return [Integer] The new current position 464 | # 465 | # @api private 466 | def parse_info(src, current_pos) 467 | group = 1 468 | while (true) 469 | case(src[current_pos]) 470 | when '{' then group += 1 471 | when '}' then 472 | group -= 1 473 | break if group == 0 474 | end 475 | current_pos += 1 476 | end 477 | 478 | current_pos 479 | end 480 | 481 | # Parses a comment group 482 | # 483 | # @param src [String] The source document 484 | # @param current_pos [Integer] The starting position 485 | # @return [Integer] The new current position 486 | # 487 | # @api private 488 | def parse_skip(src, current_pos) 489 | group = 1 490 | while (true) 491 | case(src[current_pos]) 492 | when '{' then group += 1 493 | when '}' then 494 | group -= 1 495 | break if group == 0 496 | end 497 | current_pos += 1 498 | end 499 | 500 | current_pos 501 | end 502 | 503 | def add_modifier_section(mods = {}, text = nil) 504 | force_section!(mods, text) 505 | pop_formatting! 506 | 507 | force_section! 508 | pop_formatting! 509 | end 510 | 511 | def add_section!(mods = {}) 512 | if current_section[:text].empty? 513 | current_section[:modifiers].merge!(mods) 514 | else 515 | force_section!(mods) 516 | end 517 | end 518 | 519 | # Keys that aren't inherited 520 | BLACKLISTED = [:paragraph, :newline, :tab, :lquote, :rquote, :ldblquote, :rdblquote] 521 | def force_section!(mods = {}, text = nil) 522 | current_context << @current_section 523 | 524 | # The modifiers for the new section 525 | modifiers = {} 526 | 527 | fs = formatting_stack.last || {} 528 | fs.each_pair do |k, v| 529 | next if BLACKLISTED.include?(k) 530 | modifiers[k] = v 531 | end 532 | 533 | modifiers.merge!(mods) 534 | 535 | formatting_stack.push(modifiers) 536 | 537 | @current_section = {:text => (text || ''), :modifiers => modifiers} 538 | end 539 | 540 | # Resets the current section to default formating 541 | # 542 | # @return [Nil] 543 | def reset_current_section! 544 | paragraph = current_section[:modifiers].has_key?(:paragraph) 545 | current_section[:modifiers].clear 546 | current_section[:modifiers][:paragraph] = true if paragraph 547 | end 548 | 549 | def current_context 550 | @context_stack.last || doc 551 | end 552 | 553 | # Pop the current top element off the formatting stack. 554 | # @note This will not allow you to remove the defualt formatting parameters 555 | # 556 | # @return [Nil] 557 | def pop_formatting! 558 | formatting_stack.pop if formatting_stack.length > 1 559 | end 560 | end 561 | end 562 | -------------------------------------------------------------------------------- /lib/ruby-rtf/ruby-rtf.rb: -------------------------------------------------------------------------------- 1 | module RubyRTF 2 | module_function 3 | 4 | def twips_to_points(twips) 5 | twips / 20.0 6 | end 7 | end -------------------------------------------------------------------------------- /lib/ruby-rtf/table.rb: -------------------------------------------------------------------------------- 1 | module RubyRTF 2 | class Table 3 | attr_accessor :rows, :half_gap, :left_margin 4 | 5 | def initialize 6 | @left_margin = 0 7 | @half_gap = 0 8 | 9 | @rows = [] 10 | add_row 11 | end 12 | 13 | def current_row 14 | @rows.last 15 | end 16 | 17 | def add_row 18 | @rows << RubyRTF::Table::Row.new(self) 19 | @rows.last 20 | end 21 | 22 | class Row 23 | attr_accessor :table, :end_positions, :cells 24 | 25 | def initialize(table) 26 | @table = table 27 | @end_positions = [] 28 | 29 | @cells = [RubyRTF::Table::Row::Cell.new(self, 0)] 30 | end 31 | 32 | def current_cell 33 | @cells.last 34 | end 35 | 36 | def add_cell 37 | return @cells.last if @cells.last.sections.empty? 38 | 39 | @cells << RubyRTF::Table::Row::Cell.new(self, @cells.length) 40 | @cells.last 41 | end 42 | 43 | class Cell 44 | attr_accessor :sections, :row, :idx 45 | 46 | def initialize(row, idx) 47 | @row = row 48 | @idx = idx 49 | @sections = [] 50 | end 51 | 52 | def <<(obj) 53 | @sections << obj 54 | end 55 | 56 | def table 57 | row.table 58 | end 59 | 60 | def width 61 | gap = row.table.half_gap 62 | left_margin = row.table.left_margin 63 | 64 | end_pos = row.end_positions[idx] 65 | prev_pos = idx == 0 ? 0 : row.end_positions[idx - 1] 66 | 67 | ((end_pos - prev_pos - (2 * gap) - left_margin) / row.end_positions[-1]) * 100 68 | end 69 | end 70 | end 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /lib/ruby-rtf/version.rb: -------------------------------------------------------------------------------- 1 | # Main namespace of the RTF parser 2 | module RubyRTF 3 | # Current library version 4 | VERSION = '0.0.5' 5 | end 6 | -------------------------------------------------------------------------------- /ruby-rtf.gemspec: -------------------------------------------------------------------------------- 1 | $: << "./lib" 2 | require 'ruby-rtf/version' 3 | 4 | Gem::Specification.new do |s| 5 | s.name = 'ruby-rtf' 6 | 7 | s.version = RubyRTF::VERSION 8 | 9 | s.authors = 'dan sinclair' 10 | s.email = 'dj2@everburning.com' 11 | 12 | s.homepage = 'http://github.com/dj2/ruby-rtf' 13 | s.summary = 'Library for working with RTF files' 14 | s.description = s.summary 15 | 16 | s.add_development_dependency 'rspec', '>2.0' 17 | s.add_development_dependency 'yard', '~>0.9.11' 18 | 19 | s.bindir = 'bin' 20 | s.executables << 'rtf_parse' 21 | 22 | s.files = `git ls-files`.split("\n") 23 | s.test_files = `git ls-files -- spec/*`.split("\n") 24 | 25 | s.require_paths = ['lib'] 26 | end 27 | -------------------------------------------------------------------------------- /spec/colour_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe RubyRTF::Colour do 4 | it 'also responds to Color' do 5 | lambda { RubyRTF::Color.new }.should_not raise_error 6 | end 7 | 8 | it 'returns the rgb when to_s is called' do 9 | c = RubyRTF::Colour.new(255, 200, 199) 10 | c.to_s.should == '[255, 200, 199]' 11 | end 12 | end -------------------------------------------------------------------------------- /spec/document_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe RubyRTF::Document do 4 | it 'provides a font table' do 5 | doc = RubyRTF::Document.new 6 | table = nil 7 | lambda { table = doc.font_table }.should_not raise_error 8 | table.should_not be_nil 9 | end 10 | 11 | context 'colour table' do 12 | it 'provides a colour table' do 13 | doc = RubyRTF::Document.new 14 | tbl = nil 15 | lambda { tbl = doc.colour_table }.should_not raise_error 16 | tbl.should_not be_nil 17 | end 18 | 19 | it 'provdies access as color table' do 20 | doc = RubyRTF::Document.new 21 | tbl = nil 22 | lambda { tbl = doc.color_table }.should_not raise_error 23 | tbl.should == doc.colour_table 24 | end 25 | end 26 | 27 | it 'provides a stylesheet' 28 | 29 | context 'defaults to' do 30 | it 'character set ansi' do 31 | RubyRTF::Document.new.character_set.should == :ansi 32 | end 33 | 34 | it 'font 0' do 35 | RubyRTF::Document.new.default_font.should == 0 36 | end 37 | end 38 | end -------------------------------------------------------------------------------- /spec/font_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe RubyRTF::Font do 4 | let(:font) { RubyRTF::Font.new } 5 | 6 | it 'has a name' do 7 | font.name = 'Arial' 8 | font.name.should == 'Arial' 9 | end 10 | 11 | it 'has a command' do 12 | font.family_command = :swiss 13 | font.family_command.should == :swiss 14 | end 15 | end -------------------------------------------------------------------------------- /spec/parser_spec.rb: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | 3 | require 'spec_helper' 4 | 5 | describe RubyRTF::Parser do 6 | let(:parser) { RubyRTF::Parser.new } 7 | let(:doc) { parser.doc } 8 | 9 | context 'with input containing invalid control directives' do 10 | let(:parser) { RubyRTF::Parser.new(unknown_control_warning_enabled: unknown_control_warning_enabled) } 11 | let(:doc) { '{\rtf1\ansi\xxxxx0}' } 12 | 13 | context 'with unknown_control_warning_enabled = false' do 14 | let(:unknown_control_warning_enabled) { false } 15 | 16 | it 'does not write anything to stderr' do 17 | expect { parser.parse(doc) }.not_to output.to_stderr 18 | end 19 | end 20 | context 'with unknown_control_warning_enabled = true' do 21 | let(:unknown_control_warning_enabled) { true } 22 | 23 | it 'writes message to stderr' do 24 | expect { parser.parse(doc) }.to output("Unknown control :xxxxx with 0 at 18\n").to_stderr 25 | end 26 | end 27 | end 28 | 29 | it 'parses hello world' do 30 | src = '{\rtf1\ansi\deff0 {\fonttbl {\f0 Times New Roman;}}\f0 \fs60 Hello, World!}' 31 | lambda { parser.parse(src) }.should_not raise_error 32 | end 33 | 34 | it 'returns a RTF::Document' do 35 | src = '{\rtf1\ansi\deff0 {\fonttbl {\f0 Times New Roman;}}\f0 \fs60 Hello, World!}' 36 | d = parser.parse(src) 37 | d.is_a?(RubyRTF::Document).should == true 38 | end 39 | 40 | it 'parses a default font (\deffN)' do 41 | src = '{\rtf1\ansi\deff10 {\fonttbl {\f10 Times New Roman;}}\f0 \fs60 Hello, World!}' 42 | d = parser.parse(src) 43 | d.default_font.should == 10 44 | end 45 | 46 | context 'invalid document' do 47 | it 'raises exception if \rtf is missing' do 48 | src = '{\ansi\deff0 {\fonttbl {\f0 Times New Roman;}}\f0 \fs60 Hello, World!}' 49 | lambda { parser.parse(src) }.should raise_error(RubyRTF::InvalidDocument) 50 | end 51 | 52 | it 'raises exception if the document does not start with \rtf' do 53 | src = '{\ansi\deff0\rtf1 {\fonttbl {\f0 Times New Roman;}}\f0 \fs60 Hello, World!}' 54 | lambda { parser.parse(src) }.should raise_error(RubyRTF::InvalidDocument) 55 | end 56 | 57 | it 'raises exception if the {}s are unbalanced' do 58 | src = '{\rtf1\ansi\deff0 {\fonttbl {\f0 Times New Roman;}\f0 \fs60 Hello, World!}' 59 | lambda { parser.parse(src) }.should raise_error(RubyRTF::InvalidDocument) 60 | end 61 | end 62 | 63 | context '#parse' do 64 | it 'parses text into the current section' do 65 | src = '{\rtf1\ansi\deff10 {\fonttbl {\f10 Times New Roman;}}\f0 \fs60 Hello, World!}' 66 | d = parser.parse(src) 67 | d.sections.first[:text].should == 'Hello, World!' 68 | end 69 | 70 | it 'adds a new section on {' do 71 | src = '{\rtf1 \fs60 Hello {\fs30 World}}' 72 | d = parser.parse(src) 73 | d.sections.first[:modifiers][:font_size].should == 30 74 | d.sections.first[:text].should == 'Hello ' 75 | 76 | d.sections.last[:modifiers][:font_size].should == 15 77 | d.sections.last[:text].should == 'World' 78 | end 79 | 80 | it 'adds a new section on }' do 81 | src = '{\rtf1 \fs60 Hello {\fs30 World}\fs12 Goodbye, cruel world.}' 82 | 83 | section = parser.parse(src).sections 84 | section[0][:modifiers][:font_size].should == 30 85 | section[0][:text].should == 'Hello ' 86 | 87 | section[1][:modifiers][:font_size].should == 15 88 | section[1][:text].should == 'World' 89 | 90 | section[2][:modifiers][:font_size].should == 6 91 | section[2][:text].should == 'Goodbye, cruel world.' 92 | end 93 | 94 | it 'inherits properly over {} groups' do 95 | src = '{\rtf1 \b\fs60 Hello {\i\fs30 World}\ul Goodbye, cruel world.}' 96 | 97 | section = parser.parse(src).sections 98 | section[0][:modifiers][:font_size].should == 30 99 | section[0][:modifiers][:bold].should == true 100 | section[0][:modifiers].has_key?(:italic).should == false 101 | section[0][:modifiers].has_key?(:underline).should == false 102 | section[0][:text].should == 'Hello ' 103 | 104 | section[1][:modifiers][:font_size].should == 15 105 | section[1][:modifiers][:italic].should == true 106 | section[1][:modifiers][:bold].should == true 107 | section[1][:modifiers].has_key?(:underline).should == false 108 | section[1][:text].should == 'World' 109 | 110 | section[2][:modifiers][:font_size].should == 30 111 | section[2][:modifiers][:bold].should == true 112 | section[2][:modifiers][:underline].should == true 113 | section[2][:modifiers].has_key?(:italic).should == false 114 | section[2][:text].should == 'Goodbye, cruel world.' 115 | end 116 | 117 | context 'parses pictures' do 118 | let(:src_bitmap) do 119 | src = '{\rtf1 {\pict\wbitmap\picw7064\pich5292\picwgoal4005\pichgoal3000\picscalex111\picscaley109 120 | ffd8ffe000104a4649460001010100b400b40000ffe1158a687474703a2f2f6e732e61646f62652e636f6d2f7861702f3}}' 121 | end 122 | let(:src_jpeg) do 123 | src = '{\rtf1 {\pict\jpegblip\picw7064\pich5292\picwgoal4005\pichgoal3000\picscalex111\picscaley109 124 | ffd8ffe000104a4649460001010100b400b40000ffe1158a687474703a2f2f6e732e61646f62652e636f6d2f7861702f3}}' 125 | end 126 | 127 | it 'should parse jpeg' do 128 | section = parser.parse(src_jpeg).sections 129 | section[0][:modifiers][:picture].should == true 130 | section[0][:modifiers][:picture_format].should == 'jpeg' 131 | end 132 | 133 | it 'should parse bmp' do 134 | section = parser.parse(src_bitmap).sections 135 | section[0][:modifiers][:picture].should == true 136 | section[0][:modifiers][:picture_format].should == 'bmp' 137 | section = parser.parse(src_bitmap).sections 138 | section[0][:modifiers][:picture].should == true 139 | section[0][:modifiers][:picture_format].should == 'bmp' 140 | end 141 | 142 | it 'should parse width' do 143 | section = parser.parse(src_bitmap).sections 144 | section[0][:modifiers][:picture_width].should == 7064 / 20.0 145 | end 146 | 147 | it 'should parse height' do 148 | section = parser.parse(src_bitmap).sections 149 | section[0][:modifiers][:picture_height].should == 5292 / 20.0 150 | end 151 | 152 | it 'should parse scale' do 153 | section = parser.parse(src_bitmap).sections 154 | section[0][:modifiers][:picture_scale_x].should == 111 155 | section[0][:modifiers][:picture_scale_y].should == 109 156 | end 157 | 158 | it 'should parse picture data' do 159 | section = parser.parse(src_bitmap).sections 160 | section[0][:text].should == 'ffd8ffe000104a4649460001010100b400b40000ffe1158a687474703a2f2f6e732e61646f62652e636f6d2f7861702f3' 161 | end 162 | end 163 | 164 | it 'clears ul with ul0' do 165 | src = '{\rtf1 \ul\b Hello\b0\ul0 World}' 166 | section = parser.parse(src).sections 167 | section[0][:modifiers][:bold].should == true 168 | section[0][:modifiers][:underline].should == true 169 | section[0][:text].should == 'Hello' 170 | 171 | section[1][:modifiers].has_key?(:bold).should == false 172 | section[1][:modifiers].has_key?(:underline).should == false 173 | section[1][:text].should == 'World' 174 | end 175 | 176 | it 'parses text when control matching fails' do 177 | src = '{\rtf1 Hello\~{World}}' 178 | section = parser.parse(src).sections 179 | section[0][:text].should == 'Hello' 180 | section[1][:text].should == 'World' 181 | end 182 | end 183 | 184 | context '#parse_control' do 185 | it 'parses a normal control' do 186 | parser.parse_control("rtf")[0, 2].should == [:rtf, nil] 187 | end 188 | 189 | it 'parses a control with a value' do 190 | parser.parse_control("f2")[0, 2].should == [:f, 2] 191 | end 192 | 193 | context 'unicode' do 194 | %w(u21487* u21487).each do |code| 195 | it "parses #{code}" do 196 | parser.parse_control(code)[0, 2].should == [:u, 21487] 197 | end 198 | end 199 | 200 | %w(u-21487* u-21487).each do |code| 201 | it "parses #{code}" do 202 | parser.parse_control(code)[0, 2].should == [:u, -21487] 203 | end 204 | end 205 | end 206 | 207 | it 'parses a hex control' do 208 | parser.parse_control("'7e")[0, 2].should == [:hex, '~'] 209 | end 210 | 211 | it 'parses a hex control with a string after it' do 212 | ctrl, val, current_pos = parser.parse_control("'7e25") 213 | ctrl.should == :hex 214 | val.should == '~' 215 | current_pos.should == 3 216 | end 217 | 218 | context "encoding is windows-1252" do 219 | it 'parses a hex control' do 220 | parser.encoding = 'windows-1252' 221 | parser.parse_control("'93")[0, 2].should == [:hex, '“'] 222 | end 223 | end 224 | 225 | [' ', '{', '}', '\\', "\r", "\n"].each do |stop| 226 | it "stops at a #{stop}" do 227 | parser.parse_control("rtf#{stop}test")[0, 2].should == [:rtf, nil] 228 | end 229 | end 230 | 231 | it 'handles a non-zero current position' do 232 | parser.parse_control('Test ansi test', 5)[0, 2].should == [:ansi, nil] 233 | end 234 | 235 | it 'advances the current positon' do 236 | parser.parse_control('Test ansi{test', 5).last.should == 9 237 | end 238 | 239 | it 'advances the current positon past the optional space' do 240 | parser.parse_control('Test ansi test', 5).last.should == 10 241 | end 242 | 243 | it 'does not fail when control matching fails' do 244 | parser.parse_control('~}')[0, 2].should == ['', nil] 245 | end 246 | end 247 | 248 | context 'character set' do 249 | %w(ansi mac pc pca).each do |type| 250 | it "accepts #{type}" do 251 | src = "{\\rtf1\\#{type}\\deff0 {\\fonttbl {\\f0 Times New Roman;}}\\f0 \\fs60 Hello, World!}" 252 | doc = parser.parse(src) 253 | doc.character_set.should == type.to_sym 254 | end 255 | end 256 | end 257 | 258 | context 'font table' do 259 | it 'sets the font table into the document' do 260 | src = '{\rtf1{\fonttbl{\f0\froman Times;}{\f1\fnil Arial;}}}' 261 | doc = parser.parse(src) 262 | 263 | font = doc.font_table[0] 264 | font.family_command.should == :roman 265 | font.name.should == 'Times' 266 | end 267 | 268 | it 'parses an empty font table' do 269 | src = "{\\rtf1\\ansi\\ansicpg1252\\cocoartf1187\n{\\fonttbl}\n{\\colortbl;\\red255\\green255\\blue255;}\n}" 270 | doc = parser.parse(src) 271 | 272 | doc.font_table.should == [] 273 | end 274 | 275 | context '#parse_font_table' do 276 | it 'parses a font table' do 277 | src = '{\f0\froman Times New Roman;}{\f1\fnil Arial;}}}' 278 | parser.parse_font_table(src, 0) 279 | tbl = doc.font_table 280 | 281 | tbl.length.should == 2 282 | tbl[0].family_command.should == :roman 283 | tbl[0].name.should == 'Times New Roman' 284 | 285 | tbl[1].family_command.should == :nil 286 | tbl[1].name.should == 'Arial' 287 | end 288 | 289 | it 'parses a font table without braces' do 290 | src = '\f0\froman\fcharset0 TimesNewRomanPSMT;}}' 291 | parser.parse_font_table(src, 0) 292 | tbl = doc.font_table 293 | tbl[0].name.should == 'TimesNewRomanPSMT' 294 | end 295 | 296 | it 'handles \r and \n in the font table' do 297 | src = "{\\f0\\froman Times New Roman;}\r{\\f1\\fnil Arial;}\n}}" 298 | parser.parse_font_table(src, 0) 299 | tbl = doc.font_table 300 | 301 | tbl.length.should == 2 302 | tbl[0].family_command.should == :roman 303 | tbl[0].name.should == 'Times New Roman' 304 | 305 | tbl[1].family_command.should == :nil 306 | tbl[1].name.should == 'Arial' 307 | end 308 | 309 | it 'the family command is optional' do 310 | src = '{\f0 Times New Roman;}}}' 311 | parser.parse_font_table(src, 0) 312 | tbl = doc.font_table 313 | tbl[0].family_command.should == :nil 314 | tbl[0].name.should == 'Times New Roman' 315 | end 316 | 317 | it 'does not require the numbering to be incremental' do 318 | src = '{\f77\froman Times New Roman;}{\f3\fnil Arial;}}}' 319 | parser.parse_font_table(src, 0) 320 | tbl = doc.font_table 321 | 322 | tbl[77].family_command.should == :roman 323 | tbl[77].name.should == 'Times New Roman' 324 | 325 | tbl[3].family_command.should == :nil 326 | tbl[3].name.should == 'Arial' 327 | end 328 | 329 | it 'accepts the \falt command' do 330 | src = '{\f0\froman Times New Roman{\*\falt Courier New};}}' 331 | parser.parse_font_table(src, 0) 332 | tbl = doc.font_table 333 | tbl[0].name.should == 'Times New Roman' 334 | tbl[0].alternate_name.should == 'Courier New' 335 | end 336 | 337 | it 'sets current pos to the closing }' do 338 | src = '{\f0\froman Times New Roman{\*\falt Courier New};}}' 339 | parser.parse_font_table(src, 0).should == (src.length - 1) 340 | end 341 | 342 | it 'accepts the panose command' do 343 | src = '{\f0\froman\fcharset0\fprq2{\*\panose 02020603050405020304}Times New Roman{\*\falt Courier New};}}' 344 | parser.parse_font_table(src, 0) 345 | tbl = doc.font_table 346 | tbl[0].panose.should == '02020603050405020304' 347 | tbl[0].name.should == 'Times New Roman' 348 | tbl[0].alternate_name.should == 'Courier New' 349 | end 350 | 351 | %w(flomajor fhimajor fdbmajor fbimajor flominor fhiminor fdbminor fbiminor).each do |type| 352 | it "handles theme font type: #{type}" do 353 | src = "{\\f0\\#{type} Times New Roman;}}" 354 | parser.parse_font_table(src, 0) 355 | tbl = doc.font_table 356 | tbl[0].name.should == 'Times New Roman' 357 | tbl[0].theme.should == type[1..-1].to_sym 358 | end 359 | end 360 | 361 | [[0, :default], [1, :fixed], [2, :variable]].each do |pitch| 362 | it 'parses pitch information' do 363 | src = "{\\f0\\fprq#{pitch.first} Times New Roman;}}" 364 | parser.parse_font_table(src, 0) 365 | tbl = doc.font_table 366 | tbl[0].name.should == 'Times New Roman' 367 | tbl[0].pitch.should == pitch.last 368 | end 369 | end 370 | 371 | it 'parses the non-tagged font name' do 372 | src = '{\f0{\*\fname Arial;}Times New Roman;}}' 373 | parser.parse_font_table(src, 0) 374 | tbl = doc.font_table 375 | tbl[0].name.should == 'Times New Roman' 376 | tbl[0].non_tagged_name.should == 'Arial' 377 | end 378 | 379 | it 'parses the charset' do 380 | src = '{\f0\fcharset87 Times New Roman;}}' 381 | parser.parse_font_table(src, 0) 382 | tbl = doc.font_table 383 | tbl[0].name.should == 'Times New Roman' 384 | tbl[0].character_set.should == 87 385 | end 386 | end 387 | end 388 | 389 | context 'colour table' do 390 | it 'sets the colour table into the document' do 391 | src = '{\rtf1{\colortbl\red0\green0\blue0;\red127\green2\blue255;}}' 392 | doc = parser.parse(src) 393 | 394 | clr = doc.colour_table[0] 395 | clr.red.should == 0 396 | clr.green.should == 0 397 | clr.blue.should == 0 398 | 399 | clr = doc.colour_table[1] 400 | clr.red.should == 127 401 | clr.green.should == 2 402 | clr.blue.should == 255 403 | end 404 | 405 | it 'ignores single space between colour sections' do 406 | src = '{\rtf1{\colortbl\red0\green0\blue0; \red127\green2\blue255;}}' 407 | doc = parser.parse(src) 408 | 409 | clr = doc.colour_table[0] 410 | clr.red.should == 0 411 | clr.green.should == 0 412 | clr.blue.should == 0 413 | 414 | clr = doc.colour_table[1] 415 | clr.red.should == 127 416 | clr.green.should == 2 417 | clr.blue.should == 255 418 | end 419 | 420 | it 'ignores double space between colour sections' do 421 | src = '{\rtf1{\colortbl\red0\green0\blue0; \red127\green2\blue255;}}' 422 | doc = parser.parse(src) 423 | 424 | clr = doc.colour_table[0] 425 | clr.red.should == 0 426 | clr.green.should == 0 427 | clr.blue.should == 0 428 | 429 | clr = doc.colour_table[1] 430 | clr.red.should == 127 431 | clr.green.should == 2 432 | clr.blue.should == 255 433 | end 434 | 435 | it 'sets the first colour if missing' do 436 | src = '{\rtf1{\colortbl;\red255\green0\blue0;\red0\green0\blue255;}}' 437 | doc = parser.parse(src) 438 | 439 | clr = doc.colour_table[0] 440 | clr.use_default?.should == true 441 | 442 | clr = doc.colour_table[1] 443 | clr.red.should == 255 444 | clr.green.should == 0 445 | clr.blue.should == 0 446 | end 447 | 448 | context '#parse_colour_table' do 449 | it 'parses \red \green \blue' do 450 | src = '\red2\green55\blue23;}' 451 | parser.parse_colour_table(src, 0) 452 | tbl = doc.colour_table 453 | tbl[0].red.should == 2 454 | tbl[0].green.should == 55 455 | tbl[0].blue.should == 23 456 | end 457 | 458 | it 'handles ctintN' do 459 | src = '\ctint22\red2\green55\blue23;}' 460 | parser.parse_colour_table(src, 0) 461 | tbl = doc.colour_table 462 | tbl[0].tint.should == 22 463 | end 464 | 465 | it 'handles cshadeN' do 466 | src = '\cshade11\red2\green55\blue23;}' 467 | parser.parse_colour_table(src, 0) 468 | tbl = doc.colour_table 469 | tbl[0].shade.should == 11 470 | end 471 | 472 | %w(cmaindarkone cmainlightone cmaindarktwo cmainlighttwo caccentone 473 | caccenttwo caccentthree caccentfour caccentfive caccentsix 474 | chyperlink cfollowedhyperlink cbackgroundone ctextone 475 | cbackgroundtwo ctexttwo).each do |theme| 476 | it "it allows theme item #{theme}" do 477 | src = "\\#{theme}\\red11\\green22\\blue33;}" 478 | parser.parse_colour_table(src, 0) 479 | tbl = doc.colour_table 480 | tbl[0].theme.should == theme[1..-1].to_sym 481 | end 482 | end 483 | 484 | it 'handles \r and \n' do 485 | src = "\\cshade11\\red2\\green55\r\n\\blue23;}" 486 | parser.parse_colour_table(src, 0) 487 | tbl = doc.colour_table 488 | tbl[0].shade.should == 11 489 | tbl[0].red.should == 2 490 | tbl[0].green.should == 55 491 | tbl[0].blue.should == 23 492 | end 493 | end 494 | end 495 | 496 | context 'stylesheet' do 497 | it 'parses a stylesheet' 498 | end 499 | 500 | context 'document info' do 501 | it 'parse the doocument info' 502 | end 503 | 504 | context '#handle_control' do 505 | it 'sets the font' do 506 | font = RubyRTF::Font.new('Times New Roman') 507 | doc.font_table[0] = font 508 | 509 | parser.handle_control(:f, 0, nil, 0) 510 | parser.current_section[:modifiers][:font].should == font 511 | end 512 | 513 | it 'sets the font size' do 514 | parser.handle_control(:fs, 61, nil, 0) 515 | parser.current_section[:modifiers][:font_size].should == 30.5 516 | end 517 | 518 | it 'sets bold' do 519 | parser.handle_control(:b, nil, nil, 0) 520 | parser.current_section[:modifiers][:bold].should == true 521 | end 522 | 523 | it 'sets underline' do 524 | parser.handle_control(:ul, nil, nil, 0) 525 | parser.current_section[:modifiers][:underline].should == true 526 | end 527 | 528 | it 'sets italic' do 529 | parser.handle_control(:i, nil, nil, 0) 530 | parser.current_section[:modifiers][:italic].should == true 531 | end 532 | 533 | %w(rquote lquote).each do |quote| 534 | it "sets a #{quote}" do 535 | parser.current_section[:text] = 'My code' 536 | parser.handle_control(quote.to_sym, nil, nil, 0) 537 | doc.sections.last[:text].should == "'" 538 | doc.sections.last[:modifiers][quote.to_sym].should == true 539 | end 540 | end 541 | 542 | %w(rdblquote ldblquote).each do |quote| 543 | it "sets a #{quote}" do 544 | parser.current_section[:text] = 'My code' 545 | parser.handle_control(quote.to_sym, nil, nil, 0) 546 | doc.sections.last[:text].should == '"' 547 | doc.sections.last[:modifiers][quote.to_sym].should == true 548 | end 549 | end 550 | 551 | it 'sets a hex character' do 552 | parser.current_section[:text] = 'My code' 553 | parser.handle_control(:hex, '~', nil, 0) 554 | parser.current_section[:text].should == 'My code~' 555 | end 556 | 557 | it 'sets a unicode character < 1000 (char 643)' do 558 | parser.current_section[:text] = 'My code' 559 | parser.handle_control(:u, 643, nil, 0) 560 | parser.current_section[:text].should == 'My codeك' 561 | end 562 | 563 | it 'sets a unicode character < 32768 (char 2603)' do 564 | parser.current_section[:text] = 'My code' 565 | parser.handle_control(:u, 2603, nil, 0) 566 | parser.current_section[:text].should == 'My code☃' 567 | end 568 | 569 | it 'sets a unicode character < 32768 (char 21340)' do 570 | parser.current_section[:text] = 'My code' 571 | parser.handle_control(:u, 21340, nil, 0) 572 | parser.current_section[:text].should == 'My code卜' 573 | end 574 | 575 | 576 | it 'sets a unicode character > 32767 (char 36,947)' do 577 | parser.current_section[:text] = 'My code' 578 | parser.handle_control(:u, -28589, nil, 0) 579 | parser.current_section[:text].should == 'My code道' 580 | end 581 | 582 | context "uc0 skips a byte in the next unicode char" do 583 | it "u8278" do 584 | parser.current_section[:text] = 'My code ' 585 | parser.handle_control(:uc, 0, nil, 0) 586 | parser.handle_control(:u, 8278, nil, 0) 587 | parser.current_section[:text].should == 'My code x' 588 | end 589 | 590 | it "u8232 - does newline" do 591 | parser.current_section[:text] = "end." 592 | parser.handle_control(:uc, 0, nil, 0) 593 | parser.handle_control(:u, 8232, nil, 0) 594 | doc.sections.last[:modifiers][:newline].should == true 595 | doc.sections.last[:text].should == "\n" 596 | end 597 | end 598 | 599 | context 'new line' do 600 | ['line', "\n"].each do |type| 601 | it "sets from #{type}" do 602 | parser.current_section[:text] = "end." 603 | parser.handle_control(type.to_sym, nil, nil, 0) 604 | doc.sections.last[:modifiers][:newline].should == true 605 | doc.sections.last[:text].should == "\n" 606 | end 607 | end 608 | 609 | it 'ignores \r' do 610 | parser.current_section[:text] = "end." 611 | parser.handle_control(:"\r", nil, nil, 0) 612 | parser.current_section[:text].should == "end." 613 | end 614 | end 615 | 616 | it 'inserts a \tab' do 617 | parser.current_section[:text] = "end." 618 | parser.handle_control(:tab, nil, nil, 0) 619 | doc.sections.last[:modifiers][:tab].should == true 620 | doc.sections.last[:text].should == "\t" 621 | end 622 | 623 | it 'inserts a \super' do 624 | parser.current_section[:text] = "end." 625 | parser.handle_control(:super, nil, nil, 0) 626 | 627 | parser.current_section[:modifiers][:superscript].should == true 628 | parser.current_section[:text].should == "" 629 | end 630 | 631 | it 'inserts a \sub' do 632 | parser.current_section[:text] = "end." 633 | parser.handle_control(:sub, nil, nil, 0) 634 | 635 | parser.current_section[:modifiers][:subscript].should == true 636 | parser.current_section[:text].should == "" 637 | end 638 | 639 | it 'inserts a \strike' do 640 | parser.current_section[:text] = "end." 641 | parser.handle_control(:strike, nil, nil, 0) 642 | 643 | parser.current_section[:modifiers][:strikethrough].should == true 644 | parser.current_section[:text].should == "" 645 | end 646 | 647 | it 'inserts a \scaps' do 648 | parser.current_section[:text] = "end." 649 | parser.handle_control(:scaps, nil, nil, 0) 650 | 651 | parser.current_section[:modifiers][:smallcaps].should == true 652 | parser.current_section[:text].should == "" 653 | end 654 | 655 | it 'inserts an \emdash' do 656 | parser.current_section[:text] = "end." 657 | parser.handle_control(:emdash, nil, nil, 0) 658 | doc.sections.last[:modifiers][:emdash].should == true 659 | doc.sections.last[:text].should == "--" 660 | end 661 | 662 | it 'inserts an \endash' do 663 | parser.current_section[:text] = "end." 664 | parser.handle_control(:endash, nil, nil, 0) 665 | doc.sections.last[:modifiers][:endash].should == true 666 | doc.sections.last[:text].should == "-" 667 | end 668 | 669 | context 'escapes' do 670 | ['{', '}', '\\'].each do |escape| 671 | it "inserts an escaped #{escape}" do 672 | parser.current_section[:text] = "end." 673 | parser.handle_control(escape.to_sym, nil, nil, 0) 674 | parser.current_section[:text].should == "end.#{escape}" 675 | end 676 | end 677 | end 678 | 679 | it 'adds a new section for a par command' do 680 | parser.current_section[:text] = 'end.' 681 | parser.handle_control(:par, nil, nil, 0) 682 | parser.current_section[:text].should == "" 683 | end 684 | 685 | %w(pard plain).each do |type| 686 | it "resets the current sections information to default for #{type}" do 687 | parser.current_section[:modifiers][:bold] = true 688 | parser.current_section[:modifiers][:italic] = true 689 | parser.handle_control(type.to_sym, nil, nil, 0) 690 | 691 | parser.current_section[:modifiers].has_key?(:bold).should == false 692 | parser.current_section[:modifiers].has_key?(:italic).should == false 693 | end 694 | end 695 | 696 | context 'colour' do 697 | it 'sets the foreground colour' do 698 | doc.colour_table << RubyRTF::Colour.new(255, 0, 255) 699 | parser.handle_control(:cf, 0, nil, 0) 700 | parser.current_section[:modifiers][:foreground_colour].to_s.should == "[255, 0, 255]" 701 | end 702 | 703 | it 'sets the background colour' do 704 | doc.colour_table << RubyRTF::Colour.new(255, 0, 255) 705 | parser.handle_control(:cb, 0, nil, 0) 706 | parser.current_section[:modifiers][:background_colour].to_s.should == "[255, 0, 255]" 707 | end 708 | end 709 | 710 | context 'justification' do 711 | it 'handles left justify' do 712 | parser.handle_control(:ql, nil, nil, 0) 713 | parser.current_section[:modifiers][:justification].should == :left 714 | end 715 | 716 | it 'handles right justify' do 717 | parser.handle_control(:qr, nil, nil, 0) 718 | parser.current_section[:modifiers][:justification].should == :right 719 | end 720 | 721 | it 'handles full justify' do 722 | parser.handle_control(:qj, nil, nil, 0) 723 | parser.current_section[:modifiers][:justification].should == :full 724 | end 725 | 726 | it 'handles centered' do 727 | parser.handle_control(:qc, nil, nil, 0) 728 | parser.current_section[:modifiers][:justification].should == :center 729 | end 730 | end 731 | 732 | context 'indenting' do 733 | it 'handles first line indent' do 734 | parser.handle_control(:fi, 1000, nil, 0) 735 | parser.current_section[:modifiers][:first_line_indent].should == 50 736 | end 737 | 738 | it 'handles left indent' do 739 | parser.handle_control(:li, 1000, nil, 0) 740 | parser.current_section[:modifiers][:left_indent].should == 50 741 | end 742 | 743 | it 'handles right indent' do 744 | parser.handle_control(:ri, 1000, nil, 0) 745 | parser.current_section[:modifiers][:right_indent].should == 50 746 | end 747 | end 748 | 749 | context 'margins' do 750 | it 'handles left margin' do 751 | parser.handle_control(:margl, 1000, nil, 0) 752 | parser.current_section[:modifiers][:left_margin].should == 50 753 | end 754 | 755 | it 'handles right margin' do 756 | parser.handle_control(:margr, 1000, nil, 0) 757 | parser.current_section[:modifiers][:right_margin].should == 50 758 | end 759 | 760 | it 'handles top margin' do 761 | parser.handle_control(:margt, 1000, nil, 0) 762 | parser.current_section[:modifiers][:top_margin].should == 50 763 | end 764 | 765 | it 'handles bottom margin' do 766 | parser.handle_control(:margb, 1000, nil, 0) 767 | parser.current_section[:modifiers][:bottom_margin].should == 50 768 | end 769 | end 770 | 771 | context 'paragraph spacing' do 772 | it 'handles space before' do 773 | parser.handle_control(:sb, 1000, nil, 0) 774 | parser.current_section[:modifiers][:space_before].should == 50 775 | end 776 | 777 | it 'handles space after' do 778 | parser.handle_control(:sa, 1000, nil, 0) 779 | parser.current_section[:modifiers][:space_after].should == 50 780 | end 781 | end 782 | 783 | context 'non breaking space' do 784 | it 'handles :~' do 785 | parser.current_section[:text] = "end." 786 | parser.handle_control(:~, nil, nil, 0) 787 | doc.sections.last[:modifiers][:nbsp].should == true 788 | doc.sections.last[:text].should == " " 789 | end 790 | end 791 | end 792 | 793 | context 'sections' do 794 | it 'has sections' do 795 | doc.sections.should_not be_nil 796 | end 797 | 798 | it 'sets an initial section' do 799 | parser.current_section.should_not be_nil 800 | end 801 | 802 | context '#add_section!' do 803 | it 'does not add a section if the current :text is empty' do 804 | d = parser 805 | d.add_section! 806 | doc.sections.length.should == 0 807 | end 808 | 809 | it 'adds a section of the current section has text' do 810 | d = parser 811 | d.current_section[:text] = "Test" 812 | d.add_section! 813 | doc.sections.length.should == 1 814 | end 815 | 816 | it 'inherits the modifiers from the parent section' do 817 | d = parser 818 | d.current_section[:modifiers][:bold] = true 819 | d.current_section[:modifiers][:italics] = true 820 | d.current_section[:text] = "New text" 821 | 822 | d.add_section! 823 | 824 | d.current_section[:modifiers][:underline] = true 825 | 826 | sections = doc.sections 827 | sections.first[:modifiers].should == {:bold => true, :italics => true} 828 | d.current_section[:modifiers].should == {:bold => true, :italics => true, :underline => true} 829 | end 830 | end 831 | 832 | context '#reset_current_section!' do 833 | it 'resets the current sections modifiers' do 834 | d = parser 835 | d.current_section[:modifiers] = {:bold => true, :italics => true} 836 | d.current_section[:text] = "New text" 837 | 838 | d.add_section! 839 | d.reset_current_section! 840 | d.current_section[:modifiers][:underline] = true 841 | 842 | sections = doc.sections 843 | sections.first[:modifiers].should == {:bold => true, :italics => true} 844 | d.current_section[:modifiers].should == {:underline => true} 845 | end 846 | end 847 | 848 | context '#remove_last_section!' do 849 | it 'removes the last section' do 850 | d = parser 851 | d.current_section[:modifiers] = {:bold => true, :italics => true} 852 | d.current_section[:text] = "New text" 853 | 854 | d.add_section! 855 | 856 | d.current_section[:modifiers][:underline] = true 857 | 858 | doc.sections.length.should == 1 859 | doc.sections.first[:text].should == 'New text' 860 | end 861 | end 862 | 863 | context 'tables' do 864 | def compare_table_results(table, data) 865 | table.rows.length.should == data.length 866 | 867 | data.each_with_index do |row, idx| 868 | end_positions = table.rows[idx].end_positions 869 | row[:end_positions].each_with_index do |size, cidx| 870 | end_positions[cidx].should == size 871 | end 872 | 873 | cells = table.rows[idx].cells 874 | cells.length.should == row[:values].length 875 | 876 | row[:values].each_with_index do |items, vidx| 877 | sects = cells[vidx].sections 878 | items.each_with_index do |val, iidx| 879 | sects[iidx][:text].should == val 880 | end 881 | end 882 | end 883 | end 884 | 885 | it 'parses a single row/column table' do 886 | src = '{\rtf1 Before Table' + 887 | '\trowd\trgaph180\cellx1440' + 888 | '\pard\intbl fee.\cell\row ' + 889 | 'After table}' 890 | d = parser.parse(src) 891 | 892 | sect = d.sections 893 | sect.length.should == 3 894 | sect[0][:text].should == 'Before Table' 895 | sect[2][:text].should == 'After table' 896 | 897 | sect[1][:modifiers][:table].should_not be_nil 898 | table = sect[1][:modifiers][:table] 899 | 900 | compare_table_results(table, [{:end_positions => [72], :values => [['fee.']]}]) 901 | end 902 | 903 | it 'parses a \trgaph180' do 904 | src = '{\rtf1 Before Table' + 905 | '\trowd\trgaph180\cellx1440' + 906 | '\pard\intbl fee.\cell\row ' + 907 | 'After table}' 908 | d = parser.parse(src) 909 | 910 | table = d.sections[1][:modifiers][:table] 911 | table.half_gap.should == 9 912 | end 913 | 914 | it 'parses a \trleft240' do 915 | src = '{\rtf1 Before Table' + 916 | '\trowd\trgaph180\trleft240\cellx1440' + 917 | '\pard\intbl fee.\cell\row ' + 918 | 'After table}' 919 | d = parser.parse(src) 920 | 921 | table = d.sections[1][:modifiers][:table] 922 | table.left_margin.should == 12 923 | end 924 | 925 | it 'parses a single row with multiple columns' do 926 | src = '{\rtf1 Before Table' + 927 | '\trowd\trgaph180\cellx1440\cellx2880\cellx1000' + 928 | '\pard\intbl fee.\cell' + 929 | '\pard\intbl fie.\cell' + 930 | '\pard\intbl foe.\cell\row ' + 931 | 'After table}' 932 | d = parser.parse(src) 933 | 934 | sect = d.sections 935 | 936 | sect.length.should == 3 937 | sect[0][:text].should == 'Before Table' 938 | sect[2][:text].should == 'After table' 939 | 940 | sect[1][:modifiers][:table].should_not be_nil 941 | table = sect[1][:modifiers][:table] 942 | 943 | compare_table_results(table, [{:end_positions => [72, 144, 50], :values => [['fee.'], ['fie.'], ['foe.']]}]) 944 | end 945 | 946 | it 'parses multiple rows and multiple columns' do 947 | src = '{\rtf1 \strike Before Table' + 948 | '\trowd\trgaph180\cellx1440\cellx2880\cellx1000' + 949 | '\pard\intbl\ul fee.\cell' + 950 | '\pard\intbl\i fie.\cell' + 951 | '\pard\intbl\b foe.\cell\row ' + 952 | '\trowd\trgaph180\cellx1000\cellx1440\cellx2880' + 953 | '\pard\intbl\i foo.\cell' + 954 | '\pard\intbl\b bar.\cell' + 955 | '\pard\intbl\ul baz.\cell\row ' + 956 | 'After table}' 957 | d = parser.parse(src) 958 | 959 | sect = d.sections 960 | sect.length.should == 3 961 | sect[0][:text].should == 'Before Table' 962 | sect[2][:text].should == 'After table' 963 | 964 | sect[1][:modifiers][:table].should_not be_nil 965 | table = sect[1][:modifiers][:table] 966 | 967 | compare_table_results(table, [{:end_positions => [72, 144, 50], :values => [['fee.'], ['fie.'], ['foe.']]}, 968 | {:end_positions => [50, 72, 144], :values => [['foo.'], ['bar.'], ['baz.']]}]) 969 | end 970 | 971 | it 'parses a grouped table' do 972 | src = '{\rtf1 \strike Before Table' + 973 | '{\trowd\trgaph180\cellx1440\cellx2880\cellx1000' + 974 | '\pard\intbl\ul fee.\cell' + 975 | '\pard\intbl\i fie.\cell' + 976 | '\pard\intbl\b foe.\cell\row}' + 977 | '{\trowd\trgaph180\cellx1000\cellx1440\cellx2880' + 978 | '\pard\intbl\i foo.\cell' + 979 | '\pard\intbl\b bar.\cell' + 980 | '\pard\intbl\ul baz.\cell\row}' + 981 | 'After table}' 982 | d = parser.parse(src) 983 | 984 | sect = d.sections 985 | sect.length.should == 3 986 | sect[0][:text].should == 'Before Table' 987 | sect[2][:text].should == 'After table' 988 | 989 | sect[1][:modifiers][:table].should_not be_nil 990 | table = sect[1][:modifiers][:table] 991 | 992 | compare_table_results(table, [{:end_positions => [72, 144, 50], :values => [['fee.'], ['fie.'], ['foe.']]}, 993 | {:end_positions => [50, 72, 144], :values => [['foo.'], ['bar.'], ['baz.']]}]) 994 | end 995 | 996 | it 'parses a new line inside a table cell' do 997 | src = '{\rtf1 Before Table' + 998 | '\trowd\trgaph180\cellx1440' + 999 | '\pard\intbl fee.\line fie.\cell\row ' + 1000 | 'After table}' 1001 | d = parser.parse(src) 1002 | 1003 | sect = d.sections 1004 | sect.length.should == 3 1005 | sect[0][:text].should == 'Before Table' 1006 | sect[2][:text].should == 'After table' 1007 | table = sect[1][:modifiers][:table] 1008 | 1009 | compare_table_results(table, [{:end_positions => [72], :values => [["fee.", "\n", "fie."]]}]) 1010 | end 1011 | 1012 | it 'parses a new line inside a table cell' do 1013 | src = '{\rtf1 Before Table' + 1014 | '\trowd\trgaph180\cellx1440\cellx2880\cellx1000' + 1015 | '\pard\intbl fee.\cell' + 1016 | '\pard\intbl\cell' + 1017 | '\pard\intbl fie.\cell\row ' + 1018 | 'After table}' 1019 | d = parser.parse(src) 1020 | 1021 | sect = d.sections 1022 | sect.length.should == 3 1023 | sect[0][:text].should == 'Before Table' 1024 | sect[2][:text].should == 'After table' 1025 | table = sect[1][:modifiers][:table] 1026 | 1027 | compare_table_results(table, [{:end_positions => [72, 144, 50], :values => [["fee."], [""], ["fie."]]}]) 1028 | end 1029 | 1030 | it 'parses a grouped cell' do 1031 | src = '{\rtf1 Before Table\trowd\cellx1440\cellx2880\cellx1000 \pard ' + 1032 | '{\fs20 Familiar }{\cell }' + 1033 | '{\fs20 Alignment }{\cell }' + 1034 | '\pard \intbl {\fs20 Arcane Spellcaster Level}{\cell }' + 1035 | '\pard {\b\fs18 \trowd \trgaph108\trleft-108\cellx1000\row }After table}' 1036 | d = parser.parse(src) 1037 | 1038 | sect = d.sections 1039 | 1040 | sect.length.should == 3 1041 | sect[0][:text].should == 'Before Table' 1042 | sect[2][:text].should == 'After table' 1043 | table = sect[1][:modifiers][:table] 1044 | 1045 | compare_table_results(table, [{:end_positions => [72, 144, 50], 1046 | :values => [["Familiar "], ["Alignment "], ['Arcane Spellcaster Level']]}]) 1047 | end 1048 | 1049 | it 'parses cells' do 1050 | src = '{\rtf1\trowd\trgaph108\trleft-108\cellx1440\cellx2880' + 1051 | '\intbl{\fs20 Familiar }{\cell }' + 1052 | '{\fs20 Alignment }{\cell }}' 1053 | 1054 | d = parser.parse(src) 1055 | table = d.sections[0][:modifiers][:table] 1056 | 1057 | compare_table_results(table, [{:end_positions => [72, 144], :values => [['Familiar '], ['Alignment ']]}]) 1058 | end 1059 | 1060 | it 'parses blank rows' do 1061 | src = '{\rtf1\trowd \trgaph108\trleft-108\cellx1440' + 1062 | '\intbl{\fs20 Familiar }{\cell }' + 1063 | '\pard\plain \intbl {\trowd \trgaph108\trleft-108\cellx1440\row } ' + 1064 | 'Improved animal}' 1065 | d = parser.parse(src) 1066 | 1067 | sect = d.sections 1068 | sect.length.should == 2 1069 | sect[1][:text].should == ' Improved animal' 1070 | sect[1][:modifiers].should == {} 1071 | 1072 | table = sect[0][:modifiers][:table] 1073 | compare_table_results(table, [{:end_positions => [72], :values => [['Familiar ']]}]) 1074 | end 1075 | end 1076 | end 1077 | end 1078 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | require 'ruby-rtf' --------------------------------------------------------------------------------