├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── release.yml └── workflows │ ├── release.yml │ └── ruby.yml ├── .gitignore ├── .rspec ├── CHANGELOG.md ├── Gemfile ├── LICENSE.md ├── README.md ├── Rakefile ├── docx.gemspec ├── lib ├── docx.rb └── docx │ ├── containers.rb │ ├── containers │ ├── container.rb │ ├── paragraph.rb │ ├── styles_configuration.rb │ ├── table.rb │ ├── table_cell.rb │ ├── table_column.rb │ ├── table_row.rb │ └── text_run.rb │ ├── document.rb │ ├── elements.rb │ ├── elements │ ├── bookmark.rb │ ├── element.rb │ ├── style.rb │ ├── style │ │ ├── converters.rb │ │ └── validators.rb │ └── text.rb │ ├── errors.rb │ ├── helpers.rb │ └── version.rb └── spec ├── docx ├── document_spec.rb └── elements │ └── style_spec.rb ├── fixtures ├── .DS_Store ├── basic.docx ├── editing.docx ├── formatting.docx ├── internal-links.docx ├── no_styles.docx ├── office365.docx ├── partial_styles │ ├── basic.xml │ └── full.xml ├── replacement.docx ├── replacement.png ├── saving.docx ├── saving_wps.docx ├── styles.docx ├── substitution.docx ├── tables.docx ├── test_with_style.docx └── weird_docx.docx ├── spec_helper.rb └── support └── shared_examples.rb /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [satoryu] # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: 'bug' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Describe the bug 11 | 12 | A clear and concise description of what the bug is. 13 | 14 | ## To Reproduce 15 | 16 | Steps to reproduce the behavior or put a short code to reproduce the bug. 17 | 18 | ### example 19 | 20 | ```rb 21 | require 'docx' 22 | 23 | doc = Docx::Document.new('/path/to/your/docx/file.docx') 24 | 25 | ## Something to reproduce the bug here 26 | ``` 27 | 28 | ## Sample docx file 29 | 30 | Put a sample docx file to reproduce the bug reported here to this issue. 31 | 32 | ## Expected behavior 33 | 34 | A clear and concise description of what you expected to happen. 35 | 36 | ## Environment 37 | 38 | - Ruby version: [e.g 2.7.1] 39 | - `docx` gem version: [e.g 0.5.0] 40 | - OS: [e.g. iOS] 41 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: 'enhancement' 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Problem 11 | 12 | Please share a clear and concise description of what the problem is. 13 | 14 | > Ex. I'm always frustrated when [...] 15 | 16 | ## Solution 17 | 18 | Please describe the solution you'd like. 19 | 20 | ## Alternative solutions 21 | 22 | A clear and concise description of any alternative solutions or features you've considered. 23 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | changelog: 2 | categories: 3 | - title: Enhancements 4 | labels: 5 | - 'enhancement' 6 | 7 | - title: Bug Fixes 8 | labels: 9 | - 'bug' 10 | 11 | - title: Chores 12 | labels: 13 | - '*' 14 | 15 | exclude: 16 | labels: 17 | - dependencies 18 | authors: 19 | - dependabot 20 | 21 | - title: Dependencies 22 | labels: 23 | - dependencies -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - master 5 | tags: 6 | - 'v*.*.*' 7 | 8 | jobs: 9 | build: 10 | name: Build a package 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | 17 | - uses: softprops/action-gh-release@v1 18 | if: startsWith(github.ref, 'refs/tags/') # Run only when tagged like v1.0.1 19 | with: 20 | files: packages/${{steps.package_name.outputs.package_name}}.zip 21 | generate_release_notes: true 22 | -------------------------------------------------------------------------------- /.github/workflows/ruby.yml: -------------------------------------------------------------------------------- 1 | # This workflow will download a prebuilt Ruby version, install dependencies and 2 | # run tests with Rake 3 | # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby 4 | 5 | name: Ruby 6 | 7 | on: 8 | push: 9 | branches: [ master ] 10 | pull_request: 11 | branches: [ master ] 12 | 13 | jobs: 14 | test: 15 | 16 | runs-on: ubuntu-latest 17 | 18 | strategy: 19 | matrix: 20 | ruby: [2.7, "3.0", 3.1, 3.2] 21 | 22 | steps: 23 | - uses: actions/checkout@v2 24 | 25 | - name: Set up Ruby 26 | uses: ruby/setup-ruby@v1 27 | with: 28 | ruby-version: ${{ matrix.ruby }} 29 | bundler-cache: true 30 | - name: Run tests 31 | env: 32 | COVERALLS_REPO_TOKEN: ${{ secrets.COVERALLS_REPO_TOKEN }} 33 | run: bundle exec rake spec 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | .bundle 3 | Gemfile.lock 4 | pkg/* 5 | doc/ 6 | vendor/ 7 | coverage/ 8 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --format progress 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v0.7.0 4 | 5 | ### Enhancements 6 | 7 | - Adds to_xml to Document [#116](https://github.com/ruby-docx/docx/pull/116) 8 | - fix runs text not changed after update [#120](https://github.com/ruby-docx/docx/pull/120) 9 | 10 | ### Bug fixes 11 | 12 | - Passing a Nokogiri::XML::Node as the second parameter to Node.new is deprecated [#121](https://github.com/ruby-docx/docx/pull/121) 13 | 14 | ### Chores 15 | 16 | - Add Ruby 3.1 to the CI matrix by petergoldstein [#122](https://github.com/ruby-docx/docx/pull/122) 17 | 18 | ## v0.6.2 19 | 20 | ### Bug fixes 21 | 22 | - Fix `Docx::Document#to_s` fails when given file has `document22.xml.rels` [#112](https://github.com/ruby-docx/docx/pull/112), [#106](https://github.com/ruby-docx/docx/pull/106) 23 | 24 | ## v0.6.1 25 | 26 | ### Bug fixes 27 | 28 | - Use `Zip::File#glob` to match any `document.xml` [#104](https://github.com/ruby-docx/docx/pull/104) 29 | 30 | ### Chores 31 | 32 | - Enable Coverall's coverage report [#102](https://github.com/ruby-docx/docx/pull/102) 33 | - Add table write example to README.md [#99](https://github.com/ruby-docx/docx/pull/99) 34 | - Replace Travis CI build with GitHub Action [#98](https://github.com/ruby-docx/docx/pull/98) 35 | - Add ruby 3.0 to versions for testing on Travis CI [#97](https://github.com/ruby-docx/docx/pull/97) 36 | 37 | ## v0.6.0 38 | 39 | ### Enhancements 40 | 41 | - Added support for hyperlinks (implemented [#70](https://github.com/ruby-docx/docx/pull/70) again) by ollieh-m and gopeter [#92](https://github.com/ruby-docx/docx/pull/92) 42 | 43 | ### Chores 44 | 45 | - Drop ruby 2.4 from supporeted versions by satoryu [#93](https://github.com/ruby-docx/docx/pull/93) 46 | - Refactoring `spec_helper` by satoryu [#90](https://github.com/ruby-docx/docx/pull/90) 47 | - Starts measuring code coverage with coveralls by satoryu [#88](https://github.com/ruby-docx/docx/pull/88) 48 | 49 | ## v0.5.0 50 | 51 | ### Enhancements 52 | 53 | - Added opening streams and outputting to a stream [#66](https://github.com/ruby-docx/docx/pull/66) 54 | - Added supports for Office 365 files [#85](https://github.com/ruby-docx/docx/pull/85) 55 | 56 | ### Bug fixes 57 | 58 | - `Docx::Document` handles a docx file without styles.xml [#81](https://github.com/ruby-docx/docx/pull/81) 59 | - Fixes insert text before after were switched [#84](https://github.com/ruby-docx/docx/pull/84) 60 | 61 | ## v0.4.0 62 | 63 | ### Enhancements 64 | 65 | - Implement substitute method on TextRun class. [#75](https://github.com/ruby-docx/docx/pull/75) 66 | 67 | ### Improvements 68 | 69 | - Updates dependencies. [#72](https://github.com/ruby-docx/docx/pull/72), [#77](https://github.com/ruby-docx/docx/pull/77) 70 | - Fix: #paragraphs grabs paragraphs in tables. [#76](https://github.com/ruby-docx/docx/pull/76) 71 | - Updates supported ruby versions. [#78](https://github.com/ruby-docx/docx/pull/78) 72 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | gemspec 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) Marcus Ortiz, http://marcusortiz.com 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # docx 2 | 3 | [![Gem Version](https://badge.fury.io/rb/docx.svg)](https://badge.fury.io/rb/docx) 4 | [![Ruby](https://github.com/ruby-docx/docx/workflows/Ruby/badge.svg)](https://github.com/ruby-docx/docx/actions?query=workflow%3ARuby) 5 | [![Coverage Status](https://coveralls.io/repos/github/ruby-docx/docx/badge.svg?branch=master)](https://coveralls.io/github/ruby-docx/docx?branch=master) 6 | [![Gitter](https://badges.gitter.im/ruby-docx/community.svg)](https://gitter.im/ruby-docx/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) 7 | 8 | A ruby library/gem for interacting with `.docx` files. currently capabilities include reading paragraphs/bookmarks, inserting text at bookmarks, reading tables/rows/columns/cells and saving the document. 9 | 10 | ## Usage 11 | 12 | ### Prerequisites 13 | 14 | - Ruby 2.6 or later 15 | 16 | ### Install 17 | 18 | Add the following line to your application's Gemfile: 19 | 20 | ```ruby 21 | gem 'docx' 22 | ``` 23 | 24 | And then execute: 25 | 26 | ```shell 27 | bundle install 28 | ``` 29 | 30 | Or install it yourself as: 31 | 32 | ```shell 33 | gem install docx 34 | ``` 35 | 36 | ### Reading 37 | 38 | ``` ruby 39 | require 'docx' 40 | 41 | # Create a Docx::Document object for our existing docx file 42 | doc = Docx::Document.open('example.docx') 43 | 44 | # Retrieve and display paragraphs 45 | doc.paragraphs.each do |p| 46 | puts p 47 | end 48 | 49 | # Retrieve and display bookmarks, returned as hash with bookmark names as keys and objects as values 50 | doc.bookmarks.each_pair do |bookmark_name, bookmark_object| 51 | puts bookmark_name 52 | end 53 | ``` 54 | 55 | Don't have a local file but a buffer? Docx handles those to: 56 | 57 | ```ruby 58 | require 'docx' 59 | 60 | # Create a Docx::Document object from a remote file 61 | doc = Docx::Document.open(buffer) 62 | 63 | # Everything about reading is the same as shown above 64 | ``` 65 | 66 | ### Rendering html 67 | ``` ruby 68 | require 'docx' 69 | 70 | # Retrieve and display paragraphs as html 71 | doc = Docx::Document.open('example.docx') 72 | doc.paragraphs.each do |p| 73 | puts p.to_html 74 | end 75 | ``` 76 | 77 | ### Reading tables 78 | 79 | ``` ruby 80 | require 'docx' 81 | 82 | # Create a Docx::Document object for our existing docx file 83 | doc = Docx::Document.open('tables.docx') 84 | 85 | first_table = doc.tables[0] 86 | puts first_table.row_count 87 | puts first_table.column_count 88 | puts first_table.rows[0].cells[0].text 89 | puts first_table.columns[0].cells[0].text 90 | 91 | # Iterate through tables 92 | doc.tables.each do |table| 93 | table.rows.each do |row| # Row-based iteration 94 | row.cells.each do |cell| 95 | puts cell.text 96 | end 97 | end 98 | 99 | table.columns.each do |column| # Column-based iteration 100 | column.cells.each do |cell| 101 | puts cell.text 102 | end 103 | end 104 | end 105 | ``` 106 | 107 | ### Writing 108 | 109 | ``` ruby 110 | require 'docx' 111 | 112 | # Create a Docx::Document object for our existing docx file 113 | doc = Docx::Document.open('example.docx') 114 | 115 | # Insert a single line of text after one of our bookmarks 116 | doc.bookmarks['example_bookmark'].insert_text_after("Hello world.") 117 | 118 | # Insert multiple lines of text at our bookmark 119 | doc.bookmarks['example_bookmark_2'].insert_multiple_lines_after(['Hello', 'World', 'foo']) 120 | 121 | # Remove paragraphs 122 | doc.paragraphs.each do |p| 123 | p.remove! if p.to_s =~ /TODO/ 124 | end 125 | 126 | # Substitute text, preserving formatting 127 | doc.paragraphs.each do |p| 128 | p.each_text_run do |tr| 129 | tr.substitute('_placeholder_', 'replacement value') 130 | end 131 | end 132 | 133 | # Save document to specified path 134 | doc.save('example-edited.docx') 135 | ``` 136 | 137 | ### Writing to tables 138 | 139 | ``` ruby 140 | require 'docx' 141 | 142 | # Create a Docx::Document object for our existing docx file 143 | doc = Docx::Document.open('tables.docx') 144 | 145 | # Iterate over each table 146 | doc.tables.each do |table| 147 | last_row = table.rows.last 148 | 149 | # Copy last row and insert a new one before last row 150 | new_row = last_row.copy 151 | new_row.insert_before(last_row) 152 | 153 | # Substitute text in each cell of this new row 154 | new_row.cells.each do |cell| 155 | cell.paragraphs.each do |paragraph| 156 | paragraph.each_text_run do |text| 157 | text.substitute('_placeholder_', 'replacement value') 158 | end 159 | end 160 | end 161 | end 162 | 163 | doc.save('tables-edited.docx') 164 | ``` 165 | 166 | ### Advanced 167 | 168 | ``` ruby 169 | require 'docx' 170 | 171 | d = Docx::Document.open('example.docx') 172 | 173 | # The Nokogiri::XML::Node on which an element is based can be accessed using #node 174 | d.paragraphs.each do |p| 175 | puts p.node.inspect 176 | end 177 | 178 | # The #xpath and #at_xpath methods are delegated to the node from the element, saving a step 179 | p_element = d.paragraphs.first 180 | p_children = p_element.xpath("//child::*") # selects all children 181 | p_child = p_element.at_xpath("//child::*") # selects first child 182 | ``` 183 | 184 | ### Writing and Manipulating Styles 185 | ``` ruby 186 | require 'docx' 187 | 188 | d = Docx::Document.open('example.docx') 189 | existing_style = d.styles_configuration.style_of("Heading 1") 190 | existing_style.font_color = "000000" 191 | 192 | # see attributes below 193 | new_style = d.styles_configuration.add_style("Red", name: "Red", font_color: "FF0000", font_size: 20) 194 | new_style.bold = true 195 | 196 | d.paragraphs.each do |p| 197 | p.style = "Red" 198 | end 199 | 200 | d.paragraphs.each do |p| 201 | p.style = "Heading 1" 202 | end 203 | 204 | d.styles_configuration.remove_style("Red") 205 | ``` 206 | 207 | #### Style Attributes 208 | 209 | The following is a list of attributes and what they control within the style. 210 | 211 | - **id**: The unique identifier of the style. (required) 212 | - **name**: The human-readable name of the style. (required) 213 | - **type**: Indicates the type of the style (e.g., paragraph, character). 214 | - **keep_next**: Boolean value controlling whether to keep a paragraph and the next one on the same page. Valid values: `true`/`false`. 215 | - **keep_lines**: Boolean value specifying whether to keep all lines of a paragraph together on one page. Valid values: `true`/`false`. 216 | - **page_break_before**: Boolean value indicating whether to insert a page break before the paragraph. Valid values: `true`/`false`. 217 | - **widow_control**: Boolean value controlling widow and orphan lines in a paragraph. Valid values: `true`/`false`. 218 | - **shading_style**: Defines the shading pattern style. 219 | - **shading_color**: Specifies the color of the shading pattern. Valid values: Hex color codes. 220 | - **shading_fill**: Indicates the background fill color of shading. 221 | - **suppress_auto_hyphens**: Boolean value controlling automatic hyphenation. Valid values: `true`/`false`. 222 | - **bidirectional_text**: Boolean value indicating if the paragraph contains bidirectional text. Valid values: `true`/`false`. 223 | - **spacing_before**: Defines the spacing before a paragraph. 224 | - **spacing_after**: Specifies the spacing after a paragraph. 225 | - **line_spacing**: Indicates the line spacing of a paragraph. 226 | - **line_rule**: Defines how line spacing is calculated. 227 | - **indent_left**: Sets the left indentation of a paragraph. 228 | - **indent_right**: Specifies the right indentation of a paragraph. 229 | - **indent_first_line**: Indicates the first line indentation of a paragraph. 230 | - **align**: Controls the text alignment within a paragraph. 231 | - **font**: Sets the font for different scripts (ASCII, complex script, East Asian, etc.). 232 | - **font_ascii**: Specifies the font for ASCII characters. 233 | - **font_cs**: Indicates the font for complex script characters. 234 | - **font_hAnsi**: Sets the font for high ANSI characters. 235 | - **font_eastAsia**: Specifies the font for East Asian characters. 236 | - **bold**: Boolean value controlling bold formatting. Valid values: `true`/`false`. 237 | - **italic**: Boolean value indicating italic formatting. Valid values: `true`/`false`. 238 | - **caps**: Boolean value controlling capitalization. Valid values: `true`/`false`. 239 | - **small_caps**: Boolean value specifying small capital letters. Valid values: `true`/`false`. 240 | - **strike**: Boolean value indicating strikethrough formatting. Valid values: `true`/`false`. 241 | - **double_strike**: Boolean value defining double strikethrough formatting. Valid values: `true`/`false`. 242 | - **outline**: Boolean value specifying outline effects. Valid values: `true`/`false`. 243 | - **outline_level**: Indicates the outline level in a document's hierarchy. 244 | - **font_color**: Sets the text color. Valid values: Hex color codes. 245 | - **font_size**: Controls the font size. 246 | - **font_size_cs**: Specifies the font size for complex script characters. 247 | - **underline_style**: Indicates the style of underlining. 248 | - **underline_color**: Specifies the color of the underline. Valid values: Hex color codes. 249 | - **spacing**: Controls character spacing. 250 | - **kerning**: Sets the space between characters. 251 | - **position**: Controls the position of characters (superscript/subscript). 252 | - **text_fill_color**: Sets the fill color of text. Valid values: Hex color codes. 253 | - **vertical_alignment**: Controls the vertical alignment of text within a line. 254 | - **lang**: Specifies the language tag for the text. 255 | 256 | ## Development 257 | 258 | ### todo 259 | 260 | * Calculate element formatting based on values present in element properties as well as properties inherited from parents 261 | * Default formatting of inserted elements to inherited values 262 | * Implement formattable elements. 263 | * Easier multi-line text insertion at a single bookmark (inserting paragraph nodes after the one containing the bookmark) 264 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'rake' 2 | require 'rspec/core/rake_task' 3 | require 'bundler/gem_tasks' 4 | 5 | RSpec::Core::RakeTask.new('spec') 6 | 7 | desc 'Run tests.' 8 | task default: :spec 9 | 10 | desc "Open an irb session preloaded with this library." 11 | task :console do 12 | sh "irb -I lib/ -r docx" 13 | end 14 | -------------------------------------------------------------------------------- /docx.gemspec: -------------------------------------------------------------------------------- 1 | $:.unshift File.expand_path("../lib", __FILE__) 2 | require 'docx/version' 3 | 4 | Gem::Specification.new do |s| 5 | s.name = 'docx' 6 | s.version = Docx::VERSION 7 | s.licenses = ['MIT'] 8 | s.summary = 'a ruby library/gem for interacting with .docx files' 9 | s.description = 'thin wrapper around rubyzip and nokogiri as a way to get started with docx files' 10 | s.authors = ['Christopher Hunt', 'Marcus Ortiz', 'Higgins Dragon', 'Toms Mikoss', 'Sebastian Wittenkamp'] 11 | s.email = ['chrahunt@gmail.com'] 12 | s.homepage = 'https://github.com/chrahunt/docx' 13 | s.files = Dir['README.md', 'LICENSE.md', 'lib/**/*.rb'] 14 | s.required_ruby_version = '>= 2.7.0' 15 | 16 | s.add_dependency 'nokogiri', '~> 1.13', '>= 1.13.0' 17 | s.add_dependency 'rubyzip', '~> 2.0' 18 | 19 | s.add_development_dependency 'coveralls_reborn', '~> 0.21' 20 | s.add_development_dependency 'rake', '~> 13.0' 21 | s.add_development_dependency 'rspec', '~> 3.7' 22 | end 23 | -------------------------------------------------------------------------------- /lib/docx.rb: -------------------------------------------------------------------------------- 1 | require 'docx/version' 2 | 3 | module Docx #:nodoc: 4 | autoload :Document, 'docx/document' 5 | end 6 | 7 | -------------------------------------------------------------------------------- /lib/docx/containers.rb: -------------------------------------------------------------------------------- 1 | require 'docx/containers/container' 2 | require 'docx/containers/text_run' 3 | require 'docx/containers/paragraph' 4 | require 'docx/containers/table' 5 | require 'docx/containers/styles_configuration' 6 | -------------------------------------------------------------------------------- /lib/docx/containers/container.rb: -------------------------------------------------------------------------------- 1 | require 'docx/elements' 2 | 3 | module Docx 4 | module Elements 5 | module Containers 6 | module Container 7 | # Relation methods 8 | # TODO: Create a properties object, include Element 9 | def properties 10 | @node.at_xpath("./w:#{@properties_tag}") 11 | end 12 | 13 | # Erase text within an element 14 | def blank! 15 | @node.xpath('.//w:t').each { |t| t.content = '' } 16 | end 17 | 18 | def remove! 19 | @node.remove 20 | end 21 | end 22 | end 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /lib/docx/containers/paragraph.rb: -------------------------------------------------------------------------------- 1 | require 'docx/containers/text_run' 2 | require 'docx/containers/container' 3 | 4 | module Docx 5 | module Elements 6 | module Containers 7 | class Paragraph 8 | include Container 9 | include Elements::Element 10 | 11 | def self.tag 12 | 'p' 13 | end 14 | 15 | 16 | # Child elements: pPr, r, fldSimple, hlink, subDoc 17 | # http://msdn.microsoft.com/en-us/library/office/ee364458(v=office.11).aspx 18 | def initialize(node, document_properties = {}, doc = nil) 19 | @node = node 20 | @properties_tag = 'pPr' 21 | @document_properties = document_properties 22 | @font_size = @document_properties[:font_size] 23 | @document = doc 24 | end 25 | 26 | # Set text of paragraph 27 | def text=(content) 28 | if text_runs.size == 1 29 | text_runs.first.text = content 30 | elsif text_runs.size == 0 31 | new_r = TextRun.create_within(self) 32 | new_r.text = content 33 | else 34 | text_runs.each {|r| r.node.remove } 35 | new_r = TextRun.create_within(self) 36 | new_r.text = content 37 | end 38 | end 39 | 40 | # Return text of paragraph 41 | def to_s 42 | text_runs.map(&:text).join('') 43 | end 44 | 45 | # Return paragraph as a

HTML fragment with formatting based on properties. 46 | def to_html 47 | html = '' 48 | text_runs.each do |text_run| 49 | html << text_run.to_html 50 | end 51 | styles = { 'font-size' => "#{font_size}pt" } 52 | styles['color'] = "##{font_color}" if font_color 53 | styles['text-align'] = alignment if alignment 54 | html_tag(:p, content: html, styles: styles) 55 | end 56 | 57 | 58 | # Array of text runs contained within paragraph 59 | def text_runs 60 | @node.xpath('w:r|w:hyperlink').map { |r_node| Containers::TextRun.new(r_node, @document_properties) } 61 | end 62 | 63 | # Iterate over each text run within a paragraph 64 | def each_text_run 65 | text_runs.each { |tr| yield(tr) } 66 | end 67 | 68 | def aligned_left? 69 | ['left', nil].include?(alignment) 70 | end 71 | 72 | def aligned_right? 73 | alignment == 'right' 74 | end 75 | 76 | def aligned_center? 77 | alignment == 'center' 78 | end 79 | 80 | def font_size 81 | size_attribute = @node.at_xpath('w:pPr//w:sz//@w:val') 82 | 83 | return @font_size unless size_attribute 84 | 85 | size_attribute.value.to_i / 2 86 | end 87 | 88 | def font_color 89 | color_tag = @node.xpath('w:r//w:rPr//w:color').first 90 | color_tag ? color_tag.attributes['val'].value : nil 91 | end 92 | 93 | def style 94 | return nil unless @document 95 | 96 | @document.style_name_of(style_id) || 97 | @document.default_paragraph_style 98 | end 99 | 100 | def style_id 101 | style_property.get_attribute('w:val') 102 | end 103 | 104 | def style=(identifier) 105 | id = @document.styles_configuration.style_of(identifier).id 106 | 107 | style_property.set_attribute('w:val', id) 108 | end 109 | 110 | alias_method :style_id=, :style= 111 | alias_method :text, :to_s 112 | 113 | private 114 | 115 | def style_property 116 | properties&.at_xpath('w:pStyle') || properties&.add_child('').first 117 | end 118 | 119 | # Returns the alignment if any, or nil if left 120 | def alignment 121 | @node.at_xpath('.//w:jc/@w:val')&.value 122 | end 123 | end 124 | end 125 | end 126 | end 127 | -------------------------------------------------------------------------------- /lib/docx/containers/styles_configuration.rb: -------------------------------------------------------------------------------- 1 | require 'docx/containers/container' 2 | require 'docx/elements/style' 3 | 4 | module Docx 5 | module Elements 6 | module Containers 7 | StyleNotFound = Class.new(StandardError) 8 | 9 | class StylesConfiguration 10 | def initialize(raw_styles) 11 | @raw_styles = raw_styles 12 | @styles_parent_node = raw_styles.root 13 | end 14 | 15 | attr_reader :styles, :styles_parent_node 16 | 17 | def styles 18 | styles_parent_node 19 | .children 20 | .filter_map do |style| 21 | next unless style.get_attribute("w:styleId") 22 | 23 | Elements::Style.new(self, style) 24 | end 25 | end 26 | 27 | def style_of(id_or_name) 28 | styles.find { |style| style.id == id_or_name || style.name == id_or_name } || raise(Errors::StyleNotFound, "Style name or id '#{id_or_name}' not found") 29 | end 30 | 31 | def size 32 | styles.size 33 | end 34 | 35 | def add_style(id, attributes = {}) 36 | Elements::Style.create(self, {id: id, name: id}.merge(attributes)) 37 | end 38 | 39 | def remove_style(id) 40 | style = styles.find { |style| style.id == id } 41 | 42 | style.node.remove 43 | styles.delete(style) 44 | end 45 | 46 | def serialize(**options) 47 | @raw_styles.serialize(**options) 48 | end 49 | end 50 | end 51 | end 52 | end -------------------------------------------------------------------------------- /lib/docx/containers/table.rb: -------------------------------------------------------------------------------- 1 | require 'docx/containers/table_row' 2 | require 'docx/containers/table_column' 3 | require 'docx/containers/container' 4 | 5 | module Docx 6 | module Elements 7 | module Containers 8 | class Table 9 | include Container 10 | include Elements::Element 11 | 12 | def self.tag 13 | 'tbl' 14 | end 15 | 16 | def initialize(node) 17 | @node = node 18 | @properties_tag = 'tblGrid' 19 | end 20 | 21 | # Array of row 22 | def rows 23 | @node.xpath('w:tr').map {|r_node| Containers::TableRow.new(r_node) } 24 | end 25 | 26 | def row_count 27 | @node.xpath('w:tr').count 28 | end 29 | 30 | # Array of column 31 | def columns 32 | columns_containers = [] 33 | (0..(column_count-1)).each do |i| 34 | columns_containers[i] = Containers::TableColumn.new @node.xpath("w:tr//w:tc[#{i+1}]") 35 | end 36 | columns_containers 37 | end 38 | 39 | def column_count 40 | @node.xpath('w:tblGrid/w:gridCol').count 41 | end 42 | 43 | # Iterate over each row within a table 44 | def each_rows 45 | rows.each { |r| yield(r) } 46 | end 47 | 48 | end 49 | end 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /lib/docx/containers/table_cell.rb: -------------------------------------------------------------------------------- 1 | require 'docx/containers/text_run' 2 | require 'docx/containers/container' 3 | 4 | module Docx 5 | module Elements 6 | module Containers 7 | class TableCell 8 | include Container 9 | include Elements::Element 10 | 11 | def self.tag 12 | 'tc' 13 | end 14 | 15 | def initialize(node) 16 | @node = node 17 | @properties_tag = 'tcPr' 18 | end 19 | 20 | # Return text of paragraph's cell 21 | def to_s 22 | paragraphs.map(&:text).join('') 23 | end 24 | 25 | # Array of paragraphs contained within cell 26 | def paragraphs 27 | @node.xpath('w:p').map {|p_node| Containers::Paragraph.new(p_node) } 28 | end 29 | 30 | # Iterate over each text run within a paragraph's cell 31 | def each_paragraph 32 | paragraphs.each { |tr| yield(tr) } 33 | end 34 | 35 | alias_method :text, :to_s 36 | end 37 | end 38 | end 39 | end 40 | -------------------------------------------------------------------------------- /lib/docx/containers/table_column.rb: -------------------------------------------------------------------------------- 1 | require 'docx/containers/table_cell' 2 | require 'docx/containers/container' 3 | 4 | module Docx 5 | module Elements 6 | module Containers 7 | class TableColumn 8 | include Container 9 | include Elements::Element 10 | 11 | def self.tag 12 | 'w:gridCol' 13 | end 14 | 15 | def initialize(cell_nodes) 16 | @node = '' 17 | @properties_tag = '' 18 | @cells = cell_nodes.map { |c_node| Containers::TableCell.new(c_node) } 19 | end 20 | 21 | # Array of cells contained within row 22 | def cells 23 | @cells 24 | end 25 | 26 | end 27 | end 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /lib/docx/containers/table_row.rb: -------------------------------------------------------------------------------- 1 | require 'docx/containers/table_cell' 2 | require 'docx/containers/container' 3 | 4 | module Docx 5 | module Elements 6 | module Containers 7 | class TableRow 8 | include Container 9 | include Elements::Element 10 | 11 | def self.tag 12 | 'tr' 13 | end 14 | 15 | def initialize(node) 16 | @node = node 17 | @properties_tag = '' 18 | end 19 | 20 | # Array of cells contained within row 21 | def cells 22 | @node.xpath('w:tc').map {|c_node| Containers::TableCell.new(c_node) } 23 | end 24 | 25 | end 26 | end 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /lib/docx/containers/text_run.rb: -------------------------------------------------------------------------------- 1 | require 'docx/containers/container' 2 | 3 | module Docx 4 | module Elements 5 | module Containers 6 | class TextRun 7 | include Container 8 | include Elements::Element 9 | 10 | DEFAULT_FORMATTING = { 11 | italic: false, 12 | bold: false, 13 | underline: false, 14 | strike: false 15 | } 16 | 17 | def self.tag 18 | 'r' 19 | end 20 | 21 | attr_reader :text 22 | attr_reader :formatting 23 | 24 | def initialize(node, document_properties = {}) 25 | @node = node 26 | @text_nodes = @node.xpath('w:t').map {|t_node| Elements::Text.new(t_node) } 27 | @text_nodes = @node.xpath('w:t|w:r/w:t').map {|t_node| Elements::Text.new(t_node) } 28 | 29 | @properties_tag = 'rPr' 30 | @text = parse_text || '' 31 | @formatting = parse_formatting || DEFAULT_FORMATTING 32 | @document_properties = document_properties 33 | @font_size = @document_properties[:font_size] 34 | end 35 | 36 | # Set text of text run 37 | def text=(content) 38 | if @text_nodes.size == 1 39 | @text_nodes.first.content = content 40 | elsif @text_nodes.empty? 41 | new_t = Elements::Text.create_within(self) 42 | new_t.content = content 43 | end 44 | reset_text 45 | end 46 | 47 | # Returns text contained within text run 48 | def parse_text 49 | @text_nodes.map(&:content).join('') 50 | end 51 | 52 | # Substitute text in text @text_nodes 53 | def substitute(match, replacement) 54 | @text_nodes.each do |text_node| 55 | text_node.content = text_node.content.gsub(match, replacement) 56 | end 57 | reset_text 58 | end 59 | 60 | def parse_formatting 61 | { 62 | italic: !@node.xpath('.//w:i').empty?, 63 | bold: !@node.xpath('.//w:b').empty?, 64 | underline: !@node.xpath('.//w:u').empty?, 65 | strike: !@node.xpath('.//w:strike').empty? 66 | } 67 | end 68 | 69 | def to_s 70 | @text 71 | end 72 | 73 | # Return text as a HTML fragment with formatting based on properties. 74 | def to_html 75 | html = @text 76 | html = html_tag(:em, content: html) if italicized? 77 | html = html_tag(:strong, content: html) if bolded? 78 | html = html_tag(:s, content: html) if striked? 79 | styles = {} 80 | styles['text-decoration'] = 'underline' if underlined? 81 | # No need to be granular with font size down to the span level if it doesn't vary. 82 | styles['font-size'] = "#{font_size}pt" if font_size != @font_size 83 | html = html_tag(:span, content: html, styles: styles) unless styles.empty? 84 | html = html_tag(:a, content: html, attributes: {href: href, target: "_blank"}) if hyperlink? 85 | return html 86 | end 87 | 88 | def italicized? 89 | @formatting[:italic] 90 | end 91 | 92 | def bolded? 93 | @formatting[:bold] 94 | end 95 | 96 | def striked? 97 | @formatting[:strike] 98 | end 99 | 100 | def underlined? 101 | @formatting[:underline] 102 | end 103 | 104 | def hyperlink? 105 | @node.name == 'hyperlink' && external_link? 106 | end 107 | 108 | def external_link? 109 | !@node.attributes['id'].nil? 110 | end 111 | 112 | def href 113 | @document_properties[:hyperlinks][hyperlink_id] 114 | end 115 | 116 | def hyperlink_id 117 | @node.attributes['id'].value 118 | end 119 | 120 | def font_size 121 | size_attribute = @node.at_xpath('w:rPr//w:sz//@w:val') 122 | 123 | return @font_size unless size_attribute 124 | 125 | size_attribute.value.to_i / 2 126 | end 127 | 128 | private 129 | 130 | def reset_text 131 | @text = parse_text 132 | end 133 | end 134 | end 135 | end 136 | end 137 | -------------------------------------------------------------------------------- /lib/docx/document.rb: -------------------------------------------------------------------------------- 1 | require 'docx/containers' 2 | require 'docx/elements' 3 | require 'docx/errors' 4 | require 'docx/helpers' 5 | require 'nokogiri' 6 | require 'zip' 7 | 8 | module Docx 9 | # The Document class wraps around a docx file and provides methods to 10 | # interface with it. 11 | # 12 | # # get a Docx::Document for a docx file in the local directory 13 | # doc = Docx::Document.open("test.docx") 14 | # 15 | # # get the text from the document 16 | # puts doc.text 17 | # 18 | # # do the same thing in a block 19 | # Docx::Document.open("test.docx") do |d| 20 | # puts d.text 21 | # end 22 | class Document 23 | include Docx::SimpleInspect 24 | 25 | attr_reader :xml, :doc, :zip, :styles 26 | 27 | def initialize(path_or_io, options = {}) 28 | @replace = {} 29 | 30 | # if path-or_io is string && does not contain a null byte 31 | if (path_or_io.instance_of?(String) && !/\u0000/.match?(path_or_io)) 32 | @zip = Zip::File.open(path_or_io) 33 | else 34 | @zip = Zip::File.open_buffer(path_or_io) 35 | end 36 | 37 | document = @zip.glob('word/document*.xml').first 38 | raise Errno::ENOENT if document.nil? 39 | 40 | @document_xml = document.get_input_stream.read 41 | @doc = Nokogiri::XML(@document_xml) 42 | load_styles 43 | yield(self) if block_given? 44 | ensure 45 | @zip.close unless @zip.nil? 46 | end 47 | 48 | # This stores the current global document properties, for now 49 | def document_properties 50 | { 51 | font_size: font_size, 52 | hyperlinks: hyperlinks 53 | } 54 | end 55 | 56 | # With no associated block, Docx::Document.open is a synonym for Docx::Document.new. If the optional code block is given, it will be passed the opened +docx+ file as an argument and the Docx::Document oject will automatically be closed when the block terminates. The values of the block will be returned from Docx::Document.open. 57 | # call-seq: 58 | # open(filepath) => file 59 | # open(filepath) {|file| block } => obj 60 | def self.open(path, &block) 61 | new(path, &block) 62 | end 63 | 64 | def paragraphs 65 | @doc.xpath('//w:document//w:body/w:p').map { |p_node| parse_paragraph_from p_node } 66 | end 67 | 68 | def bookmarks 69 | bkmrks_hsh = {} 70 | bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node } 71 | # auto-generated by office 2010 72 | bkmrks_ary.reject! { |b| b.name == '_GoBack' } 73 | bkmrks_ary.each { |b| bkmrks_hsh[b.name] = b } 74 | bkmrks_hsh 75 | end 76 | 77 | def to_xml 78 | Nokogiri::XML(@document_xml) 79 | end 80 | 81 | def tables 82 | @doc.xpath('//w:document//w:body//w:tbl').map { |t_node| parse_table_from t_node } 83 | end 84 | 85 | # Some documents have this set, others don't. 86 | # Values are returned as half-points, so to get points, that's why it's divided by 2. 87 | def font_size 88 | size_value = @styles&.at_xpath('//w:docDefaults//w:rPrDefault//w:rPr//w:sz/@w:val')&.value 89 | 90 | return nil unless size_value 91 | 92 | size_value.to_i / 2 93 | end 94 | 95 | # Hyperlink targets are extracted from the document.xml.rels file 96 | def hyperlinks 97 | hyperlink_relationships.each_with_object({}) do |rel, hash| 98 | hash[rel.attributes['Id'].value] = rel.attributes['Target'].value 99 | end 100 | end 101 | 102 | def hyperlink_relationships 103 | @rels.xpath("//xmlns:Relationship[contains(@Type,'hyperlink')]") 104 | end 105 | 106 | ## 107 | # *Deprecated* 108 | # 109 | # Iterates over paragraphs within document 110 | # call-seq: 111 | # each_paragraph => Enumerator 112 | def each_paragraph 113 | paragraphs.each { |p| yield(p) } 114 | end 115 | 116 | # call-seq: 117 | # to_s -> string 118 | def to_s 119 | paragraphs.map(&:to_s).join("\n") 120 | end 121 | 122 | # Output entire document as a String HTML fragment 123 | def to_html 124 | paragraphs.map(&:to_html).join("\n") 125 | end 126 | 127 | # Save document to provided path 128 | # call-seq: 129 | # save(filepath) => void 130 | def save(path) 131 | update 132 | Zip::OutputStream.open(path) do |out| 133 | zip.each do |entry| 134 | next unless entry.file? 135 | 136 | out.put_next_entry(entry.name) 137 | value = @replace[entry.name] || zip.read(entry.name) 138 | 139 | out.write(value) 140 | end 141 | 142 | end 143 | zip.close 144 | end 145 | 146 | # Output entire document as a StringIO object 147 | def stream 148 | update 149 | stream = Zip::OutputStream.write_buffer do |out| 150 | zip.each do |entry| 151 | next unless entry.file? 152 | 153 | out.put_next_entry(entry.name) 154 | 155 | if @replace[entry.name] 156 | out.write(@replace[entry.name]) 157 | else 158 | out.write(zip.read(entry.name)) 159 | end 160 | end 161 | end 162 | 163 | stream.rewind 164 | stream 165 | end 166 | 167 | alias text to_s 168 | 169 | def replace_entry(entry_path, file_contents) 170 | @replace[entry_path] = file_contents 171 | end 172 | 173 | def default_paragraph_style 174 | @styles.at_xpath("w:styles/w:style[@w:type='paragraph' and @w:default='1']/w:name/@w:val").value 175 | end 176 | 177 | def style_name_of(style_id) 178 | styles_configuration.style_of(style_id).name 179 | end 180 | 181 | def styles_configuration 182 | @styles_configuration ||= Elements::Containers::StylesConfiguration.new(@styles.dup) 183 | end 184 | 185 | private 186 | 187 | def load_styles 188 | @styles_xml = @zip.read('word/styles.xml') 189 | @styles = Nokogiri::XML(@styles_xml) 190 | load_rels 191 | rescue Errno::ENOENT => e 192 | warn e.message 193 | nil 194 | end 195 | 196 | def load_rels 197 | rels_entry = @zip.glob('word/_rels/document*.xml.rels').first 198 | raise Errno::ENOENT unless rels_entry 199 | 200 | @rels_xml = rels_entry.get_input_stream.read 201 | @rels = Nokogiri::XML(@rels_xml) 202 | end 203 | 204 | #-- 205 | # TODO: Flesh this out to be compatible with other files 206 | # TODO: Method to set flag on files that have been edited, probably by inserting something at the 207 | # end of methods that make edits? 208 | #++ 209 | def update 210 | replace_entry 'word/document.xml', doc.serialize(save_with: 0) 211 | replace_entry 'word/styles.xml', styles_configuration.serialize(save_with: 0) 212 | end 213 | 214 | # generate Elements::Containers::Paragraph from paragraph XML node 215 | def parse_paragraph_from(p_node) 216 | Elements::Containers::Paragraph.new(p_node, document_properties, self) 217 | end 218 | 219 | # generate Elements::Bookmark from bookmark XML node 220 | def parse_bookmark_from(b_node) 221 | Elements::Bookmark.new(b_node) 222 | end 223 | 224 | def parse_table_from(t_node) 225 | Elements::Containers::Table.new(t_node) 226 | end 227 | end 228 | end 229 | -------------------------------------------------------------------------------- /lib/docx/elements.rb: -------------------------------------------------------------------------------- 1 | require 'docx/elements/bookmark' 2 | require 'docx/elements/element' 3 | require 'docx/elements/text' 4 | require 'docx/elements/style' -------------------------------------------------------------------------------- /lib/docx/elements/bookmark.rb: -------------------------------------------------------------------------------- 1 | require 'docx/elements/element' 2 | 3 | module Docx 4 | module Elements 5 | class Bookmark 6 | include Element 7 | attr_accessor :name 8 | 9 | def self.tag 10 | 'bookmarkStart' 11 | end 12 | 13 | def initialize(node) 14 | @node = node 15 | @name = @node['w:name'] 16 | end 17 | 18 | # Insert text before bookmarkStart node 19 | def insert_text_before(text) 20 | text_run = get_run_before 21 | text_run.text = "#{text_run.text}#{text}" 22 | end 23 | 24 | # Insert text after bookmarkStart node 25 | def insert_text_after(text) 26 | text_run = get_run_after 27 | text_run.text = "#{text}#{text_run.text}" 28 | end 29 | 30 | # insert multiple lines starting with paragraph containing bookmark node. 31 | def insert_multiple_lines(text_array) 32 | # Hold paragraphs to be inserted into, corresponding to the index of the strings in the text array 33 | paragraphs = [] 34 | paragraph = self.parent_paragraph 35 | # Remove text from paragraph 36 | paragraph.blank! 37 | paragraphs << paragraph 38 | for i in 0...(text_array.size - 1) 39 | # Copy previous paragraph 40 | new_p = paragraphs[i].copy 41 | # Insert as sibling of previous paragraph 42 | new_p.insert_after(paragraphs[i]) 43 | paragraphs << new_p 44 | end 45 | 46 | # Insert text into corresponding newly created paragraphs 47 | paragraphs.each_index do |index| 48 | paragraphs[index].text = text_array[index] 49 | end 50 | end 51 | 52 | # Get text run immediately prior to bookmark node 53 | def get_run_before 54 | # at_xpath returns the first match found and preceding-sibling returns siblings in the 55 | # order they appear in the document not the order as they appear when moving out from 56 | # the starting node 57 | if not (r_nodes = @node.xpath("./preceding-sibling::w:r")).empty? 58 | r_node = r_nodes.last 59 | Containers::TextRun.new(r_node) 60 | else 61 | new_r = Containers::TextRun.create_with(self) 62 | new_r.insert_before(self) 63 | new_r 64 | end 65 | end 66 | 67 | # Get text run immediately after bookmark node 68 | def get_run_after 69 | if (r_node = @node.at_xpath("./following-sibling::w:r")) 70 | Containers::TextRun.new(r_node) 71 | else 72 | new_r = Containers::TextRun.create_with(self) 73 | new_r.insert_after(self) 74 | new_r 75 | end 76 | end 77 | end 78 | end 79 | end -------------------------------------------------------------------------------- /lib/docx/elements/element.rb: -------------------------------------------------------------------------------- 1 | require 'nokogiri' 2 | require 'docx/elements' 3 | require 'docx/containers' 4 | 5 | module Docx 6 | module Elements 7 | module Element 8 | DEFAULT_TAG = '' 9 | 10 | # Ensure that a 'tag' corresponding to the XML element that defines the element is defined 11 | def self.included(base) 12 | base.extend(ClassMethods) 13 | base.const_set(:TAG, Element::DEFAULT_TAG) unless base.const_defined?(:TAG) 14 | end 15 | 16 | attr_accessor :node 17 | 18 | # TODO: Should create a docx object from this 19 | def parent(type = '*') 20 | @node.at_xpath("./parent::#{type}") 21 | end 22 | 23 | def at_xpath(*args) 24 | @node.at_xpath(*args) 25 | end 26 | 27 | def xpath(*args) 28 | @node.xpath(*args) 29 | end 30 | 31 | # Get parent paragraph of element 32 | def parent_paragraph 33 | Elements::Containers::Paragraph.new(parent('w:p')) 34 | end 35 | 36 | # Insertion methods 37 | # Insert node as last child 38 | def append_to(element) 39 | @node = element.node.add_child(@node) 40 | self 41 | end 42 | 43 | # Insert node as first child (after properties) 44 | def prepend_to(element) 45 | @node = element.node.properties.add_next_sibling(@node) 46 | self 47 | end 48 | 49 | def insert_after(element) 50 | # Returns newly re-parented node 51 | @node = element.node.add_next_sibling(@node) 52 | self 53 | end 54 | 55 | def insert_before(element) 56 | @node = element.node.add_previous_sibling(@node) 57 | self 58 | end 59 | 60 | # Creation/edit methods 61 | def copy 62 | self.class.new(@node.dup) 63 | end 64 | 65 | # A method to wrap content in an HTML tag. 66 | # Currently used in paragraph and text_run for the to_html methods 67 | # 68 | # content:: The base text content for the tag. 69 | # styles:: Hash of the inline CSS styles to be applied. e.g. 70 | # { 'font-size' => '12pt', 'text-decoration' => 'underline' } 71 | # 72 | def html_tag(name, options = {}) 73 | content = options[:content] 74 | styles = options[:styles] 75 | attributes = options[:attributes] 76 | 77 | html = "<#{name.to_s}" 78 | 79 | unless styles.nil? || styles.empty? 80 | styles_array = [] 81 | styles.each do |property, value| 82 | styles_array << "#{property.to_s}:#{value};" 83 | end 84 | html << " style=\"#{styles_array.join('')}\"" 85 | end 86 | 87 | unless attributes.nil? || attributes.empty? 88 | attributes.each do |attr_name, attr_value| 89 | html << " #{attr_name}=\"#{attr_value}\"" 90 | end 91 | end 92 | 93 | html << ">" 94 | html << content if content 95 | html << "" 96 | end 97 | 98 | module ClassMethods 99 | def create_with(element) 100 | # Need to somehow get the xml document accessible here by default, but this is alright in the interim 101 | self.new(Nokogiri::XML::Node.new("w:#{self.tag}", element.node.document)) 102 | end 103 | 104 | def create_within(element) 105 | new_element = create_with(element) 106 | new_element.append_to(element) 107 | new_element 108 | end 109 | end 110 | end 111 | end 112 | end 113 | -------------------------------------------------------------------------------- /lib/docx/elements/style.rb: -------------------------------------------------------------------------------- 1 | require 'docx/helpers' 2 | require 'docx/elements' 3 | require 'docx/elements/style/converters' 4 | require 'docx/elements/style/validators' 5 | 6 | module Docx 7 | module Elements 8 | class Style 9 | include Docx::SimpleInspect 10 | 11 | class Attribute 12 | attr_reader :name, :selectors, :required, :converter, :validator 13 | 14 | def initialize(name, selectors, required: false, converter:, validator:) 15 | @name = name 16 | @selectors = selectors 17 | @required = required 18 | @converter = converter || Converters::DefaultValueConverter 19 | @validator = validator || Validators::DefaultValidator 20 | end 21 | 22 | def required? 23 | required 24 | end 25 | 26 | def retrieve_from(style) 27 | selectors 28 | .lazy 29 | .filter_map { |node_xpath| style.node.at_xpath(node_xpath)&.value } 30 | .map { |value| converter.decode(value) } 31 | .first 32 | end 33 | 34 | def assign_to(style, value) 35 | (required && value.nil?) && 36 | raise(Errors::StyleRequiredPropertyValue, "Required value #{name}") 37 | 38 | validator.validate(value) || 39 | raise(Errors::StyleInvalidPropertyValue, "Invalid value for #{name}: '#{value.nil? ? "nil" : value}'") 40 | 41 | encoded_value = converter.encode(value) 42 | 43 | selectors.map do |attribute_xpath| 44 | if (existing_attribute = style.node.at_xpath(attribute_xpath)) 45 | if encoded_value.nil? 46 | existing_attribute.remove 47 | else 48 | existing_attribute.value = encoded_value.to_s 49 | end 50 | 51 | next encoded_value 52 | end 53 | 54 | next encoded_value if encoded_value.nil? 55 | 56 | node_xpath, attribute = attribute_xpath.split("/@") 57 | 58 | created_node = 59 | node_xpath 60 | .split("/") 61 | .reduce(style.node) do |parent_node, child_xpath| 62 | # find the child node 63 | parent_node.at_xpath(child_xpath) || 64 | # or create the child node 65 | Nokogiri::XML::Node.new(child_xpath, parent_node) 66 | .tap { |created_child_node| parent_node << created_child_node } 67 | end 68 | 69 | created_node.set_attribute(attribute, encoded_value) 70 | end 71 | .first 72 | end 73 | end 74 | 75 | @attributes = [] 76 | 77 | class << self 78 | attr_accessor :attributes 79 | 80 | def required_attributes 81 | attributes.select(&:required?) 82 | end 83 | 84 | def attribute(name, *selectors, required: false, converter: nil, validator: nil) 85 | new_attribute = Attribute.new(name, selectors, required: required, converter: converter, validator: validator) 86 | attributes << new_attribute 87 | 88 | define_method(name) do 89 | new_attribute.retrieve_from(self) 90 | end 91 | 92 | define_method("#{name}=") do |value| 93 | new_attribute.assign_to(self, value) 94 | end 95 | end 96 | 97 | def create(configuration, attributes = {}) 98 | node = Nokogiri::XML::Node.new("w:style", configuration.styles_parent_node) 99 | configuration.styles_parent_node.add_child(node) 100 | 101 | Elements::Style.new(configuration, node, **attributes) 102 | end 103 | end 104 | 105 | def initialize(configuration, node, **attributes) 106 | @configuration = configuration 107 | @node = node 108 | 109 | attributes.each do |name, value| 110 | self.send("#{name}=", value) 111 | end 112 | end 113 | 114 | attr_accessor :node 115 | 116 | attribute :id, "./@w:styleId", required: true 117 | attribute :name, "./w:name/@w:val", "./w:next/@w:val", required: true 118 | attribute :type, ".//@w:type", required: true, validator: Validators::ValueValidator.new("paragraph", "character", "table", "numbering") 119 | attribute :keep_next, "./w:pPr/w:keepNext/@w:val", converter: Converters::BooleanConverter 120 | attribute :keep_lines, "./w:pPr/w:keepLines/@w:val", converter: Converters::BooleanConverter 121 | attribute :page_break_before, "./w:pPr/w:pageBreakBefore/@w:val", converter: Converters::BooleanConverter 122 | attribute :widow_control, "./w:pPr/w:widowControl/@w:val", converter: Converters::BooleanConverter 123 | attribute :shading_style, "./w:pPr/w:shd/@w:val", "./w:rPr/w:shd/@w:val" 124 | attribute :shading_color, "./w:pPr/w:shd/@w:color", "./w:rPr/w:shd/@w:color", validator: Validators::ColorValidator 125 | attribute :shading_fill, "./w:pPr/w:shd/@w:fill", "./w:rPr/w:shd/@w:fill" 126 | attribute :suppress_auto_hyphens, "./w:pPr/w:suppressAutoHyphens/@w:val", converter: Converters::BooleanConverter 127 | attribute :bidirectional_text, "./w:pPr/w:bidi/@w:val", converter: Converters::BooleanConverter 128 | attribute :spacing_before, "./w:pPr/w:spacing/@w:before" 129 | attribute :spacing_after, "./w:pPr/w:spacing/@w:after" 130 | attribute :line_spacing, "./w:pPr/w:spacing/@w:line" 131 | attribute :line_rule, "./w:pPr/w:spacing/@w:lineRule" 132 | attribute :indent_left, "./w:pPr/w:ind/@w:left" 133 | attribute :indent_right, "./w:pPr/w:ind/@w:right" 134 | attribute :indent_first_line, "./w:pPr/w:ind/@w:firstLine" 135 | attribute :align, "./w:pPr/w:jc/@w:val" 136 | attribute :font, "./w:rPr/w:rFonts/@w:ascii", "./w:rPr/w:rFonts/@w:cs", "./w:rPr/w:rFonts/@w:hAnsi", "./w:rPr/w:rFonts/@w:eastAsia" # setting :font, will set all other fonts 137 | attribute :font_ascii, "./w:rPr/w:rFonts/@w:ascii" 138 | attribute :font_cs, "./w:rPr/w:rFonts/@w:cs" 139 | attribute :font_hAnsi, "./w:rPr/w:rFonts/@w:hAnsi" 140 | attribute :font_eastAsia, "./w:rPr/w:rFonts/@w:eastAsia" 141 | attribute :bold, "./w:rPr/w:b/@w:val", "./w:rPr/w:bCs/@w:val", converter: Converters::BooleanConverter 142 | attribute :italic, "./w:rPr/w:i/@w:val", "./w:rPr/w:iCs/@w:val", converter: Converters::BooleanConverter 143 | attribute :caps, "./w:rPr/w:caps/@w:val", converter: Converters::BooleanConverter 144 | attribute :small_caps, "./w:rPr/w:smallCaps/@w:val", converter: Converters::BooleanConverter 145 | attribute :strike, "./w:rPr/w:strike/@w:val", converter: Converters::BooleanConverter 146 | attribute :double_strike, "./w:rPr/w:dstrike/@w:val", converter: Converters::BooleanConverter 147 | attribute :outline, "./w:rPr/w:outline/@w:val", converter: Converters::BooleanConverter 148 | attribute :outline_level, "./w:pPr/w:outlineLvl/@w:val" 149 | attribute :font_color, "./w:rPr/w:color/@w:val", validator: Validators::ColorValidator 150 | attribute :font_size, "./w:rPr/w:sz/@w:val", "./w:rPr/w:szCs/@w:val", converter: Converters::FontSizeConverter 151 | attribute :font_size_cs, "./w:rPr/w:szCs/@w:val", converter: Converters::FontSizeConverter 152 | attribute :underline_style, "./w:rPr/w:u/@w:val" 153 | attribute :underline_color, "./w:rPr/w:u/@w:color", validator: Validators::ColorValidator 154 | attribute :spacing, "./w:rPr/w:spacing/@w:val" 155 | attribute :kerning, "./w:rPr/w:kern/@w:val" 156 | attribute :position, "./w:rPr/w:position/@w:val" 157 | attribute :text_fill_color, "./w:rPr/w14:textFill/w14:solidFill/w14:srgbClr/@w14:val", validator: Validators::ColorValidator 158 | attribute :vertical_alignment, "./w:rPr/w:vertAlign/@w:val" 159 | attribute :lang, "./w:rPr/w:lang/@w:val" 160 | 161 | def valid? 162 | self.class.required_attributes.all? do |a| 163 | attribute_value = a.retrieve_from(self) 164 | 165 | a.validator&.validate(attribute_value) 166 | end 167 | end 168 | 169 | def to_xml 170 | node.to_xml 171 | end 172 | 173 | def remove 174 | node.remove 175 | @configuration.styles.delete(self) 176 | end 177 | end 178 | end 179 | end 180 | -------------------------------------------------------------------------------- /lib/docx/elements/style/converters.rb: -------------------------------------------------------------------------------- 1 | module Docx 2 | module Elements 3 | class Style 4 | module Converters 5 | class DefaultValueConverter 6 | def self.encode(value) 7 | value 8 | end 9 | 10 | def self.decode(value) 11 | value 12 | end 13 | end 14 | 15 | class FontSizeConverter 16 | def self.encode(value) 17 | value.to_i * 2 18 | end 19 | 20 | def self.decode(value) 21 | value.to_i / 2 22 | end 23 | end 24 | 25 | class BooleanConverter 26 | def self.encode(value) 27 | value ? "1" : "0" 28 | end 29 | 30 | def self.decode(value) 31 | value == "1" 32 | end 33 | end 34 | end 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /lib/docx/elements/style/validators.rb: -------------------------------------------------------------------------------- 1 | module Docx 2 | module Elements 3 | class Style 4 | module Validators 5 | class DefaultValidator 6 | def self.validate(value) 7 | true 8 | end 9 | end 10 | 11 | class ColorValidator 12 | COLOR_REGEX = /^([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$/ 13 | 14 | def self.validate(value) 15 | value =~ COLOR_REGEX 16 | end 17 | end 18 | 19 | class ValueValidator 20 | def initialize(*values) 21 | @values = values 22 | end 23 | 24 | def validate(value) 25 | @values.include?(value) 26 | end 27 | end 28 | end 29 | end 30 | end 31 | end 32 | -------------------------------------------------------------------------------- /lib/docx/elements/text.rb: -------------------------------------------------------------------------------- 1 | module Docx 2 | module Elements 3 | class Text 4 | include Element 5 | 6 | def self.tag 7 | 't' 8 | end 9 | 10 | def content 11 | @node.content 12 | end 13 | 14 | def content=(args) 15 | @node.content = args 16 | end 17 | 18 | def initialize(node) 19 | @node = node 20 | end 21 | end 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /lib/docx/errors.rb: -------------------------------------------------------------------------------- 1 | module Docx 2 | module Errors 3 | StyleNotFound = Class.new(StandardError) 4 | StyleInvalidPropertyValue = Class.new(StandardError) 5 | StyleRequiredPropertyValue = Class.new(StandardError) 6 | end 7 | end -------------------------------------------------------------------------------- /lib/docx/helpers.rb: -------------------------------------------------------------------------------- 1 | module Docx 2 | module SimpleInspect 3 | # Returns a string representation of the document that is far more readable and understandable 4 | # than the default inspect method. But you can still get the default inspect method by passing 5 | # true as the first argument. 6 | def inspect(full = false) 7 | return(super) if full 8 | 9 | variable_values = 10 | instance_variables.map do |var| 11 | value = v = instance_variable_get(var).inspect 12 | 13 | [ 14 | var, 15 | value.length > 100 ? "#{value[0..100]}..." : value 16 | ].join('=') 17 | end 18 | 19 | "#<#{self.class}:0x#{(object_id << 1).to_s(16)} #{variable_values.join(' ')}>" 20 | end 21 | end 22 | end -------------------------------------------------------------------------------- /lib/docx/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Docx #:nodoc: 4 | VERSION = '0.9.1' 5 | end 6 | -------------------------------------------------------------------------------- /spec/docx/document_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'spec_helper' 4 | require 'docx' 5 | require 'tempfile' 6 | 7 | describe Docx::Document do 8 | before(:all) do 9 | @fixtures_path = 'spec/fixtures' 10 | @formatting_line_count = 15 # number of lines the formatting.docx file has 11 | end 12 | 13 | describe '#open' do 14 | context 'When reading a file made by Office365' do 15 | it 'supports it' do 16 | expect do 17 | Docx::Document.open(@fixtures_path + '/office365.docx') 18 | end.to_not raise_error 19 | end 20 | end 21 | 22 | context 'When reading a un-supported file' do 23 | it 'should throw file not found error' do 24 | invalid_path = @fixtures_path + '/invalid_file_path.docx' 25 | expect do 26 | Docx::Document.open(invalid_path) 27 | end.to raise_error(Zip::Error, "File #{invalid_path} not found") 28 | end 29 | end 30 | end 31 | 32 | describe "#inspect" do 33 | it "isn't too long" do 34 | doc = Docx::Document.open(@fixtures_path + '/office365.docx') 35 | 36 | expect(doc.inspect.length).to be < 1000 37 | 38 | doc.instance_variables.each do |var| 39 | expect(doc.inspect).to match(/#{var}/) 40 | end 41 | end 42 | end 43 | 44 | describe 'reading' do 45 | context 'using normal file' do 46 | before do 47 | @doc = Docx::Document.open(@fixtures_path + '/basic.docx') 48 | end 49 | 50 | it_behaves_like 'reading' 51 | end 52 | 53 | context 'using stream' do 54 | before do 55 | stream = File.binread(@fixtures_path + '/basic.docx') 56 | @doc = Docx::Document.open(stream) 57 | end 58 | 59 | it_behaves_like 'reading' 60 | end 61 | end 62 | 63 | describe 'read tables' do 64 | before do 65 | @doc = Docx::Document.open(@fixtures_path + '/tables.docx') 66 | end 67 | 68 | it 'should have tables with rows and cells' do 69 | expect(@doc.tables.count).to eq 2 70 | @doc.tables.each do |table| 71 | expect(table).to be_an_instance_of(Docx::Elements::Containers::Table) 72 | table.rows.each do |row| 73 | expect(row).to be_an_instance_of(Docx::Elements::Containers::TableRow) 74 | row.cells.each do |cell| 75 | expect(cell).to be_an_instance_of(Docx::Elements::Containers::TableCell) 76 | end 77 | end 78 | end 79 | end 80 | 81 | it 'should have tables with columns and cells' do 82 | @doc.tables.each do |table| 83 | table.columns.each do |column| 84 | expect(column).to be_an_instance_of(Docx::Elements::Containers::TableColumn) 85 | column.cells.each do |cell| 86 | expect(cell).to be_an_instance_of(Docx::Elements::Containers::TableCell) 87 | end 88 | end 89 | end 90 | end 91 | 92 | it 'should have proper count' do 93 | expect(@doc.tables[0].row_count).to eq 171 94 | expect(@doc.tables[1].row_count).to eq 2 95 | expect(@doc.tables[0].column_count).to eq 2 96 | expect(@doc.tables[1].column_count).to eq 2 97 | end 98 | 99 | it 'should have tables with proper text' do 100 | expect(@doc.tables[0].rows[0].cells[0].text).to eq 'ENGLISH' 101 | expect(@doc.tables[0].rows[0].cells[1].text).to eq 'FRANÇAIS' 102 | expect(@doc.tables[1].rows[0].cells[0].text).to eq 'Second table' 103 | expect(@doc.tables[1].rows[0].cells[1].text).to eq 'Second tableau' 104 | expect(@doc.tables[0].columns[0].cells[5].text).to eq 'aphids' 105 | expect(@doc.tables[0].columns[1].cells[5].text).to eq 'puceron' 106 | end 107 | 108 | it 'should read embedded links' do 109 | expect(@doc.tables[0].columns[1].cells[1].text).to match(/^Directive/) 110 | end 111 | 112 | describe '#paragraphs' do 113 | it 'should not grabs paragraphs in the tables' do 114 | expect(@doc.paragraphs.map(&:text)).to_not include("Second table") 115 | end 116 | end 117 | end 118 | 119 | describe 'editing' do 120 | before do 121 | @doc = Docx::Document.open(@fixtures_path + '/editing.docx') 122 | end 123 | 124 | it 'should copy paragraphs' do 125 | old_p = @doc.paragraphs.first 126 | new_p = old_p.copy 127 | expect(new_p).to be_an_instance_of(Docx::Elements::Containers::Paragraph) 128 | expect(new_p).not_to eq(nil) 129 | expect(new_p).not_to eq(old_p) 130 | end 131 | 132 | it 'allows insertion of text' do 133 | expect(@doc.paragraphs.size).to eq(3) 134 | first_p = @doc.paragraphs.first 135 | new_p = first_p.copy 136 | new_p.insert_after first_p 137 | expect(@doc.paragraphs.size).to eq(4) 138 | end 139 | 140 | it 'should change text' do 141 | expect(@doc.paragraphs.first.text).to eq('test text') 142 | @doc.paragraphs.first.text = 'the real test' 143 | expect(@doc.paragraphs.first.text).to eq('the real test') 144 | end 145 | 146 | it 'should allow insertion of text before a bookmark' do 147 | expect(@doc.paragraphs.first.text).to eq('test text') 148 | @doc.bookmarks['beginning_bookmark'].insert_text_before('foo') 149 | expect(@doc.paragraphs.first.text).to eq('footest text') 150 | end 151 | 152 | it 'should allow insertion of text after a bookmark' do 153 | expect(@doc.paragraphs.first.text).to eq('test text') 154 | @doc.bookmarks['end_bookmark'].insert_text_after('bar') 155 | expect(@doc.paragraphs.first.text).to eq('test textbar') 156 | end 157 | 158 | it 'should allow multiple lines of text to be inserted at a bookmark' do 159 | expect(@doc.paragraphs.last.text).to eq('') 160 | new_lines = ['replacement test', 'second paragraph test', 'and a third paragraph test'] 161 | @doc.bookmarks['isolated_bookmark'].insert_multiple_lines(new_lines) 162 | new_lines.each_index do |line| 163 | expect(@doc.paragraphs[line + 2].text).to eq(new_lines[line]) 164 | end 165 | end 166 | 167 | it 'should allow multi-line insertion with replacement' do 168 | expect(@doc.paragraphs[1].text).to eq('placeholder text') 169 | new_lines = ['replacement test', 'second paragraph test', 'and a third paragraph test'] 170 | @doc.bookmarks['word_splitting_bookmark'].insert_multiple_lines(new_lines) 171 | new_lines.each_index do |line| 172 | expect(@doc.paragraphs[line + 1].text).to eq(new_lines[line]) 173 | end 174 | end 175 | 176 | it 'should allow content deletion' do 177 | expect(@doc.paragraphs.first.text).to eq('test text') 178 | @doc.paragraphs.first.blank! 179 | expect(@doc.paragraphs.first.text).to eq('') 180 | end 181 | 182 | it 'should allow content deletion' do 183 | expect { @doc.paragraphs.first.remove! }.to change { @doc.paragraphs.size }.by(-1) 184 | end 185 | end 186 | 187 | describe 'format-preserving substitution' do 188 | before do 189 | @doc = Docx::Document.open(@fixtures_path + '/substitution.docx') 190 | end 191 | 192 | it 'should replace placeholder in any line of a paragraph' do 193 | expect(@doc.paragraphs[0].text).to eq('Page title') 194 | expect(@doc.paragraphs[1].text).to eq('Multi-line paragraph line 1_placeholder2_ line 2_placeholder3_ line3 ') 195 | 196 | @doc.paragraphs[1].each_text_run do |text_run| 197 | text_run.substitute('_placeholder2_', 'same paragraph') 198 | text_run.substitute('_placeholder3_', 'yet the same paragraph') 199 | end 200 | 201 | expect(@doc.paragraphs[1].text).to eq('Multi-line paragraph line 1same paragraph line 2yet the same paragraph line3 ') 202 | end 203 | end 204 | 205 | describe 'read formatting' do 206 | before do 207 | @doc = Docx::Document.open(@fixtures_path + '/formatting.docx') 208 | @formatting = @doc.paragraphs.map { |p| p.text_runs.map(&:formatting) } 209 | @default_formatting = Docx::Elements::Containers::TextRun::DEFAULT_FORMATTING 210 | @only_italic = @default_formatting.merge italic: true 211 | @only_bold = @default_formatting.merge bold: true 212 | @only_underline = @default_formatting.merge underline: true 213 | @all_formatted = @default_formatting.merge italic: true, bold: true, underline: true 214 | end 215 | 216 | it 'should have the correct text' do 217 | expect(@doc.paragraphs.size).to eq(@formatting_line_count) 218 | expect(@doc.paragraphs[0].text).to eq('Normal') 219 | expect(@doc.paragraphs[1].text).to eq('Italic') 220 | expect(@doc.paragraphs[2].text).to eq('Bold') 221 | expect(@doc.paragraphs[3].text).to eq('Underline') 222 | expect(@doc.paragraphs[4].text).to eq('Normal') 223 | expect(@doc.paragraphs[5].text).to eq('This is a sentence with all formatting options in the middle of the sentence.') 224 | expect(@doc.paragraphs[6].text).to eq('This is a centered paragraph.') 225 | expect(@doc.paragraphs[7].text).to eq('This paragraph is aligned left.') 226 | expect(@doc.paragraphs[8].text).to eq('This paragraph is aligned right.') 227 | expect(@doc.paragraphs[9].text).to eq('This paragraph is 14 points.') 228 | expect(@doc.paragraphs[10].text).to eq('This paragraph has a word at 16 points.') 229 | expect(@doc.paragraphs[11].text).to eq('This sentence has different formatting in different places.') 230 | expect(@doc.paragraphs[12].text).to eq('This sentence has a hyperlink.') 231 | end 232 | 233 | it 'should contain a paragraph with multiple text runs' do 234 | end 235 | 236 | it 'should detect normal formatting' do 237 | [0, 4].each do |i| 238 | expect(@formatting[i][0]).to eq(@default_formatting) 239 | expect(@doc.paragraphs[i].text_runs[0].italicized?).to eq(false) 240 | expect(@doc.paragraphs[i].text_runs[0].bolded?).to eq(false) 241 | expect(@doc.paragraphs[i].text_runs[0].underlined?).to eq(false) 242 | end 243 | end 244 | 245 | it 'should detect italic formatting' do 246 | expect(@formatting[1][0]).to eq(@only_italic) 247 | expect(@doc.paragraphs[1].text_runs[0].italicized?).to eq(true) 248 | expect(@doc.paragraphs[1].text_runs[0].bolded?).to eq(false) 249 | expect(@doc.paragraphs[1].text_runs[0].underlined?).to eq(false) 250 | end 251 | 252 | it 'should detect bold formatting' do 253 | expect(@formatting[2][0]).to eq(@only_bold) 254 | expect(@doc.paragraphs[2].text_runs[0].italicized?).to eq(false) 255 | expect(@doc.paragraphs[2].text_runs[0].bolded?).to eq(true) 256 | expect(@doc.paragraphs[2].text_runs[0].underlined?).to eq(false) 257 | end 258 | 259 | it 'should detect underline formatting' do 260 | expect(@formatting[3][0]).to eq(@only_underline) 261 | expect(@doc.paragraphs[3].text_runs[0].italicized?).to eq(false) 262 | expect(@doc.paragraphs[3].text_runs[0].bolded?).to eq(false) 263 | expect(@doc.paragraphs[3].text_runs[0].underlined?).to eq(true) 264 | end 265 | 266 | it 'should detect mixed formatting' do 267 | expect(@formatting[5][0]).to eq(@default_formatting) 268 | expect(@doc.paragraphs[5].text_runs[0].italicized?).to eq(false) 269 | expect(@doc.paragraphs[5].text_runs[0].bolded?).to eq(false) 270 | expect(@doc.paragraphs[5].text_runs[0].underlined?).to eq(false) 271 | 272 | expect(@formatting[5][1]).to eq(@all_formatted) 273 | expect(@doc.paragraphs[5].text_runs[1].italicized?).to eq(true) 274 | expect(@doc.paragraphs[5].text_runs[1].bolded?).to eq(true) 275 | expect(@doc.paragraphs[5].text_runs[1].underlined?).to eq(true) 276 | 277 | expect(@formatting[5][2]).to eq(@default_formatting) 278 | expect(@doc.paragraphs[5].text_runs[2].italicized?).to eq(false) 279 | expect(@doc.paragraphs[5].text_runs[2].bolded?).to eq(false) 280 | expect(@doc.paragraphs[5].text_runs[2].underlined?).to eq(false) 281 | end 282 | 283 | it 'should detect centered paragraphs' do 284 | expect(@doc.paragraphs[5].aligned_center?).to eq(false) 285 | expect(@doc.paragraphs[6].aligned_center?).to eq(true) 286 | expect(@doc.paragraphs[7].aligned_center?).to eq(false) 287 | end 288 | 289 | it 'should detect left justified paragraphs' do 290 | expect(@doc.paragraphs[6].aligned_left?).to eq(false) 291 | expect(@doc.paragraphs[7].aligned_left?).to eq(true) 292 | expect(@doc.paragraphs[8].aligned_left?).to eq(false) 293 | end 294 | 295 | it 'should detect right justified paragraphs' do 296 | expect(@doc.paragraphs[7].aligned_right?).to eq(false) 297 | expect(@doc.paragraphs[8].aligned_right?).to eq(true) 298 | expect(@doc.paragraphs[9].aligned_right?).to eq(false) 299 | end 300 | 301 | # ECMA-376 Office Open XML spec (4th edition), 17.3.2.38, size is 302 | # defined in half-points, meaning 14pt text returns a value of 28. 303 | # http://www.ecma-international.org/publications/standards/Ecma-376.htm 304 | it 'should return proper font size for paragraphs' do 305 | expect(@doc.font_size).to eq 11 306 | expect(@doc.paragraphs[5].font_size).to eq 11 307 | paragraph = @doc.paragraphs[9] 308 | expect(paragraph.font_size).to eq 14 309 | expect(paragraph.text_runs[0].font_size).to eq 14 310 | end 311 | 312 | it 'should return proper font size for runs' do 313 | expect(@doc.font_size).to eq 11 314 | paragraph = @doc.paragraphs[10] 315 | expect(paragraph.font_size).to eq 11 316 | text_runs = paragraph.text_runs 317 | expect(text_runs[0].font_size).to eq 11 318 | expect(text_runs[1].font_size).to eq 16 319 | expect(text_runs[2].font_size).to eq 11 320 | expect(text_runs[3].font_size).to eq 11 321 | expect(text_runs[4].font_size).to eq 11 322 | end 323 | 324 | it 'should return changed value for runs' do 325 | paragraph = @doc.paragraphs[10] 326 | text_runs = paragraph.text_runs 327 | 328 | tr = text_runs[0] 329 | expect(tr.text).to eq 'This paragraph has a ' 330 | 331 | tr.text = 'This paragraph hasn\'t a' 332 | expect(tr.text).to eq 'This paragraph hasn\'t a' 333 | end 334 | end 335 | 336 | describe 'saving' do 337 | context 'from a normal file' do 338 | before do 339 | @doc = Docx::Document.open(@fixtures_path + '/saving.docx') 340 | end 341 | 342 | it_behaves_like 'saving to file' 343 | end 344 | 345 | context 'from a stream' do 346 | before do 347 | stream = File.binread(@fixtures_path + '/saving.docx') 348 | @doc = Docx::Document.open(stream) 349 | end 350 | 351 | it_behaves_like 'saving to file' 352 | end 353 | 354 | context 'wps modified docx file' do 355 | before { @doc = Docx::Document.open(@fixtures_path + '/saving_wps.docx') } 356 | 357 | it 'should save to a normal file path' do 358 | @new_doc_path = @fixtures_path + '/new_save.docx' 359 | @doc.save(@new_doc_path) 360 | @new_doc = Docx::Document.open(@new_doc_path) 361 | expect(@new_doc.paragraphs.size).to eq(@doc.paragraphs.size) 362 | end 363 | 364 | after { File.delete(@new_doc_path) if File.exist?(@new_doc_path) } 365 | end 366 | end 367 | 368 | describe 'streaming' do 369 | it 'should return a StringIO to send over HTTP' do 370 | doc = Docx::Document.open(@fixtures_path + '/basic.docx') 371 | expect(doc.stream).to be_a(StringIO) 372 | end 373 | 374 | context 'should return a valid docx stream' do 375 | before do 376 | doc = Docx::Document.open(@fixtures_path + '/basic.docx') 377 | result = doc.stream 378 | 379 | @doc = Docx::Document.open(result) 380 | end 381 | 382 | it_behaves_like 'reading' 383 | end 384 | end 385 | 386 | describe 'outputting html' do 387 | before do 388 | @doc = Docx::Document.open(@fixtures_path + '/formatting.docx') 389 | @formatted_line = @doc.paragraphs[5] 390 | @p_regex = /(^\)\w+)(\<\/p>$)/ 391 | @span_regex = /(\)\w+)(<\/span>)/ 392 | @em_regex = /(\)\w+)(\<\/em\>)/ 393 | @strong_regex = /(\)\w+)(\<\/strong\>)/ 394 | @strike_regex = /(\)\w+)(\<\/s\>)/ 395 | @anchor_tag_regex = /\(.+)\<\/a>/ 396 | end 397 | 398 | it 'should wrap pragraphs in a p tag' do 399 | scan = @doc.paragraphs[0].to_html.scan(@p_regex).flatten 400 | expect(scan.first).to eq('') 402 | expect(scan[1]).to eq('Normal') 403 | end 404 | 405 | it 'should emphasize italicized text' do 406 | scan = @doc.paragraphs[1].to_html.scan(@em_regex).flatten 407 | expect(scan.first).to eq('') 409 | expect(scan[1]).to eq('Italic') 410 | end 411 | 412 | it 'should strong bolded text' do 413 | scan = @doc.paragraphs[2].to_html.scan(@strong_regex).flatten 414 | expect(scan.first).to eq '' 416 | expect(scan[1]).to eq 'Bold' 417 | end 418 | 419 | it 'should underline underlined text' do 420 | scan = @doc.paragraphs[3].to_html.scan(/\]+)/).flatten 421 | expect(scan.first).to eq 'style="text-decoration:underline;"' 422 | end 423 | 424 | it 'should strike striked text' do 425 | scan = @doc.paragraphs[13].to_html.scan(@strike_regex).flatten 426 | expect(scan.first).to eq '' 428 | expect(scan[1]).to eq 'Strike' 429 | end 430 | 431 | it 'should color the text' do 432 | scan = @doc.paragraphs[14].to_html.scan(/\]+)/).flatten 433 | expect(scan.first).to eq 'style="font-size:11pt;color:#FF0000;"' 434 | end 435 | 436 | it 'should justify paragraphs' do 437 | regex = /^]+style\=\"([^\"]+).+(<\/p>)/ 445 | scan = @doc.paragraphs[9].to_html.scan(regex).flatten 446 | expect(scan.first).to eq '' 448 | expect(scan[1].split(';').include?('font-size:14pt')).to eq(true) 449 | end 450 | 451 | it 'should set font size on styled text runs' do 452 | regex = /(\]+style\=\"([^\"]+)[^\<]+(<\/span>)/ 453 | scan = @doc.paragraphs[10].to_html.scan(regex).flatten 454 | expect(scan.first).to eq '' 456 | expect(scan[1].split(';').include?('font-size:16pt')).to eq(true) 457 | end 458 | 459 | it 'should properly highlight different text in different places in a sentence' do 460 | paragraph = @doc.paragraphs[11] 461 | scan = paragraph.to_html.scan(@em_regex).flatten 462 | expect(scan.first).to eq '' 464 | expect(scan[1]).to eq 'sentence' 465 | scan = paragraph.to_html.scan(@strong_regex).flatten 466 | expect(scan.first).to eq '' 468 | expect(scan[1]).to eq 'formatting' 469 | scan = paragraph.to_html.scan(@span_regex).flatten 470 | expect(scan.first).to eq '' 472 | expect(scan[1]).to eq 'different' 473 | scan = paragraph.to_html.scan(/\]+)/).flatten 474 | expect(scan.first).to eq 'style="text-decoration:underline;"' 475 | end 476 | 477 | it 'should output an entire document as html fragment' do 478 | expect(@doc.to_html.scan(/(\all').size).to eq 1 483 | end 484 | 485 | it 'should join paragraphs with newlines' do 486 | expect(@doc.to_html.scan(%(

Normal

\n

Italic

\n

Bold

)).size).to eq 1 487 | end 488 | 489 | it 'should convert hyperlinks to anchor tags' do 490 | scan = @doc.to_html.scan(@anchor_tag_regex).flatten 491 | expect(scan[0]).to eq "http://www.google.com/" 492 | expect(scan[1]).to eq "hyperlink" 493 | end 494 | end 495 | 496 | describe 'replacing contents' do 497 | let(:replacement_file_path) { @fixtures_path + '/replacement.png' } 498 | let(:temp_file_path) { Tempfile.new(['docx_gem', '.docx']).path } 499 | let(:entry_path) { 'word/media/image1.png' } 500 | let(:doc) { Docx::Document.open(@fixtures_path + '/replacement.docx') } 501 | 502 | it 'should replace existing file within the document' do 503 | File.open replacement_file_path, 'rb' do |io| 504 | doc.replace_entry entry_path, io.read 505 | end 506 | 507 | doc.save(temp_file_path) 508 | 509 | File.open replacement_file_path, 'rb' do |io| 510 | expect(Zip::File.open(temp_file_path).read(entry_path)).to eq io.read 511 | end 512 | end 513 | 514 | after do 515 | File.delete(temp_file_path) if File.exist?(temp_file_path) 516 | end 517 | end 518 | 519 | describe '#to_s' do 520 | let(:doc) { Docx::Document.open(@fixtures_path + '/weird_docx.docx') } 521 | 522 | it 'does not raise error' do 523 | expect { doc.to_s }.to_not raise_error 524 | end 525 | it 'returns a String' do 526 | expect(doc.to_s).to be_a(String) 527 | end 528 | end 529 | 530 | describe 'reading and manipulating paragraph style' do 531 | before do 532 | @doc = Docx::Document.open(@fixtures_path + '/styles.docx') 533 | end 534 | 535 | it 'read default style when not' do 536 | nb = @doc.paragraphs.size 537 | 538 | expect(@doc.paragraphs.map(&:style)).to eq([ 539 | "Title", 540 | "Subtitle", 541 | "Author", 542 | "Date", 543 | "Compact", 544 | "Heading 1", 545 | "Heading 2", 546 | "Heading 3", 547 | "Heading 4", 548 | "Heading 5", 549 | "Heading 6", 550 | "Heading 7", 551 | "Heading 8", 552 | "Heading 9", 553 | "First Paragraph", 554 | "Body Text", 555 | "Block Text", 556 | "Table Caption", 557 | "Image Caption", 558 | "Definition Term", 559 | "Definition", 560 | "Definition Term", 561 | "Definition", 562 | ]) 563 | 564 | expect(@doc.paragraphs.map(&:style_id)).to eq([ 565 | "Title", 566 | "Subtitle", 567 | "Author", 568 | "Date", 569 | "Compact", 570 | "Heading1", 571 | "Heading2", 572 | "Heading3", 573 | "Heading4", 574 | "Heading5", 575 | "Heading6", 576 | "Heading7", 577 | "Heading8", 578 | "Heading9", 579 | "FirstParagraph", 580 | "BodyText", 581 | "BlockText", 582 | "TableCaption", 583 | "ImageCaption", 584 | "DefinitionTerm", 585 | "Definition", 586 | "DefinitionTerm", 587 | "Definition", 588 | ]) 589 | end 590 | 591 | it 'set paragraph style' do 592 | nb = @doc.paragraphs.size 593 | expect(nb).to eq 23 594 | 595 | @doc.paragraphs.each do |p| 596 | p.style = 'Heading 1' 597 | expect(p.style).to eq 'Heading 1' 598 | end 599 | 600 | @doc.paragraphs.each do |p| 601 | p.style_id = 'Heading2' 602 | expect(p.style).to eq 'Heading 2' 603 | end 604 | end 605 | 606 | it 'raises if invalid paragraph style' do 607 | expect { @doc.paragraphs.first.style = 'invalid' }.to raise_error(Docx::Errors::StyleNotFound) 608 | end 609 | end 610 | 611 | describe 'reading and manipulating document styles' do 612 | before do 613 | @doc = Docx::Document.open(@fixtures_path + '/styles.docx') 614 | end 615 | 616 | it '#default_paragraphy_style' do 617 | expect(@doc.default_paragraph_style).to eq 'Normal' 618 | end 619 | 620 | it 'manipulates existing document styles' do 621 | styles_config = @doc.styles_configuration 622 | 623 | expect(styles_config.size).to eq 37 624 | 625 | heading_style = styles_config.style_of('Normal') 626 | expect(heading_style).to be_a(Docx::Elements::Style) 627 | 628 | expect(heading_style.id).to eq "Normal" 629 | expect(heading_style.font_color).to eq(nil) 630 | 631 | heading_style.font_color = "000000" 632 | expect(heading_style.font_color).to eq("000000") 633 | 634 | expect(heading_style.node.at_xpath("w:rPr/w:color/@w:val").value).to eq("000000") 635 | end 636 | 637 | it 'creates document styles' do 638 | styles_config = @doc.styles_configuration 639 | 640 | expect(styles_config.size).to eq 37 641 | expect { styles_config.style_of('Red') } .to raise_error(Docx::Errors::StyleNotFound) 642 | 643 | red_style = styles_config.add_style("Red") 644 | expect(styles_config.size).to eq 38 645 | 646 | expect(red_style).to be_a(Docx::Elements::Style) 647 | expect(red_style.id).to eq "Red" 648 | expect(red_style.name).to eq "Red" 649 | 650 | expect { red_style.font_color = "#FFFFFF" }.to raise_error(Docx::Errors::StyleInvalidPropertyValue) 651 | expect { red_style.font_color = "blue" }.to raise_error(Docx::Errors::StyleInvalidPropertyValue) 652 | expect { red_style.font_color = "FF0000" }.not_to raise_error 653 | 654 | styles_config.remove_style("Red") 655 | expect(styles_config.size).to eq 37 656 | expect { styles_config.style_of('Red') }.to raise_error(Docx::Errors::StyleNotFound) 657 | end 658 | 659 | it 'persists document styles' do 660 | styles_config = @doc.styles_configuration 661 | styles_config.add_style("Red", name: "Red", font_color: "FF0000", font_size: 20) 662 | @doc.paragraphs[5].style = "Red" 663 | 664 | first_modified_styles_path = @fixtures_path + '/styles_modified.docx' 665 | second_modified_styles_path = @fixtures_path + '/styles_modified2.docx' 666 | @doc.save(first_modified_styles_path) 667 | 668 | modified_styles_doc = Docx::Document.open(first_modified_styles_path) 669 | modified_styles_config = modified_styles_doc.styles_configuration 670 | 671 | expect(modified_styles_config.style_of('Red')).to be_a(Docx::Elements::Style) 672 | modified_styles_config.remove_style("Red") 673 | modified_styles_doc.save(second_modified_styles_path) 674 | 675 | modified_styles_doc = Docx::Document.open(second_modified_styles_path) 676 | modified_styles_config = modified_styles_doc.styles_configuration 677 | expect { modified_styles_config.style_of('Red') }.to raise_error(Docx::Errors::StyleNotFound) 678 | 679 | File.delete(first_modified_styles_path) 680 | File.delete(second_modified_styles_path) 681 | end 682 | 683 | after { File.delete(@new_doc_path) if @new_doc_path && File.exist?(@new_doc_path) } 684 | end 685 | 686 | describe '#to_html' do 687 | before do 688 | @doc = Docx::Document.open(@fixtures_path + '/internal-links.docx') 689 | end 690 | 691 | it 'should not raise error' do 692 | expect { @doc.to_html }.to_not raise_error 693 | end 694 | end 695 | end 696 | -------------------------------------------------------------------------------- /spec/docx/elements/style_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require 'spec_helper' 4 | require 'docx' 5 | 6 | describe Docx::Elements::Style do 7 | let(:fixture_path) { Dir.pwd + "/spec/fixtures/partial_styles/full.xml" } 8 | let(:fixture_xml) { File.read(fixture_path) } 9 | let(:node) { Nokogiri::XML(fixture_xml).root.children[1] } 10 | let(:style) { described_class.new(double(:configuration), node) } 11 | 12 | it "should extract attributes" do 13 | expect(style.id).to eq("Red") 14 | end 15 | 16 | describe "attribute getters" do 17 | it { expect(style.id).to eq("Red") } 18 | it { expect(style.name).to eq("Red") } 19 | it { expect(style.type).to eq("paragraph") } 20 | it { expect(style.keep_next).to eq(false) } 21 | it { expect(style.keep_lines).to eq(false) } 22 | it { expect(style.page_break_before).to eq(false) } 23 | it { expect(style.widow_control).to eq(true) } 24 | it { expect(style.suppress_auto_hyphens).to eq(false) } 25 | it { expect(style.bidirectional_text).to eq(false) } 26 | it { expect(style.spacing_before).to eq("0") } 27 | it { expect(style.spacing_after).to eq("200") } 28 | it { expect(style.line_spacing).to eq("240") } 29 | it { expect(style.line_rule).to eq("auto") } 30 | it { expect(style.indent_left).to eq("0") } 31 | it { expect(style.indent_right).to eq("0") } 32 | it { expect(style.indent_first_line).to eq("0") } 33 | it { expect(style.align).to eq("left") } 34 | it { expect(style.outline_level).to eq("9") } 35 | it { expect(style.font).to eq("Cambria") } 36 | it { expect(style.font_ascii).to eq("Cambria") } 37 | it { expect(style.font_cs).to eq("Arial Unicode MS") } 38 | it { expect(style.font_hAnsi).to eq("Cambria") } 39 | it { expect(style.font_eastAsia).to eq("Arial Unicode MS") } 40 | it { expect(style.bold).to eq(true) } 41 | it { expect(style.italic).to eq(false) } 42 | it { expect(style.caps).to eq(false) } 43 | it { expect(style.small_caps).to eq(false) } 44 | it { expect(style.strike).to eq(false) } 45 | it { expect(style.double_strike).to eq(false) } 46 | it { expect(style.outline).to eq(false) } 47 | it { expect(style.shading_style).to eq("clear") } 48 | it { expect(style.shading_color).to eq("auto") } 49 | it { expect(style.shading_fill).to eq("auto") } # TODO 50 | it { expect(style.font_color).to eq("99403d") } 51 | it { expect(style.font_size).to eq(12) } 52 | it { expect(style.font_size_cs).to eq(12) } 53 | it { expect(style.underline_style).to eq("none") } 54 | it { expect(style.underline_color).to eq("000000") } 55 | it { expect(style.spacing).to eq("0") } 56 | it { expect(style.kerning).to eq("0") } 57 | it { expect(style.position).to eq("0") } 58 | it { expect(style.text_fill_color).to eq("9A403E") } 59 | it { expect(style.vertical_alignment).to eq("baseline") } 60 | it { expect(style.lang).to eq("en-US") } 61 | end 62 | 63 | it "should allow setting simple attributes" do 64 | style.id = "Blue" 65 | 66 | # Get persisted to the style method 67 | expect(style.id).to eq("Blue") 68 | 69 | # Gets persisted to the ./node 70 | expect(node.at_xpath("./@w:styleId").value).to eq("Blue") 71 | end 72 | 73 | it "should allow setting complex attributes" do 74 | style.shading_style = "complex" 75 | 76 | # Get persisted to the style method 77 | expect(style.shading_style).to eq("complex") 78 | 79 | # Gets persisted to the node 80 | expect(node.at_xpath("./w:pPr/w:shd/@w:val").value).to eq("complex") 81 | expect(node.at_xpath("./w:rPr/w:shd/@w:val").value).to eq("complex") 82 | end 83 | 84 | it "should allow setting attributes to nil" do 85 | style.shading_style = nil 86 | 87 | expect(style.shading_style).to eq(nil) 88 | expect(node.at_xpath("./w:pPr/w:shd/@w:val")).to eq(nil) 89 | expect { node.at_xpath("./w:pPr/w:shd/@w:val").value }.to raise_error(NoMethodError) # i.e. it's gone! 90 | end 91 | 92 | describe "#to_xml" do 93 | it "should return the node as XML" do 94 | expect(style.to_xml).to eq(node.to_xml) 95 | end 96 | 97 | it "should change underlying XML when attributes are changed" do 98 | style.id = "blue" 99 | style.name = "Blue" 100 | style.font_size = 20 101 | style.font_color = "0000FF" 102 | 103 | expect(style.to_xml).to eq(node.to_xml) 104 | expect(style.to_xml).to include('') 105 | expect(style.to_xml).to include('') 106 | expect(style.to_xml).to include('') 107 | expect(style.to_xml).to include('') 108 | expect(style.to_xml).to include('') 109 | expect(style.to_xml).to include('') 110 | end 111 | end 112 | 113 | describe "validation" do 114 | let(:fixture_path) { Dir.pwd + "/spec/fixtures/partial_styles/basic.xml" } 115 | 116 | it "validation: id" do 117 | expect { style.id = nil }.to raise_error(Docx::Errors::StyleRequiredPropertyValue) 118 | end 119 | 120 | it "validation: name" do 121 | expect { style.name = nil }.to raise_error(Docx::Errors::StyleRequiredPropertyValue) 122 | end 123 | 124 | it "validation: type" do 125 | expect { style.type = nil }.to raise_error(Docx::Errors::StyleRequiredPropertyValue) 126 | 127 | expect { style.type = "invalid" }.to raise_error(Docx::Errors::StyleInvalidPropertyValue) 128 | end 129 | 130 | it "true" do 131 | expect(style).to be_valid 132 | end 133 | 134 | describe "unhappy" do 135 | let(:fixture_xml) do 136 | <<~XML 137 | 138 | 139 | 140 | 141 | 142 | 143 | XML 144 | end 145 | 146 | it "false" do 147 | expect(style).to_not be_valid 148 | end 149 | end 150 | 151 | end 152 | 153 | describe "basic" do 154 | let(:fixture_path) { Dir.pwd + "/spec/fixtures/partial_styles/basic.xml" } 155 | 156 | it "should allow setting simple attributes" do 157 | expect(style.id).to eq("MyCustomStyle") 158 | style.id = "Blue" 159 | 160 | # Get persisted to the style method 161 | expect(style.id).to eq("Blue") 162 | 163 | # Gets persisted to the node 164 | expect(node.at_xpath("./@w:styleId").value).to eq("Blue") 165 | end 166 | 167 | it "should allow setting complex attributes" do 168 | expect(style.shading_style).to eq(nil) 169 | expect(style.to_xml).to_not include('') 170 | style.shading_style = "complex" 171 | 172 | # Get persisted to the style method 173 | expect(style.shading_style).to eq("complex") 174 | 175 | # Gets persisted to the node 176 | expect(node.at_xpath("./w:pPr/w:shd/@w:val").value).to eq("complex") 177 | expect(node.at_xpath("./w:rPr/w:shd/@w:val").value).to eq("complex") 178 | expect(style.to_xml).to include('') 179 | end 180 | end 181 | end -------------------------------------------------------------------------------- /spec/fixtures/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/.DS_Store -------------------------------------------------------------------------------- /spec/fixtures/basic.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/basic.docx -------------------------------------------------------------------------------- /spec/fixtures/editing.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/editing.docx -------------------------------------------------------------------------------- /spec/fixtures/formatting.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/formatting.docx -------------------------------------------------------------------------------- /spec/fixtures/internal-links.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/internal-links.docx -------------------------------------------------------------------------------- /spec/fixtures/no_styles.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/no_styles.docx -------------------------------------------------------------------------------- /spec/fixtures/office365.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/office365.docx -------------------------------------------------------------------------------- /spec/fixtures/partial_styles/basic.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /spec/fixtures/partial_styles/full.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /spec/fixtures/replacement.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/replacement.docx -------------------------------------------------------------------------------- /spec/fixtures/replacement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/replacement.png -------------------------------------------------------------------------------- /spec/fixtures/saving.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/saving.docx -------------------------------------------------------------------------------- /spec/fixtures/saving_wps.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/saving_wps.docx -------------------------------------------------------------------------------- /spec/fixtures/styles.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/styles.docx -------------------------------------------------------------------------------- /spec/fixtures/substitution.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/substitution.docx -------------------------------------------------------------------------------- /spec/fixtures/tables.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/tables.docx -------------------------------------------------------------------------------- /spec/fixtures/test_with_style.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/test_with_style.docx -------------------------------------------------------------------------------- /spec/fixtures/weird_docx.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ruby-docx/docx/0a536d4ac54b2da5615aebd6ec52e601368d339e/spec/fixtures/weird_docx.docx -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | if ENV['COVERALLS_REPO_TOKEN'] 4 | require 'coveralls' 5 | 6 | Coveralls.wear! 7 | end 8 | 9 | Dir['./spec/support/**/*.rb'].sort.each { |f| require f } 10 | 11 | # This file was generated by the `rspec --init` command. Conventionally, all 12 | # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. 13 | # Require this file using `require "spec_helper"` to ensure that it is only 14 | # loaded once. 15 | # 16 | # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration 17 | RSpec.configure do |config| 18 | config.run_all_when_everything_filtered = true 19 | config.filter_run :focus 20 | 21 | # Run specs in random order to surface order dependencies. If you find an 22 | # order dependency and want to debug it, you can fix the order by providing 23 | # the seed, which is printed after each run. 24 | # --seed 1234 25 | config.order = 'random' 26 | end 27 | -------------------------------------------------------------------------------- /spec/support/shared_examples.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.shared_examples_for 'reading' do 4 | it 'should read the document' do 5 | expect(@doc.paragraphs.size).to eq(2) 6 | expect(@doc.paragraphs.first.text).to eq('hello') 7 | expect(@doc.paragraphs.last.text).to eq('world') 8 | expect(@doc.text).to eq("hello\nworld") 9 | end 10 | 11 | it 'should read bookmarks' do 12 | expect(@doc.bookmarks.size).to eq(1) 13 | expect(@doc.bookmarks['test_bookmark']).to_not eq(nil) 14 | end 15 | 16 | it 'should have paragraphs' do 17 | @doc.each_paragraph do |p| 18 | expect(p).to be_an_instance_of(Docx::Elements::Containers::Paragraph) 19 | end 20 | end 21 | 22 | it 'should have properly formatted text runs' do 23 | @doc.each_paragraph do |p| 24 | p.each_text_run do |tr| 25 | expect(tr).to be_an_instance_of(Docx::Elements::Containers::TextRun) 26 | expect(tr.formatting).to eq(Docx::Elements::Containers::TextRun::DEFAULT_FORMATTING) 27 | end 28 | end 29 | end 30 | 31 | describe '#font_size' do 32 | context 'When a docx files has no styles.xml' do 33 | before do 34 | @doc = Docx::Document.new(@fixtures_path + '/no_styles.docx') 35 | end 36 | 37 | it 'should raise an error' do 38 | expect(@doc.font_size).to be_nil 39 | end 40 | end 41 | end 42 | end 43 | 44 | RSpec.shared_examples_for 'saving to file' do 45 | it 'should save to a normal file path' do 46 | @new_doc_path = @fixtures_path + '/new_save.docx' 47 | @doc.save(@new_doc_path) 48 | @new_doc = Docx::Document.open(@new_doc_path) 49 | expect(@new_doc.paragraphs.size).to eq(@doc.paragraphs.size) 50 | end 51 | 52 | it 'should save to a tempfile' do 53 | temp_file = Tempfile.new(['docx_gem', '.docx']) 54 | @new_doc_path = temp_file.path 55 | @doc.save(@new_doc_path) 56 | @new_doc = Docx::Document.open(@new_doc_path) 57 | expect(@new_doc.paragraphs.size).to eq(@doc.paragraphs.size) 58 | 59 | temp_file.close 60 | temp_file.unlink 61 | # ensure temp file has been removed 62 | expect(File.exist?(@new_doc_path)).to eq(false) 63 | end 64 | 65 | after do 66 | File.delete(@new_doc_path) if File.exist?(@new_doc_path) 67 | end 68 | end 69 | --------------------------------------------------------------------------------