├── .ruby-version
├── lib
├── truncato
│ ├── version.rb
│ ├── truncato.rb
│ └── truncated_sax_document.rb
└── truncato.rb
├── .travis.yml
├── .gitignore
├── Rakefile
├── benchmark
├── truncato
│ ├── vendor
│ │ ├── peppercorn_adapter.rb
│ │ └── vendor_html_truncator_adapter.rb
│ └── benchmark_runner.rb
└── truncato_benchmark.rb
├── spec
├── support
│ └── spec_helpers
│ │ └── truncato_macros.rb
├── spec_helper.rb
└── truncato
│ └── truncato_spec.rb
├── Gemfile
├── .github
└── workflows
│ └── ruby.yml
├── LICENSE.txt
├── truncato.gemspec
├── .rvmrc
└── README.md
/.ruby-version:
--------------------------------------------------------------------------------
1 | ruby-3.4.1
2 |
--------------------------------------------------------------------------------
/lib/truncato/version.rb:
--------------------------------------------------------------------------------
1 | module Truncato
2 | VERSION='0.7.13'
3 | end
4 |
--------------------------------------------------------------------------------
/lib/truncato.rb:
--------------------------------------------------------------------------------
1 | Dir[File.dirname(__FILE__) + '/truncato/**/*.rb'].each do |file|
2 | require file
3 | end
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: ruby
2 | env:
3 | global:
4 | - NOKOGIRI_USE_SYSTEM_LIBRARIES=true
5 | rvm:
6 | - 2.3.1
7 | - 2.4.4
8 | - 2.5.3
9 | - 2.6.0
10 |
11 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.gem
2 | *.rbc
3 | .bundle
4 | .config
5 | coverage
6 | InstalledFiles
7 | lib/bundler/man
8 | pkg
9 | rdoc
10 | spec/reports
11 | test/tmp
12 | test/version_tmp
13 | tmp
14 |
15 | # YARD artifacts
16 | .yardoc
17 | _yardoc
18 | doc/
19 | .idea/
20 |
21 | Gemfile.lock
22 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | begin
2 | require 'bundler/setup'
3 | rescue LoadError
4 | puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5 | end
6 |
7 | require "rake"
8 | require "rspec/core/rake_task"
9 |
10 | RSpec::Core::RakeTask.new(:spec)
11 |
12 | task default: :spec
13 |
--------------------------------------------------------------------------------
/benchmark/truncato/vendor/peppercorn_adapter.rb:
--------------------------------------------------------------------------------
1 | # Adapter for comparing https://github.com/nono/HTML-Truncator
2 | module Truncato
3 | class PeppercornAdapter
4 | def self.truncate string, options
5 | string.truncate_html options[:max_length], :tail=>options[:tail]
6 | end
7 | end
8 | end
9 |
10 |
--------------------------------------------------------------------------------
/spec/support/spec_helpers/truncato_macros.rb:
--------------------------------------------------------------------------------
1 | module TruncatoMacros
2 | def it_should_truncate(example_description, options)
3 | it "should truncate #{example_description}" do
4 | expected_options = Truncato::DEFAULT_OPTIONS.merge(options[:with])
5 | Truncato.truncate(options[:source], expected_options).should == options[:expected]
6 | end
7 | end
8 | end
--------------------------------------------------------------------------------
/benchmark/truncato_benchmark.rb:
--------------------------------------------------------------------------------
1 | $:.unshift File.join(File.dirname(__FILE__), '..', 'lib')
2 |
3 | require 'rubygems'
4 | require 'bundler'
5 | require 'nokogiri'
6 | require 'truncato'
7 | require 'html_truncator'
8 | require 'peppercorn'
9 | require 'benchmark'
10 |
11 | Bundler.setup
12 | Bundler.require
13 |
14 | Dir[File.dirname(__FILE__) + '/truncato/**/*.rb'].each do |file|
15 | load file
16 | end
17 |
18 |
19 |
--------------------------------------------------------------------------------
/benchmark/truncato/vendor/vendor_html_truncator_adapter.rb:
--------------------------------------------------------------------------------
1 | # Adapter for comparing https://github.com/nono/HTML-Truncator
2 | module Truncato
3 | class VendorHtmlTruncatorAdapter
4 | def self.truncate string, options
5 | HTML_Truncator.truncate string, options[:max_length], ellipsis: "..."
6 | end
7 | end
8 | end
9 |
10 | #[{Truncato::VendorHtmlTruncatorAdapter=>{:truncated_length=>3584682, :time=>223.36}}]
11 |
12 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 | gemspec
3 |
4 |
5 | # Add dependencies required to use your gem here.
6 | # Example:
7 | # gem "activesupport", ">= 2.3.5"
8 |
9 | # Add dependencies to develop your gem here.
10 | # Include everything needed to run rake, tests, features, etc.
11 |
12 | group :development do
13 | gem "bundler"
14 | end
15 |
16 | group :benchrmark do
17 | gem 'html_truncator'
18 | gem 'peppercorn'
19 | end
20 |
--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
1 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2 | $LOAD_PATH.unshift(File.dirname(__FILE__))
3 |
4 | require 'rubygems'
5 | require 'bundler'
6 | require 'nokogiri'
7 |
8 | Bundler.setup
9 | Bundler.require
10 |
11 | # Requires supporting files with custom matchers and macros, etc,
12 | # in ./support/ and its subdirectories.
13 | Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f }
14 |
15 | RSpec.configure do |config|
16 | config.extend TruncatoMacros
17 | end
18 |
--------------------------------------------------------------------------------
/.github/workflows/ruby.yml:
--------------------------------------------------------------------------------
1 | name: Ruby
2 |
3 | on:
4 | push:
5 | branches: [ "master" ]
6 | pull_request:
7 | branches: [ "**" ]
8 |
9 | permissions:
10 | contents: read
11 |
12 | jobs:
13 | test:
14 |
15 | runs-on: ubuntu-22.04
16 | strategy:
17 | matrix:
18 | ruby-version: ["2.6", "2.7", "3.0", "3.1", "3.2", "3.3", "3.4"]
19 |
20 | steps:
21 | - uses: actions/checkout@v4
22 | - name: Set up Ruby
23 | uses: ruby/setup-ruby@v1
24 | with:
25 | ruby-version: ${{ matrix.ruby-version }}
26 | bundler-cache: true
27 |
28 | - name: Run tests
29 | run: bundle exec rake
30 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright (c) 2011 Jorge Manrubia
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/truncato.gemspec:
--------------------------------------------------------------------------------
1 | # Generated by jeweler
2 | # DO NOT EDIT THIS FILE DIRECTLY
3 | # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4 | # -*- encoding: utf-8 -*-
5 |
6 | $:.push File.expand_path("../lib", __FILE__)
7 |
8 | # Maintain your gem'spec version:
9 | require "truncato/version"
10 |
11 | Gem::Specification.new do |spec|
12 | spec.name = "truncato"
13 | spec.version = Truncato::VERSION
14 |
15 | spec.authors = ["Jorge Manrubia"]
16 | spec.date = "2013-09-10"
17 | spec.description = "Ruby tool for truncating HTML strings keeping a valid HTML markup"
18 | spec.email = "jorge.manrubia@gmail.com"
19 | spec.extra_rdoc_files = [
20 | "LICENSE.txt",
21 | "README.md"
22 | ]
23 | spec.files = Dir["{app,config,db,lib}/**/*", "LICENSE.txt", "Rakefile", "README.rdoc"]
24 | spec.homepage = "https://github.com/jorgemanrubia/truncato"
25 | spec.licenses = ["MIT"]
26 | spec.require_paths = ["lib"]
27 | spec.rubygems_version = "2.0.2"
28 | spec.summary = "A tool for truncating HTML strings efficiently"
29 |
30 | # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
31 | # to allow pushing to a single host or delete this section to allow pushing to any host.
32 | if spec.respond_to?(:metadata)
33 | spec.metadata['allowed_push_host'] = "https://rubygems.org"
34 | else
35 | raise 'RubyGems 2.0 or newer is required to protect against ' \
36 | 'public gem pushes.'
37 | end
38 |
39 | spec.add_dependency "nokogiri", ">= 1.7.0", "<= 2.0"
40 | spec.add_dependency "htmlentities", "~> 4.3.1"
41 |
42 | spec.add_development_dependency "rspec"
43 | spec.add_development_dependency "rake"
44 | end
45 |
--------------------------------------------------------------------------------
/benchmark/truncato/benchmark_runner.rb:
--------------------------------------------------------------------------------
1 | module Truncato
2 | class BenchmarkRunner
3 | SYNTHETIC_XML_LENGTH = 4000000
4 | TRUNCATION_LENGTH = 400000
5 |
6 | attr_reader :synthetic_xml
7 |
8 | def initialize
9 | @synthetic_xml = create_synthetic_xml(SYNTHETIC_XML_LENGTH)
10 | puts "Generated synthethic load with #{@synthetic_xml.length/1000.0}K characters"
11 | end
12 |
13 | def run
14 | run_suite [Truncato]
15 | end
16 |
17 | def run_comparison
18 | run_suite [Truncato, VendorHtmlTruncatorAdapter, PeppercornAdapter]
19 | end
20 |
21 |
22 | private
23 |
24 | def run_suite(truncation_classes)
25 | results = truncation_classes.collect { |klass| {klass => run_with(klass)} }
26 | show_results results
27 | end
28 |
29 | def create_synthetic_xml(length)
30 | xml_content = ""
31 | append_random_xml_content xml_content, length
32 | xml_content << ""
33 | xml_content
34 | end
35 |
36 | def append_random_xml_content(xml_content, length)
37 | begin
38 | random_tag = random_string(rand(10)+1)
39 | xml_content << %{
40 | <#{random_tag}>#{random_string(rand(300)+1)}#{random_tag}>
41 | }
42 | end while (xml_content.length < length)
43 | end
44 |
45 | def random_string(length)
46 | (0...length).map { 65.+(rand(26)).chr }.join
47 | end
48 |
49 | def run_with(truncation_klass)
50 | puts "Running benchmark for #{truncation_klass}..."
51 | truncated_string = ""
52 | result = Benchmark.measure { truncated_string = truncation_klass.truncate synthetic_xml, max_length: TRUNCATION_LENGTH, count_tags: true }
53 | {truncated_length: truncated_string.length, time: result.total}
54 | end
55 |
56 | def show_results(results)
57 | puts results.inspect
58 | end
59 |
60 | end
61 | end
--------------------------------------------------------------------------------
/.rvmrc:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # This is an RVM Project .rvmrc file, used to automatically load the ruby
4 | # development environment upon cd'ing into the directory
5 |
6 | # First we specify our desired [@], the @gemset string is optional,
7 | # Only full ruby string is supported here, for short names use:
8 | # echo "rvm use 1.9.2" > .rvmrc
9 | environment_id="ruby-1.9.2-p320"
10 |
11 | # Uncomment the following lines if you want to verify rvm version per project
12 | # rvmrc_rvm_version="1.15.8 (stable)" # 1.10.1 seams as a safe start
13 | # eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || {
14 | # echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading."
15 | # return 1
16 | # }
17 |
18 | # First we attempt to load the desired environment directly from the environment
19 | # file. This is very fast and efficient compared to running through the entire
20 | # CLI and selector. If you want feedback on which environment was used then
21 | # insert the word 'use' after --create as this triggers verbose mode.
22 | if [[ -d "${rvm_path:-$HOME/.rvm}/environments"
23 | && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]]
24 | then
25 | \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id"
26 | [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] &&
27 | \. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true
28 | else
29 | # If the environment file has not yet been created, use the RVM CLI to select.
30 | rvm --create "$environment_id" || {
31 | echo "Failed to create RVM environment '${environment_id}'."
32 | return 1
33 | }
34 | fi
35 |
36 | # If you use bundler, this might be useful to you:
37 | # if [[ -s Gemfile ]] && {
38 | # ! builtin command -v bundle >/dev/null ||
39 | # builtin command -v bundle | GREP_OPTIONS= \grep $rvm_path/bin/bundle >/dev/null
40 | # }
41 | # then
42 | # printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n"
43 | # gem install bundler
44 | # fi
45 | # if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null
46 | # then
47 | # bundle install | GREP_OPTIONS= \grep -vE '^Using|Your bundle is complete'
48 | # fi
49 |
50 | git branch
51 | ruby -v
52 | git stash list
53 |
--------------------------------------------------------------------------------
/lib/truncato/truncato.rb:
--------------------------------------------------------------------------------
1 | module Truncato
2 | DEFAULT_OPTIONS = {
3 | max_length: 30,
4 | count_tags: true,
5 | tail: "...",
6 | filtered_attributes: []
7 | }
8 |
9 | ARTIFICIAL_ROOT_NAME = 'truncato-artificial-root'
10 |
11 | # Truncates the source XML string and returns the truncated XML. It will keep a valid XML structure
12 | # and insert a _tail_ text indicating the position where content were removed (...).
13 | #
14 | # @param [String] source the XML source to truncate
15 | # @param [Hash] user_options truncation options
16 | # @option user_options [Integer] :max_length Maximum length
17 | # @option user_options [String] :tail text to append when the truncation happens
18 | # @option user_options [Boolean] :count_tags `true` for counting tags for truncation, `false` for not counting them
19 | # @option user_options [Array] :filtered_attributes Array of names of attributes that should be excluded in the resulting truncated string. This allows you to make the truncated string shorter by excluding the content of attributes you can discard in some given context, e.g HTML `style` attribute.
20 | # @return [String] the truncated string
21 | def self.truncate source, user_options={}
22 | options = DEFAULT_OPTIONS.merge(user_options)
23 | self.truncate_html(source, options) || self.truncate_no_html(source, options)
24 | end
25 |
26 | private
27 |
28 | def self.truncate_html source, options
29 | source = unicode_normalize(source)
30 | self.do_truncate_html(source, options) ? self.do_truncate_html(with_artificial_root(source), options) : nil
31 | end
32 |
33 | def self.unicode_normalize(string)
34 | string.unicode_normalize
35 | rescue Encoding::CompatibilityError
36 | # By relying on rescue we don't have to maintain a list of compatible encodings.
37 | string
38 | end
39 |
40 | def self.do_truncate_html source, options
41 | truncated_sax_document = TruncatedSaxDocument.new(options)
42 |
43 | # Only nokogiri >= 1.17 accept Encoding object, older needs a String as encoding
44 | parser = Nokogiri::HTML::SAX::Parser.new(truncated_sax_document, source.encoding.to_s)
45 |
46 | parser.parse(source) { |context| context.replace_entities = false }
47 | truncated_string = truncated_sax_document.truncated_string
48 | truncated_string.empty? ? nil : truncated_string
49 | end
50 |
51 | def self.with_artificial_root(source)
52 | "<#{ARTIFICIAL_ROOT_NAME}>#{source}#{ARTIFICIAL_ROOT_NAME}>"
53 | end
54 |
55 | def self.truncate_no_html source, options
56 | max_length = options[:max_length]
57 | tail = source.length > max_length ? options[:tail] : ''
58 | "#{source[0..max_length-1]}#{tail}"
59 | end
60 | end
61 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # truncato
2 |
3 | *truncato* is a Ruby library for truncating HTML strings keeping the markup valid.
4 |
5 | ## Installing
6 |
7 | In your `Gemfile`
8 |
9 | ```ruby
10 | gem 'truncato'
11 | ```
12 |
13 | ## Usage
14 |
15 | ```ruby
16 | Truncato.truncate "some text
", max_length: 4 #=> "s...
"
17 | Truncato.truncate "some text
", max_length: 4, count_tags: false #=> "some...
"
18 | ```
19 |
20 | The configuration options are:
21 |
22 | * `max_length`: The size, in characters, to truncate (`30` by default)
23 | * `filtered_attributes`: Array of attribute names that will be removed in the truncated string. This allows you to make the truncated string shorter by excluding the content of attributes you can discard in some given context, e.g HTML `style` attribute.
24 | * `filtered_tags`: Array of tags that will be removed in the truncated string. If a tag is excluded, all the nested tags under it will be excluded too.
25 | * `count_tags`: Boolean value indicating whether tags size should be considered when truncating (`true` by default)
26 | * `tail_before_final_tag`: Boolean value indicating whether to apply a tail before the final closing tag (`false` by default)
27 | * `comments`: Boolean value indicating whether to include comments in parsed results (`false` by default)
28 | * `tail`: The string to append when the truncation occurs ('...' by default)
29 | * `count_tail`: Boolean value indicating whether to include the tail within the bounds of the provided max length (`false` by default)
30 |
31 | ## Performance
32 |
33 | Truncato was designed with performance in mind. Its main motivation was that existing libs couldn't truncate a multiple-MB document into a few-KB one in a reasonable time. It uses the [Nokogiri](http://nokogiri.org/) SAX parser.
34 |
35 | There is a benchmark included that generates a synthetic XML of 4MB and truncates it to 400 KB. You can run the benchmark using
36 |
37 | ```ruby
38 | rake truncato:benchmark
39 | ```
40 |
41 | There is a also a comparison benchmark that tests the previous data with other alternatives
42 |
43 | ```ruby
44 | rake truncato:vendor_compare
45 | ```
46 |
47 | The results comparing truncato with other libs:
48 |
49 |
50 |
51 | |
52 | Truncato |
53 | truncate_html |
54 | HTML Truncator |
55 | peppercorn |
56 |
57 |
58 | | Time for truncating a 4MB XML document to 4KB |
59 | 1.5 s |
60 | 20 s |
61 | 220 s |
62 | 232 s |
63 |
64 |
65 |
66 | ## Running the tests
67 |
68 | ```ruby
69 | rake spec
70 | ```
71 |
--------------------------------------------------------------------------------
/lib/truncato/truncated_sax_document.rb:
--------------------------------------------------------------------------------
1 | require 'nokogiri'
2 | require 'htmlentities'
3 |
4 | class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
5 | IGNORABLE_TAGS = %w(html head body)
6 |
7 | SINGLE_TAGS = %w{br img}
8 |
9 | attr_reader :max_length, :max_length_reached, :tail,
10 | :count_tags, :filtered_attributes, :filtered_tags, :ignored_levels
11 |
12 | def initialize(options)
13 | @html_coder = HTMLEntities.new
14 | capture_options options
15 | init_parsing_state
16 | end
17 |
18 | def start_element name, attributes
19 | enter_ignored_level if filtered_tags.include?(name)
20 | return if @max_length_reached || ignorable_tag?(name) || ignore_mode?
21 | @closing_tags.push name unless single_tag_element? name
22 | append_to_truncated_string opening_tag(name, attributes), overriden_tag_length
23 | end
24 |
25 | def characters decoded_string
26 | return if @max_length_reached || ignore_mode?
27 | remaining_length = max_length - @estimated_length - 1
28 | string_to_append = decoded_string.length > remaining_length ? truncate_string(decoded_string, remaining_length) : decoded_string
29 | append_to_truncated_string @html_coder.encode(string_to_append), string_to_append.length
30 | end
31 |
32 | def comment string
33 | if @comments
34 | return if @max_length_reached
35 | process_comment string
36 | end
37 | end
38 |
39 | def end_element name
40 | if filtered_tags.include?(name) && ignore_mode?
41 | exit_ignored_level
42 | return
43 | end
44 |
45 | return if @max_length_reached || ignorable_tag?(name) || ignore_mode?
46 |
47 | unless single_tag_element? name
48 | @closing_tags.pop
49 | append_to_truncated_string closing_tag(name), overriden_tag_length
50 | end
51 | end
52 |
53 | def end_document
54 | close_truncated_document if max_length_reached
55 | end
56 |
57 | def truncated_string
58 | @truncated_buffer.join
59 | end
60 |
61 | private
62 |
63 | def capture_options(options)
64 | @max_length = options[:max_length]
65 | @count_tags = options [:count_tags]
66 | @count_tail = options.fetch(:count_tail, false)
67 | @tail = options[:tail]
68 | @filtered_attributes = options[:filtered_attributes] || []
69 | @filtered_tags = options[:filtered_tags] || []
70 | @tail_before_final_tag = options.fetch(:tail_before_final_tag, false)
71 | @comments = options.fetch(:comments, false)
72 | end
73 |
74 | def process_comment(string)
75 | remaining_length = max_length - @estimated_length - 1
76 | string_to_append = comment_tag(string).length > remaining_length ? truncate_comment(comment_tag(string), remaining_length) : comment_tag(string)
77 | append_to_truncated_string string_to_append
78 | end
79 |
80 | def comment_tag comment
81 | ""
82 | end
83 |
84 | def init_parsing_state
85 | @truncated_buffer = []
86 | @closing_tags = []
87 | @estimated_length = @count_tail ? tail_length : 0
88 | @max_length_reached = false
89 | @ignored_levels = 0
90 | end
91 |
92 | def tail_length
93 | tail.match(/^&\w+;$/).nil? ? tail.length : 1
94 | end
95 |
96 | def single_tag_element? name
97 | SINGLE_TAGS.include? name
98 | end
99 |
100 | def append_to_truncated_string string, overriden_length=nil
101 | @truncated_buffer << string
102 | increase_estimated_length(overriden_length || string.length)
103 | end
104 |
105 | def opening_tag name, attributes
106 | attributes_string = attributes_to_string attributes
107 | if single_tag_element? name
108 | "<#{name}#{attributes_string}/>"
109 | else
110 | "<#{name}#{attributes_string}>"
111 | end
112 | end
113 |
114 | def attributes_to_string attributes
115 | return "" if attributes.empty?
116 | attributes_string = concatenate_attributes_declaration attributes
117 | attributes_string.rstrip
118 | end
119 |
120 | def concatenate_attributes_declaration attributes
121 | attributes.inject(' ') do |string, attribute|
122 | key, value = attribute
123 | next string if @filtered_attributes.include? key
124 | string << "#{key}='#{@html_coder.encode value}' "
125 | end
126 | end
127 |
128 | def closing_tag name
129 | "#{name}>"
130 | end
131 |
132 | def increase_estimated_length amount
133 | @estimated_length += amount
134 | check_max_length_reached
135 | end
136 |
137 | def check_max_length_reached
138 | @max_length_reached = true if @estimated_length >= max_length
139 | end
140 |
141 | def truncate_string string, remaining_length
142 | if @tail_before_final_tag
143 | string[0..remaining_length]
144 | else
145 | @tail_appended = true
146 | "#{string[0..remaining_length]}#{tail}"
147 | end
148 | end
149 |
150 | def truncate_comment string, remaining_length
151 | if @tail_before_final_tag
152 | string[0..remaining_length]
153 | else
154 | @tail_appended = true
155 | "#{string[0..remaining_length]}#{tail}-->"
156 | end
157 | end
158 |
159 | def close_truncated_document
160 | append_tail_between_closing_tags if @tail_before_final_tag
161 | append_to_truncated_string tail unless @tail_appended
162 | append_closing_tags
163 | end
164 |
165 | def append_closing_tags
166 | @closing_tags.reverse.each { |name| append_to_truncated_string closing_tag name }
167 | end
168 |
169 | def overriden_tag_length
170 | @count_tags ? nil : 0
171 | end
172 |
173 |
174 | def ignorable_tag?(name)
175 | artificial_root_name?(name) || IGNORABLE_TAGS.include?(name.downcase)
176 | end
177 |
178 | def artificial_root_name? name
179 | name == Truncato::ARTIFICIAL_ROOT_NAME
180 | end
181 |
182 | def append_tail_between_closing_tags
183 | append_to_truncated_string closing_tag(@closing_tags.delete_at (@closing_tags.length - 1)) if @closing_tags.length > 1
184 | end
185 |
186 | def enter_ignored_level
187 | @ignored_levels += 1
188 | end
189 |
190 | def exit_ignored_level
191 | @ignored_levels -= 1
192 | end
193 |
194 | def ignore_mode?
195 | @ignored_levels > 0
196 | end
197 | end
198 |
--------------------------------------------------------------------------------
/spec/truncato/truncato_spec.rb:
--------------------------------------------------------------------------------
1 | require "spec_helper"
2 |
3 | describe "Truncato" do
4 | NBSP = Nokogiri::HTML(" ").text
5 |
6 | describe "normal strings" do
7 | it_should_truncate "no html text with longer length", with: {max_length: 13, tail: '...'}, source: "some text", expected: "some text"
8 | it_should_truncate "no html text with shorter length", with: {max_length: 3}, source: "some text", expected: "som..."
9 | it_should_truncate "no html text with longer length", with: {max_length: 5}, source: "some", expected: "some"
10 | end
11 |
12 | describe "unicode string" do
13 | it_should_truncate "text with non-ASCII characters",
14 | with: { max_length: 8 },
15 | source: "Großer Übungs- und Beispieltext",
16 | expected: "Großer Ü..."
17 | it_should_truncate "with decomposed codes",
18 | with: { max_length: 8 },
19 | source: "Großer Übungs- und Beispieltext".unicode_normalize(:nfd),
20 | expected: "Großer Ü..."
21 | it_should_truncate "with multi-byte characters",
22 | with: { max_length: 3, count_tags: false },
23 | source: "轉街過巷 就如滑過浪潮 聽天說地 仍然剩我心跳",
24 | expected: "轉街過..."
25 | end
26 |
27 | # Support for non-UTF-8 only on ruby 3+.
28 | describe "non-unicode string", if: Gem::Version.new(RUBY_VERSION).segments.first > 2 do
29 | it_should_truncate "text with non-unicode encodings",
30 | with: { max_length: 8 },
31 | source: "Großer Übungs- und Beispieltext".encode!(Encoding::ISO_8859_1),
32 | expected: "Großer Ü..."
33 | end
34 |
35 | describe "html tags structure" do
36 | it_should_truncate "html text with a tag (counting tags)", with: {max_length: 4}, source: "some text
", expected: "s...
"
37 |
38 | it_should_truncate "html text with a tag (not counting tags)", with: {max_length: 4, count_tags: false}, source: "some text
", expected: "some...
"
39 |
40 | it_should_truncate "html text with nested tags (first node)", with: {max_length: 9},
41 | source: "",
42 | expected: ""
43 |
44 | it_should_truncate "html text with nested tags (second node)", with: {max_length: 33},
45 | source: "",
46 | expected: ""
47 |
48 | it_should_truncate "html text with nested tags (empty contents)", with: {max_length: 3},
49 | source: "",
50 | expected: "...
"
51 |
52 | it_should_truncate "html text with special html entioes", with: {max_length: 5},
53 | source: ">some text
",
54 | expected: ">s...
"
55 |
56 | it_should_truncate "html text with siblings tags", with: {max_length: 51},
57 | source: "some text 0
",
58 | expected: "some text 0
"
59 |
60 | it_should_truncate "html with unclosed tags", with: {max_length: 151},
61 | source: "",
62 | expected: ""
63 |
64 | it_should_truncate "sdasd", with: {},
65 | source: "Foo Bar", expected: "Foo#{NBSP}Bar"
66 | end
67 |
68 | describe "include tail as part of max_length" do
69 | it_should_truncate "html text with a tag (counting tail)", with: {max_length: 4, count_tail: true, count_tags: false},
70 | source: "some text
",
71 | expected: "s...
"
72 |
73 | it_should_truncate "html text with a tag (counting tail)", with: {max_length: 6, count_tail: true, count_tags: false}, source: "some text
", expected: "som...
"
74 |
75 | it_should_truncate "html text with a tag (counting tail)", with: {max_length: 16, count_tail: true, count_tags: false},
76 | source: "some text
some other text
",
77 | expected: "some text
some...
"
78 |
79 | it_should_truncate "html text with a tag (counting tail and including tail before final tag)", with: {max_length: 16, count_tail: true, count_tags: false, tail_before_final_tag: true},
80 | source: "some text
some other text
",
81 | expected: "some text
some...
"
82 |
83 | it_should_truncate "html text, counting special html characters as one character",
84 | with: {max_length: 16, count_tail: true, count_tags: false, tail_before_final_tag: true, tail: '…'},
85 | source: "some text
some other text
",
86 | expected: "some text
some o…
"
87 | end
88 |
89 | describe "insert tail between two or more final tags" do
90 | it_should_truncate "html text as normal when tail_before_final_tag option is not set",
91 | with: {max_length: 4, count_tags: false},
92 | source: "some textsome more text
",
93 | expected: "some...
"
94 |
95 | it_should_truncate "html text when tail_before_final_tag: true by inserting tail before the final tag, and after any other closing tags",
96 | with: {max_length: 4, count_tags: false, tail_before_final_tag: true},
97 | source: "some textsome more text
",
98 | expected: "some...
"
99 | end
100 |
101 | describe "single html tag elements" do
102 | it_should_truncate "html text with
element without adding a closing tag", with: {max_length: 9},
103 | source: "",
104 | expected: ""
105 |
106 | it_should_truncate "html text with
element without adding a closing tag", with: {max_length: 9},
107 | source: "
some text 1
some text 2
",
108 | expected: "
...
"
109 | end
110 |
111 | describe "comment html element" do
112 | it_should_truncate "html text and ignore element by default", with: {max_length: 20},
113 | source: "some text 1
",
114 | expected: "some text 1
"
115 |
116 | it_should_truncate "html text with element", with: {max_length: 30, comments: true},
117 | source: "some text 1
",
118 | expected: "some text...
"
119 |
120 | it_should_truncate "html text with element that exceeds the max_length", with: {max_length: 5, comments: true},
121 | source: "some text 1
",
122 | expected: ""
123 |
124 | it_should_truncate "html text with element with other elements that exceeds max_length", with: {max_length: 20, comments: true},
125 | source: "some text 1
",
126 | expected: "...
"
127 | end
128 |
129 | describe "html attributes" do
130 | it_should_truncate "html text with 1 attributes", with: {max_length: 3, count_tags: false},
131 | source: "some text
",
132 | expected: "som...
"
133 |
134 | it_should_truncate "html text with 1 attributes counting its size", with: {max_length: 16, count_tags: true},
135 | source: "some text
",
136 | expected: "som...
"
137 |
138 | it_should_truncate "html text with 2 attributes", with: {max_length: 3, count_tags: false},
139 | source: "some text
",
140 | expected: "som...
"
141 |
142 | it_should_truncate "html text with attributes in nested tags", with: {max_length: 4, count_tags: false},
143 | source: "",
144 | expected: ""
145 |
146 | it_should_truncate "html text with attribute containing entities respecting them", with: {max_length: 3, count_tags: false, filtered_attributes: ['attr2']},
147 | source: "text
",
148 | expected: "tex...
"
149 |
150 | it_should_truncate "html text with 2 attributes filtering one of them", with: {max_length: 90, count_tags: false, filtered_attributes: ['attr2']},
151 | source: "some text
filtered text
",
152 | expected: "some text
filtered text
"
153 |
154 | it_should_truncate "html text with 2 attributes filtering all of them", with: {max_length: 3, count_tags: false, filtered_attributes: ['attr1', 'attr2']},
155 | source: "some text
",
156 | expected: "som...
"
157 | end
158 |
159 | describe "excluded tags" do
160 | it_should_truncate "html text with a filtered tag", with: {max_length: 90, filtered_tags: %w(img)},
161 | source: "
some text
",
162 | expected: "some text
"
163 |
164 | it_should_truncate "html text with a filtered tag with nested tags", with: {max_length: 90, filtered_tags: %w(table img)},
165 | source: "some text
![]()
",
166 | expected: "some text
"
167 |
168 | it_should_truncate "html text with a filtered tag with nested tags where nested tags are filtered", with: {max_length: 90, filtered_tags: %w(table tr img)},
169 | source: "some text
![]()
",
170 | expected: "some text
"
171 | end
172 |
173 | end
174 |
--------------------------------------------------------------------------------