├── .ruby-version ├── lib ├── truncato │ ├── version.rb │ ├── truncato.rb │ └── truncated_sax_document.rb └── truncato.rb ├── .travis.yml ├── .gitignore ├── Rakefile ├── benchmark ├── truncato │ ├── vendor │ │ ├── peppercorn_adapter.rb │ │ └── vendor_html_truncator_adapter.rb │ └── benchmark_runner.rb └── truncato_benchmark.rb ├── spec ├── support │ └── spec_helpers │ │ └── truncato_macros.rb ├── spec_helper.rb └── truncato │ └── truncato_spec.rb ├── Gemfile ├── .github └── workflows │ └── ruby.yml ├── LICENSE.txt ├── truncato.gemspec ├── .rvmrc └── README.md /.ruby-version: -------------------------------------------------------------------------------- 1 | ruby-3.4.1 2 | -------------------------------------------------------------------------------- /lib/truncato/version.rb: -------------------------------------------------------------------------------- 1 | module Truncato 2 | VERSION='0.7.13' 3 | end 4 | -------------------------------------------------------------------------------- /lib/truncato.rb: -------------------------------------------------------------------------------- 1 | Dir[File.dirname(__FILE__) + '/truncato/**/*.rb'].each do |file| 2 | require file 3 | end 4 | 5 | 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | env: 3 | global: 4 | - NOKOGIRI_USE_SYSTEM_LIBRARIES=true 5 | rvm: 6 | - 2.3.1 7 | - 2.4.4 8 | - 2.5.3 9 | - 2.6.0 10 | 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | coverage 6 | InstalledFiles 7 | lib/bundler/man 8 | pkg 9 | rdoc 10 | spec/reports 11 | test/tmp 12 | test/version_tmp 13 | tmp 14 | 15 | # YARD artifacts 16 | .yardoc 17 | _yardoc 18 | doc/ 19 | .idea/ 20 | 21 | Gemfile.lock 22 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | begin 2 | require 'bundler/setup' 3 | rescue LoadError 4 | puts 'You must `gem install bundler` and `bundle install` to run rake tasks' 5 | end 6 | 7 | require "rake" 8 | require "rspec/core/rake_task" 9 | 10 | RSpec::Core::RakeTask.new(:spec) 11 | 12 | task default: :spec 13 | -------------------------------------------------------------------------------- /benchmark/truncato/vendor/peppercorn_adapter.rb: -------------------------------------------------------------------------------- 1 | # Adapter for comparing https://github.com/nono/HTML-Truncator 2 | module Truncato 3 | class PeppercornAdapter 4 | def self.truncate string, options 5 | string.truncate_html options[:max_length], :tail=>options[:tail] 6 | end 7 | end 8 | end 9 | 10 | -------------------------------------------------------------------------------- /spec/support/spec_helpers/truncato_macros.rb: -------------------------------------------------------------------------------- 1 | module TruncatoMacros 2 | def it_should_truncate(example_description, options) 3 | it "should truncate #{example_description}" do 4 | expected_options = Truncato::DEFAULT_OPTIONS.merge(options[:with]) 5 | Truncato.truncate(options[:source], expected_options).should == options[:expected] 6 | end 7 | end 8 | end -------------------------------------------------------------------------------- /benchmark/truncato_benchmark.rb: -------------------------------------------------------------------------------- 1 | $:.unshift File.join(File.dirname(__FILE__), '..', 'lib') 2 | 3 | require 'rubygems' 4 | require 'bundler' 5 | require 'nokogiri' 6 | require 'truncato' 7 | require 'html_truncator' 8 | require 'peppercorn' 9 | require 'benchmark' 10 | 11 | Bundler.setup 12 | Bundler.require 13 | 14 | Dir[File.dirname(__FILE__) + '/truncato/**/*.rb'].each do |file| 15 | load file 16 | end 17 | 18 | 19 | -------------------------------------------------------------------------------- /benchmark/truncato/vendor/vendor_html_truncator_adapter.rb: -------------------------------------------------------------------------------- 1 | # Adapter for comparing https://github.com/nono/HTML-Truncator 2 | module Truncato 3 | class VendorHtmlTruncatorAdapter 4 | def self.truncate string, options 5 | HTML_Truncator.truncate string, options[:max_length], ellipsis: "..." 6 | end 7 | end 8 | end 9 | 10 | #[{Truncato::VendorHtmlTruncatorAdapter=>{:truncated_length=>3584682, :time=>223.36}}] 11 | 12 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | gemspec 3 | 4 | 5 | # Add dependencies required to use your gem here. 6 | # Example: 7 | # gem "activesupport", ">= 2.3.5" 8 | 9 | # Add dependencies to develop your gem here. 10 | # Include everything needed to run rake, tests, features, etc. 11 | 12 | group :development do 13 | gem "bundler" 14 | end 15 | 16 | group :benchrmark do 17 | gem 'html_truncator' 18 | gem 'peppercorn' 19 | end 20 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 2 | $LOAD_PATH.unshift(File.dirname(__FILE__)) 3 | 4 | require 'rubygems' 5 | require 'bundler' 6 | require 'nokogiri' 7 | 8 | Bundler.setup 9 | Bundler.require 10 | 11 | # Requires supporting files with custom matchers and macros, etc, 12 | # in ./support/ and its subdirectories. 13 | Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each { |f| require f } 14 | 15 | RSpec.configure do |config| 16 | config.extend TruncatoMacros 17 | end 18 | -------------------------------------------------------------------------------- /.github/workflows/ruby.yml: -------------------------------------------------------------------------------- 1 | name: Ruby 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "**" ] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | test: 14 | 15 | runs-on: ubuntu-22.04 16 | strategy: 17 | matrix: 18 | ruby-version: ["2.6", "2.7", "3.0", "3.1", "3.2", "3.3", "3.4"] 19 | 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Set up Ruby 23 | uses: ruby/setup-ruby@v1 24 | with: 25 | ruby-version: ${{ matrix.ruby-version }} 26 | bundler-cache: true 27 | 28 | - name: Run tests 29 | run: bundle exec rake 30 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Jorge Manrubia 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /truncato.gemspec: -------------------------------------------------------------------------------- 1 | # Generated by jeweler 2 | # DO NOT EDIT THIS FILE DIRECTLY 3 | # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec' 4 | # -*- encoding: utf-8 -*- 5 | 6 | $:.push File.expand_path("../lib", __FILE__) 7 | 8 | # Maintain your gem'spec version: 9 | require "truncato/version" 10 | 11 | Gem::Specification.new do |spec| 12 | spec.name = "truncato" 13 | spec.version = Truncato::VERSION 14 | 15 | spec.authors = ["Jorge Manrubia"] 16 | spec.date = "2013-09-10" 17 | spec.description = "Ruby tool for truncating HTML strings keeping a valid HTML markup" 18 | spec.email = "jorge.manrubia@gmail.com" 19 | spec.extra_rdoc_files = [ 20 | "LICENSE.txt", 21 | "README.md" 22 | ] 23 | spec.files = Dir["{app,config,db,lib}/**/*", "LICENSE.txt", "Rakefile", "README.rdoc"] 24 | spec.homepage = "https://github.com/jorgemanrubia/truncato" 25 | spec.licenses = ["MIT"] 26 | spec.require_paths = ["lib"] 27 | spec.rubygems_version = "2.0.2" 28 | spec.summary = "A tool for truncating HTML strings efficiently" 29 | 30 | # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host' 31 | # to allow pushing to a single host or delete this section to allow pushing to any host. 32 | if spec.respond_to?(:metadata) 33 | spec.metadata['allowed_push_host'] = "https://rubygems.org" 34 | else 35 | raise 'RubyGems 2.0 or newer is required to protect against ' \ 36 | 'public gem pushes.' 37 | end 38 | 39 | spec.add_dependency "nokogiri", ">= 1.7.0", "<= 2.0" 40 | spec.add_dependency "htmlentities", "~> 4.3.1" 41 | 42 | spec.add_development_dependency "rspec" 43 | spec.add_development_dependency "rake" 44 | end 45 | -------------------------------------------------------------------------------- /benchmark/truncato/benchmark_runner.rb: -------------------------------------------------------------------------------- 1 | module Truncato 2 | class BenchmarkRunner 3 | SYNTHETIC_XML_LENGTH = 4000000 4 | TRUNCATION_LENGTH = 400000 5 | 6 | attr_reader :synthetic_xml 7 | 8 | def initialize 9 | @synthetic_xml = create_synthetic_xml(SYNTHETIC_XML_LENGTH) 10 | puts "Generated synthethic load with #{@synthetic_xml.length/1000.0}K characters" 11 | end 12 | 13 | def run 14 | run_suite [Truncato] 15 | end 16 | 17 | def run_comparison 18 | run_suite [Truncato, VendorHtmlTruncatorAdapter, PeppercornAdapter] 19 | end 20 | 21 | 22 | private 23 | 24 | def run_suite(truncation_classes) 25 | results = truncation_classes.collect { |klass| {klass => run_with(klass)} } 26 | show_results results 27 | end 28 | 29 | def create_synthetic_xml(length) 30 | xml_content = "" 31 | append_random_xml_content xml_content, length 32 | xml_content << "" 33 | xml_content 34 | end 35 | 36 | def append_random_xml_content(xml_content, length) 37 | begin 38 | random_tag = random_string(rand(10)+1) 39 | xml_content << %{ 40 | <#{random_tag}>#{random_string(rand(300)+1)} 41 | } 42 | end while (xml_content.length < length) 43 | end 44 | 45 | def random_string(length) 46 | (0...length).map { 65.+(rand(26)).chr }.join 47 | end 48 | 49 | def run_with(truncation_klass) 50 | puts "Running benchmark for #{truncation_klass}..." 51 | truncated_string = "" 52 | result = Benchmark.measure { truncated_string = truncation_klass.truncate synthetic_xml, max_length: TRUNCATION_LENGTH, count_tags: true } 53 | {truncated_length: truncated_string.length, time: result.total} 54 | end 55 | 56 | def show_results(results) 57 | puts results.inspect 58 | end 59 | 60 | end 61 | end -------------------------------------------------------------------------------- /.rvmrc: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # This is an RVM Project .rvmrc file, used to automatically load the ruby 4 | # development environment upon cd'ing into the directory 5 | 6 | # First we specify our desired [@], the @gemset string is optional, 7 | # Only full ruby string is supported here, for short names use: 8 | # echo "rvm use 1.9.2" > .rvmrc 9 | environment_id="ruby-1.9.2-p320" 10 | 11 | # Uncomment the following lines if you want to verify rvm version per project 12 | # rvmrc_rvm_version="1.15.8 (stable)" # 1.10.1 seams as a safe start 13 | # eval "$(echo ${rvm_version}.${rvmrc_rvm_version} | awk -F. '{print "[[ "$1*65536+$2*256+$3" -ge "$4*65536+$5*256+$6" ]]"}' )" || { 14 | # echo "This .rvmrc file requires at least RVM ${rvmrc_rvm_version}, aborting loading." 15 | # return 1 16 | # } 17 | 18 | # First we attempt to load the desired environment directly from the environment 19 | # file. This is very fast and efficient compared to running through the entire 20 | # CLI and selector. If you want feedback on which environment was used then 21 | # insert the word 'use' after --create as this triggers verbose mode. 22 | if [[ -d "${rvm_path:-$HOME/.rvm}/environments" 23 | && -s "${rvm_path:-$HOME/.rvm}/environments/$environment_id" ]] 24 | then 25 | \. "${rvm_path:-$HOME/.rvm}/environments/$environment_id" 26 | [[ -s "${rvm_path:-$HOME/.rvm}/hooks/after_use" ]] && 27 | \. "${rvm_path:-$HOME/.rvm}/hooks/after_use" || true 28 | else 29 | # If the environment file has not yet been created, use the RVM CLI to select. 30 | rvm --create "$environment_id" || { 31 | echo "Failed to create RVM environment '${environment_id}'." 32 | return 1 33 | } 34 | fi 35 | 36 | # If you use bundler, this might be useful to you: 37 | # if [[ -s Gemfile ]] && { 38 | # ! builtin command -v bundle >/dev/null || 39 | # builtin command -v bundle | GREP_OPTIONS= \grep $rvm_path/bin/bundle >/dev/null 40 | # } 41 | # then 42 | # printf "%b" "The rubygem 'bundler' is not installed. Installing it now.\n" 43 | # gem install bundler 44 | # fi 45 | # if [[ -s Gemfile ]] && builtin command -v bundle >/dev/null 46 | # then 47 | # bundle install | GREP_OPTIONS= \grep -vE '^Using|Your bundle is complete' 48 | # fi 49 | 50 | git branch 51 | ruby -v 52 | git stash list 53 | -------------------------------------------------------------------------------- /lib/truncato/truncato.rb: -------------------------------------------------------------------------------- 1 | module Truncato 2 | DEFAULT_OPTIONS = { 3 | max_length: 30, 4 | count_tags: true, 5 | tail: "...", 6 | filtered_attributes: [] 7 | } 8 | 9 | ARTIFICIAL_ROOT_NAME = 'truncato-artificial-root' 10 | 11 | # Truncates the source XML string and returns the truncated XML. It will keep a valid XML structure 12 | # and insert a _tail_ text indicating the position where content were removed (...). 13 | # 14 | # @param [String] source the XML source to truncate 15 | # @param [Hash] user_options truncation options 16 | # @option user_options [Integer] :max_length Maximum length 17 | # @option user_options [String] :tail text to append when the truncation happens 18 | # @option user_options [Boolean] :count_tags `true` for counting tags for truncation, `false` for not counting them 19 | # @option user_options [Array] :filtered_attributes Array of names of attributes that should be excluded in the resulting truncated string. This allows you to make the truncated string shorter by excluding the content of attributes you can discard in some given context, e.g HTML `style` attribute. 20 | # @return [String] the truncated string 21 | def self.truncate source, user_options={} 22 | options = DEFAULT_OPTIONS.merge(user_options) 23 | self.truncate_html(source, options) || self.truncate_no_html(source, options) 24 | end 25 | 26 | private 27 | 28 | def self.truncate_html source, options 29 | source = unicode_normalize(source) 30 | self.do_truncate_html(source, options) ? self.do_truncate_html(with_artificial_root(source), options) : nil 31 | end 32 | 33 | def self.unicode_normalize(string) 34 | string.unicode_normalize 35 | rescue Encoding::CompatibilityError 36 | # By relying on rescue we don't have to maintain a list of compatible encodings. 37 | string 38 | end 39 | 40 | def self.do_truncate_html source, options 41 | truncated_sax_document = TruncatedSaxDocument.new(options) 42 | 43 | # Only nokogiri >= 1.17 accept Encoding object, older needs a String as encoding 44 | parser = Nokogiri::HTML::SAX::Parser.new(truncated_sax_document, source.encoding.to_s) 45 | 46 | parser.parse(source) { |context| context.replace_entities = false } 47 | truncated_string = truncated_sax_document.truncated_string 48 | truncated_string.empty? ? nil : truncated_string 49 | end 50 | 51 | def self.with_artificial_root(source) 52 | "<#{ARTIFICIAL_ROOT_NAME}>#{source}" 53 | end 54 | 55 | def self.truncate_no_html source, options 56 | max_length = options[:max_length] 57 | tail = source.length > max_length ? options[:tail] : '' 58 | "#{source[0..max_length-1]}#{tail}" 59 | end 60 | end 61 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # truncato 2 | 3 | *truncato* is a Ruby library for truncating HTML strings keeping the markup valid. 4 | 5 | ## Installing 6 | 7 | In your `Gemfile` 8 | 9 | ```ruby 10 | gem 'truncato' 11 | ``` 12 | 13 | ## Usage 14 | 15 | ```ruby 16 | Truncato.truncate "

some text

", max_length: 4 #=> "

s...

" 17 | Truncato.truncate "

some text

", max_length: 4, count_tags: false #=> "

some...

" 18 | ``` 19 | 20 | The configuration options are: 21 | 22 | * `max_length`: The size, in characters, to truncate (`30` by default) 23 | * `filtered_attributes`: Array of attribute names that will be removed in the truncated string. This allows you to make the truncated string shorter by excluding the content of attributes you can discard in some given context, e.g HTML `style` attribute. 24 | * `filtered_tags`: Array of tags that will be removed in the truncated string. If a tag is excluded, all the nested tags under it will be excluded too. 25 | * `count_tags`: Boolean value indicating whether tags size should be considered when truncating (`true` by default) 26 | * `tail_before_final_tag`: Boolean value indicating whether to apply a tail before the final closing tag (`false` by default) 27 | * `comments`: Boolean value indicating whether to include comments in parsed results (`false` by default) 28 | * `tail`: The string to append when the truncation occurs ('...' by default) 29 | * `count_tail`: Boolean value indicating whether to include the tail within the bounds of the provided max length (`false` by default) 30 | 31 | ## Performance 32 | 33 | Truncato was designed with performance in mind. Its main motivation was that existing libs couldn't truncate a multiple-MB document into a few-KB one in a reasonable time. It uses the [Nokogiri](http://nokogiri.org/) SAX parser. 34 | 35 | There is a benchmark included that generates a synthetic XML of 4MB and truncates it to 400 KB. You can run the benchmark using 36 | 37 | ```ruby 38 | rake truncato:benchmark 39 | ``` 40 | 41 | There is a also a comparison benchmark that tests the previous data with other alternatives 42 | 43 | ```ruby 44 | rake truncato:vendor_compare 45 | ``` 46 | 47 | The results comparing truncato with other libs: 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 |
Truncatotruncate_htmlHTML Truncatorpeppercorn
Time for truncating a 4MB XML document to 4KB1.5 s20 s220 s232 s
65 | 66 | ## Running the tests 67 | 68 | ```ruby 69 | rake spec 70 | ``` 71 | -------------------------------------------------------------------------------- /lib/truncato/truncated_sax_document.rb: -------------------------------------------------------------------------------- 1 | require 'nokogiri' 2 | require 'htmlentities' 3 | 4 | class TruncatedSaxDocument < Nokogiri::XML::SAX::Document 5 | IGNORABLE_TAGS = %w(html head body) 6 | 7 | SINGLE_TAGS = %w{br img} 8 | 9 | attr_reader :max_length, :max_length_reached, :tail, 10 | :count_tags, :filtered_attributes, :filtered_tags, :ignored_levels 11 | 12 | def initialize(options) 13 | @html_coder = HTMLEntities.new 14 | capture_options options 15 | init_parsing_state 16 | end 17 | 18 | def start_element name, attributes 19 | enter_ignored_level if filtered_tags.include?(name) 20 | return if @max_length_reached || ignorable_tag?(name) || ignore_mode? 21 | @closing_tags.push name unless single_tag_element? name 22 | append_to_truncated_string opening_tag(name, attributes), overriden_tag_length 23 | end 24 | 25 | def characters decoded_string 26 | return if @max_length_reached || ignore_mode? 27 | remaining_length = max_length - @estimated_length - 1 28 | string_to_append = decoded_string.length > remaining_length ? truncate_string(decoded_string, remaining_length) : decoded_string 29 | append_to_truncated_string @html_coder.encode(string_to_append), string_to_append.length 30 | end 31 | 32 | def comment string 33 | if @comments 34 | return if @max_length_reached 35 | process_comment string 36 | end 37 | end 38 | 39 | def end_element name 40 | if filtered_tags.include?(name) && ignore_mode? 41 | exit_ignored_level 42 | return 43 | end 44 | 45 | return if @max_length_reached || ignorable_tag?(name) || ignore_mode? 46 | 47 | unless single_tag_element? name 48 | @closing_tags.pop 49 | append_to_truncated_string closing_tag(name), overriden_tag_length 50 | end 51 | end 52 | 53 | def end_document 54 | close_truncated_document if max_length_reached 55 | end 56 | 57 | def truncated_string 58 | @truncated_buffer.join 59 | end 60 | 61 | private 62 | 63 | def capture_options(options) 64 | @max_length = options[:max_length] 65 | @count_tags = options [:count_tags] 66 | @count_tail = options.fetch(:count_tail, false) 67 | @tail = options[:tail] 68 | @filtered_attributes = options[:filtered_attributes] || [] 69 | @filtered_tags = options[:filtered_tags] || [] 70 | @tail_before_final_tag = options.fetch(:tail_before_final_tag, false) 71 | @comments = options.fetch(:comments, false) 72 | end 73 | 74 | def process_comment(string) 75 | remaining_length = max_length - @estimated_length - 1 76 | string_to_append = comment_tag(string).length > remaining_length ? truncate_comment(comment_tag(string), remaining_length) : comment_tag(string) 77 | append_to_truncated_string string_to_append 78 | end 79 | 80 | def comment_tag comment 81 | "" 82 | end 83 | 84 | def init_parsing_state 85 | @truncated_buffer = [] 86 | @closing_tags = [] 87 | @estimated_length = @count_tail ? tail_length : 0 88 | @max_length_reached = false 89 | @ignored_levels = 0 90 | end 91 | 92 | def tail_length 93 | tail.match(/^&\w+;$/).nil? ? tail.length : 1 94 | end 95 | 96 | def single_tag_element? name 97 | SINGLE_TAGS.include? name 98 | end 99 | 100 | def append_to_truncated_string string, overriden_length=nil 101 | @truncated_buffer << string 102 | increase_estimated_length(overriden_length || string.length) 103 | end 104 | 105 | def opening_tag name, attributes 106 | attributes_string = attributes_to_string attributes 107 | if single_tag_element? name 108 | "<#{name}#{attributes_string}/>" 109 | else 110 | "<#{name}#{attributes_string}>" 111 | end 112 | end 113 | 114 | def attributes_to_string attributes 115 | return "" if attributes.empty? 116 | attributes_string = concatenate_attributes_declaration attributes 117 | attributes_string.rstrip 118 | end 119 | 120 | def concatenate_attributes_declaration attributes 121 | attributes.inject(' ') do |string, attribute| 122 | key, value = attribute 123 | next string if @filtered_attributes.include? key 124 | string << "#{key}='#{@html_coder.encode value}' " 125 | end 126 | end 127 | 128 | def closing_tag name 129 | "" 130 | end 131 | 132 | def increase_estimated_length amount 133 | @estimated_length += amount 134 | check_max_length_reached 135 | end 136 | 137 | def check_max_length_reached 138 | @max_length_reached = true if @estimated_length >= max_length 139 | end 140 | 141 | def truncate_string string, remaining_length 142 | if @tail_before_final_tag 143 | string[0..remaining_length] 144 | else 145 | @tail_appended = true 146 | "#{string[0..remaining_length]}#{tail}" 147 | end 148 | end 149 | 150 | def truncate_comment string, remaining_length 151 | if @tail_before_final_tag 152 | string[0..remaining_length] 153 | else 154 | @tail_appended = true 155 | "#{string[0..remaining_length]}#{tail}-->" 156 | end 157 | end 158 | 159 | def close_truncated_document 160 | append_tail_between_closing_tags if @tail_before_final_tag 161 | append_to_truncated_string tail unless @tail_appended 162 | append_closing_tags 163 | end 164 | 165 | def append_closing_tags 166 | @closing_tags.reverse.each { |name| append_to_truncated_string closing_tag name } 167 | end 168 | 169 | def overriden_tag_length 170 | @count_tags ? nil : 0 171 | end 172 | 173 | 174 | def ignorable_tag?(name) 175 | artificial_root_name?(name) || IGNORABLE_TAGS.include?(name.downcase) 176 | end 177 | 178 | def artificial_root_name? name 179 | name == Truncato::ARTIFICIAL_ROOT_NAME 180 | end 181 | 182 | def append_tail_between_closing_tags 183 | append_to_truncated_string closing_tag(@closing_tags.delete_at (@closing_tags.length - 1)) if @closing_tags.length > 1 184 | end 185 | 186 | def enter_ignored_level 187 | @ignored_levels += 1 188 | end 189 | 190 | def exit_ignored_level 191 | @ignored_levels -= 1 192 | end 193 | 194 | def ignore_mode? 195 | @ignored_levels > 0 196 | end 197 | end 198 | -------------------------------------------------------------------------------- /spec/truncato/truncato_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe "Truncato" do 4 | NBSP = Nokogiri::HTML(" ").text 5 | 6 | describe "normal strings" do 7 | it_should_truncate "no html text with longer length", with: {max_length: 13, tail: '...'}, source: "some text", expected: "some text" 8 | it_should_truncate "no html text with shorter length", with: {max_length: 3}, source: "some text", expected: "som..." 9 | it_should_truncate "no html text with longer length", with: {max_length: 5}, source: "some", expected: "some" 10 | end 11 | 12 | describe "unicode string" do 13 | it_should_truncate "text with non-ASCII characters", 14 | with: { max_length: 8 }, 15 | source: "Großer Übungs- und Beispieltext", 16 | expected: "Großer Ü..." 17 | it_should_truncate "with decomposed codes", 18 | with: { max_length: 8 }, 19 | source: "Großer Übungs- und Beispieltext".unicode_normalize(:nfd), 20 | expected: "Großer Ü..." 21 | it_should_truncate "with multi-byte characters", 22 | with: { max_length: 3, count_tags: false }, 23 | source: "轉街過巷 就如滑過浪潮 聽天說地 仍然剩我心跳", 24 | expected: "轉街過..." 25 | end 26 | 27 | # Support for non-UTF-8 only on ruby 3+. 28 | describe "non-unicode string", if: Gem::Version.new(RUBY_VERSION).segments.first > 2 do 29 | it_should_truncate "text with non-unicode encodings", 30 | with: { max_length: 8 }, 31 | source: "Großer Übungs- und Beispieltext".encode!(Encoding::ISO_8859_1), 32 | expected: "Großer Ü..." 33 | end 34 | 35 | describe "html tags structure" do 36 | it_should_truncate "html text with a tag (counting tags)", with: {max_length: 4}, source: "

some text

", expected: "

s...

" 37 | 38 | it_should_truncate "html text with a tag (not counting tags)", with: {max_length: 4, count_tags: false}, source: "

some text

", expected: "

some...

" 39 | 40 | it_should_truncate "html text with nested tags (first node)", with: {max_length: 9}, 41 | source: "

some text 1

some text 2

", 42 | expected: "

s...

" 43 | 44 | it_should_truncate "html text with nested tags (second node)", with: {max_length: 33}, 45 | source: "

some text 1

some text 2

", 46 | expected: "

some text 1

some te...

" 47 | 48 | it_should_truncate "html text with nested tags (empty contents)", with: {max_length: 3}, 49 | source: "

some text 1

some text 2

", 50 | expected: "
...
" 51 | 52 | it_should_truncate "html text with special html entioes", with: {max_length: 5}, 53 | source: "

>some text

", 54 | expected: "

>s...

" 55 | 56 | it_should_truncate "html text with siblings tags", with: {max_length: 51}, 57 | source: "
some text 0

some text 1

some text 2

", 58 | expected: "
some text 0

some text 1

som...

" 59 | 60 | it_should_truncate "html with unclosed tags", with: {max_length: 151}, 61 | source: "
Hi
there
", 62 | expected: "
Hi
there
" 63 | 64 | it_should_truncate "sdasd", with: {}, 65 | source: "Foo Bar", expected: "Foo#{NBSP}Bar" 66 | end 67 | 68 | describe "include tail as part of max_length" do 69 | it_should_truncate "html text with a tag (counting tail)", with: {max_length: 4, count_tail: true, count_tags: false}, 70 | source: "

some text

", 71 | expected: "

s...

" 72 | 73 | it_should_truncate "html text with a tag (counting tail)", with: {max_length: 6, count_tail: true, count_tags: false}, source: "

some text

", expected: "

som...

" 74 | 75 | it_should_truncate "html text with a tag (counting tail)", with: {max_length: 16, count_tail: true, count_tags: false}, 76 | source: "

some text

some other text
", 77 | expected: "

some text

some...
" 78 | 79 | it_should_truncate "html text with a tag (counting tail and including tail before final tag)", with: {max_length: 16, count_tail: true, count_tags: false, tail_before_final_tag: true}, 80 | source: "

some text

some other text
", 81 | expected: "

some text

some...
" 82 | 83 | it_should_truncate "html text, counting special html characters as one character", 84 | with: {max_length: 16, count_tail: true, count_tags: false, tail_before_final_tag: true, tail: '…'}, 85 | source: "

some text

some other text
", 86 | expected: "

some text

some o
" 87 | end 88 | 89 | describe "insert tail between two or more final tags" do 90 | it_should_truncate "html text as normal when tail_before_final_tag option is not set", 91 | with: {max_length: 4, count_tags: false}, 92 | source: "

some textsome more text

", 93 | expected: "

some...

" 94 | 95 | it_should_truncate "html text when tail_before_final_tag: true by inserting tail before the final tag, and after any other closing tags", 96 | with: {max_length: 4, count_tags: false, tail_before_final_tag: true}, 97 | source: "

some textsome more text

", 98 | expected: "

some...

" 99 | end 100 | 101 | describe "single html tag elements" do 102 | it_should_truncate "html text with
element without adding a closing tag", with: {max_length: 9}, 103 | source: "


some text 1

some text 2

", 104 | expected: "


...

" 105 | 106 | it_should_truncate "html text with element without adding a closing tag", with: {max_length: 9}, 107 | source: "

some text 1

some text 2

", 108 | expected: "

...

" 109 | end 110 | 111 | describe "comment html element" do 112 | it_should_truncate "html text and ignore element by default", with: {max_length: 20}, 113 | source: "

some text 1

", 114 | expected: "

some text 1

" 115 | 116 | it_should_truncate "html text with element", with: {max_length: 30, comments: true}, 117 | source: "

some text 1

", 118 | expected: "

some text...

" 119 | 120 | it_should_truncate "html text with element that exceeds the max_length", with: {max_length: 5, comments: true}, 121 | source: "

some text 1

", 122 | expected: "" 123 | 124 | it_should_truncate "html text with element with other elements that exceeds max_length", with: {max_length: 20, comments: true}, 125 | source: "

some text 1

", 126 | expected: "

...

" 127 | end 128 | 129 | describe "html attributes" do 130 | it_should_truncate "html text with 1 attributes", with: {max_length: 3, count_tags: false}, 131 | source: "

some text

", 132 | expected: "

som...

" 133 | 134 | it_should_truncate "html text with 1 attributes counting its size", with: {max_length: 16, count_tags: true}, 135 | source: "

some text

", 136 | expected: "

som...

" 137 | 138 | it_should_truncate "html text with 2 attributes", with: {max_length: 3, count_tags: false}, 139 | source: "

some text

", 140 | expected: "

som...

" 141 | 142 | it_should_truncate "html text with attributes in nested tags", with: {max_length: 4, count_tags: false}, 143 | source: "

some text

", 144 | expected: "

some...

" 145 | 146 | it_should_truncate "html text with attribute containing entities respecting them", with: {max_length: 3, count_tags: false, filtered_attributes: ['attr2']}, 147 | source: "

text

", 148 | expected: "

tex...

" 149 | 150 | it_should_truncate "html text with 2 attributes filtering one of them", with: {max_length: 90, count_tags: false, filtered_attributes: ['attr2']}, 151 | source: "

some text

filtered text

", 152 | expected: "

some text

filtered text

" 153 | 154 | it_should_truncate "html text with 2 attributes filtering all of them", with: {max_length: 3, count_tags: false, filtered_attributes: ['attr1', 'attr2']}, 155 | source: "

some text

", 156 | expected: "

som...

" 157 | end 158 | 159 | describe "excluded tags" do 160 | it_should_truncate "html text with a filtered tag", with: {max_length: 90, filtered_tags: %w(img)}, 161 | source: "

some text

", 162 | expected: "

some text

" 163 | 164 | it_should_truncate "html text with a filtered tag with nested tags", with: {max_length: 90, filtered_tags: %w(table img)}, 165 | source: "
Hi there
some text
", 166 | expected: "
some text
" 167 | 168 | it_should_truncate "html text with a filtered tag with nested tags where nested tags are filtered", with: {max_length: 90, filtered_tags: %w(table tr img)}, 169 | source: "
Hi there
some text
", 170 | expected: "
some text
" 171 | end 172 | 173 | end 174 | --------------------------------------------------------------------------------