├── .rspec ├── Gemfile ├── .gitignore ├── data └── display_width.marshal.gz ├── .editorconfig ├── lib └── unicode │ ├── display_width │ ├── string_ext.rb │ ├── constants.rb │ ├── no_string_ext.rb │ ├── reline_ext.rb │ ├── index.rb │ └── emoji_support.rb │ └── display_width.rb ├── misc └── terminal-emoji-width.rb ├── MIT-LICENSE.txt ├── .github └── workflows │ └── test.yml ├── unicode-display_width.gemspec ├── Rakefile ├── CODE_OF_CONDUCT.md ├── CHANGELOG.md ├── README.md └── spec └── display_width_spec.rb /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --format documentation 3 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec 4 | 5 | gem "irb" 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Gemfile.lock 2 | *.swp 3 | *~ 4 | pkg 5 | /data/EastAsianWidth.txt 6 | /.bundle 7 | -------------------------------------------------------------------------------- /data/display_width.marshal.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/janlelis/unicode-display_width/HEAD/data/display_width.marshal.gz -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | end_of_line = lf 7 | insert_final_newline = true 8 | trim_trailing_whitespace = true 9 | 10 | [*.{md,rdoc,txt}] 11 | indent_size = 4 12 | 13 | -------------------------------------------------------------------------------- /lib/unicode/display_width/string_ext.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "../display_width" 4 | 5 | class String 6 | def display_width(ambiguous = nil, overwrite = nil, old_options = {}, **options) 7 | Unicode::DisplayWidth.of(self, ambiguous, overwrite, old_options = {}, **options) 8 | end 9 | end 10 | -------------------------------------------------------------------------------- /lib/unicode/display_width/constants.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Unicode 4 | class DisplayWidth 5 | VERSION = "3.2.0" 6 | UNICODE_VERSION = "17.0.0" 7 | DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + "/../../../data/") 8 | INDEX_FILENAME = DATA_DIRECTORY + "/display_width.marshal.gz" 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/unicode/display_width/no_string_ext.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | warn "You are loading 'unicode-display_width/no_string_ext'\n" \ 4 | "Beginning with version 2.0, this is not necessary anymore\n"\ 5 | "You can just require 'unicode-display_width' now and no\n"\ 6 | "string extension will be loaded" 7 | 8 | require_relative "../display_width" 9 | -------------------------------------------------------------------------------- /lib/unicode/display_width/reline_ext.rb: -------------------------------------------------------------------------------- 1 | # Experimental 2 | # Patches Reline's get_mbchar_width to use Unicode::DisplayWidth 3 | 4 | require "reline" 5 | require "reline/unicode" 6 | 7 | require_relative "../display_width" 8 | 9 | class Reline::Unicode 10 | def self.get_mbchar_width(mbchar) 11 | Unicode::DisplayWidth.of(mbchar, Reline.ambiguous_width) 12 | end 13 | end 14 | 15 | -------------------------------------------------------------------------------- /misc/terminal-emoji-width.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | RULER = "123456789\n" 4 | ABC = "abcdefg\n\n" 5 | 6 | puts "1) TEXT-DEFAULT EMOJI" 7 | puts 8 | puts RULER + "⛹" + ABC 9 | 10 | puts "1B) TEXT-DEFAULT EMOJI + VS16" 11 | puts 12 | puts RULER + "⛹️" + ABC 13 | 14 | puts "1C) BASE EMOJI CHARACTER + MODIFIER" 15 | puts 16 | puts RULER + "🏃🏽" + ABC 17 | 18 | puts "1D) MODIFIER IN ISOLATION" 19 | puts 20 | puts RULER + "Z🏽" + ABC 21 | 22 | puts "2) RGI EMOJI SEQ" 23 | puts 24 | puts RULER + "🏃🏼‍♀‍➡" + ABC 25 | 26 | puts "2B) RGI EMOJI SEQ (TEXT-DEFAULT FIRST)" 27 | puts 28 | puts RULER + "⛹️‍♂️" + ABC 29 | 30 | puts "2C) RGI EMOJI SEQ (TEXT-DEFAULT FIRST + UQE)" 31 | puts 32 | puts RULER + "⛹‍♂️" + ABC 33 | 34 | puts "3) NON-RGI VALID EMOJI" 35 | puts 36 | puts RULER + "🤠‍🤢" + ABC 37 | 38 | puts "4) NOT WELL-FORMED EMOJI SEQ" 39 | puts 40 | puts RULER + "🚄🏾‍🔆" + ABC 41 | -------------------------------------------------------------------------------- /lib/unicode/display_width/index.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "zlib" 4 | require_relative "constants" 5 | 6 | module Unicode 7 | class DisplayWidth 8 | File.open(INDEX_FILENAME, "rb") do |file| 9 | serialized_data = Zlib::GzipReader.new(file).read 10 | serialized_data.force_encoding Encoding::BINARY 11 | INDEX = Marshal.load(serialized_data) 12 | end 13 | 14 | def self.decompress_index(index, level) 15 | index.flat_map{ |value| 16 | if level > 0 17 | if value.instance_of?(Array) 18 | value[15] ||= nil 19 | decompress_index(value, level - 1) 20 | else 21 | decompress_index([value] * 16, level - 1) 22 | end 23 | else 24 | if value.instance_of?(Array) 25 | value[15] ||= nil 26 | value 27 | else 28 | [value] * 16 29 | end 30 | end 31 | } 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /MIT-LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT LICENSE 2 | 3 | Copyright (c) 2011, 2015-2024 Jan Lelis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | name: Ruby ${{ matrix.ruby }} (${{ matrix.os }}) 8 | if: "!contains(github.event.head_commit.message, '[skip ci]')" 9 | strategy: 10 | matrix: 11 | ruby: 12 | - '3.4' 13 | - '3.3' 14 | - '3.2' 15 | - '3.1' 16 | - '3.0' 17 | - '2.7' 18 | - jruby 19 | - truffleruby 20 | os: 21 | - ubuntu-latest 22 | - macos-latest 23 | runs-on: ${{matrix.os}} 24 | steps: 25 | - uses: actions/checkout@v4 26 | - name: Set up Ruby 27 | uses: ruby/setup-ruby@v1 28 | with: 29 | ruby-version: ${{matrix.ruby}} 30 | bundler-cache: true 31 | - name: Run tests 32 | run: bundle exec rake 33 | 34 | test-windows: 35 | name: Ruby ${{ matrix.ruby }} (windows-latest) 36 | if: "!contains(github.event.head_commit.message, '[skip ci]')" 37 | strategy: 38 | matrix: 39 | ruby: 40 | - '3.4' 41 | - '3.3' 42 | - '3.2' 43 | - '3.1' 44 | - '3.0' 45 | - '2.7' 46 | - jruby 47 | runs-on: windows-latest 48 | steps: 49 | - uses: actions/checkout@v4 50 | - name: Set up Ruby 51 | uses: ruby/setup-ruby@v1 52 | with: 53 | ruby-version: ${{matrix.ruby}} 54 | bundler-cache: true 55 | - name: Run tests 56 | run: bundle exec rake 57 | -------------------------------------------------------------------------------- /unicode-display_width.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | require File.dirname(__FILE__) + "/lib/unicode/display_width/constants" 3 | 4 | Gem::Specification.new do |s| 5 | s.name = "unicode-display_width" 6 | s.version = Unicode::DisplayWidth::VERSION 7 | s.authors = ["Jan Lelis"] 8 | s.email = ["hi@ruby.consulting"] 9 | s.homepage = "https://github.com/janlelis/unicode-display_width" 10 | s.summary = "Determines the monospace display width of a string in Ruby." 11 | s.description = "[Unicode #{Unicode::DisplayWidth::UNICODE_VERSION}] Determines the monospace display width of a string using EastAsianWidth.txt, Unicode general category, Emoji specification, and other data." 12 | s.files = Dir.glob(%w[{lib,data}/**/*]) 13 | s.extra_rdoc_files = ["README.md", "MIT-LICENSE.txt", "CHANGELOG.md"] 14 | s.license = 'MIT' 15 | s.required_ruby_version = '>= 2.5.0' 16 | s.add_dependency 'unicode-emoji', '~> 4.1' 17 | s.add_development_dependency 'rspec', '~> 3.4' 18 | s.add_development_dependency 'rake', '~> 13.0' 19 | 20 | if s.respond_to?(:metadata) 21 | s.metadata['changelog_uri'] = "https://github.com/janlelis/unicode-display_width/blob/main/CHANGELOG.md" 22 | s.metadata['source_code_uri'] = "https://github.com/janlelis/unicode-display_width" 23 | s.metadata['bug_tracker_uri'] = "https://github.com/janlelis/unicode-display_width/issues" 24 | s.metadata['rubygems_mfa_required'] = "true" 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /lib/unicode/display_width/emoji_support.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Unicode 4 | class DisplayWidth 5 | module EmojiSupport 6 | # Tries to find out which terminal emulator is used to 7 | # set emoji: config to best suiting value 8 | # 9 | # Please also see section in README.md and 10 | # misc/terminal-emoji-width.rb 11 | # 12 | # Please note: Many terminals do not set any ENV vars, 13 | # maybe CSI queries can help? 14 | def self.recommended 15 | @recommended ||= _recommended 16 | end 17 | 18 | def self._recommended 19 | if ENV["CI"] 20 | return :rqi 21 | end 22 | 23 | case ENV["TERM_PROGRAM"] 24 | when "iTerm.app" 25 | return :all 26 | when "Apple_Terminal" 27 | return :rgi_at 28 | when "WezTerm" 29 | return :all_no_vs16 30 | end 31 | 32 | case ENV["TERM"] 33 | when "contour","foot" 34 | # konsole: all, how to detect? 35 | return :all 36 | when /kitty/ 37 | return :vs16 38 | end 39 | 40 | if ENV["WT_SESSION"] # Windows Terminal 41 | return :vs16 42 | end 43 | 44 | # As of last time checked: gnome-terminal, vscode, alacritty 45 | :none 46 | end 47 | 48 | # Maybe: Implement something like https://github.com/jquast/ucs-detect 49 | # which uses the terminal cursor to check for best support level 50 | # at runtime 51 | # def self.detect! 52 | # end 53 | end 54 | end 55 | end 56 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # # # 2 | # Get gemspec info 3 | 4 | gemspec_file = Dir['*.gemspec'].first 5 | gemspec = eval File.read(gemspec_file), binding, gemspec_file 6 | info = "#{gemspec.name} | #{gemspec.version} | " \ 7 | "#{gemspec.runtime_dependencies.size} dependencies | " \ 8 | "#{gemspec.files.size} files" 9 | 10 | 11 | # # # 12 | # Gem build and install task 13 | 14 | desc info 15 | task :gem do 16 | puts info + "\n\n" 17 | print " "; sh "gem build #{gemspec_file}" 18 | FileUtils.mkdir_p 'pkg' 19 | FileUtils.mv "#{gemspec.name}-#{gemspec.version}.gem", 'pkg' 20 | puts; sh %{gem install --no-document pkg/#{gemspec.name}-#{gemspec.version}.gem} 21 | end 22 | 23 | 24 | # # # 25 | # Start an IRB session with the gem loaded 26 | 27 | desc "#{gemspec.name} | IRB" 28 | task :irb do 29 | sh "irb -I ./lib -r #{gemspec.name.gsub '-','/'}" 30 | end 31 | 32 | # # # 33 | # Run all specs 34 | 35 | 36 | desc "#{gemspec.name} | Test" 37 | task :test do 38 | sh "rspec spec" 39 | end 40 | task :spec => :test 41 | task :default => :test 42 | 43 | # # # 44 | # Update index table 45 | 46 | namespace :update do 47 | desc "#{gemspec.name} | Update index" 48 | task :index do 49 | require File.dirname(__FILE__) + '/lib/unicode/display_width/index_builder' 50 | Unicode::DisplayWidth::IndexBuilder.build! 51 | end 52 | end 53 | 54 | # # # 55 | # Update data file 56 | 57 | namespace :update do 58 | desc "#{gemspec.name} | Update unicode data" 59 | task :data do 60 | require File.dirname(__FILE__) + '/lib/unicode/display_width/index_builder' 61 | Unicode::DisplayWidth::IndexBuilder.fetch! 62 | end 63 | end 64 | 65 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | nationality, personal appearance, race, religion, or sexual identity and 10 | orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at opensource@janlelis.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at [https://contributor-covenant.org/version/1/4][version] 72 | 73 | [homepage]: https://contributor-covenant.org 74 | [version]: https://contributor-covenant.org/version/1/4/ 75 | -------------------------------------------------------------------------------- /lib/unicode/display_width.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "unicode/emoji" 4 | 5 | require_relative "display_width/constants" 6 | require_relative "display_width/index" 7 | require_relative "display_width/emoji_support" 8 | 9 | module Unicode 10 | class DisplayWidth 11 | DEFAULT_AMBIGUOUS = 1 12 | INITIAL_DEPTH = 0x10000 13 | ASCII_NON_ZERO_REGEX = /[\0\x05\a\b\n-\x0F]/ 14 | ASCII_NON_ZERO_STRING = "\0\x05\a\b\n-\x0F" 15 | ASCII_BACKSPACE = "\b" 16 | AMBIGUOUS_MAP = { 17 | 1 => :WIDTH_ONE, 18 | 2 => :WIDTH_TWO, 19 | } 20 | FIRST_AMBIGUOUS = { 21 | WIDTH_ONE: 768, 22 | WIDTH_TWO: 161, 23 | } 24 | NOT_COMMON_NARROW_REGEX = { 25 | WIDTH_ONE: /[^\u{10}-\u{2FF}]/m, 26 | WIDTH_TWO: /[^\u{10}-\u{A1}]/m, 27 | } 28 | FIRST_4096 = { 29 | WIDTH_ONE: decompress_index(INDEX[:WIDTH_ONE][0][0], 1), 30 | WIDTH_TWO: decompress_index(INDEX[:WIDTH_TWO][0][0], 1), 31 | } 32 | EMOJI_SEQUENCES_REGEX_MAPPING = { 33 | rgi: :REGEX_INCLUDE_MQE_UQE, 34 | rgi_at: :REGEX_INCLUDE_MQE_UQE, 35 | possible: :REGEX_WELL_FORMED, 36 | } 37 | REGEX_EMOJI_VS16 = Regexp.union( 38 | Regexp.compile( 39 | Unicode::Emoji::REGEX_TEXT_PRESENTATION.source + 40 | "(? 15 && codepoint < first_ambiguous 98 | width += 1 99 | elsif codepoint < 0x1001 100 | width += index_low[codepoint] || 1 101 | else 102 | d = INITIAL_DEPTH 103 | w = index_full[codepoint / d] 104 | while w.instance_of? Array 105 | w = w[(codepoint %= d) / (d /= 16)] 106 | end 107 | 108 | width += w || 1 109 | end 110 | } 111 | 112 | # Return result + prevent negative lengths 113 | width < 0 ? 0 : width 114 | end 115 | 116 | # Returns width of custom overwrites and remaining string 117 | def self.width_custom(string, overwrite) 118 | width = 0 119 | 120 | string = string.each_codepoint.select{ |codepoint| 121 | if overwrite[codepoint] 122 | width += overwrite[codepoint] 123 | nil 124 | else 125 | codepoint 126 | end 127 | }.pack("U*") 128 | 129 | [width, string] 130 | end 131 | 132 | # Returns width for ASCII-only strings. Will consider zero-width control symbols. 133 | def self.width_ascii(string) 134 | if string.match?(ASCII_NON_ZERO_REGEX) 135 | res = string.delete(ASCII_NON_ZERO_STRING).bytesize - string.count(ASCII_BACKSPACE) 136 | return res < 0 ? 0 : res 137 | end 138 | 139 | string.bytesize 140 | end 141 | 142 | # Returns width of all considered Emoji and remaining string 143 | def self.emoji_width(string, mode = :all, ambiguous = DEFAULT_AMBIGUOUS) 144 | res = 0 145 | 146 | if emoji_set_regex = EMOJI_SEQUENCES_REGEX_MAPPING[mode] 147 | emoji_width_via_possible( 148 | string, 149 | Unicode::Emoji.const_get(emoji_set_regex), 150 | mode == :rgi_at, 151 | ambiguous, 152 | ) 153 | 154 | elsif mode == :all_no_vs16 155 | no_emoji_string = string.gsub(REGEX_EMOJI_ALL_SEQUENCES){ res += 2; "" } 156 | [res, no_emoji_string] 157 | 158 | elsif mode == :vs16 159 | no_emoji_string = string.gsub(REGEX_EMOJI_VS16){ res += 2; "" } 160 | [res, no_emoji_string] 161 | 162 | elsif mode == :all 163 | no_emoji_string = string.gsub(REGEX_EMOJI_ALL_SEQUENCES_AND_VS16){ res += 2; "" } 164 | [res, no_emoji_string] 165 | 166 | else 167 | [0, string] 168 | 169 | end 170 | end 171 | 172 | # Match possible Emoji first, then refine 173 | def self.emoji_width_via_possible(string, emoji_set_regex, strict_eaw = false, ambiguous = DEFAULT_AMBIGUOUS) 174 | res = 0 175 | 176 | # For each string possibly an emoji 177 | no_emoji_string = string.gsub(REGEX_EMOJI_ALL_SEQUENCES_AND_VS16){ |emoji_candidate| 178 | # Check if we have a combined Emoji with width 2 (or EAW an Apple Terminal) 179 | if emoji_candidate == emoji_candidate[emoji_set_regex] 180 | if strict_eaw 181 | res += self.of(emoji_candidate[0], ambiguous, emoji: false) 182 | else 183 | res += 2 184 | end 185 | "" 186 | 187 | # We are dealing with a default text presentation emoji or a well-formed sequence not matching the above Emoji set 188 | else 189 | if !strict_eaw 190 | # Ensure all explicit VS16 sequences have width 2 191 | emoji_candidate.gsub!(REGEX_EMOJI_VS16){ res += 2; "" } 192 | end 193 | 194 | emoji_candidate 195 | end 196 | } 197 | 198 | [res, no_emoji_string] 199 | end 200 | 201 | def self.normalize_options(string, ambiguous = nil, overwrite = nil, old_options = {}, **options) 202 | unless old_options.empty? 203 | warn "Unicode::DisplayWidth: Please migrate to keyword arguments - #{old_options.inspect}" 204 | options.merge! old_options 205 | end 206 | 207 | options[:ambiguous] = ambiguous if ambiguous 208 | options[:ambiguous] ||= DEFAULT_AMBIGUOUS 209 | 210 | if options[:ambiguous] != 1 && options[:ambiguous] != 2 211 | raise ArgumentError, "Unicode::DisplayWidth: Ambiguous width must be 1 or 2" 212 | end 213 | 214 | if overwrite && !overwrite.empty? 215 | warn "Unicode::DisplayWidth: Please migrate to keyword arguments - overwrite: #{overwrite.inspect}" 216 | options[:overwrite] = overwrite 217 | end 218 | options[:overwrite] ||= {} 219 | 220 | if [nil, true, :auto].include?(options[:emoji]) 221 | options[:emoji] = EmojiSupport.recommended 222 | elsif options[:emoji] == false 223 | options[:emoji] = :none 224 | end 225 | 226 | options 227 | end 228 | 229 | def initialize(ambiguous: DEFAULT_AMBIGUOUS, overwrite: {}, emoji: true) 230 | @ambiguous = ambiguous 231 | @overwrite = overwrite 232 | @emoji = emoji 233 | end 234 | 235 | def get_config(**kwargs) 236 | { 237 | ambiguous: kwargs[:ambiguous] || @ambiguous, 238 | overwrite: kwargs[:overwrite] || @overwrite, 239 | emoji: kwargs[:emoji] || @emoji, 240 | } 241 | end 242 | 243 | def of(string, **kwargs) 244 | self.class.of(string, **get_config(**kwargs)) 245 | end 246 | end 247 | end 248 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # CHANGELOG 2 | 3 | ## 3.2.0 4 | 5 | - Unicode 17.0 6 | 7 | ## 3.1.5 8 | 9 | - Cache Emoji support level for performance reasons #30, patch by @Earlopain: 10 | 11 | ## 3.1.4 12 | 13 | - Fix that skin tone modifiers were ignored when used in a non-ZWJ sequence 14 | context (= single emoji char + modifier) #29 15 | - Add more docs and specs about modifier handling 16 | 17 | ## 3.1.3 18 | 19 | Better handling of non-UTF-8 strings, patch by @Earlopain: 20 | 21 | - Data with *BINARY* encoding is interpreted as UTF-8, if possible 22 | - Use `invalid: :replace` and `undef: :replace` options when converting to UTF-8 23 | 24 | ## 3.1.2 25 | 26 | - Performance improvements 27 | 28 | ## 3.1.1 29 | 30 | - Performance improvements 31 | 32 | ## 3.1.0 33 | 34 | **Improve Emoji support:** 35 | 36 | - Emoji modes: Differentiate between well-formed Emoji (`:possible`) and any 37 | ZWJ/modifier sequence (`:all`). The latter is more common and more efficient 38 | to implement. 39 | - Unify `:rgi_{fqe,mqe,uqe}` options to just `:rgi` to keep things simpler (corresponds to 40 | the former `:rgi_uqe` option). Most terminals that want to support the RGI set 41 | will probably want to catch Emoji sequences with missing VS16s. 42 | - Add new `:all_no_vs16` and `:rgi_at` modes to be able to support some terminals 43 | that needs these quirks 44 | - Add alias `emoji: :auto` for `emoji: true` and `emoji: :none` for `emoji: false` 45 | - `:auto` mode: Only consider terminal cells when recommending Emoji support level 46 | (Emoji themselves might display differently) 47 | - `:auto` mode: Set default Emoji mode for unknown/unsupported terminals to `:none` 48 | - Rename `:basic` mode to `:vs16` 49 | 50 | ## 3.0.1 51 | 52 | 53 | - Add WezTerm and foot as good Emoji terminals 54 | 55 | ## 3.0.0 56 | 57 | **Rework Emoji support:** 58 | 59 | - Emoji widths are now enabled by default 60 | - Only reduce Emoji width to 2 when RGI Emoji detected (configurable) 61 | - VS16 turns Emoji characters of width 1 into full-width 62 | - Please note that Emoji parsing has a notable impact on performance. 63 | You can use the `emoji: false` option to disable Emoji adjustments 64 | - Tries to detect terminal's Emoji support level automatically (from ENV vars) 65 | 66 | **Index fixes and updates:** 67 | 68 | - Private-use characters are considered ambiguous (were given width 1 before) 69 | - Fix that a few zero-width ignorable codepoints from recent Unicode were missing 70 | - Consider the following separators to be zero-width: 71 | - U+2028 - LINE SEPARATOR - Zl 72 | - U+2029 - PARAGRAPH SEPARATOR - Zp 73 | 74 | **Other:** 75 | 76 | - Add keyword arguments to `Unicode::DisplayWidth.of`. If you are using a hash 77 | with overwrite values as third parameter, be sure to put it in curly braces. 78 | - Using third parameter or explicit hash as fourth parameter is deprecated, 79 | please migrate to the keyword arguments API 80 | - Gem raises `ArgumentError` for ambiguous values other than 1 or 2 81 | - Performance optimizations 82 | - Require Ruby 2.5 83 | 84 | ## 2.6.0 85 | 86 | - Unicode 16 87 | 88 | ## 2.5.0 89 | 90 | - Unicode 15.1 91 | 92 | ## 2.4.2 93 | 94 | More performance improvements: 95 | 96 | - Optimize lookup of first 4096 codepoints 97 | - Avoid overwrite lookup if no overwrites are set 98 | 99 | ## 2.4.1 100 | 101 | - Improve general performance! 102 | - Further improve performance for ASCII strings 103 | 104 | *You should really upgrade - it's much faster now!* 105 | 106 | ## 2.4.0 107 | - Improve performance for ASCII-only strings, by @fatkodima 108 | - Require Ruby 2.4 109 | 110 | ## 2.3.0 111 | 112 | - Unicode 15.0 113 | 114 | ## 2.2.0 115 | 116 | - Add *Hangul Jamo Extended-B* block to zero-width chars, thanks @ninjalj #22 117 | 118 | ## 2.1.0 119 | 120 | - Unicode 14.0 121 | 122 | ## 2.0.0 123 | 124 | Add Support for Ruby 3.0 125 | 126 | ### Breaking Changes 127 | 128 | Some features of this library were marked deprecated for a long time and have been removed with Version 2.0: 129 | 130 | - Aliases of display\_width (…\_size, …\_length) have been removed 131 | - Auto-loading of string core extension has been removed: 132 | 133 | If you are relying on the `String#display_width` string extension to be automatically loaded (old behavior), please load it explicitly now: 134 | 135 | ```ruby 136 | require "unicode/display_width/string_ext" 137 | ``` 138 | 139 | You could also change your `Gemfile` line to achieve this: 140 | 141 | ```ruby 142 | gem "unicode-display_width", require: "unicode/display_width/string_ext" 143 | ``` 144 | 145 | ## 2.0.0.pre2 146 | 147 | - Update 2.0 branch to Unicode 13 148 | 149 | ## 2.0.0.pre1 150 | 151 | Will be published as non-pre version on rubygems.org when Ruby 3.0 is released (December 2020) 152 | 153 | - Introduce new class-based API, which remembers your string-width configuration. See README for details. 154 | - Remove auto-loading of string extension 155 | - You can: `require "unicode/display_width/string_ext"` to continue to use the string extension 156 | - The manual opt-out `require "unicode/display_width/no_string_ext"` is not needed anymore and will 157 | issue a warning in the future 158 | - Remove (already deprecated) String#display_size and String#display_width aliases 159 | 160 | Refactorings / Internal Changes: 161 | 162 | - Freeze string literals 163 | - The Unicode::DisplayWidth now is class, instead of a module, this enables the new config-object API 164 | 165 | ## 1.8.0 166 | 167 | - Unicode 14.0 (last release of 1.x) 168 | 169 | ## 1.7.0 170 | 171 | - Unicode 13 172 | 173 | ## 1.6.1 174 | 175 | - Fix that ambiguous and overwrite options where ignored for emoji-measuring 176 | 177 | ## 1.6.0 178 | 179 | - Unicode 12.1 180 | 181 | ## 1.5.0 182 | 183 | - Unicode 12 184 | 185 | ## 1.4.1 186 | 187 | - Only bundle required lib/* and data/* files in actual rubygem, patch by @tas50 188 | 189 | ## 1.4.0 190 | 191 | - Unicode 11 192 | 193 | ## 1.3.3 194 | 195 | - Replace Gem::Util.gunzip with direct zlib implementation 196 | This removes the dependency on rubygems, fixes #17 197 | 198 | ## 1.3.2 199 | 200 | - Explicitly load rubygems/util, fixes regression in 1.3.1 (autoload issue) 201 | 202 | ## 1.3.1 203 | 204 | - Use `Gem::Util` for `gunzip`, removes deprecation warning, patch by @Schwad 205 | 206 | ## 1.3.0 207 | 208 | - Unicode 10 209 | 210 | ## 1.2.1 211 | 212 | - Fix bug that `emoji: true` would fail for emoji without modifier 213 | 214 | ## 1.2.0 215 | 216 | - Add zero-width codepoint ranges: U+2060..U+206F, U+FFF0..U+FFF8, U+E0000..U+E0FFF 217 | - Add full-witdh codepoint ranges: U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2FFFD, U+30000..U+3FFFD 218 | - Experimental emoji support using the [unicode-emoji](https://github.com/janlelis/unicode-emoji) gem 219 | - Fix minor bug in index compression scheme 220 | 221 | ## 1.1.3 222 | 223 | - Fix that non-UTF-8 encodings do not throw errors, patch by @windwiny 224 | 225 | ## 1.1.2 226 | 227 | - Reduce memory consumption and increase performance, patch by @rrosenblum 228 | 229 | ## 1.1.1 230 | 231 | - Always load index into memory, fixes #9 232 | 233 | ## 1.1.0 234 | 235 | - Support Unicode 9.0 236 | 237 | ## 1.0.5 238 | 239 | - Actually include new index from 1.0.4 240 | 241 | ## 1.0.4 242 | 243 | - New index format (much smaller) and internal API changes 244 | - Move index generation to a builder plugin for the unicoder gem 245 | - No public API changes 246 | 247 | ## 1.0.3 248 | 249 | - Avoid circular dependency warning 250 | 251 | ## 1.0.2 252 | 253 | - Fix error that gemspec might be invalid under some circumstances (see gh#6) 254 | 255 | ## 1.0.1 256 | 257 | - Inofficially allow Ruby 1.9 258 | 259 | ## 1.0.0 260 | 261 | - Faster than 0.3.1 262 | - Advanced determination of character width 263 | - This includes: Treat width of most chars of general categories (Mn, Me, Cf) as 0 264 | - This includes: Introduce list of characters with special widths 265 | - Allow custom overrides for specific codepoints 266 | - Set required Ruby version to 2.0 267 | - Add NO_STRING_EXT mode to disable monkey patching 268 | - Internal API & index format changed drastically 269 | - Remove require 'unicode/display_size' (use 'unicode/display_width' instead) 270 | 271 | ## 0.3.1 272 | 273 | - Faster than 0.3.0 274 | - Deprecate usage of aliases: String#display_size and String#display_length 275 | - Eliminate Ruby warnings (@amatsuda) 276 | 277 | ## 0.3.0 278 | 279 | - Update EastAsianWidth from 7.0 to 8.0 280 | - Add rake task to update EastAsianWidth.txt 281 | - Move code to generate index from library to Rakefile 282 | - Update project's meta files 283 | - Deprecate requiring 'unicode-display_size' 284 | 285 | ## 0.2.0 286 | 287 | - Update EastAsianWidth from 6.0 to 7.0 288 | - Don't build index table automatically when not available 289 | - Don't include EastAsianWidth.txt in gem (only index) 290 | 291 | 292 | ## 0.1.0 293 | 294 | - Fix github issue #1 295 | 296 | 297 | ## 0.1.0 298 | 299 | - Initial release 300 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Unicode::DisplayWidth [![[version]](https://badge.fury.io/rb/unicode-display_width.svg)](https://badge.fury.io/rb/unicode-display_width) [](https://github.com/janlelis/unicode-display_width/actions?query=workflow%3ATest) 2 | 3 | Determines the monospace display width of a string in Ruby, which is useful for all kinds of terminal-based applications. The implementation is based on [EastAsianWidth.txt](https://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt), the [Emoji specfication](https://www.unicode.org/reports/tr51/) and other data, 100% in Ruby. It does not rely on the OS vendor ([wcwidth](https://github.com/janlelis/wcswidth-ruby)) to provide an up-to-date method for measuring string width in terminals. 4 | 5 | Unicode version: **17.0.0** (September 2025) 6 | 7 | ## Gem Version 3 — Improved Emoji Support 8 | 9 | **Emoji support is now enabled by default.** See below for description and configuration possibilities. 10 | 11 | **Unicode::DisplayWidth.of now takes keyword arguments:** { ambiguous:, emoji:, overwrite: } 12 | 13 | See [CHANGELOG](/CHANGELOG.md) for details. 14 | 15 | ## Gem Version 2.4.2 — Performance Updates 16 | 17 | **If you use this gem, you should really upgrade to 2.4.2 or newer. It's often 100x faster, sometimes even 1000x and more!** 18 | 19 | This is possible because the gem now detects if you use very basic (and common) characters, like ASCII characters. Furthermore, the character width lookup code has been optimized, so even when the string involves full-width or ambiguous characters, the gem is much faster now. 20 | 21 | ## Introduction to Character Widths 22 | 23 | Guessing the correct space a character will consume on terminals is not easy. There is no single standard. Most implementations combine data from [East Asian Width](https://www.unicode.org/reports/tr11/), some [General Categories](https://en.wikipedia.org/wiki/Unicode_character_property#General_Category), and hand-picked adjustments. 24 | 25 | ### How this Library Handles Widths 26 | 27 | Further at the top means higher precedence. Please expect changes to this algorithm with every MINOR version update (the X in 1.X.0)! 28 | 29 | Width | Characters | Comment 30 | -------|------------------------------|-------------------------------------------------- 31 | ? | (user defined) | Overwrites any other values 32 | ? | Emoji | See "How this Library Handles Emoji Width" below 33 | -1 | `"\b"` | Backspace (total width never below 0) 34 | 0 | `"\0"`, `"\x05"`, `"\a"`, `"\n"`, `"\v"`, `"\f"`, `"\r"`, `"\x0E"`, `"\x0F"` | [C0 control codes](https://en.wikipedia.org/wiki/C0_and_C1_control_codes#C0_.28ASCII_and_derivatives.29) which do not change horizontal width 35 | 1 | `"\u{00AD}"` | SOFT HYPHEN 36 | 2 | `"\u{2E3A}"` | TWO-EM DASH 37 | 3 | `"\u{2E3B}"` | THREE-EM DASH 38 | 0 | General Categories: Mn, Me, Zl, Zp, Cf (non-arabic)| Excludes ARABIC format characters 39 | 0 | Derived Property: Default_Ignorable_Code_Point | Ignorable ranges 40 | 0 | `"\u{1160}".."\u{11FF}"`, `"\u{D7B0}".."\u{D7FF}"` | HANGUL JUNGSEONG 41 | 2 | East Asian Width: F, W | Full-width characters 42 | 2 | `"\u{3400}".."\u{4DBF}"`, `"\u{4E00}".."\u{9FFF}"`, `"\u{F900}".."\u{FAFF}"`, `"\u{20000}".."\u{2FFFD}"`, `"\u{30000}".."\u{3FFFD}"` | Full-width ranges 43 | 1 or 2 | East Asian Width: A | Ambiguous characters, user defined, default: 1 44 | 1 | All other codepoints | - 45 | 46 | ## Install 47 | 48 | Install the gem with: 49 | 50 | $ gem install unicode-display_width 51 | 52 | Or add to your Gemfile: 53 | 54 | gem 'unicode-display_width' 55 | 56 | ## Usage 57 | 58 | ```ruby 59 | require 'unicode/display_width' 60 | 61 | Unicode::DisplayWidth.of("⚀") # => 1 62 | Unicode::DisplayWidth.of("一") # => 2 63 | ``` 64 | 65 | ### Ambiguous Characters 66 | 67 | The second parameter defines the value returned by characters defined as ambiguous: 68 | 69 | ```ruby 70 | Unicode::DisplayWidth.of("·", 1) # => 1 71 | Unicode::DisplayWidth.of("·", 2) # => 2 72 | ``` 73 | 74 | ### Encoding Notes 75 | 76 | - Data with *BINARY* encoding is interpreted as UTF-8, if possible 77 | - Non-UTF-8 strings are converted to UTF-8 before measuring, using the [`{invalid: :replace, undef: :replace}`) options](https://ruby-doc.org/3.3.5/encodings_rdoc.html#label-Encoding+Options) 78 | 79 | ### Custom Overwrites 80 | 81 | You can overwrite how to handle specific code points by passing a hash (or even a proc) as `overwrite:` parameter: 82 | 83 | ```ruby 84 | Unicode::DisplayWidth.of("a\tb", 1, overwrite: { "\t".ord => 10 })) # => TAB counted as 10, result is 12 85 | ``` 86 | 87 | Please note that using overwrites disables some perfomance optimizations of this gem. 88 | 89 | ### Emoji 90 | 91 | If your terminal supports it, the gem detects Emoji and Emoji sequences and adjusts the width of the measured string. This can be disabled by passing `emoji: false` as an argument: 92 | 93 | ```ruby 94 | Unicode::DisplayWidth.of "🤾🏽‍♀️", emoji: :all # => 2 95 | Unicode::DisplayWidth.of "🤾🏽‍♀️", emoji: false # => 5 96 | ``` 97 | 98 | #### How this Library Handles Emoji Width 99 | 100 | There are many Emoji which get constructed by combining other Emoji in a sequence. This makes measuring the width complicated, since terminals might either display the combined Emoji or the separate parts of the Emoji individually. 101 | 102 | Another aspect where terminals disagree is whether Emoji characters which have a text presentation by default (width 1) should be turned into full-width (width 2) when combined with Variation Selector 16 (*U+FEOF*). 103 | 104 | Finally, it varies if Skin Tone Modifiers can be applied to all characters or just to those with the "Emoji Base" property. 105 | 106 | Emoji Type | Width / Comment 107 | ------------|---------------- 108 | Basic/Single Emoji character without Variation Selector | No special handling 109 | Basic/Single Emoji character with VS15 (Text) | No special handling 110 | Basic/Single Emoji character with VS16 (Emoji) | 2 or East Asian Width (see table below) 111 | Single Emoji character with Skin Tone Modifier | 2 unless Emoji mode is `:none` or `vs16` 112 | Skin Tone Modifier used in isolation or with invalid base | 2 if Emoji mode is `:rgi` / `:rgi_at` 113 | Emoji Sequence | 2 if Emoji belongs to configured Emoji set (see table below) 114 | 115 | #### Emoji Modes 116 | 117 | The `emoji:` option can be used to configure which type of Emoji should be considered to have a width of 2 and if VS16-Emoji should be widened. Other sequences are treated as non-combined Emoji, so the widths of all partial Emoji add up (e.g. width of one basic Emoji + one skin tone modifier + another basic Emoji). The following Emoji settings can be used: 118 | 119 | `emoji:` Option | VS16-Emoji Width | Emoji Sequences Width / Comment | Example Terminals 120 | ----------------|------------------|---------------------------------|------------------ 121 | `true` or `:auto` | - | Automatically use recommended Emoji setting for your terminal | - 122 | `:all` | 2 | 2 for all ZWJ/modifier/keycap sequences, even if they are not well-formed Emoji sequences | iTerm, foot 123 | `:all_no_vs16` | EAW (1 or 2) | 2 for all ZWJ/modifier/keycap sequences, even if they are not well-formed Emoji sequences | WezTerm 124 | `:possible`| 2 | 2 for all possible/well-formed Emoji sequences | ? 125 | `:rgi` | 2 | 2 for all [RGI Emoji](https://www.unicode.org/reports/tr51/#def_rgi_set) sequences | ? 126 | `:rgi_at` | EAW (1 or 2) | 1 or 2: Like `:rgi`, but Emoji sequences starting with a default-text Emoji have EAW | Apple Terminal 127 | `:vs16` | 2 | 2 * number of partial Emoji (sequences never considered to represent a combined Emoji) | kitty? 128 | `false` or `:none` | EAW (1 or 2) | No Emoji adjustments | gnome-terminal, many older terminals 129 | 130 | - *EAW:* East Asian Width 131 | - *RGI Emoji:* Emoji Recommended for General Interchange 132 | - *ZWJ:* Zero-width Joiner: Codepoint `U+200D`,used in many Emoji sequences 133 | 134 | #### Emoji Support in Terminals 135 | 136 | Unfortunately, the level of Emoji support varies a lot between terminals. While some of them are able to display (almost) all Emoji sequences correctly, others fall back to displaying sequences of basic Emoji. When `emoji: true` or `emoji: :auto` is used, the gem will attempt to set the best fitting Emoji setting for you (e.g. `:rgi_at` on "Apple_Terminal" or `false` on Gnome's terminal widget). 137 | 138 | Please note that Emoji display and number of terminal columns used might differs a lot. For example, it might be the case that a terminal does not understand which Emoji to display, but still manages to calculate the proper amount of terminal cells. The automatic Emoji support level per terminal only considers the latter (cursor position), not the actual Emoji image(s) displayed. Please [open an issue](https://github.com/janlelis/unicode-display_width/issues/new) if you notice your terminal application could use a better default value. Also see the [ucs-detect project](https://ucs-detect.readthedocs.io/results.html), which is a great resource that compares various terminal's Unicode/Emoji capabilities. You can visually check how your terminals renders different kind of Emoji types with the [terminal-emoji-width.rb script](https://github.com/janlelis/unicode-display_width/blob/main/misc/terminal-emoji-width.rb). 139 | 140 | **To terminal implementors reading this:** Although the practice of giving all Emoji/ZWJ sequences a width of 2 (`:all` mode described above) has some advantages, it does not lead to a particularly good developer experience. Since there is always the possibility of well-formed Emoji that are currently not supported (non-RGI / future Unicode) appearing, those sequences will take more cells. Instead of overflowing, cutting off sequences or displaying placeholder-Emoji, could it be worthwile to implement the `:rgi` option (only known Emoji get width 2) and give those unknown Emoji the space they need? This would support the idea that the meaning of an unknown Emoji sequence can still be conveyed (without messing up the terminal at the same time). Just a thought… 141 | 142 | ### Usage with String Extension 143 | 144 | ```ruby 145 | require 'unicode/display_width/string_ext' 146 | 147 | "⚀".display_width # => 1 148 | '一'.display_width # => 2 149 | ``` 150 | 151 | ### Usage with Config Object 152 | 153 | You can use a config object that allows you to save your configuration for later-reuse. This requires an extra line of code, but has the advantage that you'll need to define your string-width options only once: 154 | 155 | ```ruby 156 | require 'unicode/display_width' 157 | 158 | display_width = Unicode::DisplayWidth.new( 159 | # ambiguous: 1, 160 | overwrite: { "A".ord => 100 }, 161 | emoji: :all, 162 | ) 163 | 164 | display_width.of "⚀" # => 1 165 | display_width.of "🤠‍🤢" # => 2 166 | display_width.of "A" # => 100 167 | ``` 168 | 169 | ### Usage from the Command-Line 170 | 171 | Use this one-liner to print out display widths for strings from the command-line: 172 | 173 | ``` 174 | $ gem install unicode-display_width 175 | $ ruby -r unicode/display_width -e 'puts Unicode::DisplayWidth.of $*[0]' -- "一" 176 | ``` 177 | Replace "一" with the actual string to measure 178 | 179 | ## Other Implementations & Discussion 180 | 181 | - Python: https://github.com/jquast/wcwidth 182 | - JavaScript: https://github.com/mycoboco/wcwidth.js 183 | - C: https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c 184 | - C for Julia: https://github.com/JuliaLang/utf8proc/issues/2 185 | - Golang: https://github.com/rivo/uniseg 186 | 187 | See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related micro libraries. 188 | 189 | ## Copyright & Info 190 | 191 | - Copyright (c) 2011, 2015-2025 Jan Lelis, https://janlelis.com, released under the MIT 192 | license 193 | - Early versions based on runpaint's unicode-data interface: Copyright (c) 2009 Run Paint Run Run 194 | - Unicode data: https://www.unicode.org/copyright.html#Exhibit1 195 | -------------------------------------------------------------------------------- /spec/display_width_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative '../lib/unicode/display_width/string_ext' 4 | 5 | describe 'Unicode::DisplayWidth.of' do 6 | describe '[east asian width]' do 7 | it 'returns 2 for F' do 8 | expect( '!'.display_width ).to eq 2 9 | end 10 | 11 | it 'returns 2 for W' do 12 | expect( '一'.display_width ).to eq 2 13 | end 14 | 15 | it 'returns 2 for W (which are currently unassigned)' do 16 | expect( "\u{3FFFD}".display_width ).to eq 2 17 | end 18 | 19 | it 'returns 1 for N' do 20 | expect( 'À'.display_width ).to eq 1 21 | end 22 | 23 | it 'returns 1 for Na' do 24 | expect( 'A'.display_width ).to eq 1 25 | end 26 | 27 | it 'returns 1 for H' do 28 | expect( '。'.display_width ).to eq 1 29 | end 30 | 31 | it 'returns first argument of display_width for A' do 32 | expect( '·'.display_width(1) ).to eq 1 33 | end 34 | 35 | it 'returns first argument of display_width for A' do 36 | expect( '·'.display_width(2) ).to eq 2 37 | end 38 | 39 | it 'returns 1 for A if no argument given' do 40 | expect( '·'.display_width ).to eq 1 41 | end 42 | end 43 | 44 | describe '[zero width]' do 45 | it 'returns 0 for Mn chars' do 46 | expect( 'ֿ'.display_width ).to eq 0 47 | end 48 | 49 | it 'returns 0 for Me chars' do 50 | expect( '҈'.display_width ).to eq 0 51 | end 52 | 53 | it 'returns 0 for Cf chars' do 54 | expect( '​'.display_width ).to eq 0 55 | end 56 | 57 | it 'returns 0 for HANGUL JUNGSEONG chars' do 58 | expect( 'ᅠ'.display_width ).to eq 0 59 | expect( 'ힰ'.display_width ).to eq 0 60 | end 61 | 62 | it 'returns 0 for U+2060..U+206F' do 63 | expect( "\u{2060}".display_width ).to eq 0 64 | end 65 | 66 | it 'returns 0 for U+FFF0..U+FFF8' do 67 | expect( "\u{FFF0}".display_width ).to eq 0 68 | end 69 | 70 | it 'returns 0 for U+E0000..U+E0FFF' do 71 | expect( "\u{E0000}".display_width ).to eq 0 72 | end 73 | end 74 | 75 | describe '[special characters]' do 76 | it 'returns 0 for ␀' do 77 | expect( "\0".display_width ).to eq 0 78 | end 79 | 80 | it 'returns 0 for ␅' do 81 | expect( "\x05".display_width ).to eq 0 82 | end 83 | 84 | it 'returns 0 for ␇' do 85 | expect( "\a".display_width ).to eq 0 86 | end 87 | 88 | it 'returns -1 for ␈' do 89 | expect( "aaaa\b".display_width ).to eq 3 90 | end 91 | 92 | it 'returns -1 for ␈, but at least 0' do 93 | expect( "\b".display_width ).to eq 0 94 | end 95 | 96 | it 'returns 0 for ␊' do 97 | expect( "\n".display_width ).to eq 0 98 | end 99 | 100 | it 'returns 0 for ␋' do 101 | expect( "\v".display_width ).to eq 0 102 | end 103 | 104 | it 'returns 0 for ␌' do 105 | expect( "\f".display_width ).to eq 0 106 | end 107 | 108 | it 'returns 0 for ␍' do 109 | expect( "\r".display_width ).to eq 0 110 | end 111 | 112 | it 'returns 0 for ␎' do 113 | expect( "\x0E".display_width ).to eq 0 114 | end 115 | 116 | it 'returns 0 for ␏' do 117 | expect( "\x0F".display_width ).to eq 0 118 | end 119 | 120 | it 'returns 1 for other C0 characters' do 121 | expect( "\x01".display_width ).to eq 1 122 | expect( "\x02".display_width ).to eq 1 123 | expect( "\x03".display_width ).to eq 1 124 | expect( "\x04".display_width ).to eq 1 125 | expect( "\x06".display_width ).to eq 1 126 | expect( "\x10".display_width ).to eq 1 127 | expect( "\x11".display_width ).to eq 1 128 | expect( "\x12".display_width ).to eq 1 129 | expect( "\x13".display_width ).to eq 1 130 | expect( "\x14".display_width ).to eq 1 131 | expect( "\x15".display_width ).to eq 1 132 | expect( "\x16".display_width ).to eq 1 133 | expect( "\x17".display_width ).to eq 1 134 | expect( "\x18".display_width ).to eq 1 135 | expect( "\x19".display_width ).to eq 1 136 | expect( "\x1a".display_width ).to eq 1 137 | expect( "\x1b".display_width ).to eq 1 138 | expect( "\x1c".display_width ).to eq 1 139 | expect( "\x1d".display_width ).to eq 1 140 | expect( "\x1e".display_width ).to eq 1 141 | expect( "\x1f".display_width ).to eq 1 142 | expect( "\x7f".display_width ).to eq 1 143 | end 144 | 145 | it 'returns 0 for LINE SEPARATOR' do 146 | expect( "\u{2028}".display_width ).to eq 0 147 | end 148 | 149 | it 'returns 0 for PARAGRAPH SEPARATOR' do 150 | expect( "\u{2029}".display_width ).to eq 0 151 | end 152 | 153 | it 'returns 1 for SOFT HYPHEN' do 154 | expect( "­".display_width ).to eq 1 155 | end 156 | 157 | it 'returns 2 for THREE-EM DASH' do 158 | expect( "⸺".display_width ).to eq 2 159 | end 160 | 161 | it 'returns 3 for THREE-EM DASH' do 162 | expect( "⸻".display_width ).to eq 3 163 | end 164 | 165 | it 'returns ambiguous for private-use' do 166 | expect( "󰀀".display_width(1) ).to eq 1 167 | expect( "󰀀".display_width(2) ).to eq 2 168 | end 169 | end 170 | 171 | 172 | describe '[overwrite]' do 173 | it 'can be passed a 3rd parameter with overwrites (old format)' do 174 | expect( "\t".display_width(1, { 0x09 => 12 }) ).to eq 12 175 | end 176 | 177 | it 'can be passed as :overwrite option' do 178 | expect( "\t".display_width(overwrite: { 0x09 => 12 }) ).to eq 12 179 | end 180 | end 181 | 182 | describe '[encoding]' do 183 | it 'works with non-utf8 Unicode encodings' do 184 | expect( 'À'.encode("UTF-16LE").display_width ).to eq 1 185 | end 186 | 187 | it 'works with a string that is invalid in its encoding' do 188 | s = "\x81\x39".dup.force_encoding(Encoding::SHIFT_JIS) 189 | 190 | # Would print as �9 on the terminal 191 | expect( s.display_width ).to eq 2 192 | end 193 | 194 | it 'works with a binary encoded string that is valid in UTF-8' do 195 | expect( '€'.b.display_width ).to eq 1 196 | end 197 | end 198 | 199 | describe '[emoji]' do 200 | describe '(basic emoji / text emoji)' do 201 | it 'counts default-text presentation Emoji according to EAW (example: 1)' do 202 | expect( "❣".display_width(emoji: :all) ).to eq 1 203 | end 204 | 205 | it 'counts default-text presentation Emoji according to EAW (example: ambiguous)' do 206 | expect( "♀".display_width(1, emoji: :all) ).to eq 1 207 | expect( "♀".display_width(2, emoji: :all) ).to eq 2 208 | end 209 | 210 | it 'counts default-text presentation Emoji with Emoji Presentation (VS16) as 2' do 211 | expect( "❣️".display_width(emoji: :all) ).to eq 2 212 | end 213 | 214 | it 'counts default-text presentation Emoji with Emoji Presentation (VS16) as 2 (in a sequence)' do 215 | expect( "❣️‍❣️".display_width(emoji: :rgi) ).to eq 4 216 | end 217 | 218 | it 'counts default-emoji presentation Emoji according to EAW (always 2)' do 219 | expect( "💚".display_width(emoji: :all) ).to eq 2 220 | end 221 | end 222 | 223 | describe '(special emoji / emoji sequences)' do 224 | it 'works with flags: width 2' do 225 | expect( "🇵🇹".display_width(emoji: :all) ).to eq 2 226 | end 227 | 228 | it 'works with subdivision flags: width 2' do 229 | expect( "🏴󠁧󠁢󠁥󠁮󠁧󠁿".display_width(emoji: :all) ).to eq 2 230 | end 231 | 232 | it 'works with keycaps: width 2' do 233 | expect( "1️⃣".display_width(emoji: :all) ).to eq 2 234 | end 235 | end 236 | 237 | describe '(modifiers and zwj sequences)' do 238 | it 'applies simple skin tone modifiers' do 239 | expect( "👏🏽".display_width(emoji: :rgi) ).to eq 2 240 | end 241 | 242 | it 'counts RGI Emoji ZWJ sequence as width 2' do 243 | expect( "🤾🏽‍♀️".display_width(emoji: :rgi) ).to eq 2 244 | end 245 | 246 | it 'works for emoji involving characters which are east asian ambiguous' do 247 | expect( "🤾🏽‍♀️".display_width(2, emoji: :rgi) ).to eq 2 248 | end 249 | end 250 | 251 | describe '(modes)' do 252 | describe 'false / :none' do 253 | it 'does no Emoji adjustments when emoji suport is disabled' do 254 | expect( "🤾🏽‍♀️".display_width(emoji: false) ).to eq 5 255 | expect( "❣️".display_width(emoji: :none) ).to eq 1 256 | expect( "👏🏽".display_width(emoji: :none) ).to eq 4 257 | end 258 | end 259 | 260 | describe ':vs16' do 261 | it 'will ignore shorter width of all Emoji sequences' do 262 | # Please note that this is different from emoji: false / emoji: :none 263 | # -> Basic Emoji with VS16 still get normalized 264 | expect( "🤾🏽‍♀️".display_width(emoji: :vs16) ).to eq 6 265 | end 266 | 267 | it 'counts default-text presentation Emoji with Emoji Presentation (VS16) as 2' do 268 | expect( "❣️".display_width(emoji: :vs16) ).to eq 2 269 | end 270 | 271 | it 'works with keycaps: width 2' do 272 | expect( "1️⃣".display_width(emoji: :vs16) ).to eq 2 273 | end 274 | end 275 | 276 | describe ':rgi' do 277 | it 'will ignore shorter width of non-RGI sequences' do 278 | expect( "🤾🏽‍♀️".display_width(emoji: :rgi) ).to eq 2 # FQE 279 | expect( "🤾🏽‍♀".display_width(emoji: :rgi) ).to eq 2 # MQE 280 | expect( "❤‍🩹".display_width(emoji: :rgi) ).to eq 2 # UQE 281 | expect( "👏🏽".display_width(emoji: :rgi) ).to eq 2 # Modifier 282 | expect( "J🏽".display_width(emoji: :rgi) ).to eq 3 # Modifier with invalid base 283 | expect( "🤠‍🤢".display_width(emoji: :rgi) ).to eq 4 # Non-RGI/well-formed 284 | expect( "🚄🏾‍▶️".display_width(emoji: :rgi) ).to eq 6 # Invalid/non-Emoji sequence 285 | end 286 | 287 | it 'counts default-text presentation Emoji with Emoji Presentation (VS16) as 2' do 288 | expect( "❣️".display_width(emoji: :rgi) ).to eq 2 289 | end 290 | end 291 | 292 | describe ':rgi_at' do 293 | it 'will assign width based on EAW of first partial Emoji to whole sequence' do 294 | expect( "🤾🏽‍♀️".display_width(emoji: :rgi_at) ).to eq 2 295 | expect( "⛹️‍♀️".display_width(emoji: :rgi_at) ).to eq 1 296 | expect( "❤‍🩹".display_width(emoji: :rgi_at) ).to eq 1 297 | end 298 | 299 | it 'will count partial emoji for non-RGI sequences' do 300 | expect( "🤠‍🤢".display_width(emoji: :rgi_at) ).to eq 4 # Non-RGI/well-formed 301 | expect( "🚄🏾‍▶️".display_width(emoji: :rgi_at) ).to eq 5 # Invalid/non-Emoji sequence 302 | end 303 | 304 | it 'uses EAW for default-text presentation Emoji with Emoji Presentation (VS16)' do 305 | expect( "❣️".display_width(emoji: :rgi_at) ).to eq 1 306 | end 307 | end 308 | 309 | describe ':possible' do 310 | it 'will treat possible/well-formed Emoji sequence as width 2' do 311 | expect( "🤾🏽‍♀️".display_width(emoji: :possible) ).to eq 2 # FQE 312 | expect( "🤾🏽‍♀".display_width(emoji: :possible) ).to eq 2 # MQE 313 | expect( "❤‍🩹".display_width(emoji: :possible) ).to eq 2 # UQE 314 | expect( "👏🏽".display_width(emoji: :possible) ).to eq 2 # Modifier 315 | expect( "J🏽".display_width(emoji: :possible) ).to eq 3 # Modifier with invalid base 316 | expect( "🤠‍🤢".display_width(emoji: :possible) ).to eq 2 # Non-RGI/well-formed 317 | expect( "🚄🏾‍▶️".display_width(emoji: :possible) ).to eq 6 # Invalid/non-Emoji sequence 318 | end 319 | 320 | it 'counts default-text presentation Emoji with Emoji Presentation (VS16) as 2' do 321 | expect( "❣️".display_width(emoji: :possible) ).to eq 2 322 | end 323 | end 324 | 325 | describe ':all' do 326 | it 'will treat any ZWJ/modifier/keycap sequences sequence as width 2' do 327 | expect( "🤾🏽‍♀️".display_width(emoji: :all) ).to eq 2 # FQE 328 | expect( "🤾🏽‍♀".display_width(emoji: :all) ).to eq 2 # MQE 329 | expect( "❤‍🩹".display_width(emoji: :all) ).to eq 2 # UQE 330 | expect( "👏🏽".display_width(emoji: :all) ).to eq 2 # Modifier 331 | expect( "👏🏽".display_width(emoji: :all) ).to eq 2 # Modifier 332 | expect( "J🏽".display_width(emoji: :all) ).to eq 2 # Modifier with invalid base 333 | expect( "🤠‍🤢".display_width(emoji: :all) ).to eq 2 # Non-RGI/well-formed 334 | expect( "🚄🏾‍▶️".display_width(emoji: :all) ).to eq 2 # Invalid/non-Emoji sequence 335 | end 336 | 337 | it 'counts default-text presentation Emoji with Emoji Presentation (VS16) as 2' do 338 | expect( "❣️".display_width(emoji: :all) ).to eq 2 339 | end 340 | end 341 | 342 | describe ':all_no_vs16' do 343 | it 'will treat any ZWJ/modifier/keycap sequences sequence as width 2' do 344 | expect( "🤾🏽‍♀️".display_width(emoji: :all_no_vs16) ).to eq 2 # FQE 345 | expect( "🤾🏽‍♀".display_width(emoji: :all_no_vs16) ).to eq 2 # MQE 346 | expect( "❤‍🩹".display_width(emoji: :all_no_vs16) ).to eq 2 # UQE 347 | expect( "👏🏽".display_width(emoji: :all_no_vs16) ).to eq 2 # Modifier 348 | expect( "J🏽".display_width(emoji: :all_no_vs16) ).to eq 2 # Modifier with wrong base 349 | expect( "🤠‍🤢".display_width(emoji: :all_no_vs16) ).to eq 2 # Non-RGI/well-formed 350 | expect( "🚄🏾‍▶️".display_width(emoji: :all_no_vs16) ).to eq 2 # Invalid/non-Emoji sequence 351 | end 352 | 353 | it 'uses EAW for default-text presentation Emoji with Emoji Presentation (VS16)' do 354 | expect( "❣️".display_width(emoji: :all_no_vs16) ).to eq 1 355 | end 356 | end 357 | end 358 | end 359 | end 360 | 361 | describe "Config object based API" do 362 | let :display_width do 363 | Unicode::DisplayWidth.new( 364 | # ambiguous: 1, 365 | overwrite: { "A".ord => 100 }, 366 | emoji: :all, 367 | ) 368 | end 369 | 370 | it "will respect given overwrite option" do 371 | expect( display_width.of "A" ).to eq 100 372 | end 373 | 374 | it "will respect given emoji option" do 375 | expect( display_width.of "🤠‍🤢" ).to eq 2 376 | end 377 | end 378 | --------------------------------------------------------------------------------