├── .github └── workflows │ └── test.yml ├── .gitignore ├── CHANGELOG.md ├── CODE_OF_CONDUCT.md ├── Gemfile ├── MIT-LICENSE.txt ├── README.md ├── Rakefile ├── characteristics.gemspec ├── lib ├── characteristics.rb └── characteristics │ ├── ascii.rb │ ├── binary.rb │ ├── byte.rb │ ├── unicode.rb │ └── version.rb └── spec └── characteristics_spec.rb /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | name: Ruby ${{ matrix.ruby }} (${{ matrix.os }}) 8 | if: "!contains(github.event.head_commit.message, '[skip ci]')" 9 | strategy: 10 | matrix: 11 | ruby: 12 | - '3.3' 13 | - '3.2' 14 | - '3.1' 15 | - '3.0' 16 | - jruby 17 | - truffleruby 18 | os: 19 | - ubuntu-latest 20 | - macos-latest 21 | runs-on: ${{matrix.os}} 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Set up Ruby 25 | uses: ruby/setup-ruby@v1 26 | with: 27 | ruby-version: ${{matrix.ruby}} 28 | bundler-cache: true 29 | - name: Run tests 30 | run: bundle exec rake 31 | 32 | test-windows: 33 | name: Ruby ${{ matrix.ruby }} (windows-latest) 34 | if: "!contains(github.event.head_commit.message, '[skip ci]')" 35 | strategy: 36 | matrix: 37 | ruby: 38 | - '3.3' 39 | - '3.2' 40 | - '3.1' 41 | - '3.0' 42 | - jruby 43 | runs-on: windows-latest 44 | steps: 45 | - uses: actions/checkout@v2 46 | - name: Set up Ruby 47 | uses: ruby/setup-ruby@v1 48 | with: 49 | ruby-version: ${{matrix.ruby}} 50 | bundler-cache: true 51 | - name: Run tests 52 | run: bundle exec rake 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Gemfile.lock 2 | /pkg 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## CHANGELOG 2 | 3 | ### 1.7.1 4 | 5 | * Fixes in README and GEMSPEC 6 | * Remove "U+110B1 Kaithi Vowel Sign I" from blanks 7 | 8 | ### 1.7.0 9 | 10 | * Unicode 16.0 (general categories) 11 | 12 | ### 1.6.0 13 | 14 | * Unicode 15.1 (general categories) 15 | 16 | ### 1.5.0 17 | 18 | * Unicode 15 (general categories) 19 | 20 | ### 1.4.1 21 | 22 | * Add NO-BREAK SPACE to blanks 😅 23 | 24 | ### 1.4.0 25 | 26 | * Unicode 14 (general categories) 27 | 28 | ### 1.3.0 29 | 30 | * Unicode 13 (general categories) 31 | * Relax Ruby version requirement to allow Ruby 3.0 32 | 33 | ### 1.2.0 34 | 35 | * Unicode 12 (general categories) 36 | 37 | ### 1.1.0 38 | 39 | * Unicode 11 (general categories) 40 | 41 | ### 1.0.0 42 | 43 | * Mark as production-ready, no API changes 44 | 45 | ### 0.8.0 46 | 47 | * Unicode 10 (general categories) 48 | 49 | ### 0.7.0 50 | 51 | * Add more Unicode properties 52 | * variation_selector? 53 | * tag? 54 | * ignorable? 55 | * noncharacter? 56 | 57 | ### 0.6.0 58 | 59 | * Add separator? property 60 | * Ensure all characteristics have a c0? / c1? method 61 | * Add GB1988 encoding (which is a 7bit ascii-like) 62 | 63 | ### 0.5.2 64 | 65 | * Add another Hangul blank (U+FFA0) 66 | 67 | ### 0.5.1 68 | 69 | * (Proper version of gem on rubygems.org) 70 | 71 | ### 0.5.0 72 | 73 | * Add bidi_control? property 74 | * Treat NEL (in C1 area) as separator 75 | * Treat RLM and LRM as blanks 76 | * Treat CGJ as blank 77 | 78 | ### 0.4.0 79 | 80 | * Support Japanese Emojis (KDDI / SoftBank / DoCoMo) for Unicode 81 | 82 | ### 0.3.1 83 | 84 | * Add two Khmer blanks (U+17B4, U+17B5) 85 | * Add one Kaithi blank (U+110B1) 86 | * Add one Syriac blank (U+070F) 87 | * Add one Arabic blank (U+061C) 88 | * Make general category readable for UnicodeCharacteristics 89 | 90 | ### 0.3.0 91 | 92 | * Add soft-hyphen to single byte encodings 93 | * Add format? property (e.g. RLM) 94 | * Support more encodings: IBMX, CP85X, macX, TIS-620, Windows-874, KOI8-X 95 | 96 | ### 0.2.0 97 | 98 | * Fix detection of supported Windows encodings and some unassigned codepoints 99 | * Include unassigned codepoints of ISO-8859-X 100 | 101 | ### 0.1.0 102 | 103 | * Initial release 104 | 105 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | nationality, personal appearance, race, religion, or sexual identity and 10 | orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at opensource@janlelis.com. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at [http://contributor-covenant.org/version/1/4][version] 72 | 73 | [homepage]: http://contributor-covenant.org 74 | [version]: http://contributor-covenant.org/version/1/4/ 75 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gemspec 4 | 5 | gem 'minitest' 6 | gem 'rake' 7 | -------------------------------------------------------------------------------- /MIT-LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-2024 Jan Lelis, https://janlelis.com 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Characteristics [![[version]](https://badge.fury.io/rb/characteristics.svg)](https://badge.fury.io/rb/characteristics) [![[ci]](https://github.com/janlelis/characteristics/workflows/Test/badge.svg)](https://github.com/janlelis/characteristics/actions?query=workflow%3ATest) 2 | 3 | A Ruby library that provides additional info about characters:¹ 4 | 5 | - Could a character be invisible (blank)? 6 | - Is a character assigned? 7 | - Is a character a special control character? 8 | 9 | Extra data is available for Unicode characters (see below). 10 | 11 | The [unibits](https://github.com/janlelis/unibits) and [uniscribe](https://github.com/janlelis/uniscribe) gems makes use of this data to visualize it accordingly. 12 | 13 | ¹ in the sense of [codepoints](https://en.wikipedia.org/wiki/Codepoint) 14 | 15 | ## Setup 16 | 17 | Add to your `Gemfile`: 18 | 19 | ```ruby 20 | gem 'characteristics' 21 | ``` 22 | 23 | ## Usage 24 | 25 | ```ruby 26 | # All supported encodings 27 | char_info = Characteristics.create(character) 28 | char_info.valid? # => true / false 29 | char_info.unicode? # => true / false 30 | char_info.assigned? # => true / false 31 | char_info.control? # => true / false 32 | char_info.blank? # => true / false 33 | char_info.separator? # => true / false 34 | char_info.format? # => true / false 35 | 36 | # Unicode characters 37 | char_info = Characteristics.create(character) 38 | char_info.variation_selector? # => true / false 39 | char_info.tag? # => true / false 40 | char_info.ignorable? # => true / false 41 | char_info.noncharacter? # => true / false 42 | ``` 43 | 44 | ## Types of Encodings 45 | 46 | This library knows of four different kinds of encodings: 47 | 48 | - **:unicode** Unicode familiy of multi-byte encodings 49 | - *UTF-X* 50 | - **:byte** Known single-byte encoding 51 | - *ISO-8859-X*, *Windows-125X*, *IBMX*, *CP85X*, *macX*, *TIS-620*, *Windows-874*, *KOI-X* 52 | - **:ascii** 7-Bit ASCII 53 | - *US-ASCII*, *GB1988* 54 | - **:binary** Arbitrary string 55 | - *ASCII-8BIT* 56 | 57 | Other encodings are currently not supported. 58 | 59 | ## Properties 60 | 61 | ### General 62 | 63 | #### `valid?` 64 | 65 | Validness is determined by Ruby's `String#valid_encoding?` 66 | 67 | #### `unicode?` 68 | 69 | **true** for Unicode encodings (`UTF-X`) 70 | 71 | #### `control?` 72 | 73 | Control characters are codepoints in the is [C0, delete or C1 control character range](https://en.wikipedia.org/wiki/C0_and_C1_control_codes). Characters in this range of [IBM codepage 437](https://en.wikipedia.org/wiki/Code_page_437) based encodings are always treated as control characters. 74 | 75 | #### `assigned?` 76 | 77 | - All valid ASCII and BINARY characters are considered assigned 78 | - For other byte based encodings, a character is considered assigned if it is not on the exception list included in this library. C0 control characters (and `\x7F`) are always considered assigned. C1 control characters are treated as assigned, if the encoding generally does not assign characters in the C1 region. 79 | - For Unicode, the general category is considered 80 | 81 | #### `blank?` 82 | 83 | The library includes a list of characters that might not be rendered visually. This list does not include unassigned codepoints, control characters (except for `\t`, `\n`, `\v`, `\f`, `\r`, and `\u{85}` in Unicode), or special formatting characters (right-to-left markers, variation selectors, etc). 84 | 85 | #### `separator?` 86 | 87 | Returns true if character is considered a separator. All separators also return true for the `blank?` check. In Unicode, the following characters are separators: `\n`, `\v`, `\f`, `\r`, `\u{85}` (next line), `\u{2028}` (line separator), and `\u{2029}` (paragraph separator) 88 | 89 | #### `format?` 90 | 91 | This flag is *true* only for special formatting characters, which are not control characters, like right-to-left marks. In Unicode, this means codepoints with the General Category of **Cf**. 92 | 93 | ### Additional Unicode Properties 94 | 95 | #### `variation_selector?` 96 | 97 | **true** for [variation selectors](https://en.wikipedia.org/wiki/Variation_Selector). 98 | 99 | #### `tag?` 100 | 101 | **true** for [tags](https://en.wikipedia.org/wiki/Tags_(Unicode_block)). 102 | 103 | #### `ignorable?` 104 | 105 | **true** for characters which might not be implemented, and thus, might render no visible glyph. 106 | 107 | #### `noncharacter?` 108 | 109 | **true** if codepoint will never be assigned in a future standard of Unicode. 110 | 111 | ## Also See 112 | 113 | - [Symbolify](https://github.com/janlelis/symbolify) 114 | 115 | ## MIT License 116 | 117 | Copyright (C) 2017-2024 Jan Lelis . Released under the MIT license. 118 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # # # 2 | # Get gemspec info 3 | 4 | gemspec_file = Dir['*.gemspec'].first 5 | gemspec = eval File.read(gemspec_file), binding, gemspec_file 6 | info = "#{gemspec.name} | #{gemspec.version} | " \ 7 | "#{gemspec.runtime_dependencies.size} dependencies | " \ 8 | "#{gemspec.files.size} files" 9 | 10 | # # # 11 | # Gem build and install task 12 | 13 | desc info 14 | task :gem do 15 | puts info + "\n\n" 16 | print " "; sh "gem build #{gemspec_file}" 17 | FileUtils.mkdir_p 'pkg' 18 | FileUtils.mv "#{gemspec.name}-#{gemspec.version}.gem", 'pkg' 19 | puts; sh %{gem install --no-document pkg/#{gemspec.name}-#{gemspec.version}.gem} 20 | end 21 | 22 | # # # 23 | # Start an IRB session with the gem loaded 24 | 25 | desc "#{gemspec.name} | IRB" 26 | task :irb do 27 | sh "irb -I ./lib -r #{gemspec.name.gsub '-','/'}" 28 | end 29 | 30 | # # # 31 | # Run specs 32 | 33 | desc "#{gemspec.name} | Spec" 34 | task :spec do 35 | if RbConfig::CONFIG['host_os'] =~ /mswin|mingw/ 36 | sh "for %f in (spec/\*.rb) do ruby spec/%f" 37 | else 38 | sh "for file in spec/*.rb; do ruby $file; done" 39 | end 40 | end 41 | task default: :spec 42 | 43 | -------------------------------------------------------------------------------- /characteristics.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | require File.dirname(__FILE__) + "/lib/characteristics/version" 4 | 5 | Gem::Specification.new do |gem| 6 | gem.name = "characteristics" 7 | gem.version = Characteristics::VERSION 8 | gem.summary = "Basic character properties" 9 | gem.description = "Provides basic information about how characters behave in different encodings" 10 | gem.authors = ["Jan Lelis"] 11 | gem.email = ["hi@ruby.consulting"] 12 | gem.homepage = "https://github.com/janlelis/characteristics" 13 | gem.license = "MIT" 14 | 15 | gem.files = Dir["{**/}{.*,*}"].select{ |path| File.file?(path) && path !~ /^pkg/ } 16 | gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) } 17 | gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) 18 | gem.require_paths = ["lib"] 19 | gem.metadata = { "rubygems_mfa_required" => "true" } 20 | 21 | gem.required_ruby_version = ">= 2.0" 22 | gem.add_dependency 'unicode-categories', '~> 1.10' 23 | end 24 | -------------------------------------------------------------------------------- /lib/characteristics.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require_relative "characteristics/version" 4 | 5 | require_relative "characteristics/ascii" 6 | require_relative "characteristics/binary" 7 | require_relative "characteristics/byte" 8 | require_relative "characteristics/unicode" 9 | 10 | class Characteristics 11 | def self.type_from_encoding_name(encoding_name) 12 | case encoding_name 13 | when "US-ASCII", "GB1988" 14 | :ascii 15 | when "ASCII-8BIT" 16 | :binary 17 | when /^UTF-?/ 18 | :unicode 19 | when /^ISO-8859-/, /^Windows-125/, /^(IBM|CP85)/, /^mac/, 'TIS-620', 'Windows-874', /^KOI8-/ 20 | :byte 21 | else 22 | raise ArgumentError, "encoding <#{encoding_name}> not supported" 23 | end 24 | end 25 | 26 | def self.create_for_type(char, type) 27 | case type 28 | when :unicode 29 | UnicodeCharacteristics.new(char) 30 | when :byte 31 | ByteCharacteristics.new(char) 32 | when :ascii 33 | AsciiCharacteristics.new(char) 34 | else 35 | BinaryCharacteristics.new(char) 36 | end 37 | end 38 | 39 | def self.create(char) 40 | create_for_type(char, type_from_encoding_name(char.encoding.name)) 41 | end 42 | 43 | attr_reader :encoding 44 | 45 | def initialize(char) 46 | raise ArgumentError, "Do not use abstract Characteristics.new(char) directly, please use Characteristics.create(char)" if self.class == Characteristics 47 | 48 | @is_valid = char.valid_encoding? 49 | @encoding = char.encoding 50 | @encoding_name = @encoding.name 51 | end 52 | 53 | def valid? 54 | @is_valid 55 | end 56 | 57 | def unicode? 58 | end 59 | 60 | def assigned? 61 | end 62 | 63 | def control? 64 | end 65 | 66 | def c0? 67 | end 68 | 69 | def delete? 70 | end 71 | 72 | def c1? 73 | end 74 | 75 | def blank? 76 | end 77 | 78 | def format? 79 | end 80 | 81 | def bidi_control? 82 | end 83 | 84 | # private use emojis 85 | def kddi? 86 | end 87 | 88 | # private use emojis 89 | def softbank? 90 | end 91 | 92 | # private use emojis 93 | def docomo? 94 | end 95 | 96 | private 97 | 98 | def encoding_has_kddi? 99 | @encoding_name.end_with? "KDDI" 100 | end 101 | 102 | def encoding_has_softbank? 103 | @encoding_name.end_with? "SoftBank" 104 | end 105 | 106 | def encoding_has_docomo? 107 | @encoding_name.end_with? "DoCoMo" 108 | end 109 | 110 | def encoding_has_c0? 111 | true 112 | end 113 | 114 | def encoding_has_delete? 115 | true 116 | end 117 | 118 | def encoding_has_c1? 119 | false 120 | end 121 | end 122 | -------------------------------------------------------------------------------- /lib/characteristics/ascii.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class AsciiCharacteristics < Characteristics 4 | BLANKS = [ 5 | 0x9, 6 | 0x20, 7 | ].freeze 8 | 9 | SEPARATORS = [ 10 | 0xA, 11 | 0xB, 12 | 0xC, 13 | 0xD, 14 | ].freeze 15 | 16 | def initialize(char) 17 | super 18 | @ord = char.ord if @is_valid 19 | end 20 | 21 | def valid? 22 | @is_valid && !(@encoding_name != "US-ASCII" && @ord >= 0x80) 23 | end 24 | 25 | def unicode? 26 | false 27 | end 28 | 29 | def assigned? 30 | true 31 | end 32 | 33 | def control? 34 | c0? || delete? 35 | end 36 | 37 | def c0? 38 | @is_valid && @ord < 0x20 39 | end 40 | 41 | def delete? 42 | @is_valid && @ord == 0x7F 43 | end 44 | 45 | def c1? 46 | false 47 | end 48 | 49 | def blank? 50 | @is_valid && ( BLANKS.include?(@ord) || SEPARATORS.include?(@ord) ) 51 | end 52 | 53 | def separator? 54 | SEPARATORS.include?(@ord) 55 | end 56 | 57 | def format? 58 | false 59 | end 60 | 61 | def bidi_control? 62 | false 63 | end 64 | end -------------------------------------------------------------------------------- /lib/characteristics/binary.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class BinaryCharacteristics < Characteristics 4 | BLANKS = [ 5 | 0x9, 6 | 0x20, 7 | ].freeze 8 | 9 | SEPARATORS = [ 10 | 0xA, 11 | 0xB, 12 | 0xC, 13 | 0xD, 14 | ].freeze 15 | 16 | def initialize(char) 17 | @ord = char.ord 18 | @encoding = char.encoding 19 | @encoding_name = @encoding.name 20 | end 21 | 22 | def valid? 23 | true 24 | end 25 | 26 | def unicode? 27 | false 28 | end 29 | 30 | def assigned? 31 | true 32 | end 33 | 34 | def control? 35 | c0? || delete? 36 | end 37 | 38 | def c0? 39 | @ord < 0x20 40 | end 41 | 42 | def delete? 43 | @ord == 0x7F 44 | end 45 | 46 | def c1? 47 | false 48 | end 49 | 50 | def blank? 51 | BLANKS.include?(@ord) || SEPARATORS.include?(@ord) 52 | end 53 | 54 | def separator? 55 | SEPARATORS.include?(@ord) 56 | end 57 | 58 | def format? 59 | false 60 | end 61 | 62 | def bidi_control? 63 | false 64 | end 65 | end -------------------------------------------------------------------------------- /lib/characteristics/byte.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class ByteCharacteristics < Characteristics 4 | HAS_C1 = /^(ISO-8859-|TIS-620)/ 5 | 6 | UNASSIGNED = { 7 | 0x80 => /^(IBM869)/, 8 | 0x81 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|IBM869|Windows-874)/, 9 | 0x82 => /^(IBM869|Windows-874)/, 10 | 0x83 => /^(Windows-(1250|1257)|IBM869|Windows-874)/, 11 | 0x84 => /^(IBM869|Windows-874)/, 12 | 0x85 => /^(IBM869)/, 13 | 0x86 => /^(Windows-874)/, 14 | 0x87 => /^(IBM869|Windows-874)/, 15 | 0x88 => /^(Windows-(1250|1253|1257)|Windows-874)/, 16 | 0x89 => /^(Windows-874)/, 17 | 0x8A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/, 18 | 0x8B => /^(Windows-874)/, 19 | 0x8C => /^(Windows-(1253|1255|1257)|Windows-874)/, 20 | 0x8D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/, 21 | 0x8E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/, 22 | 0x8F => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/, 23 | 24 | 0x90 => /^(Windows-(1250|1252|1253|1254|1255|1257|1258)|macThai|Windows-874)/, 25 | 0x93 => /^(IBM869)/, 26 | 0x94 => /^(IBM869)/, 27 | 0x98 => /^(Windows-(1250|1251|1253|1257)|Windows-874)/, 28 | 0x99 => /^(Windows-874)/, 29 | 0x9A => /^(Windows-(1253|1255|1257|1258)|Windows-874)/, 30 | 0x9B => /^(IBM864|Windows-874)/, 31 | 0x9C => /^(Windows-(1253|1255|1257)|IBM864|Windows-874)/, 32 | 0x9D => /^(Windows-(1252|1253|1254|1255|1258)|Windows-874)/, 33 | 0x9E => /^(Windows-(1253|1254|1255|1258)|Windows-874)/, 34 | 0x9F => /^(Windows-(1253|1255|1257)|IBM864|macThai|Windows-874)/, 35 | 36 | 0xA0 => /^(TIS-620)/, 37 | 0xA1 => /^(ISO-8859-(6|8)|Windows-(1257))/, 38 | 0xA2 => /^(ISO-8859-(6))/, 39 | 0xA3 => /^(ISO-8859-(6))/, 40 | 0xA5 => /^(ISO-8859-(3|6)|Windows-(1257))/, 41 | 0xA6 => /^(ISO-8859-(6)|IBM864)/, 42 | 0xA7 => /^(ISO-8859-(6))/, 43 | 0xA8 => /^(ISO-8859-(6))/, 44 | 0xA9 => /^(ISO-8859-(6))/, 45 | 0xAA => /^(ISO-8859-(6)|Windows-(1253))/, 46 | 0xAB => /^(ISO-8859-(6))/, 47 | 0xAE => /^(ISO-8859-(3|6|7))/, 48 | 0xAF => /^(ISO-8859-(6))/, 49 | 50 | 0xB0 => /^(ISO-8859-(6))/, 51 | 0xB1 => /^(ISO-8859-(6))/, 52 | 0xB2 => /^(ISO-8859-(6))/, 53 | 0xB3 => /^(ISO-8859-(6))/, 54 | 0xB4 => /^(ISO-8859-(6))/, 55 | 0xB5 => /^(ISO-8859-(6))/, 56 | 0xB6 => /^(ISO-8859-(6))/, 57 | 0xB7 => /^(ISO-8859-(6))/, 58 | 0xB8 => /^(ISO-8859-(6))/, 59 | 0xB9 => /^(ISO-8859-(6))/, 60 | 0xBA => /^(ISO-8859-(6))/, 61 | 0xBC => /^(ISO-8859-(6))/, 62 | 0xBD => /^(ISO-8859-(6))/, 63 | 0xBE => /^(ISO-8859-(3|6))/, 64 | 0xBF => /^(ISO-8859-(8))/, 65 | 66 | 0xC0 => /^(ISO-8859-(6|8))/, 67 | 0xC1 => /^(ISO-8859-(8))/, 68 | 0xC2 => /^(ISO-8859-(8))/, 69 | 0xC3 => /^(ISO-8859-(3|8))/, 70 | 0xC4 => /^(ISO-8859-(8))/, 71 | 0xC5 => /^(ISO-8859-(8))/, 72 | 0xC6 => /^(ISO-8859-(8))/, 73 | 0xC7 => /^(ISO-8859-(8))/, 74 | 0xC8 => /^(ISO-8859-(8))/, 75 | 0xC9 => /^(ISO-8859-(8))/, 76 | 0xCA => /^(ISO-8859-(8))/, 77 | 0xCB => /^(ISO-8859-(8))/, 78 | 0xCC => /^(ISO-8859-(8))/, 79 | 0xCD => /^(ISO-8859-(8))/, 80 | 0xCE => /^(ISO-8859-(8))/, 81 | 0xCF => /^(ISO-8859-(8))/, 82 | 83 | 0xD0 => /^(ISO-8859-(3|8))/, 84 | 0xD1 => /^(ISO-8859-(8))/, 85 | 0xD2 => /^(ISO-8859-(7|8)|Windows-(1253))/, 86 | 0xD3 => /^(ISO-8859-(8))/, 87 | 0xD4 => /^(ISO-8859-(8))/, 88 | 0xD5 => /^(ISO-8859-(8)|IBM857)/, # IBM857: Ruby does not support euro sign 89 | 0xD6 => /^(ISO-8859-(8))/, 90 | 0xD7 => /^(ISO-8859-(8))/, 91 | 0xD8 => /^(ISO-8859-(8))/, 92 | 0xD9 => /^(ISO-8859-(8)|Windows-(1255))/, 93 | 0xDA => /^(ISO-8859-(8)|Windows-(1255))/, 94 | 0xDB => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/, 95 | 0xDC => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/, 96 | 0xDD => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/, 97 | 0xDE => /^(ISO-8859-(6|8|11)|Windows-(1255)|TIS-620|Windows-874)/, 98 | 0xDF => /^(ISO-8859-(6)|Windows-(1255))/, 99 | 100 | 0xE3 => /^(ISO-8859-(3))/, 101 | 0xE7 => /^(IBM857)/, 102 | 103 | 0xF0 => /^(ISO-8859-(3))/, # mac: Treating F0 as always assigned 104 | 0xF2 => /^(IBM857)/, 105 | 0xF3 => /^(ISO-8859-(6))/, 106 | 0xF4 => /^(ISO-8859-(6))/, 107 | 0xF5 => /^(ISO-8859-(6)|macTurkish)/, 108 | 0xF6 => /^(ISO-8859-(6))/, 109 | 0xF7 => /^(ISO-8859-(6))/, 110 | 0xF8 => /^(ISO-8859-(6))/, 111 | 0xF9 => /^(ISO-8859-(6))/, 112 | 0xFA => /^(ISO-8859-(6))/, 113 | 0xFB => /^(ISO-8859-(6|8)|Windows-(1255))/, 114 | 0xFC => /^(ISO-8859-(6|8|11)|Windows-(1255)|macThai|TIS-620|Windows-874)/, 115 | 0xFD => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/, 116 | 0xFE => /^(ISO-8859-(6|11)|macThai|TIS-620|Windows-874)/, 117 | 0xFF => /^(ISO-8859-(6|7|8|11)|Windows-(1253|1255)|IBM864|macGreek|macThai|TIS-620|Windows-874)/, # macGreek: Ruby does not know of soft hyphen at FF 118 | }.freeze 119 | 120 | BLANKS = [ 121 | 0x9, 122 | 0x20, 123 | ].freeze 124 | 125 | SEPARATORS = [ 126 | 0xA, 127 | 0xB, 128 | 0xC, 129 | 0xD, 130 | ].freeze 131 | 132 | EXTRA_BLANKS = { 133 | 0xA0 => /^(ISO-8859-|Windows-125|macThai|Windows-874)/, 134 | 0xA1 => /^IBM864/, 135 | 0xAD => /^(ISO-8859-(?!11)|Windows-125)/, 136 | 0x9A => /^KOI8-/, 137 | 0x9D => /^Windows-(1256)/, 138 | 0x9E => /^Windows-(1256)/, 139 | 0xCA => /^mac(?!Thai)/, 140 | 0xDB => /^macThai/, 141 | 0xDC => /^macThai/, 142 | 0xF0 => /^(IBM(?!437|737|86)|IBM869|CP)/, 143 | 0xFF => /^(IBM(?!864)|CP)/, # |macGreek, but is unnasigned in Ruby 144 | }.freeze 145 | 146 | FORMATS = { 147 | 0xFD => /^(ISO-8859-8|Windows-(1255|1256))/, 148 | 0xFE => /^(ISO-8859-8|Windows-(1255|1256))/, 149 | }.freeze 150 | 151 | def initialize(char) 152 | super 153 | @ord = char.ord 154 | end 155 | 156 | def unicode? 157 | false 158 | end 159 | 160 | def assigned? 161 | control? || UNASSIGNED[@ord] !~ @encoding_name 162 | end 163 | 164 | def control? 165 | c0? || c1? || delete? 166 | end 167 | 168 | def c0? 169 | @ord < 0x20 && encoding_has_c0? 170 | end 171 | 172 | def c1? 173 | @ord >= 0x80 && @ord < 0xA0 && encoding_has_c1? 174 | end 175 | 176 | def delete? 177 | @ord == 0x7F && encoding_has_delete? 178 | end 179 | 180 | def blank? 181 | BLANKS.include?(@ord) || 182 | SEPARATORS.include?(@ord) || 183 | EXTRA_BLANKS[@ord] =~ @encoding_name 184 | end 185 | 186 | def separator? 187 | SEPARATORS.include?(@ord) 188 | end 189 | 190 | def format? 191 | FORMATS[@ord] =~ @encoding_name 192 | end 193 | 194 | def bidi_control? 195 | format? 196 | end 197 | 198 | private 199 | 200 | def encoding_has_c1? 201 | !!(HAS_C1 =~ @encoding_name) 202 | end 203 | end -------------------------------------------------------------------------------- /lib/characteristics/unicode.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "unicode/categories" 4 | 5 | class UnicodeCharacteristics < Characteristics 6 | # Note that this list is maintained by hand and might not cover the latest Unicode updates 7 | # Please open an issue or pull request is you find another character that is rendered invisible: 8 | # - https://github.com/janlelis/characteristics/issues/new 9 | BLANKS = [ 10 | 0x0009, 11 | 0x0020, 12 | 0x00A0, 13 | 0x00AD, 14 | 0x034F, 15 | 0x061C, 16 | 0x070F, 17 | 0x115F, 18 | 0x1160, 19 | 0x1680, 20 | 0x17B4, 21 | 0x17B5, 22 | 0x180E, 23 | 0x2000, 24 | 0x2001, 25 | 0x2002, 26 | 0x2003, 27 | 0x2004, 28 | 0x2005, 29 | 0x2006, 30 | 0x2007, 31 | 0x2008, 32 | 0x2009, 33 | 0x200A, 34 | 0x200B, 35 | 0x200C, 36 | 0x200D, 37 | 0x200E, 38 | 0x200F, 39 | 0x202F, 40 | 0x205F, 41 | 0x2060, 42 | 0x2061, 43 | 0x2062, 44 | 0x2063, 45 | 0x2064, 46 | 0x206A, 47 | 0x206B, 48 | 0x206C, 49 | 0x206D, 50 | 0x206E, 51 | 0x206F, 52 | 0x3000, 53 | 0x2800, 54 | 0x3164, 55 | 0xFEFF, 56 | 0xFFA0, 57 | 0x1BCA0, 58 | 0x1BCA1, 59 | 0x1BCA2, 60 | 0x1BCA3, 61 | 0x1D159, 62 | 0x1D173, 63 | 0x1D174, 64 | 0x1D175, 65 | 0x1D176, 66 | 0x1D177, 67 | 0x1D178, 68 | 0x1D179, 69 | 0x1D17A, 70 | ].freeze 71 | 72 | SEPARATORS = [ 73 | 0x000A, 74 | 0x000B, 75 | 0x000C, 76 | 0x000D, 77 | 0x0085, 78 | 0x2028, 79 | 0x2029, 80 | ].freeze 81 | 82 | BIDI_CONTROL = [ 83 | 0x061C, 84 | 0x200E, 85 | 0x200F, 86 | 0x202A, 87 | 0x202B, 88 | 0x202C, 89 | 0x202D, 90 | 0x202E, 91 | 0x2066, 92 | 0x2067, 93 | 0x2068, 94 | 0x2069, 95 | ].freeze 96 | 97 | VARIATION_SELECTORS = [ 98 | *0x180B..0x180D, 99 | *0xFE00..0xFE0F, 100 | *0xE0100..0xE01EF, 101 | ].freeze 102 | 103 | TAGS = [ 104 | 0xE0001, 105 | *0xE0020..0xE007F, 106 | ].freeze 107 | 108 | NONCHARACTERS = [ 109 | *0xFDD0..0xFDEF, 110 | 0xFFFE, 0xFFFF, 111 | 0x1FFFE, 0x1FFFF, 112 | 0x2FFFE, 0x2FFFF, 113 | 0x3FFFE, 0x3FFFF, 114 | 0x4FFFE, 0x4FFFF, 115 | 0x5FFFE, 0x5FFFF, 116 | 0x6FFFE, 0x6FFFF, 117 | 0x7FFFE, 0x7FFFF, 118 | 0x8FFFE, 0x8FFFF, 119 | 0x9FFFE, 0x9FFFF, 120 | 0xAFFFE, 0xAFFFF, 121 | 0xBFFFE, 0xBFFFF, 122 | 0xCFFFE, 0xCFFFF, 123 | 0xDFFFE, 0xDFFFF, 124 | 0xEFFFE, 0xEFFFF, 125 | 0xFFFFE, 0xFFFFF, 126 | 0x10FFFE, 0x10FFFF, 127 | ].freeze 128 | 129 | IGNORABLE = [ 130 | 0x00AD, 131 | 0x034F, 132 | 0x061C, 133 | *0x115F..0x1160, 134 | *0x17B4..0x17B5, 135 | *0x180B..0x180E, 136 | *0x200B..0x200F, 137 | *0x202A..0x202E, 138 | *0x2060..0x206F, 139 | 0x3164, 140 | *0xFE00..0xFE0F, 141 | 0xFEFF, 142 | 0xFFA0, 143 | *0xFFF0..0xFFF8, 144 | *0x1BCA0..0x1BCA3, 145 | *0x1D173..0x1D17A, 146 | *0xE0000..0xE0FFF, 147 | ].freeze 148 | 149 | KDDI = [ 150 | *0xE468..0xE5DF, 151 | *0xEA80..0xEB8E, 152 | ].freeze 153 | 154 | SOFTBANK = [ 155 | *0xE001..0xE05A, 156 | *0xE101..0xE15A, 157 | *0xE201..0xE25A, 158 | *0xE301..0xE34D, 159 | *0xE401..0xE44C, 160 | *0xE501..0xE53E, 161 | ].freeze 162 | 163 | DOCOMO = [ 164 | *0xE63E..0xE757, 165 | ].freeze 166 | 167 | attr_reader :category 168 | 169 | def initialize(char) 170 | super 171 | 172 | if @is_valid 173 | @category = Unicode::Categories.category(char) 174 | @ord = char.ord 175 | end 176 | end 177 | 178 | def unicode? 179 | true 180 | end 181 | 182 | def assigned? 183 | @is_valid && @category != "Cn" 184 | end 185 | 186 | def control? 187 | @is_valid && @category == "Cc" 188 | end 189 | 190 | def c0? 191 | @is_valid && @ord < 0x20 192 | end 193 | 194 | def delete? 195 | @is_valid && @ord == 0x7F 196 | end 197 | 198 | def c1? 199 | @is_valid && @ord >= 0x80 && @ord < 0xA0 200 | end 201 | 202 | def blank? 203 | @is_valid && ( BLANKS.include?(@ord) || SEPARATORS.include?(@ord) ) 204 | end 205 | 206 | def separator? 207 | @is_valid && SEPARATORS.include?(@ord) 208 | end 209 | 210 | def format? 211 | @is_valid && @category == "Cf" 212 | end 213 | 214 | def bidi_control? 215 | @is_valid && BIDI_CONTROL.include?(@ord) 216 | end 217 | 218 | # unicode specific 219 | 220 | def variation_selector? 221 | @is_valid && VARIATION_SELECTORS.include?(@ord) 222 | end 223 | 224 | def tag? 225 | @is_valid && TAGS.include?(@ord) 226 | end 227 | 228 | def noncharacter? 229 | @is_valid && NONCHARACTERS.include?(@ord) 230 | end 231 | 232 | def ignorable? 233 | @is_valid && IGNORABLE.include?(@ord) 234 | end 235 | 236 | # emoji 237 | 238 | def kddi? 239 | @is_valid && 240 | encoding_has_kddi? && 241 | KDDI.include?(@ord) 242 | end 243 | 244 | def softbank? 245 | @is_valid && 246 | encoding_has_softbank? && 247 | SOFTBANK.include?(@ord) 248 | end 249 | 250 | def docomo? 251 | @is_valid && 252 | encoding_has_docomo? && 253 | DOCOMO.include?(@ord) 254 | end 255 | 256 | private 257 | 258 | def encoding_has_c1? 259 | true 260 | end 261 | end 262 | -------------------------------------------------------------------------------- /lib/characteristics/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class Characteristics 4 | VERSION = "1.7.1" 5 | UNICODE_VERSION = "16.0.0" 6 | end 7 | -------------------------------------------------------------------------------- /spec/characteristics_spec.rb: -------------------------------------------------------------------------------- 1 | require_relative "../lib/characteristics" 2 | require "minitest/autorun" 3 | 4 | describe Characteristics do 5 | def valid?(char) 6 | Characteristics.create(char.force_encoding(encoding)).valid? 7 | end 8 | 9 | def assigned?(char) 10 | Characteristics.create(char.force_encoding(encoding)).assigned? 11 | end 12 | 13 | def control?(char) 14 | Characteristics.create(char.force_encoding(encoding)).control? 15 | end 16 | 17 | def blank?(char) 18 | Characteristics.create(char.force_encoding(encoding)).blank? 19 | end 20 | 21 | def separator?(char) 22 | Characteristics.create(char.force_encoding(encoding)).separator? 23 | end 24 | 25 | def format?(char) 26 | Characteristics.create(char.force_encoding(encoding)).format? 27 | end 28 | 29 | def bidi_control?(char) 30 | Characteristics.create(char.force_encoding(encoding)).bidi_control? 31 | end 32 | 33 | describe UnicodeCharacteristics do 34 | describe "UTF*" do 35 | let(:encoding) { "UTF-8" } 36 | 37 | it "is valid or not" do 38 | assert valid? "\x21" 39 | refute valid? "\x80" 40 | end 41 | 42 | it "is assigned or not" do 43 | assert assigned? "\x21" 44 | refute assigned? "\u{FFEF}" 45 | end 46 | 47 | it "is control or not" do 48 | assert control? "\x1E" 49 | assert control? "\x7F" 50 | assert control? "\u{0080}" 51 | refute control? "\x67" 52 | end 53 | 54 | it "is blank or not" do 55 | assert blank? "\x20" 56 | refute blank? "\x21" 57 | end 58 | 59 | it "is separator or not" do 60 | assert separator? "\n" 61 | refute separator? "\x20" 62 | end 63 | 64 | it "is format or not" do 65 | assert format? "\u{FFF9}" 66 | refute format? "\x21" 67 | end 68 | 69 | it "is bidi_control or not" do 70 | assert bidi_control? "\u{202D}" 71 | refute bidi_control? "\x21" 72 | end 73 | end 74 | 75 | describe "Unicode Properties" do 76 | it "is variation_selector or not" do 77 | assert Characteristics.create("\u{FE00}").variation_selector? 78 | refute Characteristics.create("a").variation_selector? 79 | end 80 | 81 | it "is tag or not" do 82 | assert Characteristics.create("\u{E0020}").tag? 83 | refute Characteristics.create("a").tag? 84 | end 85 | 86 | it "is noncharacter or not" do 87 | assert Characteristics.create("\u{10FFFF}").noncharacter? 88 | refute Characteristics.create("a").noncharacter? 89 | end 90 | 91 | it "is ignorable or not" do 92 | assert Characteristics.create("\u{AD}").ignorable? 93 | assert Characteristics.create("\u{E0000}").ignorable? 94 | refute Characteristics.create(" ").ignorable? 95 | end 96 | end 97 | 98 | describe "Japanese Emojis" do 99 | it "can be a KDDI emoji" do 100 | encoding = "UTF8-KDDI" 101 | assert Characteristics.create("\u{E468}".force_encoding(encoding)).kddi? 102 | refute Characteristics.create("A".force_encoding(encoding)).kddi? 103 | end 104 | 105 | it "can be a SoftBank emoji" do 106 | encoding = "UTF8-SoftBank" 107 | assert Characteristics.create("\u{E001}".force_encoding(encoding)).softbank? 108 | refute Characteristics.create("A".force_encoding(encoding)).softbank? 109 | end 110 | 111 | it "can be a DoCoMo emoji" do 112 | encoding = "UTF8-DoCoMo" 113 | assert Characteristics.create("\u{E63E}".force_encoding(encoding)).docomo? 114 | refute Characteristics.create("A".force_encoding(encoding)).docomo? 115 | end 116 | end 117 | end 118 | 119 | describe BinaryCharacteristics do 120 | describe "ASCII-8BIT" do 121 | let(:encoding) { "ASCII-8BIT" } 122 | 123 | it "is always valid" do 124 | assert valid? "\x80" 125 | end 126 | 127 | it "is always assigned" do 128 | assert assigned? "\x7F" 129 | end 130 | 131 | it "is control or not" do 132 | assert control? "\x1E" 133 | assert control? "\x7F" 134 | refute control? "\x67" 135 | end 136 | 137 | it "is blank or not" do 138 | assert blank? "\x20" 139 | refute blank? "\x21" 140 | end 141 | 142 | it "is separator or not" do 143 | assert separator? "\n" 144 | refute separator? "\x20" 145 | end 146 | 147 | it "is never format" do 148 | refute format? "\x21" 149 | end 150 | 151 | it "is never bidi_control" do 152 | refute bidi_control? "\x21" 153 | end 154 | end 155 | end 156 | 157 | describe AsciiCharacteristics do 158 | describe "US-ASCII" do 159 | let(:encoding) { "US-ASCII" } 160 | 161 | it "is valid or not" do 162 | assert valid? "\x21" 163 | refute valid? "\x80" 164 | end 165 | 166 | it "is always assigned" do 167 | assert assigned? "\x21" 168 | end 169 | 170 | it "is control or not" do 171 | assert control? "\x1E" 172 | assert control? "\x7F" 173 | refute control? "\x67" 174 | end 175 | 176 | it "is blank or not" do 177 | assert blank? "\x20" 178 | refute blank? "\x21" 179 | end 180 | 181 | it "is separator or not" do 182 | assert separator? "\n" 183 | refute separator? "\x20" 184 | end 185 | 186 | it "is never format" do 187 | refute format? "\x21" 188 | end 189 | 190 | it "is never bidi_control" do 191 | refute bidi_control? "\x21" 192 | end 193 | end 194 | 195 | describe "GB1988" do 196 | let(:encoding) { "GB1988" } 197 | 198 | it "is valid or not" do 199 | assert valid? "\x21" 200 | refute valid? "\x80" 201 | end 202 | 203 | it "is always assigned" do 204 | assert assigned? "\x21" 205 | end 206 | 207 | it "is control or not" do 208 | assert control? "\x1E" 209 | assert control? "\x7F" 210 | refute control? "\x67" 211 | end 212 | 213 | it "is blank or not" do 214 | assert blank? "\x20" 215 | refute blank? "\x21" 216 | end 217 | 218 | it "is separator or not" do 219 | assert separator? "\n" 220 | refute separator? "\x20" 221 | end 222 | 223 | it "is never format" do 224 | refute format? "\x21" 225 | end 226 | 227 | it "is never bidi_control" do 228 | refute bidi_control? "\x21" 229 | end 230 | end 231 | end 232 | 233 | describe ByteCharacteristics do 234 | describe "ISO-8859-*" do 235 | describe "ISO-8859-1" do 236 | let(:encoding) { "ISO-8859-1" } 237 | 238 | it "is always valid" do 239 | assert valid? "\x80" 240 | end 241 | 242 | it "is always assigned" do 243 | assert assigned? "\x21" 244 | assert assigned? "\x80" 245 | end 246 | 247 | it "is control or not" do 248 | assert control? "\x1E" 249 | assert control? "\x7F" 250 | assert control? "\x80" 251 | refute control? "\x67" 252 | end 253 | 254 | it "is blank or not" do 255 | assert blank? "\x20" 256 | refute blank? "\x21" 257 | end 258 | 259 | it "is separator or not" do 260 | assert separator? "\n" 261 | refute separator? "\x20" 262 | end 263 | 264 | it "is never format" do 265 | refute format? "\x21" 266 | end 267 | 268 | it "is never bidi_control" do 269 | refute bidi_control? "\x21" 270 | end 271 | end 272 | 273 | describe "ISO-8859-2" do 274 | let(:encoding) { "ISO-8859-2" } 275 | 276 | it "is always valid" do 277 | assert valid? "\x80" 278 | end 279 | 280 | it "is always assigned" do 281 | assert assigned? "\x21" 282 | assert assigned? "\x80" 283 | end 284 | 285 | it "is control or not" do 286 | assert control? "\x1E" 287 | assert control? "\x7F" 288 | assert control? "\x80" 289 | refute control? "\x67" 290 | end 291 | 292 | it "is blank or not" do 293 | assert blank? "\x20" 294 | refute blank? "\x21" 295 | end 296 | 297 | it "is separator or not" do 298 | assert separator? "\n" 299 | refute separator? "\x20" 300 | end 301 | 302 | it "is never format" do 303 | refute format? "\x21" 304 | end 305 | 306 | it "is never bidi_control" do 307 | refute bidi_control? "\x21" 308 | end 309 | end 310 | 311 | describe "ISO-8859-3" do 312 | let(:encoding) { "ISO-8859-3" } 313 | 314 | it "is always valid" do 315 | assert valid? "\x80" 316 | end 317 | 318 | it "is assigned or not" do 319 | assert assigned? "\x21" 320 | refute assigned? "\xA5" 321 | end 322 | 323 | it "is control or not" do 324 | assert control? "\x1E" 325 | assert control? "\x7F" 326 | assert control? "\x80" 327 | refute control? "\x67" 328 | end 329 | 330 | it "is blank or not" do 331 | assert blank? "\x20" 332 | refute blank? "\x21" 333 | end 334 | 335 | it "is separator or not" do 336 | assert separator? "\n" 337 | refute separator? "\x20" 338 | end 339 | 340 | it "is never format" do 341 | refute format? "\x21" 342 | end 343 | 344 | it "is never bidi_control" do 345 | refute bidi_control? "\x21" 346 | end 347 | end 348 | 349 | describe "ISO-8859-4" do 350 | let(:encoding) { "ISO-8859-4" } 351 | 352 | it "is always valid" do 353 | assert valid? "\x80" 354 | end 355 | 356 | it "is always assigned" do 357 | assert assigned? "\x21" 358 | assert assigned? "\x80" 359 | end 360 | 361 | it "is control or not" do 362 | assert control? "\x1E" 363 | assert control? "\x7F" 364 | assert control? "\x80" 365 | refute control? "\x67" 366 | end 367 | 368 | it "is blank or not" do 369 | assert blank? "\x20" 370 | refute blank? "\x21" 371 | end 372 | 373 | it "is separator or not" do 374 | assert separator? "\n" 375 | refute separator? "\x20" 376 | end 377 | 378 | it "is never format" do 379 | refute format? "\x21" 380 | end 381 | 382 | it "is never bidi_control" do 383 | refute bidi_control? "\x21" 384 | end 385 | end 386 | 387 | describe "ISO-8859-5" do 388 | let(:encoding) { "ISO-8859-5" } 389 | 390 | it "is always valid" do 391 | assert valid? "\x80" 392 | end 393 | 394 | it "is always assigned" do 395 | assert assigned? "\x21" 396 | assert assigned? "\x80" 397 | end 398 | 399 | it "is control or not" do 400 | assert control? "\x1E" 401 | assert control? "\x7F" 402 | assert control? "\x80" 403 | refute control? "\x67" 404 | end 405 | 406 | it "is blank or not" do 407 | assert blank? "\x20" 408 | refute blank? "\x21" 409 | end 410 | 411 | it "is separator or not" do 412 | assert separator? "\n" 413 | refute separator? "\x20" 414 | end 415 | 416 | it "is never format" do 417 | refute format? "\x21" 418 | end 419 | 420 | it "is never bidi_control" do 421 | refute bidi_control? "\x21" 422 | end 423 | end 424 | 425 | describe "ISO-8859-6" do 426 | let(:encoding) { "ISO-8859-6" } 427 | 428 | it "is always valid" do 429 | assert valid? "\x80" 430 | end 431 | 432 | it "is assigned or not" do 433 | assert assigned? "\x21" 434 | refute assigned? "\xA1" 435 | end 436 | 437 | it "is control or not" do 438 | assert control? "\x1E" 439 | assert control? "\x7F" 440 | assert control? "\x80" 441 | refute control? "\x67" 442 | end 443 | 444 | it "is blank or not" do 445 | assert blank? "\x20" 446 | refute blank? "\x21" 447 | end 448 | 449 | it "is separator or not" do 450 | assert separator? "\n" 451 | refute separator? "\x20" 452 | end 453 | 454 | it "is never format" do 455 | refute format? "\x21" 456 | end 457 | 458 | it "is never bidi_control" do 459 | refute bidi_control? "\x21" 460 | end 461 | end 462 | 463 | describe "ISO-8859-7" do 464 | let(:encoding) { "ISO-8859-7" } 465 | 466 | it "is always valid" do 467 | assert valid? "\x80" 468 | end 469 | 470 | it "is assigned or not" do 471 | assert assigned? "\x21" 472 | refute assigned? "\xFF" 473 | end 474 | 475 | it "is control or not" do 476 | assert control? "\x1E" 477 | assert control? "\x7F" 478 | assert control? "\x80" 479 | refute control? "\x67" 480 | end 481 | 482 | it "is blank or not" do 483 | assert blank? "\x20" 484 | refute blank? "\x21" 485 | end 486 | 487 | it "is separator or not" do 488 | assert separator? "\n" 489 | refute separator? "\x20" 490 | end 491 | 492 | it "is never format" do 493 | refute format? "\x21" 494 | end 495 | 496 | it "is never bidi_control" do 497 | refute bidi_control? "\x21" 498 | end 499 | end 500 | 501 | describe "ISO-8859-8" do 502 | let(:encoding) { "ISO-8859-8" } 503 | 504 | it "is always valid" do 505 | assert valid? "\x80" 506 | end 507 | 508 | it "is assigned or not" do 509 | assert assigned? "\x21" 510 | refute assigned? "\xA1" 511 | end 512 | 513 | it "is control or not" do 514 | assert control? "\x1E" 515 | assert control? "\x7F" 516 | assert control? "\x80" 517 | refute control? "\x67" 518 | end 519 | 520 | it "is blank or not" do 521 | assert blank? "\x20" 522 | refute blank? "\x21" 523 | end 524 | 525 | it "is separator or not" do 526 | assert separator? "\n" 527 | refute separator? "\x20" 528 | end 529 | 530 | it "is format or not" do 531 | assert format? "\xFE" 532 | refute format? "\x21" 533 | end 534 | 535 | it "is bidi_control or not" do 536 | assert bidi_control? "\xFE" 537 | refute bidi_control? "\x21" 538 | end 539 | end 540 | 541 | describe "ISO-8859-9" do 542 | let(:encoding) { "ISO-8859-9" } 543 | 544 | it "is always valid" do 545 | assert valid? "\x80" 546 | end 547 | 548 | it "is always assigned" do 549 | assert assigned? "\x21" 550 | assert assigned? "\x80" 551 | end 552 | 553 | it "is control or not" do 554 | assert control? "\x1E" 555 | assert control? "\x7F" 556 | assert control? "\x80" 557 | refute control? "\x67" 558 | end 559 | 560 | it "is blank or not" do 561 | assert blank? "\x20" 562 | refute blank? "\x21" 563 | end 564 | 565 | it "is separator or not" do 566 | assert separator? "\n" 567 | refute separator? "\x20" 568 | end 569 | 570 | it "is never format" do 571 | refute format? "\x21" 572 | end 573 | 574 | it "is never bidi_control" do 575 | refute bidi_control? "\x21" 576 | end 577 | end 578 | 579 | describe "ISO-8859-10" do 580 | let(:encoding) { "ISO-8859-10" } 581 | 582 | it "is always valid" do 583 | assert valid? "\x80" 584 | end 585 | 586 | it "is always assigned" do 587 | assert assigned? "\x21" 588 | assert assigned? "\x80" 589 | end 590 | 591 | it "is control or not" do 592 | assert control? "\x1E" 593 | assert control? "\x7F" 594 | assert control? "\x80" 595 | refute control? "\x67" 596 | end 597 | 598 | it "is blank or not" do 599 | assert blank? "\x20" 600 | refute blank? "\x21" 601 | end 602 | 603 | it "is separator or not" do 604 | assert separator? "\n" 605 | refute separator? "\x20" 606 | end 607 | 608 | it "is never format" do 609 | refute format? "\x21" 610 | end 611 | 612 | it "is never bidi_control" do 613 | refute bidi_control? "\x21" 614 | end 615 | end 616 | 617 | describe "ISO-8859-11" do 618 | let(:encoding) { "ISO-8859-11" } 619 | 620 | it "is always valid" do 621 | assert valid? "\x80" 622 | end 623 | 624 | it "is assigned or not" do 625 | assert assigned? "\x21" 626 | refute assigned? "\xDB" 627 | end 628 | 629 | it "is control or not" do 630 | assert control? "\x1E" 631 | assert control? "\x7F" 632 | assert control? "\x80" 633 | refute control? "\x67" 634 | end 635 | 636 | it "is blank or not" do 637 | assert blank? "\x20" 638 | refute blank? "\x21" 639 | end 640 | 641 | it "is separator or not" do 642 | assert separator? "\n" 643 | refute separator? "\x20" 644 | end 645 | 646 | it "is never format" do 647 | refute format? "\x21" 648 | end 649 | 650 | it "is never bidi_control" do 651 | refute bidi_control? "\x21" 652 | end 653 | end 654 | 655 | describe "ISO-8859-13" do 656 | let(:encoding) { "ISO-8859-13" } 657 | 658 | it "is always valid" do 659 | assert valid? "\x80" 660 | end 661 | 662 | it "is always assigned" do 663 | assert assigned? "\x21" 664 | assert assigned? "\x80" 665 | end 666 | 667 | it "is control or not" do 668 | assert control? "\x1E" 669 | assert control? "\x7F" 670 | assert control? "\x80" 671 | refute control? "\x67" 672 | end 673 | 674 | it "is blank or not" do 675 | assert blank? "\x20" 676 | refute blank? "\x21" 677 | end 678 | 679 | it "is separator or not" do 680 | assert separator? "\n" 681 | refute separator? "\x20" 682 | end 683 | 684 | it "is never format" do 685 | refute format? "\x21" 686 | end 687 | 688 | it "is never bidi_control" do 689 | refute bidi_control? "\x21" 690 | end 691 | end 692 | 693 | describe "ISO-8859-14" do 694 | let(:encoding) { "ISO-8859-14" } 695 | 696 | it "is always valid" do 697 | assert valid? "\x80" 698 | end 699 | 700 | it "is always assigned" do 701 | assert assigned? "\x21" 702 | assert assigned? "\x80" 703 | end 704 | 705 | it "is control or not" do 706 | assert control? "\x1E" 707 | assert control? "\x7F" 708 | assert control? "\x80" 709 | refute control? "\x67" 710 | end 711 | 712 | it "is blank or not" do 713 | assert blank? "\x20" 714 | refute blank? "\x21" 715 | end 716 | 717 | it "is separator or not" do 718 | assert separator? "\n" 719 | refute separator? "\x20" 720 | end 721 | 722 | it "is never format" do 723 | refute format? "\x21" 724 | end 725 | 726 | it "is never bidi_control" do 727 | refute bidi_control? "\x21" 728 | end 729 | end 730 | 731 | describe "ISO-8859-15" do 732 | let(:encoding) { "ISO-8859-15" } 733 | 734 | it "is always valid" do 735 | assert valid? "\x80" 736 | end 737 | 738 | it "is always assigned" do 739 | assert assigned? "\x21" 740 | assert assigned? "\x80" 741 | end 742 | 743 | it "is control or not" do 744 | assert control? "\x1E" 745 | assert control? "\x7F" 746 | assert control? "\x80" 747 | refute control? "\x67" 748 | end 749 | 750 | it "is blank or not" do 751 | assert blank? "\x20" 752 | refute blank? "\x21" 753 | end 754 | 755 | it "is separator or not" do 756 | assert separator? "\n" 757 | refute separator? "\x20" 758 | end 759 | 760 | it "is never format" do 761 | refute format? "\x21" 762 | end 763 | 764 | it "is never bidi_control" do 765 | refute bidi_control? "\x21" 766 | end 767 | end 768 | 769 | describe "ISO-8859-16" do 770 | let(:encoding) { "ISO-8859-16" } 771 | 772 | it "is always valid" do 773 | assert valid? "\x80" 774 | end 775 | 776 | it "is always assigned" do 777 | assert assigned? "\x21" 778 | assert assigned? "\x80" 779 | end 780 | 781 | it "is control or not" do 782 | assert control? "\x1E" 783 | assert control? "\x7F" 784 | assert control? "\x80" 785 | refute control? "\x67" 786 | end 787 | 788 | it "is blank or not" do 789 | assert blank? "\x20" 790 | refute blank? "\x21" 791 | end 792 | 793 | it "is separator or not" do 794 | assert separator? "\n" 795 | refute separator? "\x20" 796 | end 797 | 798 | it "is never format" do 799 | refute format? "\x21" 800 | end 801 | 802 | it "is never bidi_control" do 803 | refute bidi_control? "\x21" 804 | end 805 | end 806 | end 807 | 808 | describe "Windows-125*" do 809 | describe "Windows-1250" do 810 | let(:encoding) { "Windows-1250" } 811 | 812 | it "is always valid" do 813 | assert valid? "\x80" 814 | end 815 | 816 | it "is assigned or not" do 817 | assert assigned? "\x21" 818 | refute assigned? "\x81" 819 | end 820 | 821 | it "is control or not" do 822 | assert control? "\x1E" 823 | refute control? "\x67" 824 | end 825 | 826 | it "is blank or not" do 827 | assert blank? "\x20" 828 | refute blank? "\x21" 829 | end 830 | 831 | it "is separator or not" do 832 | assert separator? "\n" 833 | refute separator? "\x20" 834 | end 835 | 836 | it "is never format" do 837 | refute format? "\x21" 838 | end 839 | 840 | it "is never bidi_control" do 841 | refute bidi_control? "\x21" 842 | end 843 | end 844 | 845 | describe "Windows-1251" do 846 | let(:encoding) { "Windows-1251" } 847 | 848 | it "is always valid" do 849 | assert valid? "\x80" 850 | end 851 | 852 | it "is assigned or not" do 853 | assert assigned? "\x21" 854 | refute assigned? "\x98" 855 | end 856 | 857 | it "is control or not" do 858 | assert control? "\x1E" 859 | refute control? "\x67" 860 | end 861 | 862 | it "is blank or not" do 863 | assert blank? "\x20" 864 | refute blank? "\x21" 865 | end 866 | 867 | it "is separator or not" do 868 | assert separator? "\n" 869 | refute separator? "\x20" 870 | end 871 | 872 | it "is never format" do 873 | refute format? "\x21" 874 | end 875 | 876 | it "is never bidi_control" do 877 | refute bidi_control? "\x21" 878 | end 879 | end 880 | 881 | describe "Windows-1252" do 882 | let(:encoding) { "Windows-1252" } 883 | 884 | it "is always valid" do 885 | assert valid? "\x80" 886 | end 887 | 888 | it "is assigned or not" do 889 | assert assigned? "\x21" 890 | refute assigned? "\x81" 891 | end 892 | 893 | it "is control or not" do 894 | assert control? "\x1E" 895 | refute control? "\x67" 896 | end 897 | 898 | it "is blank or not" do 899 | assert blank? "\x20" 900 | refute blank? "\x21" 901 | end 902 | 903 | it "is separator or not" do 904 | assert separator? "\n" 905 | refute separator? "\x20" 906 | end 907 | 908 | it "is never format" do 909 | refute format? "\x21" 910 | end 911 | 912 | it "is never bidi_control" do 913 | refute bidi_control? "\x21" 914 | end 915 | end 916 | 917 | describe "Windows-1253" do 918 | let(:encoding) { "Windows-1253" } 919 | 920 | it "is always valid" do 921 | assert valid? "\x80" 922 | end 923 | 924 | it "is assigned or not" do 925 | assert assigned? "\x21" 926 | refute assigned? "\x81" 927 | end 928 | 929 | it "is control or not" do 930 | assert control? "\x1E" 931 | refute control? "\x67" 932 | end 933 | 934 | it "is blank or not" do 935 | assert blank? "\x20" 936 | refute blank? "\x21" 937 | end 938 | 939 | it "is separator or not" do 940 | assert separator? "\n" 941 | refute separator? "\x20" 942 | end 943 | 944 | it "is never format" do 945 | refute format? "\x21" 946 | end 947 | 948 | it "is never bidi_control" do 949 | refute bidi_control? "\x21" 950 | end 951 | end 952 | 953 | describe "Windows-1254" do 954 | let(:encoding) { "Windows-1254" } 955 | 956 | it "is always valid" do 957 | assert valid? "\x80" 958 | end 959 | 960 | it "is assigned or not" do 961 | assert assigned? "\x21" 962 | refute assigned? "\x81" 963 | end 964 | 965 | it "is control or not" do 966 | assert control? "\x1E" 967 | refute control? "\x67" 968 | end 969 | 970 | it "is blank or not" do 971 | assert blank? "\x20" 972 | refute blank? "\x21" 973 | end 974 | 975 | it "is separator or not" do 976 | assert separator? "\n" 977 | refute separator? "\x20" 978 | end 979 | 980 | it "is never format" do 981 | refute format? "\x21" 982 | end 983 | 984 | it "is never bidi_control" do 985 | refute bidi_control? "\x21" 986 | end 987 | end 988 | 989 | describe "Windows-1255" do 990 | let(:encoding) { "Windows-1255" } 991 | 992 | it "is always valid" do 993 | assert valid? "\x80" 994 | end 995 | 996 | it "is assigned or not" do 997 | assert assigned? "\x21" 998 | refute assigned? "\x81" 999 | end 1000 | 1001 | it "is control or not" do 1002 | assert control? "\x1E" 1003 | refute control? "\x67" 1004 | end 1005 | 1006 | it "is blank or not" do 1007 | assert blank? "\x20" 1008 | refute blank? "\x21" 1009 | end 1010 | 1011 | it "is separator or not" do 1012 | assert separator? "\n" 1013 | refute separator? "\x20" 1014 | end 1015 | 1016 | it "is format or not" do 1017 | assert format? "\xFE" 1018 | refute format? "\x21" 1019 | end 1020 | 1021 | it "is never bidi_control" do 1022 | assert bidi_control? "\xFE" 1023 | refute bidi_control? "\x21" 1024 | end 1025 | end 1026 | 1027 | describe "Windows-1256" do 1028 | let(:encoding) { "Windows-1256" } 1029 | 1030 | it "is always valid" do 1031 | assert valid? "\x80" 1032 | end 1033 | 1034 | it "is always assigned" do 1035 | assert assigned? "\x21" 1036 | end 1037 | 1038 | it "is control or not" do 1039 | assert control? "\x1E" 1040 | refute control? "\x67" 1041 | end 1042 | 1043 | it "is blank or not" do 1044 | assert blank? "\x20" 1045 | refute blank? "\x21" 1046 | end 1047 | 1048 | it "is separator or not" do 1049 | assert separator? "\n" 1050 | refute separator? "\x20" 1051 | end 1052 | 1053 | it "is format or not" do 1054 | assert format? "\xFE" 1055 | refute format? "\x21" 1056 | end 1057 | 1058 | it "is never bidi_control" do 1059 | assert bidi_control? "\xFE" 1060 | refute bidi_control? "\x21" 1061 | end 1062 | end 1063 | 1064 | describe "Windows-1257" do 1065 | let(:encoding) { "Windows-1257" } 1066 | 1067 | it "is always valid" do 1068 | assert valid? "\x80" 1069 | end 1070 | 1071 | it "is assigned or not" do 1072 | assert assigned? "\x21" 1073 | refute assigned? "\x81" 1074 | end 1075 | 1076 | it "is control or not" do 1077 | assert control? "\x1E" 1078 | refute control? "\x67" 1079 | end 1080 | 1081 | it "is blank or not" do 1082 | assert blank? "\x20" 1083 | refute blank? "\x21" 1084 | end 1085 | 1086 | it "is separator or not" do 1087 | assert separator? "\n" 1088 | refute separator? "\x20" 1089 | end 1090 | 1091 | it "is never format" do 1092 | refute format? "\x21" 1093 | end 1094 | 1095 | it "is never bidi_control" do 1096 | refute bidi_control? "\x21" 1097 | end 1098 | end 1099 | 1100 | describe "Windows-1258" do 1101 | let(:encoding) { "Windows-1258" } 1102 | 1103 | it "is always valid" do 1104 | assert valid? "\x80" 1105 | end 1106 | 1107 | it "is assigned or not" do 1108 | assert assigned? "\x21" 1109 | refute assigned? "\x81" 1110 | end 1111 | 1112 | it "is control or not" do 1113 | assert control? "\x1E" 1114 | refute control? "\x67" 1115 | end 1116 | 1117 | it "is blank or not" do 1118 | assert blank? "\x20" 1119 | refute blank? "\x21" 1120 | end 1121 | 1122 | it "is separator or not" do 1123 | assert separator? "\n" 1124 | refute separator? "\x20" 1125 | end 1126 | 1127 | it "is never format" do 1128 | refute format? "\x21" 1129 | end 1130 | 1131 | it "is never bidi_control" do 1132 | refute bidi_control? "\x21" 1133 | end 1134 | end 1135 | end 1136 | 1137 | describe "IBM*, CP85*" do 1138 | describe "IBM437" do 1139 | let(:encoding) { "IBM437" } 1140 | 1141 | it "is always valid" do 1142 | assert valid? "\x80" 1143 | end 1144 | 1145 | it "is always assigned" do 1146 | assert assigned? "\x21" 1147 | end 1148 | 1149 | it "is control or not" do 1150 | assert control? "\x1E" 1151 | refute control? "\x67" 1152 | end 1153 | 1154 | it "is blank or not" do 1155 | assert blank? "\x20" 1156 | refute blank? "\x21" 1157 | end 1158 | 1159 | it "is separator or not" do 1160 | assert separator? "\n" 1161 | refute separator? "\x20" 1162 | end 1163 | 1164 | it "is never format" do 1165 | refute format? "\x21" 1166 | end 1167 | 1168 | it "is never bidi_control" do 1169 | refute bidi_control? "\x21" 1170 | end 1171 | end 1172 | 1173 | describe "IBM737" do 1174 | let(:encoding) { "IBM737" } 1175 | 1176 | it "is always valid" do 1177 | assert valid? "\x80" 1178 | end 1179 | 1180 | it "is always assigned" do 1181 | assert assigned? "\x21" 1182 | end 1183 | 1184 | it "is control or not" do 1185 | assert control? "\x1E" 1186 | refute control? "\x67" 1187 | end 1188 | 1189 | it "is blank or not" do 1190 | assert blank? "\x20" 1191 | refute blank? "\x21" 1192 | end 1193 | 1194 | it "is separator or not" do 1195 | assert separator? "\n" 1196 | refute separator? "\x20" 1197 | end 1198 | 1199 | it "is never format" do 1200 | refute format? "\x21" 1201 | end 1202 | 1203 | it "is never bidi_control" do 1204 | refute bidi_control? "\x21" 1205 | end 1206 | end 1207 | 1208 | describe "IBM775" do 1209 | let(:encoding) { "IBM775" } 1210 | 1211 | it "is always valid" do 1212 | assert valid? "\x80" 1213 | end 1214 | 1215 | it "is always assigned" do 1216 | assert assigned? "\x21" 1217 | end 1218 | 1219 | it "is control or not" do 1220 | assert control? "\x1E" 1221 | refute control? "\x67" 1222 | end 1223 | 1224 | it "is blank or not" do 1225 | assert blank? "\x20" 1226 | refute blank? "\x21" 1227 | end 1228 | 1229 | it "is separator or not" do 1230 | assert separator? "\n" 1231 | refute separator? "\x20" 1232 | end 1233 | 1234 | it "is never format" do 1235 | refute format? "\x21" 1236 | end 1237 | 1238 | it "is never bidi_control" do 1239 | refute bidi_control? "\x21" 1240 | end 1241 | end 1242 | 1243 | describe "CP850" do 1244 | let(:encoding) { "CP850" } 1245 | 1246 | it "is always valid" do 1247 | assert valid? "\x80" 1248 | end 1249 | 1250 | it "is always assigned" do 1251 | assert assigned? "\x21" 1252 | end 1253 | 1254 | it "is control or not" do 1255 | assert control? "\x1E" 1256 | refute control? "\x67" 1257 | end 1258 | 1259 | it "is blank or not" do 1260 | assert blank? "\x20" 1261 | refute blank? "\x21" 1262 | end 1263 | 1264 | it "is separator or not" do 1265 | assert separator? "\n" 1266 | refute separator? "\x20" 1267 | end 1268 | 1269 | it "is never format" do 1270 | refute format? "\x21" 1271 | end 1272 | 1273 | it "is never bidi_control" do 1274 | refute bidi_control? "\x21" 1275 | end 1276 | end 1277 | 1278 | describe "IBM852" do 1279 | let(:encoding) { "IBM852" } 1280 | 1281 | it "is always valid" do 1282 | assert valid? "\x80" 1283 | end 1284 | 1285 | it "is always assigned" do 1286 | assert assigned? "\x21" 1287 | end 1288 | 1289 | it "is control or not" do 1290 | assert control? "\x1E" 1291 | refute control? "\x67" 1292 | end 1293 | 1294 | it "is blank or not" do 1295 | assert blank? "\x20" 1296 | refute blank? "\x21" 1297 | end 1298 | 1299 | it "is separator or not" do 1300 | assert separator? "\n" 1301 | refute separator? "\x20" 1302 | end 1303 | 1304 | it "is never format" do 1305 | refute format? "\x21" 1306 | end 1307 | 1308 | it "is never bidi_control" do 1309 | refute bidi_control? "\x21" 1310 | end 1311 | end 1312 | 1313 | describe "CP852" do 1314 | let(:encoding) { "CP852" } 1315 | 1316 | it "is always valid" do 1317 | assert valid? "\x80" 1318 | end 1319 | 1320 | it "is always assigned" do 1321 | assert assigned? "\x21" 1322 | end 1323 | 1324 | it "is control or not" do 1325 | assert control? "\x1E" 1326 | refute control? "\x67" 1327 | end 1328 | 1329 | it "is blank or not" do 1330 | assert blank? "\x20" 1331 | refute blank? "\x21" 1332 | end 1333 | 1334 | it "is separator or not" do 1335 | assert separator? "\n" 1336 | refute separator? "\x20" 1337 | end 1338 | 1339 | it "is never format" do 1340 | refute format? "\x21" 1341 | end 1342 | 1343 | it "is never bidi_control" do 1344 | refute bidi_control? "\x21" 1345 | end 1346 | end 1347 | 1348 | describe "IBM855" do 1349 | let(:encoding) { "IBM855" } 1350 | 1351 | it "is always valid" do 1352 | assert valid? "\x80" 1353 | end 1354 | 1355 | it "is always assigned" do 1356 | assert assigned? "\x21" 1357 | end 1358 | 1359 | it "is control or not" do 1360 | assert control? "\x1E" 1361 | refute control? "\x67" 1362 | end 1363 | 1364 | it "is blank or not" do 1365 | assert blank? "\x20" 1366 | refute blank? "\x21" 1367 | end 1368 | 1369 | it "is separator or not" do 1370 | assert separator? "\n" 1371 | refute separator? "\x20" 1372 | end 1373 | 1374 | it "is never format" do 1375 | refute format? "\x21" 1376 | end 1377 | 1378 | it "is never bidi_control" do 1379 | refute bidi_control? "\x21" 1380 | end 1381 | end 1382 | 1383 | describe "CP855" do 1384 | let(:encoding) { "CP855" } 1385 | 1386 | it "is always valid" do 1387 | assert valid? "\x80" 1388 | end 1389 | 1390 | it "is always assigned" do 1391 | assert assigned? "\x21" 1392 | end 1393 | 1394 | it "is control or not" do 1395 | assert control? "\x1E" 1396 | refute control? "\x67" 1397 | end 1398 | 1399 | it "is blank or not" do 1400 | assert blank? "\x20" 1401 | refute blank? "\x21" 1402 | end 1403 | 1404 | it "is separator or not" do 1405 | assert separator? "\n" 1406 | refute separator? "\x20" 1407 | end 1408 | 1409 | it "is never format" do 1410 | refute format? "\x21" 1411 | end 1412 | 1413 | it "is never bidi_control" do 1414 | refute bidi_control? "\x21" 1415 | end 1416 | end 1417 | 1418 | describe "IBM857" do 1419 | let(:encoding) { "IBM857" } 1420 | 1421 | it "is always valid" do 1422 | assert valid? "\x80" 1423 | end 1424 | 1425 | it "is assigned or not" do 1426 | assert assigned? "\x21" 1427 | refute assigned? "\xE7" 1428 | end 1429 | 1430 | it "is control or not" do 1431 | assert control? "\x1E" 1432 | refute control? "\x67" 1433 | end 1434 | 1435 | it "is blank or not" do 1436 | assert blank? "\x20" 1437 | refute blank? "\x21" 1438 | end 1439 | 1440 | it "is separator or not" do 1441 | assert separator? "\n" 1442 | refute separator? "\x20" 1443 | end 1444 | 1445 | it "is never format" do 1446 | refute format? "\x21" 1447 | end 1448 | 1449 | it "is never bidi_control" do 1450 | refute bidi_control? "\x21" 1451 | end 1452 | end 1453 | 1454 | describe "IBM860" do 1455 | let(:encoding) { "IBM860" } 1456 | 1457 | it "is always valid" do 1458 | assert valid? "\x80" 1459 | end 1460 | 1461 | it "is always assigned" do 1462 | assert assigned? "\x21" 1463 | end 1464 | 1465 | it "is control or not" do 1466 | assert control? "\x1E" 1467 | refute control? "\x67" 1468 | end 1469 | 1470 | it "is blank or not" do 1471 | assert blank? "\x20" 1472 | refute blank? "\x21" 1473 | end 1474 | 1475 | it "is separator or not" do 1476 | assert separator? "\n" 1477 | refute separator? "\x20" 1478 | end 1479 | 1480 | it "is never format" do 1481 | refute format? "\x21" 1482 | end 1483 | 1484 | it "is never bidi_control" do 1485 | refute bidi_control? "\x21" 1486 | end 1487 | end 1488 | 1489 | describe "IBM861" do 1490 | let(:encoding) { "IBM861" } 1491 | 1492 | it "is always valid" do 1493 | assert valid? "\x80" 1494 | end 1495 | 1496 | it "is always assigned" do 1497 | assert assigned? "\x21" 1498 | end 1499 | 1500 | it "is control or not" do 1501 | assert control? "\x1E" 1502 | refute control? "\x67" 1503 | end 1504 | 1505 | it "is blank or not" do 1506 | assert blank? "\x20" 1507 | refute blank? "\x21" 1508 | end 1509 | 1510 | it "is separator or not" do 1511 | assert separator? "\n" 1512 | refute separator? "\x20" 1513 | end 1514 | 1515 | it "is never format" do 1516 | refute format? "\x21" 1517 | end 1518 | 1519 | it "is never bidi_control" do 1520 | refute bidi_control? "\x21" 1521 | end 1522 | end 1523 | 1524 | describe "IBM862" do 1525 | let(:encoding) { "IBM862" } 1526 | 1527 | it "is always valid" do 1528 | assert valid? "\x80" 1529 | end 1530 | 1531 | it "is always assigned" do 1532 | assert assigned? "\x21" 1533 | end 1534 | 1535 | it "is control or not" do 1536 | assert control? "\x1E" 1537 | refute control? "\x67" 1538 | end 1539 | 1540 | it "is blank or not" do 1541 | assert blank? "\x20" 1542 | refute blank? "\x21" 1543 | end 1544 | 1545 | it "is separator or not" do 1546 | assert separator? "\n" 1547 | refute separator? "\x20" 1548 | end 1549 | 1550 | it "is never format" do 1551 | refute format? "\x21" 1552 | end 1553 | 1554 | it "is never bidi_control" do 1555 | refute bidi_control? "\x21" 1556 | end 1557 | end 1558 | 1559 | describe "IBM863" do 1560 | let(:encoding) { "IBM863" } 1561 | 1562 | it "is always valid" do 1563 | assert valid? "\x80" 1564 | end 1565 | 1566 | it "is always assigned" do 1567 | assert assigned? "\x21" 1568 | end 1569 | 1570 | it "is control or not" do 1571 | assert control? "\x1E" 1572 | refute control? "\x67" 1573 | end 1574 | 1575 | it "is blank or not" do 1576 | assert blank? "\x20" 1577 | refute blank? "\x21" 1578 | end 1579 | 1580 | it "is separator or not" do 1581 | assert separator? "\n" 1582 | refute separator? "\x20" 1583 | end 1584 | 1585 | it "is never format" do 1586 | refute format? "\x21" 1587 | end 1588 | 1589 | it "is never bidi_control" do 1590 | refute bidi_control? "\x21" 1591 | end 1592 | end 1593 | 1594 | describe "IBM864" do 1595 | let(:encoding) { "IBM864" } 1596 | 1597 | it "is always valid" do 1598 | assert valid? "\x80" 1599 | end 1600 | 1601 | it "is assigned or not" do 1602 | assert assigned? "\x21" 1603 | refute assigned? "\xA6" 1604 | end 1605 | 1606 | it "is control or not" do 1607 | assert control? "\x1E" 1608 | refute control? "\x67" 1609 | end 1610 | 1611 | it "is blank or not" do 1612 | assert blank? "\x20" 1613 | refute blank? "\x21" 1614 | end 1615 | 1616 | it "is separator or not" do 1617 | assert separator? "\n" 1618 | refute separator? "\x20" 1619 | end 1620 | 1621 | it "is never format" do 1622 | refute format? "\x21" 1623 | end 1624 | 1625 | it "is never bidi_control" do 1626 | refute bidi_control? "\x21" 1627 | end 1628 | end 1629 | 1630 | describe "IBM865" do 1631 | let(:encoding) { "IBM865" } 1632 | 1633 | it "is always valid" do 1634 | assert valid? "\x80" 1635 | end 1636 | 1637 | it "is always assigned" do 1638 | assert assigned? "\x21" 1639 | end 1640 | 1641 | it "is control or not" do 1642 | assert control? "\x1E" 1643 | refute control? "\x67" 1644 | end 1645 | 1646 | it "is blank or not" do 1647 | assert blank? "\x20" 1648 | refute blank? "\x21" 1649 | end 1650 | 1651 | it "is separator or not" do 1652 | assert separator? "\n" 1653 | refute separator? "\x20" 1654 | end 1655 | 1656 | it "is never format" do 1657 | refute format? "\x21" 1658 | end 1659 | 1660 | it "is never bidi_control" do 1661 | refute bidi_control? "\x21" 1662 | end 1663 | end 1664 | 1665 | describe "IBM866" do 1666 | let(:encoding) { "IBM866" } 1667 | 1668 | it "is always valid" do 1669 | assert valid? "\x80" 1670 | end 1671 | 1672 | it "is always assigned" do 1673 | assert assigned? "\x21" 1674 | end 1675 | 1676 | it "is control or not" do 1677 | assert control? "\x1E" 1678 | refute control? "\x67" 1679 | end 1680 | 1681 | it "is blank or not" do 1682 | assert blank? "\x20" 1683 | refute blank? "\x21" 1684 | end 1685 | 1686 | it "is separator or not" do 1687 | assert separator? "\n" 1688 | refute separator? "\x20" 1689 | end 1690 | 1691 | it "is never format" do 1692 | refute format? "\x21" 1693 | end 1694 | 1695 | it "is never bidi_control" do 1696 | refute bidi_control? "\x21" 1697 | end 1698 | end 1699 | 1700 | describe "IBM869" do 1701 | let(:encoding) { "IBM869" } 1702 | 1703 | it "is always valid" do 1704 | assert valid? "\x80" 1705 | end 1706 | 1707 | it "is assigned or not" do 1708 | assert assigned? "\x21" 1709 | refute assigned? "\x80" 1710 | end 1711 | 1712 | it "is control or not" do 1713 | assert control? "\x1E" 1714 | refute control? "\x67" 1715 | end 1716 | 1717 | it "is blank or not" do 1718 | assert blank? "\x20" 1719 | refute blank? "\x21" 1720 | end 1721 | 1722 | it "is separator or not" do 1723 | assert separator? "\n" 1724 | refute separator? "\x20" 1725 | end 1726 | 1727 | it "is never format" do 1728 | refute format? "\x21" 1729 | end 1730 | 1731 | it "is never bidi_control" do 1732 | refute bidi_control? "\x21" 1733 | end 1734 | end 1735 | end 1736 | 1737 | describe "mac*" do 1738 | describe "macCentEuro" do 1739 | let(:encoding) { "macCentEuro" } 1740 | 1741 | it "is always valid" do 1742 | assert valid? "\x80" 1743 | end 1744 | 1745 | it "is always assigned" do 1746 | assert assigned? "\x21" 1747 | end 1748 | 1749 | it "is control or not" do 1750 | assert control? "\x1E" 1751 | refute control? "\x67" 1752 | end 1753 | 1754 | it "is blank or not" do 1755 | assert blank? "\x20" 1756 | refute blank? "\x21" 1757 | end 1758 | 1759 | it "is separator or not" do 1760 | assert separator? "\n" 1761 | refute separator? "\x20" 1762 | end 1763 | 1764 | it "is never format" do 1765 | refute format? "\x21" 1766 | end 1767 | 1768 | it "is never bidi_control" do 1769 | refute bidi_control? "\x21" 1770 | end 1771 | end 1772 | 1773 | describe "macCroatian" do 1774 | let(:encoding) { "macCroatian" } 1775 | 1776 | it "is always valid" do 1777 | assert valid? "\x80" 1778 | end 1779 | 1780 | it "is always assigned" do 1781 | assert assigned? "\x21" 1782 | end 1783 | 1784 | it "is control or not" do 1785 | assert control? "\x1E" 1786 | refute control? "\x67" 1787 | end 1788 | 1789 | it "is blank or not" do 1790 | assert blank? "\x20" 1791 | refute blank? "\x21" 1792 | end 1793 | 1794 | it "is separator or not" do 1795 | assert separator? "\n" 1796 | refute separator? "\x20" 1797 | end 1798 | 1799 | it "is never format" do 1800 | refute format? "\x21" 1801 | end 1802 | 1803 | it "is never bidi_control" do 1804 | refute bidi_control? "\x21" 1805 | end 1806 | end 1807 | 1808 | describe "macCyrillic" do 1809 | let(:encoding) { "macCyrillic" } 1810 | 1811 | it "is always valid" do 1812 | assert valid? "\x80" 1813 | end 1814 | 1815 | it "is always assigned" do 1816 | assert assigned? "\x21" 1817 | end 1818 | 1819 | it "is control or not" do 1820 | assert control? "\x1E" 1821 | refute control? "\x67" 1822 | end 1823 | 1824 | it "is blank or not" do 1825 | assert blank? "\x20" 1826 | refute blank? "\x21" 1827 | end 1828 | 1829 | it "is separator or not" do 1830 | assert separator? "\n" 1831 | refute separator? "\x20" 1832 | end 1833 | 1834 | it "is never format" do 1835 | refute format? "\x21" 1836 | end 1837 | 1838 | it "is never bidi_control" do 1839 | refute bidi_control? "\x21" 1840 | end 1841 | end 1842 | 1843 | describe "macGreek" do 1844 | let(:encoding) { "macGreek" } 1845 | 1846 | it "is always valid" do 1847 | assert valid? "\x80" 1848 | end 1849 | 1850 | it "is always assigned" do 1851 | assert assigned? "\x21" 1852 | end 1853 | 1854 | it "is control or not" do 1855 | assert control? "\x1E" 1856 | refute control? "\x67" 1857 | end 1858 | 1859 | it "is blank or not" do 1860 | assert blank? "\x20" 1861 | refute blank? "\x21" 1862 | end 1863 | 1864 | it "is separator or not" do 1865 | assert separator? "\n" 1866 | refute separator? "\x20" 1867 | end 1868 | 1869 | it "is never format" do 1870 | refute format? "\x21" 1871 | end 1872 | 1873 | it "is never bidi_control" do 1874 | refute bidi_control? "\x21" 1875 | end 1876 | end 1877 | 1878 | describe "macIceland" do 1879 | let(:encoding) { "macIceland" } 1880 | 1881 | it "is always valid" do 1882 | assert valid? "\x80" 1883 | end 1884 | 1885 | it "is always assigned" do 1886 | assert assigned? "\x21" 1887 | end 1888 | 1889 | it "is control or not" do 1890 | assert control? "\x1E" 1891 | refute control? "\x67" 1892 | end 1893 | 1894 | it "is blank or not" do 1895 | assert blank? "\x20" 1896 | refute blank? "\x21" 1897 | end 1898 | 1899 | it "is separator or not" do 1900 | assert separator? "\n" 1901 | refute separator? "\x20" 1902 | end 1903 | 1904 | it "is never format" do 1905 | refute format? "\x21" 1906 | end 1907 | 1908 | it "is never bidi_control" do 1909 | refute bidi_control? "\x21" 1910 | end 1911 | end 1912 | 1913 | describe "macRoman" do 1914 | let(:encoding) { "macRoman" } 1915 | 1916 | it "is always valid" do 1917 | assert valid? "\x80" 1918 | end 1919 | 1920 | it "is always assigned" do 1921 | assert assigned? "\x21" 1922 | end 1923 | 1924 | it "is control or not" do 1925 | assert control? "\x1E" 1926 | refute control? "\x67" 1927 | end 1928 | 1929 | it "is blank or not" do 1930 | assert blank? "\x20" 1931 | refute blank? "\x21" 1932 | end 1933 | 1934 | it "is separator or not" do 1935 | assert separator? "\n" 1936 | refute separator? "\x20" 1937 | end 1938 | 1939 | it "is never format" do 1940 | refute format? "\x21" 1941 | end 1942 | 1943 | it "is never bidi_control" do 1944 | refute bidi_control? "\x21" 1945 | end 1946 | end 1947 | 1948 | describe "macRomania" do 1949 | let(:encoding) { "macRomania" } 1950 | 1951 | it "is always valid" do 1952 | assert valid? "\x80" 1953 | end 1954 | 1955 | it "is always assigned" do 1956 | assert assigned? "\x21" 1957 | end 1958 | 1959 | it "is control or not" do 1960 | assert control? "\x1E" 1961 | refute control? "\x67" 1962 | end 1963 | 1964 | it "is blank or not" do 1965 | assert blank? "\x20" 1966 | refute blank? "\x21" 1967 | end 1968 | 1969 | it "is separator or not" do 1970 | assert separator? "\n" 1971 | refute separator? "\x20" 1972 | end 1973 | 1974 | it "is never format" do 1975 | refute format? "\x21" 1976 | end 1977 | 1978 | it "is never bidi_control" do 1979 | refute bidi_control? "\x21" 1980 | end 1981 | end 1982 | 1983 | describe "macThai" do 1984 | let(:encoding) { "macThai" } 1985 | 1986 | it "is always valid" do 1987 | assert valid? "\x80" 1988 | end 1989 | 1990 | it "is assigned or not" do 1991 | assert assigned? "\x21" 1992 | refute assigned? "\xFC" 1993 | end 1994 | 1995 | it "is control or not" do 1996 | assert control? "\x1E" 1997 | refute control? "\x67" 1998 | end 1999 | 2000 | it "is blank or not" do 2001 | assert blank? "\x20" 2002 | refute blank? "\x21" 2003 | end 2004 | 2005 | it "is separator or not" do 2006 | assert separator? "\n" 2007 | refute separator? "\x20" 2008 | end 2009 | 2010 | it "is never format" do 2011 | refute format? "\x21" 2012 | end 2013 | 2014 | it "is never bidi_control" do 2015 | refute bidi_control? "\x21" 2016 | end 2017 | end 2018 | 2019 | describe "macTurkish" do 2020 | let(:encoding) { "macTurkish" } 2021 | 2022 | it "is always valid" do 2023 | assert valid? "\x80" 2024 | end 2025 | 2026 | it "is assigned or not" do 2027 | assert assigned? "\x21" 2028 | refute assigned? "\xF5" 2029 | end 2030 | 2031 | it "is control or not" do 2032 | assert control? "\x1E" 2033 | refute control? "\x67" 2034 | end 2035 | 2036 | it "is blank or not" do 2037 | assert blank? "\x20" 2038 | refute blank? "\x21" 2039 | end 2040 | 2041 | it "is separator or not" do 2042 | assert separator? "\n" 2043 | refute separator? "\x20" 2044 | end 2045 | 2046 | it "is never format" do 2047 | refute format? "\x21" 2048 | end 2049 | 2050 | it "is never bidi_control" do 2051 | refute bidi_control? "\x21" 2052 | end 2053 | end 2054 | 2055 | describe "macUkraine" do 2056 | let(:encoding) { "macUkraine" } 2057 | 2058 | it "is always valid" do 2059 | assert valid? "\x80" 2060 | end 2061 | 2062 | it "is always assigned" do 2063 | assert assigned? "\x21" 2064 | end 2065 | 2066 | it "is control or not" do 2067 | assert control? "\x1E" 2068 | refute control? "\x67" 2069 | end 2070 | 2071 | it "is blank or not" do 2072 | assert blank? "\x20" 2073 | refute blank? "\x21" 2074 | end 2075 | 2076 | it "is separator or not" do 2077 | assert separator? "\n" 2078 | refute separator? "\x20" 2079 | end 2080 | 2081 | it "is never format" do 2082 | refute format? "\x21" 2083 | end 2084 | 2085 | it "is never bidi_control" do 2086 | refute bidi_control? "\x21" 2087 | end 2088 | end 2089 | 2090 | end 2091 | 2092 | describe "TIS-620/Windows-874" do 2093 | describe "TIS-620" do 2094 | let(:encoding) { "TIS-620" } 2095 | 2096 | it "is always valid" do 2097 | assert valid? "\x80" 2098 | end 2099 | 2100 | it "is assigned or not" do 2101 | assert assigned? "\x21" 2102 | refute assigned? "\xA0" 2103 | end 2104 | 2105 | it "is control or not" do 2106 | assert control? "\x1E" 2107 | refute control? "\x67" 2108 | end 2109 | 2110 | it "is blank or not" do 2111 | assert blank? "\x20" 2112 | refute blank? "\x21" 2113 | end 2114 | 2115 | it "is separator or not" do 2116 | assert separator? "\n" 2117 | refute separator? "\x20" 2118 | end 2119 | 2120 | it "is never format" do 2121 | refute format? "\x21" 2122 | end 2123 | 2124 | it "is never bidi_control" do 2125 | refute bidi_control? "\x21" 2126 | end 2127 | end 2128 | 2129 | describe "Windows-874" do 2130 | let(:encoding) { "Windows-874" } 2131 | 2132 | it "is always valid" do 2133 | assert valid? "\x80" 2134 | end 2135 | 2136 | it "is assigned or not" do 2137 | assert assigned? "\xA0" 2138 | refute assigned? "\x99" 2139 | end 2140 | 2141 | it "is control or not" do 2142 | assert control? "\x1E" 2143 | refute control? "\x67" 2144 | end 2145 | 2146 | it "is blank or not" do 2147 | assert blank? "\x20" 2148 | refute blank? "\x21" 2149 | end 2150 | 2151 | it "is separator or not" do 2152 | assert separator? "\n" 2153 | refute separator? "\x20" 2154 | end 2155 | 2156 | it "is never format" do 2157 | refute format? "\x21" 2158 | end 2159 | 2160 | it "is never bidi_control" do 2161 | refute bidi_control? "\x21" 2162 | end 2163 | end 2164 | end 2165 | 2166 | describe "KOI8-*" do 2167 | describe "KOI8-R" do 2168 | let(:encoding) { "KOI8-R" } 2169 | 2170 | it "is always valid" do 2171 | assert valid? "\x80" 2172 | end 2173 | 2174 | it "is always assigned" do 2175 | assert assigned? "\x21" 2176 | end 2177 | 2178 | it "is control or not" do 2179 | assert control? "\x1E" 2180 | refute control? "\x67" 2181 | end 2182 | 2183 | it "is blank or not" do 2184 | assert blank? "\x20" 2185 | refute blank? "\x21" 2186 | end 2187 | 2188 | it "is separator or not" do 2189 | assert separator? "\n" 2190 | refute separator? "\x20" 2191 | end 2192 | 2193 | it "is never format" do 2194 | refute format? "\x21" 2195 | end 2196 | 2197 | it "is never bidi_control" do 2198 | refute bidi_control? "\x21" 2199 | end 2200 | end 2201 | 2202 | describe "KOI8-U" do 2203 | let(:encoding) { "KOI8-U" } 2204 | 2205 | it "is always valid" do 2206 | assert valid? "\x80" 2207 | end 2208 | 2209 | it "is always assigned" do 2210 | assert assigned? "\x21" 2211 | end 2212 | 2213 | it "is control or not" do 2214 | assert control? "\x1E" 2215 | refute control? "\x67" 2216 | end 2217 | 2218 | it "is blank or not" do 2219 | assert blank? "\x20" 2220 | refute blank? "\x21" 2221 | end 2222 | 2223 | it "is separator or not" do 2224 | assert separator? "\n" 2225 | refute separator? "\x20" 2226 | end 2227 | 2228 | it "is never format" do 2229 | refute format? "\x21" 2230 | end 2231 | 2232 | it "is never bidi_control" do 2233 | refute bidi_control? "\x21" 2234 | end 2235 | end 2236 | end 2237 | end 2238 | end 2239 | --------------------------------------------------------------------------------