├── lib ├── unf │ ├── version.rb │ ├── normalizer_cruby.rb │ ├── normalizer_jruby.rb │ └── normalizer.rb └── unf.rb ├── Gemfile ├── .gitignore ├── test ├── helper.rb └── test_unf.rb ├── Rakefile ├── ext └── mkrf_conf.rb ├── README.md ├── CHANGELOG.md ├── LICENSE ├── unf.gemspec └── .github └── workflows └── test.yml /lib/unf/version.rb: -------------------------------------------------------------------------------- 1 | module UNF 2 | VERSION = '0.2.0' 3 | end 4 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in unf.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | .yardoc 6 | Gemfile.lock 7 | InstalledFiles 8 | _yardoc 9 | coverage 10 | doc/ 11 | lib/bundler/man 12 | pkg 13 | rdoc 14 | spec/reports 15 | test/tmp 16 | test/version_tmp 17 | tmp 18 | -------------------------------------------------------------------------------- /test/helper.rb: -------------------------------------------------------------------------------- 1 | require 'rubygems' 2 | require 'bundler' 3 | begin 4 | Bundler.setup(:default, :development) 5 | rescue Bundler::BundlerError => e 6 | $stderr.puts e.message 7 | $stderr.puts "Run `bundle install` to install missing gems" 8 | exit e.status_code 9 | end 10 | require 'test/unit' 11 | 12 | $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) 13 | $LOAD_PATH.unshift(File.dirname(__FILE__)) 14 | require 'unf' 15 | 16 | class Test::Unit::TestCase 17 | end 18 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env rake 2 | require "bundler/gem_tasks" 3 | 4 | gemspec = Bundler::GemHelper.gemspec 5 | 6 | require 'rake/testtask' 7 | Rake::TestTask.new(:test) do |test| 8 | test.libs << 'test' 9 | test.test_files = FileList['test/**/test_*.rb'] 10 | test.verbose = true 11 | end 12 | 13 | require 'rdoc/task' 14 | Rake::RDocTask.new do |rdoc| 15 | rdoc.rdoc_dir = 'rdoc' 16 | rdoc.title = "#{gemspec.name} #{gemspec.version}" 17 | rdoc.rdoc_files.include(gemspec.extra_rdoc_files) 18 | rdoc.rdoc_files.include('lib/**/*.rb') 19 | end 20 | 21 | task :default => :test 22 | -------------------------------------------------------------------------------- /lib/unf/normalizer_cruby.rb: -------------------------------------------------------------------------------- 1 | case 2 | when defined?(UNF::Normalizer) 3 | # Probably unf_ext is preloaded. 4 | when String.method_defined?(:unicode_normalize) 5 | class String 6 | [:nfc, :nfd, :nfkc, :nfkd].each { |form| 7 | eval %{ 8 | remove_method :to_#{form} if method_defined?(:to_#{form}) 9 | 10 | def to_#{form} 11 | unicode_normalize(#{form.inspect}) 12 | end 13 | } 14 | } 15 | end 16 | 17 | module UNF # :nodoc: all 18 | class Normalizer 19 | def normalize(string, normalization_form) 20 | String.try_convert(string).unicode_normalize(normalization_form) 21 | end 22 | end 23 | end 24 | else 25 | require 'unf_ext' 26 | end 27 | -------------------------------------------------------------------------------- /lib/unf/normalizer_jruby.rb: -------------------------------------------------------------------------------- 1 | require 'java' 2 | 3 | module UNF # :nodoc: all 4 | class Normalizer 5 | def initialize() 6 | @normalizer = java.text.Normalizer 7 | end 8 | 9 | def normalize(string, normalization_form) 10 | @normalizer.normalize(string, form(normalization_form)) 11 | end 12 | 13 | private 14 | 15 | def form(symbol) 16 | case symbol 17 | when :nfc 18 | @normalizer::Form::NFC 19 | when :nfd 20 | @normalizer::Form::NFD 21 | when :nfkc 22 | @normalizer::Form::NFKC 23 | when :nfkd 24 | @normalizer::Form::NFKD 25 | else 26 | raise ArgumentError, "unknown normalization form: #{symbol.inspect}" 27 | end 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /ext/mkrf_conf.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | extdir = File.dirname(__FILE__) 4 | 5 | unless defined?(JRUBY_VERSION) || String.method_defined?(:unicode_normalize) 6 | require 'fileutils' 7 | require 'rubygems' 8 | require 'rubygems/command.rb' 9 | require 'rubygems/dependency_installer.rb' 10 | 11 | Gem::Command.build_args = ARGV 12 | 13 | gemsdir = File.expand_path('gems', extdir) 14 | installer = Gem::DependencyInstaller.new(install_dir: gemsdir) 15 | specs = installer.install('unf_ext', '>= 0') 16 | 17 | unf_ext = specs.find { |spec| spec.name == 'unf_ext' } 18 | FileUtils.cp_r(File.join(unf_ext.gem_dir, 'lib'), File.expand_path('..', extdir)) 19 | FileUtils.rm_rf(gemsdir) 20 | end 21 | 22 | File.write(File.expand_path('Rakefile', extdir), < string in NFC 20 | } 21 | 22 | # Class method 23 | UNF::Normalizer.normalize(string, :nfc) 24 | 25 | # Instance methods of String 26 | string.to_nfc 27 | 28 | Installation 29 | ------------ 30 | 31 | gem install unf 32 | 33 | License 34 | ------- 35 | 36 | Copyright (c) 2011, 2012, 2013 Akinori MUSHA 37 | 38 | Licensed under the 2-clause BSD license. 39 | See `LICENSE` for details. 40 | -------------------------------------------------------------------------------- /lib/unf/normalizer.rb: -------------------------------------------------------------------------------- 1 | require 'singleton' 2 | if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'jruby' 3 | require 'unf/normalizer_jruby' 4 | else 5 | require 'unf/normalizer_cruby' 6 | end 7 | 8 | # UTF-8 string normalizer class. Implementations may vary depending 9 | # on the platform. 10 | class UNF::Normalizer 11 | include Singleton 12 | 13 | class << self 14 | # :singleton-method: instance 15 | # 16 | # Returns a singleton normalizer instance. 17 | 18 | # :singleton-method: new 19 | # 20 | # Returns a new normalizer instance. Use +singleton+ instead. 21 | public :new 22 | 23 | # A shortcut for instance.normalize(string, form). 24 | def normalize(string, form) 25 | instance.normalize(string, form) 26 | end 27 | end 28 | 29 | # :method: normalize 30 | # :call-seq: 31 | # normalize(string, form) 32 | # 33 | # Normalizes a UTF-8 string into a given form (:nfc, :nfd, :nfkc or 34 | # :nfkd). 35 | end 36 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.2.0 (2024-08-18) 2 | 3 | Features: 4 | 5 | - Use String#unicode_normalize on Ruby >=2.2 unless unf_ext is already loaded. 6 | 7 | - Reduce the gem size by removing unnecessary files. (GH #22) 8 | 9 | ## 0.1.4 (2014-04-04) 10 | 11 | Bugfixes: 12 | 13 | - Fix the gem platform name for JRuby. 14 | 15 | ## 0.1.3 (2013-10-25) 16 | 17 | Features: 18 | 19 | - Make UNF::Normalizer.instance thread-safe, and deprecate .new. 20 | (GH #6) 21 | 22 | ## 0.1.2 (2013-08-12) 23 | 24 | Features: 25 | 26 | - Add license to gemspec. 27 | 28 | - Adjust dependencies for Ruby 1.8 to satisfy bundler. 29 | 30 | ## 0.1.1 (2013-03-23) 31 | 32 | Features: 33 | 34 | - Add rdoc. 35 | 36 | ## 0.1.0 (2013-03-18) 37 | 38 | Features: 39 | 40 | - Start CI with Travis-CI. 41 | 42 | ## 0.0.5 (2012-03-04) 43 | 44 | Features: 45 | 46 | - Migrate from Jeweler to Bundle gem. 47 | 48 | Bugfixes: 49 | 50 | - Fix gem support for JRuby. 51 | 52 | ## 0.0.4 (2011-12-09) 53 | 54 | Features: 55 | 56 | - Introduce autoloading. 57 | 58 | ## 0.0.3 (2011-10-25) 59 | 60 | - Initial release. 61 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011, 2012 Akinori MUSHA 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 1. Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 | SUCH DAMAGE. 25 | -------------------------------------------------------------------------------- /unf.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | require File.expand_path('../lib/unf/version', __FILE__) 3 | 4 | Gem::Specification.new do |gem| 5 | gem.name = "unf" 6 | gem.version = UNF::VERSION 7 | gem.authors = ["Akinori MUSHA"] 8 | gem.email = ["knu@idaemons.org"] 9 | gem.description = <<-'EOS' 10 | This is a wrapper library to bring Unicode Normalization Form support 11 | to Ruby/JRuby. 12 | EOS 13 | gem.summary = %q{A wrapper library to bring Unicode Normalization Form support to Ruby/JRuby} 14 | gem.homepage = "https://github.com/knu/ruby-unf" 15 | gem.platform = defined?(JRUBY_VERSION) ? 'java' : Gem::Platform::RUBY 16 | gem.license = "BSD-2-Clause" 17 | 18 | gem.files = `git ls-files -z`.split("\x0").reject { |f| f.start_with?(*%w[test/ Rakefile .gitignore .travis.yml]) } 19 | gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) } 20 | gem.require_paths = ["lib"] 21 | gem.extra_rdoc_files = ['README.md', 'LICENSE'] 22 | 23 | gem.required_ruby_version = '>= 1.9.3' 24 | gem.extensions = 'ext/mkrf_conf.rb' 25 | 26 | gem.add_development_dependency 'bundler', '>= 1.2.0' 27 | gem.add_development_dependency 'rake', '>= 0.9.2.2' 28 | gem.add_development_dependency 'rdoc', '> 2.4.2' 29 | gem.add_development_dependency 'test-unit' 30 | gem.add_development_dependency 'unf_ext', '>= 0' unless defined?(JRUBY_VERSION) 31 | end 32 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: [push, pull_request, workflow_dispatch] 4 | 5 | jobs: 6 | ruby-versions: 7 | uses: ruby/actions/.github/workflows/ruby_versions.yml@master 8 | with: 9 | engine: cruby 10 | min_version: 3.2 11 | 12 | build: 13 | needs: ruby-versions 14 | name: build (${{ matrix.ruby_engine }} / ${{ matrix.ruby_version || 'latest' }} / ${{ matrix.os }}) 15 | strategy: 16 | matrix: 17 | os: 18 | - ubuntu-latest 19 | ruby_engine: 20 | - ruby 21 | ruby_version: 22 | - '2.0' 23 | - '2.7' 24 | - '${{ fromJson(needs.ruby-versions.outputs.versions) }}' 25 | include: 26 | - { os: ubuntu-latest, ruby_engine: jruby, ruby_version: '' } 27 | - { os: ubuntu-latest, ruby_engine: jruby, ruby_version: head } 28 | - { os: macos-latest, ruby_engine: ruby, ruby_version: '' } 29 | - { os: windows-latest, ruby_engine: ruby, ruby_version: ucrt } 30 | - { os: windows-latest, ruby_engine: ruby, ruby_version: mswin } 31 | runs-on: ${{ matrix.os }} 32 | steps: 33 | - uses: actions/checkout@v4 34 | 35 | - name: Set up Ruby 36 | uses: ruby/setup-ruby@v1 37 | with: 38 | ruby-version: ${{ matrix.ruby_engine }}-${{ matrix.ruby_version }} 39 | bundler-cache: true 40 | 41 | - name: Run test 42 | run: bundle exec rake test 43 | -------------------------------------------------------------------------------- /test/test_unf.rb: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | require 'helper' 3 | require 'pathname' 4 | 5 | class TestUNF < Test::Unit::TestCase 6 | test "raise ArgumentError if an unknown normalization form is given" do 7 | normalizer = UNF::Normalizer.instance 8 | assert_raises(ArgumentError) { normalizer.normalize("が", :nfck) } 9 | end 10 | 11 | test "pass all tests bundled with the original unf" do 12 | normalizer = UNF::Normalizer.instance 13 | open(Pathname(__FILE__).dirname + 'normalization-test.txt', 'r:utf-8').each_slice(6) { |lines| 14 | flunk "broken test file" if lines.size != 6 || lines.pop !~ /^$/ 15 | str, nfd, nfc, nfkd, nfkc = lines 16 | assert nfd, normalizer.normalize(str, :nfd) 17 | assert nfd, normalizer.normalize(nfd, :nfd) 18 | assert nfd, normalizer.normalize(nfc, :nfd) 19 | assert nfkd, normalizer.normalize(nfkc, :nfd) 20 | assert nfkd, normalizer.normalize(nfkc, :nfd) 21 | 22 | assert nfc, normalizer.normalize(str, :nfd) 23 | assert nfc, normalizer.normalize(nfd, :nfc) 24 | assert nfc, normalizer.normalize(nfc, :nfc) 25 | assert nfkc, normalizer.normalize(nfkc, :nfc) 26 | assert nfkc, normalizer.normalize(nfkd, :nfc) 27 | 28 | assert nfkd, normalizer.normalize(str, :nfkd) 29 | assert nfkd, normalizer.normalize(nfd, :nfkd) 30 | assert nfkd, normalizer.normalize(nfc, :nfkd) 31 | assert nfkd, normalizer.normalize(nfkc, :nfkd) 32 | assert nfkd, normalizer.normalize(nfkd, :nfkd) 33 | 34 | assert nfkc, normalizer.normalize(str, :nfkc) 35 | assert nfkc, normalizer.normalize(nfd, :nfkc) 36 | assert nfkc, normalizer.normalize(nfc, :nfkc) 37 | assert nfkc, normalizer.normalize(nfkc, :nfkc) 38 | assert nfkc, normalizer.normalize(nfkd, :nfkc) 39 | } 40 | end 41 | end 42 | --------------------------------------------------------------------------------