├── .rspec ├── .coveralls.yml ├── spec ├── MeCab.rb ├── spec_helper.rb ├── parser_spec.rb └── node_spec.rb ├── lib └── mecab │ ├── ext │ ├── version.rb │ ├── parser.rb │ └── node.rb │ └── ext.rb ├── Gemfile ├── version.rb ├── .travis.yml ├── Rakefile ├── .gitignore ├── Changelog.md ├── extconf.rb ├── patch └── prefix.patch ├── mecab-ext-heavy.gemspec ├── parallel_make.rb └── README.md /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | -------------------------------------------------------------------------------- /.coveralls.yml: -------------------------------------------------------------------------------- 1 | service_name: travis-ci 2 | -------------------------------------------------------------------------------- /spec/MeCab.rb: -------------------------------------------------------------------------------- 1 | # File to fake loading original mecab-ruby gem 2 | module MeCab; end 3 | -------------------------------------------------------------------------------- /lib/mecab/ext/version.rb: -------------------------------------------------------------------------------- 1 | module Mecab 2 | module Ext 3 | VERSION = "1.0.2" 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in mecab-ext.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /version.rb: -------------------------------------------------------------------------------- 1 | module Mecab::Ext::Heavy 2 | LOCAL_VERSION = '.2' 3 | VERSION = Mecab::Ext::VERSION + LOCAL_VERSION 4 | end 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | rvm: 3 | - 2.0.0 4 | - 1.9.3 5 | branches: 6 | only: 7 | - master 8 | script: 9 | - bundle exec rake spec 10 | notifications: 11 | email: false 12 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | 3 | require "rspec/core/rake_task" 4 | RSpec::Core::RakeTask.new(:spec) do |task| 5 | task.rspec_opts = ["-c", "-fs"] 6 | end 7 | 8 | task :default => :spec 9 | task :test => :spec 10 | -------------------------------------------------------------------------------- /lib/mecab/ext.rb: -------------------------------------------------------------------------------- 1 | require "active_support/inflector" 2 | require "active_support/core_ext/module/delegation" 3 | require "MeCab" 4 | 5 | require "mecab/ext/parser" 6 | require "mecab/ext/node" 7 | 8 | module Mecab 9 | module Ext 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | .yardoc 6 | Gemfile.lock 7 | InstalledFiles 8 | _yardoc 9 | coverage 10 | doc/ 11 | lib/bundler/man 12 | pkg 13 | rdoc 14 | spec/reports 15 | test/tmp 16 | test/version_tmp 17 | tmp 18 | /vendor/ 19 | Makefile 20 | /ports/ -------------------------------------------------------------------------------- /lib/mecab/ext/parser.rb: -------------------------------------------------------------------------------- 1 | module Mecab 2 | module Ext 3 | module Parser 4 | class << self 5 | @@tagger = nil 6 | def parse(str) 7 | generator = lambda { (@@tagger ||= MeCab::Tagger.new).parseToNode(str) } 8 | Node.new(generator) 9 | end 10 | 11 | alias :parseToNode :parse 12 | alias :parse_to_node :parse 13 | end 14 | end 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # Do not require original mecab-ruby gem for testing 2 | require File.expand_path('../MeCab', __FILE__) 3 | 4 | module MeCab 5 | class Tagger 6 | end 7 | end 8 | 9 | $LOAD_PATH.unshift File.expand_path("../", __FILE__) 10 | $LOAD_PATH.unshift File.expand_path("../../lib", __FILE__) 11 | 12 | require 'coveralls' 13 | require 'simplecov' 14 | 15 | Coveralls.wear! 16 | SimpleCov.formatter = Coveralls::SimpleCov::Formatter 17 | SimpleCov.start do 18 | add_filter 'spec' 19 | end 20 | 21 | require 'mecab/ext' 22 | -------------------------------------------------------------------------------- /Changelog.md: -------------------------------------------------------------------------------- 1 | ### 1.0.2 / 2013-06-15 2 | [full changelog](http://github.com/taiki45/mecab-ext/compare/v1.0.2...master) 3 | 4 | Enhancements 5 | 6 | * Support Rails 4.0 (Naoyoshi Aikawa / @awakia) 7 | 8 | ### 1.0.1 / 2013-05-19 9 | [full changelog](http://github.com/taiki45/mecab-ext/compare/v1.0.1...master) 10 | 11 | Bug fixes 12 | 13 | * While MeCab::Node instance is living and in use, a Tagger which generates these nodes should not be deleted because it causes inner inconsistency. When the tagger is deleted, its corresponding inner data seem to be also deleted, then trying to access to node's data causes SEGV. (osak a.k.a @osa_k) 14 | -------------------------------------------------------------------------------- /extconf.rb: -------------------------------------------------------------------------------- 1 | require 'mkmf' 2 | require 'mini_portile' 3 | 4 | require_relative 'parallel_make.rb' 5 | 6 | def cook_internal(name, version, url, patches = []) 7 | recipe = MiniPortile.new(name, version) 8 | recipe.files = [url] 9 | recipe.configure_options += %w[--with-charset=utf8] 10 | recipe.patch_files += patches.map(&File.method(:expand_path)) unless patches.empty? 11 | recipe.cook 12 | recipe.activate 13 | end 14 | 15 | def cook_mecab 16 | cook_internal('mecab', '0.996', 'https://mecab.googlecode.com/files/mecab-0.996.tar.gz') 17 | end 18 | 19 | def cook_naist_jdic 20 | cook_internal('mecab-naist-jdic', '0.6.3b-20111013', 21 | 'http://jaist.dl.sourceforge.jp/naist-jdic/53500/mecab-naist-jdic-0.6.3b-20111013.tar.gz', 22 | %w[patch/prefix.patch]) 23 | end 24 | 25 | cook_mecab 26 | cook_naist_jdic 27 | 28 | create_makefile 'mecab-ext-heavy' 29 | -------------------------------------------------------------------------------- /patch/prefix.patch: -------------------------------------------------------------------------------- 1 | --- Makefile.in.orig 2014-06-26 07:17:04.385962877 +0900 2 | +++ Makefile.in 2014-06-26 07:17:39.313474839 +0900 3 | @@ -512,11 +512,11 @@ export-package: 4 | ./upload.pl -p mecab -n @PACKAGE@ -r @VERSION@ -f @PACKAGE@-@VERSION@.tar.gz 5 | 6 | install-exec-hook: 7 | - if ! [ -d $(DESTDIR)/etc/mecab/dic/naist-jdic ]; \ 8 | - then mkdir -p $(DESTDIR)/etc/mecab/dic/naist-jdic; \ 9 | + if ! [ -d ${prefix}/etc/mecab/dic/naist-jdic ]; \ 10 | + then mkdir -p ${prefix}/etc/mecab/dic/naist-jdic; \ 11 | fi 12 | - if ! [ -f $(DESTDIR)/etc/mecab/dic/naist-jdic/dicrc ]; \ 13 | - then $(LN_S) @MECAB_DICDIR@/dicrc $(DESTDIR)/etc/mecab/dic/naist-jdic/dicrc; \ 14 | + if ! [ -f ${prefix}/etc/mecab/dic/naist-jdic/dicrc ]; \ 15 | + then $(LN_S) @MECAB_DICDIR@/dicrc ${prefix}/etc/mecab/dic/naist-jdic/dicrc; \ 16 | fi 17 | 18 | # Tell versions [3.59,3.63) of GNU make to not export all variables. 19 | -------------------------------------------------------------------------------- /spec/parser_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe Mecab::Ext::Parser do 4 | 5 | describe ".parse" do 6 | subject { described_class.parse("test string") } 7 | 8 | it "should return Node instance" do 9 | should be_a Mecab::Ext::Node 10 | end 11 | 12 | it "should call Node.new" do 13 | Mecab::Ext::Node.stub(:new).and_return(:called) 14 | should be :called 15 | end 16 | 17 | it "should pass proc to Node#initialize" do 18 | Mecab::Ext::Node.should_receive(:new) do |arg| 19 | expect(arg).to be_a Proc 20 | end 21 | subject 22 | end 23 | 24 | it "should pass callable obj to Node.new" do 25 | Mecab::Ext::Node.should_receive(:new) do |arg| 26 | expect(arg).to be_respond_to :call 27 | end 28 | subject 29 | end 30 | end 31 | 32 | describe ".parseToNode" do 33 | subject { described_class.method(:parseToNode) } 34 | it { should eq described_class.method(:parse) } 35 | end 36 | 37 | describe ".parse_to_node" do 38 | subject { described_class.method(:parse_to_node) } 39 | it { should eq described_class.method(:parse) } 40 | end 41 | 42 | end 43 | -------------------------------------------------------------------------------- /lib/mecab/ext/node.rb: -------------------------------------------------------------------------------- 1 | module Mecab 2 | module Ext 3 | class Node 4 | 5 | delegate *Enumerable.instance_methods, :each, to: :__enum__ 6 | attr_reader :__enum__ 7 | 8 | def initialize(generator) 9 | @generator = generator 10 | @__enum__ = to_enum 11 | end 12 | 13 | def to_enum 14 | gen_enumrator 15 | end 16 | 17 | def each_surface 18 | each {|node| yield node.surface } 19 | end 20 | 21 | def each_feature 22 | each {|node| yield node.feature } 23 | end 24 | 25 | %w(surfaces features lengths ids char_types isbests wcosts costs).each do |plural_name| 26 | define_method(plural_name) do 27 | gen_enumrator(plural_name.singularize) 28 | end 29 | end 30 | 31 | private 32 | 33 | def gen_enumrator(name = nil) 34 | Enumerator.new do |y| 35 | node = @generator.call 36 | while node 37 | node = node.next 38 | unless node.nil? || node.surface.empty? 39 | y << (name ? node.__send__(name) : node) 40 | end 41 | end 42 | self 43 | end 44 | end 45 | 46 | end 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /mecab-ext-heavy.gemspec: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | lib = File.expand_path('../lib', __FILE__) 3 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 4 | require 'mecab/ext/version' 5 | require_relative 'version' 6 | 7 | Gem::Specification.new do |spec| 8 | spec.add_dependency "activesupport", "~> 3.2.13", ">= 3.2.13" 9 | spec.add_dependency "mini_portile", "~> 0" 10 | 11 | spec.name = "mecab-ext-heavy" 12 | spec.version = Mecab::Ext::Heavy::VERSION 13 | spec.authors = ["Tadashi Saito"] 14 | spec.email = ["tad.a.diggergmail.com"] 15 | spec.description = %q{mecab-ext with mecab and mecab-naist-jdic, so you can just try mecab soon with Heroku} 16 | spec.summary = %q{mecab-ext with mecab and mecab-naist-jdic} 17 | spec.homepage = "https://github.com/tadd/mecab-ext-heavy" 18 | spec.license = "MIT" 19 | 20 | spec.files = `git ls-files`.split($/) + 21 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } 22 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) 23 | spec.require_paths = ["lib"] 24 | spec.extensions = "extconf.rb" 25 | 26 | spec.add_development_dependency "bundler", "~> 1.3" 27 | spec.add_development_dependency "rake", "~> 0" 28 | spec.add_development_dependency "rspec", "~> 2.13.0", ">= 2.13.0" 29 | spec.add_development_dependency "simplecov", "~> 0.7.1", ">= 0.7.1" 30 | spec.add_development_dependency "coveralls", "~> 0.6.7", ">= 0.6.7" 31 | end 32 | -------------------------------------------------------------------------------- /parallel_make.rb: -------------------------------------------------------------------------------- 1 | # from: https://github.com/grosser/parallel/blob/master/lib/parallel.rb 2 | def processor_count 3 | os_name = RbConfig::CONFIG["target_os"] 4 | if os_name =~ /mingw|mswin/ 5 | require 'win32ole' 6 | result = WIN32OLE.connect("winmgmts://"). 7 | ExecQuery("select NumberOfLogicalProcessors from Win32_Processor") 8 | result.to_enum.collect(&:NumberOfLogicalProcessors).reduce(:+) 9 | elsif File.readable?("/proc/cpuinfo") 10 | IO.read("/proc/cpuinfo").scan(/^processor/).size 11 | elsif File.executable?("/usr/bin/hwprefs") 12 | IO.popen("/usr/bin/hwprefs thread_count").read.to_i 13 | elsif File.executable?("/usr/sbin/psrinfo") 14 | IO.popen("/usr/sbin/psrinfo").read.scan(/^.*on-*line/).size 15 | elsif File.executable?("/usr/sbin/ioscan") 16 | IO.popen("/usr/sbin/ioscan -kC processor") do |out| 17 | out.read.scan(/^.*processor/).size 18 | end 19 | elsif File.executable?("/usr/sbin/pmcycles") 20 | IO.popen("/usr/sbin/pmcycles -m").read.count("\n") 21 | elsif File.executable?("/usr/sbin/lsdev") 22 | IO.popen("/usr/sbin/lsdev -Cc processor -S 1").read.count("\n") 23 | elsif File.executable?("/usr/sbin/sysconf") and os_name =~ /irix/i 24 | IO.popen("/usr/sbin/sysconf NPROC_ONLN").read.to_i 25 | elsif File.executable?("/usr/sbin/sysctl") 26 | IO.popen("/usr/sbin/sysctl -n hw.ncpu").read.to_i 27 | elsif File.executable?("/sbin/sysctl") 28 | IO.popen("/sbin/sysctl -n hw.ncpu").read.to_i 29 | else 30 | 1 31 | end 32 | end 33 | 34 | class MiniPortile 35 | def make_cmd 36 | "make -j#{processor_count+1}" 37 | end 38 | end 39 | 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mecab-ext-heavy 2 | A handy extensions for mecab-ruby, with mecab binary and NAIST dictionary. 3 | 4 | ## Installation 5 | At first install `Mecab` and `mecab-ruby`. 6 | [See more detail](https://code.google.com/p/mecab/). 7 | 8 | Then this line to your application's Gemfile: 9 | 10 | gem 'mecab-ext-heavy' 11 | 12 | And then execute: 13 | 14 | $ bundle 15 | 16 | Or install it yourself as: 17 | 18 | $ gem install mecab-ext-heavy 19 | 20 | ## Usage 21 | 22 | ```ruby 23 | # require with original ruby-mecab 24 | require "mecab/ext" 25 | 26 | # "Mecab" for this gem. Original takes "MeCab" 27 | Mecab #=> for this gem 28 | MeCab #=> for original ruby-mecab 29 | 30 | # Parse japanese text and get extented node instance 31 | nodes = Mecab::Ext::Parser.parse("テスト文章") 32 | nodes.class #=> Mecab::Ext::Node 33 | 34 | 35 | # Call Mecab::Ext::Node#each to get each MeCab::Node object 36 | nodes.each {|node| p node } 37 | 38 | # Extented node class has Enumerable methods 39 | nodes.map {|node| node.surface } 40 | nodes.select {|node| node.surface == "テスト" } 41 | nodes.map {|n| n.surface }.join 42 | 43 | # If you need only surfaces, call Mecab::Ext::Node#each_surface 44 | nodes.each_surface {|surface| p surface } 45 | 46 | # Iterate original node's method returns 47 | nodes.surfaces.each {|surface| p surface } 48 | nodes.surfaces.select {|surface| surface == "テスト" } 49 | 50 | %w(surfaces features lengths ids char_types isbests wcosts costs).each do |name| 51 | nodes.respond_to? name #=> true 52 | end 53 | 54 | 55 | # This gem solved original mecab's Node overridden problem 56 | n1 = Mecab::Ext::Parser.parse "今日の天気" 57 | n2 = Mecab::Ext::Parser.parse "赤い花" 58 | n1.each_surface(&:display) 59 | n2.each_surface(&:display) 60 | 61 | # mecab-ext cuts beginning of line node and end of line node for handiness 62 | nodes.count #=> 2 63 | ``` 64 | 65 | ## Contributing 66 | 67 | 1. Fork it 68 | 2. Create your feature branch (`git checkout -b my-new-feature`) 69 | 3. Commit your changes (`git commit -am 'Add some feature'`) 70 | 4. Push to the branch (`git push origin my-new-feature`) 71 | 5. Create new Pull Request 72 | 73 | ## License 74 | Copyright (c) 2014 Tadashi Saito 75 | 76 | MIT License 77 | 78 | Permission is hereby granted, free of charge, to any person obtaining 79 | a copy of this software and associated documentation files (the 80 | "Software"), to deal in the Software without restriction, including 81 | without limitation the rights to use, copy, modify, merge, publish, 82 | distribute, sublicense, and/or sell copies of the Software, and to 83 | permit persons to whom the Software is furnished to do so, subject to 84 | the following conditions: 85 | 86 | The above copyright notice and this permission notice shall be 87 | included in all copies or substantial portions of the Software. 88 | 89 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 90 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 91 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 92 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 93 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 94 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 95 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 96 | 97 | ## Thanks to 98 | 99 | Taiki ONO, the original author. 100 | https://github.com/taiki45/mecab-ext 101 | -------------------------------------------------------------------------------- /spec/node_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | describe Mecab::Ext::Node do 4 | 5 | shared_context %{with MeCab::Node like mock which given "test string"}, mecab: :nodes do 6 | let(:first_node) do 7 | n = mock("node").tap {|o| o.stub(:surface).and_return("") } 8 | n.stub(:feature).and_return("") 9 | n.tap {|o| o.stub(:next).and_return(second_node) } 10 | end 11 | let(:second_node) do 12 | n = mock("node").tap {|o| o.stub(:surface).and_return("test") } 13 | n.stub(:feature).and_return("test feature") 14 | n.tap {|o| o.stub(:next).and_return(third_node) } 15 | end 16 | let(:third_node) do 17 | n = mock("node").tap {|o| o.stub(:surface).and_return("string") } 18 | n.stub(:feature).and_return("string feature") 19 | n.tap {|o| o.stub(:next).and_return(fourth_node) } 20 | end 21 | let(:fourth_node) do 22 | n = mock("node").tap {|o| o.stub(:surface).and_return("") } 23 | n.stub(:feature).and_return("") 24 | n.tap {|o| o.stub(:next).and_return(nil) } 25 | end 26 | let(:generator) { double("generator", call: first_node) } 27 | let(:tests) { Array.new } 28 | 29 | subject { described_class.new(generator) } 30 | end 31 | 32 | 33 | describe "#each" do 34 | context "with generator mock" do 35 | let(:generator) do 36 | mock("generator").tap {|o| o.should_receive(:call).at_least(:once).and_return(nil) } 37 | end 38 | subject { described_class.new(generator) } 39 | 40 | it "calls given generator's :call" do 41 | subject.each {} 42 | end 43 | 44 | it "returns self" do 45 | expect(subject.each {}).to equal subject 46 | end 47 | 48 | it "returns enumerable" do 49 | Enumerable.instance_methods.each do |method_name| 50 | expect(subject.each {}).to be_respond_to method_name 51 | end 52 | end 53 | end 54 | 55 | context "with node mocks" do 56 | let(:node) do 57 | n = mock("node").tap {|o| o.stub(:next).and_return(nil) } 58 | n.tap {|o| o.should_receive(:surface).and_return("test") } 59 | end 60 | let(:parent_node) { mock("node").tap {|o| o.should_receive(:next).and_return(node) } } 61 | let(:generator) { double("generator").tap {|o| o.stub(:call).and_return(parent_node) } } 62 | subject { described_class.new(generator) } 63 | 64 | it "calls node#next" do 65 | subject.each {} 66 | end 67 | 68 | it "yields sub node" do 69 | subject.each {|test| expect(test).to equal node } 70 | end 71 | end 72 | 73 | context %(with mecab nodes which given "test string"), mecab: :nodes do 74 | it "yields nodes" do 75 | subject.each {|node| expect(node).to be_a RSpec::Mocks::Mock } 76 | end 77 | 78 | it "yields 2 nodes" do 79 | subject.each {|node| tests.push node } 80 | expect(tests).to have(2).nodes 81 | end 82 | 83 | it "yields nodes which respond to :surface" do 84 | subject.each {|node| expect(node).to be_respond_to :surface } 85 | end 86 | end 87 | end 88 | 89 | 90 | describe "#each_surface" do 91 | context %(with mecab nodes which given "test string"), mecab: :nodes do 92 | it "yields 2 surfaces" do 93 | subject.each_surface {|surface| tests.push surface } 94 | expect(tests).to have(2).surfaces 95 | end 96 | 97 | it "yields each surface" do 98 | subject.each_surface {|surface| tests.push surface } 99 | expect(tests).to be_include "test" 100 | expect(tests).to be_include "string" 101 | end 102 | end 103 | end 104 | 105 | describe "#each_feature" do 106 | context %(with mecab nodes which given "test string"), mecab: :nodes do 107 | it "yields 2 features" do 108 | subject.each_feature {|feature| tests.push feature } 109 | expect(tests).to have(2).features 110 | end 111 | 112 | it "yields each features" do 113 | subject.each_feature {|feature| tests.push feature } 114 | expect(tests).to be_include "test feature" 115 | expect(tests).to be_include "string feature" 116 | end 117 | end 118 | end 119 | 120 | describe "its plural methods" do 121 | context %(with mecab nodes which given "test string"), mecab: :nodes do 122 | 123 | describe "#surfaces" do 124 | it "returns enumerator" do 125 | expect(subject.surfaces).to be_a Enumerator 126 | end 127 | 128 | it "iterates nodes surfaces" do 129 | subject.surfaces.each {|surface| tests.push surface } 130 | expect(tests).to have(2).surfaces 131 | expect(tests).to be_include "test" 132 | expect(tests).to be_include "string" 133 | end 134 | 135 | it "can fold" do 136 | expect(subject.surfaces.reduce("", &:+)).to eq "teststring" 137 | end 138 | end 139 | 140 | %w(features lengths ids char_types isbests wcosts costs).each do |name| 141 | describe "##{name}" do 142 | it "iterates #{name}" do 143 | expect(subject.send(name)).to be_a Enumerator 144 | end 145 | 146 | it "iterates #{name.singularize} value" do 147 | second_node.stub(name.singularize).and_return(:test) 148 | subject.send(name) {|test| expect(test).to equal :test } 149 | end 150 | end 151 | end 152 | 153 | end 154 | end 155 | 156 | end 157 | --------------------------------------------------------------------------------