├── .rspec ├── Rakefile ├── lib ├── ikku.rb └── ikku │ ├── version.rb │ ├── parser.rb │ ├── reviewer.rb │ ├── song.rb │ ├── bracket_state.rb │ ├── scanner.rb │ └── node.rb ├── Gemfile ├── .gitignore ├── spec ├── spec_helper.rb └── ikku │ └── reviewer_spec.rb ├── CHANGELOG.md ├── ikku.gemspec ├── LICENSE.txt └── README.md /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --require spec_helper 3 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require "bundler/gem_tasks" 2 | 3 | -------------------------------------------------------------------------------- /lib/ikku.rb: -------------------------------------------------------------------------------- 1 | require "ikku/reviewer" 2 | require "ikku/version" 3 | -------------------------------------------------------------------------------- /lib/ikku/version.rb: -------------------------------------------------------------------------------- 1 | module Ikku 2 | VERSION = "0.1.4" 3 | end 4 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in ikku.gemspec 4 | gemspec 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.bundle/ 2 | /.yardoc 3 | /Gemfile.lock 4 | /_yardoc/ 5 | /coverage/ 6 | /doc/ 7 | /pkg/ 8 | /spec/reports/ 9 | /tmp/ 10 | *.bundle 11 | *.so 12 | *.o 13 | *.a 14 | mkmf.log 15 | -------------------------------------------------------------------------------- /lib/ikku/parser.rb: -------------------------------------------------------------------------------- 1 | require "natto" 2 | require "ikku/node" 3 | 4 | module Ikku 5 | class Parser 6 | def parse(text) 7 | mecab.enum_parse(text).map do |mecab_node| 8 | Node.new(mecab_node) 9 | end.select(&:analyzable?) 10 | end 11 | 12 | private 13 | 14 | def mecab 15 | @mecab ||= Natto::MeCab.new 16 | end 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | $LOAD_PATH.unshift File.expand_path("../../lib", __FILE__) 2 | require "ikku" 3 | 4 | RSpec.configure do |config| 5 | config.expect_with :rspec do |expectations| 6 | expectations.include_chain_clauses_in_custom_matcher_descriptions = true 7 | end 8 | 9 | config.mock_with :rspec do |mocks| 10 | mocks.verify_partial_doubles = true 11 | end 12 | 13 | config.filter_run :focus 14 | config.run_all_when_everything_filtered = true 15 | config.disable_monkey_patching! 16 | config.warnings = true 17 | config.default_formatter = "doc" if config.files_to_run.one? 18 | end 19 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.1.4 2 | - Don't allow song ending with ん as 非自立名詞 3 | 4 | ## 0.1.3 5 | - Don't allow song ending with 未然形 6 | 7 | ## 0.1.2 8 | - Don't allow song ending with サ変・スル in 連用形 (-し) 9 | - Don't allow song ending with 動詞 in 仮定形 10 | 11 | ## 0.1.1 12 | - Fix bracket bug 13 | 14 | ## 0.1.0 15 | - Change some reviewer methods to return `Ikku::Song` 16 | - Don't allow song with odd parentheses 17 | 18 | ## 0.0.9 19 | - Don't allow ikku ending with 連用タ接続 20 | 21 | ## 0.0.8 22 | - Don't allow ikku starting with no pronunciation length node 23 | 24 | ## 0.0.7 25 | - Don't allow phrase ending with 接頭辞 26 | 27 | ## 0.0.6 28 | - Don't allow English words in ikku 29 | 30 | ## 0.0.5 31 | - Improve pattern about 自立・非自立・助動詞 32 | 33 | ## 0.0.4 34 | - Fix bug of Ikku::Scanner#consume 35 | 36 | ## 0.0.3 37 | - Change rule option interface 38 | 39 | ## 0.0.2 40 | - Add rule option 41 | 42 | ## 0.0.1 43 | - 1st Release 44 | -------------------------------------------------------------------------------- /ikku.gemspec: -------------------------------------------------------------------------------- 1 | lib = File.expand_path("../lib", __FILE__) 2 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 3 | require "ikku/version" 4 | 5 | Gem::Specification.new do |spec| 6 | spec.name = "ikku" 7 | spec.version = Ikku::VERSION 8 | spec.authors = ["Ryo Nakamura"] 9 | spec.email = ["r7kamura@gmail.com"] 10 | spec.summary = "Discover haiku from text." 11 | spec.homepage = "https://github.com/r7kamura/ikku" 12 | spec.license = "MIT" 13 | 14 | spec.files = `git ls-files -z`.split("\x0") 15 | spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) } 16 | spec.test_files = spec.files.grep(%r{^(test|spec|features)/}) 17 | spec.require_paths = ["lib"] 18 | 19 | spec.add_runtime_dependency "natto" 20 | spec.add_development_dependency "bundler", "~> 1.7" 21 | spec.add_development_dependency "pry" 22 | spec.add_development_dependency "rake", "~> 10.0" 23 | spec.add_development_dependency "rspec", "3.2.0" 24 | end 25 | -------------------------------------------------------------------------------- /lib/ikku/reviewer.rb: -------------------------------------------------------------------------------- 1 | require "ikku/parser" 2 | require "ikku/song" 3 | 4 | module Ikku 5 | class Reviewer 6 | def initialize(rule: nil) 7 | @rule = rule 8 | end 9 | 10 | # Find one valid song from given text. 11 | # @return [Ikku::Song] 12 | def find(text) 13 | nodes = parser.parse(text) 14 | nodes.length.times.find do |index| 15 | if (song = Song.new(nodes[index..-1], rule: @rule)).valid? 16 | break song 17 | end 18 | end 19 | end 20 | 21 | # Judge if given text is valid song or not. 22 | # @return [true, false] 23 | def judge(text) 24 | Song.new(parser.parse(text), exactly: true, rule: @rule).valid? 25 | end 26 | 27 | # Search all valid songs from given text. 28 | # @return [Array] 29 | def search(text) 30 | nodes = parser.parse(text) 31 | nodes.length.times.map do |index| 32 | Song.new(nodes[index..-1], rule: @rule) 33 | end.select(&:valid?) 34 | end 35 | 36 | private 37 | 38 | def parser 39 | @parser ||= Parser.new 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Ryo Nakamura 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /lib/ikku/song.rb: -------------------------------------------------------------------------------- 1 | require "ikku/bracket_state" 2 | require "ikku/scanner" 3 | 4 | module Ikku 5 | class Song 6 | DEFAULT_RULE = [5, 7, 5] 7 | 8 | def initialize(nodes, exactly: false, rule: nil) 9 | @exactly = exactly 10 | @nodes = nodes 11 | @rule = rule 12 | end 13 | 14 | def phrases 15 | if instance_variable_defined?(:@phrases) 16 | @phrases 17 | else 18 | @phrases = scan 19 | end 20 | end 21 | 22 | def valid? 23 | case 24 | when phrases.nil? 25 | false 26 | when has_odd_parentheses? 27 | false 28 | else 29 | true 30 | end 31 | end 32 | 33 | private 34 | 35 | def has_odd_parentheses? 36 | bracket_state.odd? 37 | end 38 | 39 | def nodes 40 | phrases.flatten 41 | end 42 | 43 | def rule 44 | @rule || DEFAULT_RULE 45 | end 46 | 47 | def scan 48 | Scanner.new(exactly: @exactly, nodes: @nodes, rule: rule).scan 49 | end 50 | 51 | def bracket_state 52 | @bracket_state ||= BracketState.new.tap do |state| 53 | state.consume_all(surfaces) 54 | end 55 | end 56 | 57 | def surfaces 58 | nodes.map(&:surface) 59 | end 60 | end 61 | end 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ikku 2 | Discover haiku from text. 3 | 4 | ## Requirements 5 | - Ruby 2.0.0+ 6 | - MeCab with IPADIC (e.g. `brew install mecab mecab-ipadic`) 7 | 8 | ## Example 9 | ### Ikku::Reviewer 10 | Ikku::Reviewer class is the main interface for this library. 11 | 12 | ```rb 13 | require "ikku" 14 | reviewer = Ikku::Reviewer.new 15 | ``` 16 | 17 | ### Ikku::Reviewer#judge 18 | Judge if given text is valid song or not. 19 | 20 | ```rb 21 | reviewer.judge("古池や蛙飛び込む水の音") #=> true 22 | reviewer.judge("ああ古池や蛙飛び込む水の音ああ") #=> false 23 | ``` 24 | 25 | ### Ikku::Reviewer#find 26 | Find one valid song from given text. 27 | 28 | ```rb 29 | reviewer.find("ああ古池や蛙飛び込む水の音ああ") 30 | #=> # 31 | ``` 32 | 33 | ### Ikku::Reviewer#search 34 | Search all valid songs from given text. 35 | 36 | ```rb 37 | reviewer.search("ああ古池や蛙飛び込む水の音ああ天秤や京江戸かけて千代の春ああ") 38 | #=> [ 39 | # #, 40 | # #, 41 | # ] 42 | ``` 43 | 44 | ### Ikku::Song#phrases 45 | Return an Array of phrases of `Ikku::Node`. 46 | 47 | ```rb 48 | song.phrases #=> [["古池", "や"], ["蛙", "飛び込む"], ["水", "の", "音"]] 49 | ``` 50 | 51 | ### Rule option 52 | Pass `:rule` option to change the measure rule (default: `[5, 7, 5]`). 53 | 54 | ```rb 55 | reviewer = Ikku::Reviewer.new(rule: [4, 3, 5]) 56 | reviewer.judge("古池や蛙飛び込む水の音") #=> false 57 | reviewer.judge("すもももももももものうち") #=> true 58 | ``` 59 | -------------------------------------------------------------------------------- /lib/ikku/bracket_state.rb: -------------------------------------------------------------------------------- 1 | module Ikku 2 | class BracketState 3 | BRACKETS_TABLE = { 4 | "‘" => "’", 5 | "“" => "”", 6 | "(" => ")", 7 | "(" => ")", 8 | "[" => "]", 9 | "[" => "]", 10 | "{" => "}", 11 | "{" => "}", 12 | "〈" => "〉", 13 | "《" => "》", 14 | "「" => "」", 15 | "『" => "』", 16 | "【" => "】", 17 | "〔" => "〕", 18 | "<" => ">", 19 | "<" => ">", 20 | } 21 | 22 | class << self 23 | def brackets_index 24 | @brackets_index ||= BRACKETS_TABLE.to_a.flatten.inject({}) do |hash, bracket| 25 | hash.merge(bracket => true) 26 | end 27 | end 28 | 29 | def inverted_brackets_table 30 | @inverted_brackets_table ||= BRACKETS_TABLE.invert 31 | end 32 | end 33 | 34 | def consume_all(surfaces) 35 | surfaces.each do |surface| 36 | consume(surface) 37 | end 38 | self 39 | end 40 | 41 | def odd? 42 | !even? 43 | end 44 | 45 | private 46 | 47 | def consume(surface) 48 | case 49 | when !stack.last.nil? && self.class.inverted_brackets_table[surface] == stack.last 50 | stack.pop 51 | when self.class.brackets_index.include?(surface) 52 | stack.push(surface) 53 | end 54 | end 55 | 56 | def even? 57 | stack.empty? 58 | end 59 | 60 | def stack 61 | @stack ||= [] 62 | end 63 | end 64 | end 65 | -------------------------------------------------------------------------------- /lib/ikku/scanner.rb: -------------------------------------------------------------------------------- 1 | module Ikku 2 | # Find one set of valid phrases that starts from the 1st node of given nodes. 3 | class Scanner 4 | attr_writer :count 5 | 6 | def initialize(exactly: false, nodes: nil, rule: nil) 7 | @exactly = exactly 8 | @nodes = nodes 9 | @rule = rule 10 | end 11 | 12 | # @note Pronunciation count 13 | def count 14 | @count ||= 0 15 | end 16 | 17 | def scan 18 | if has_valid_first_node? 19 | @nodes.each_with_index do |node, index| 20 | if consume(node) 21 | if satisfied? 22 | return phrases unless @exactly 23 | end 24 | else 25 | return 26 | end 27 | end 28 | phrases if satisfied? 29 | end 30 | end 31 | 32 | private 33 | 34 | def consume(node) 35 | case 36 | when node.pronunciation_length > max_consumable_length 37 | false 38 | when !node.element_of_ikku? 39 | false 40 | when first_of_phrase? && !node.first_of_phrase? 41 | false 42 | when node.pronunciation_length == max_consumable_length && !node.last_of_phrase? 43 | false 44 | else 45 | phrases[phrase_index] ||= [] 46 | phrases[phrase_index] << node 47 | self.count += node.pronunciation_length 48 | true 49 | end 50 | end 51 | 52 | def first_of_phrase? 53 | @rule.inject([]) do |array, length| 54 | array << array.last.to_i + length 55 | end.include?(count) 56 | end 57 | 58 | def has_full_count? 59 | count == @rule.inject(0, :+) 60 | end 61 | 62 | def has_valid_first_node? 63 | @nodes.first.first_of_ikku? 64 | end 65 | 66 | def has_valid_last_node? 67 | phrases.last.last.last_of_ikku? 68 | end 69 | 70 | def max_consumable_length 71 | @rule[0..phrase_index].inject(0, :+) - count 72 | end 73 | 74 | def phrase_index 75 | @rule.length.times.find do |index| 76 | count < @rule[0..index].inject(0, :+) 77 | end || @rule.length - 1 78 | end 79 | 80 | def phrases 81 | @phrases ||= [] 82 | end 83 | 84 | def satisfied? 85 | has_full_count? && has_valid_last_node? 86 | end 87 | end 88 | end 89 | -------------------------------------------------------------------------------- /lib/ikku/node.rb: -------------------------------------------------------------------------------- 1 | require "csv" 2 | 3 | module Ikku 4 | class Node 5 | STAT_ID_FOR_NORMAL = 0 6 | STAT_ID_FOR_UNKNOWN = 1 7 | STAT_ID_FOR_BOS = 2 8 | STAT_ID_FOR_EOS = 3 9 | 10 | # @param node [Natto::MeCabNode] 11 | def initialize(node) 12 | @node = node 13 | end 14 | 15 | def analyzable? 16 | !bos? && !eos? 17 | end 18 | 19 | def bos? 20 | stat == STAT_ID_FOR_BOS 21 | end 22 | 23 | def conjugation1 24 | feature[4] 25 | end 26 | 27 | def conjugation2 28 | feature[5] 29 | end 30 | 31 | def eos? 32 | stat == STAT_ID_FOR_EOS 33 | end 34 | 35 | def element_of_ikku? 36 | normal? 37 | end 38 | 39 | def feature 40 | @feature ||= CSV.parse(@node.feature)[0] 41 | end 42 | 43 | def first_of_ikku? 44 | case 45 | when !first_of_phrase? 46 | false 47 | when type == "記号" && !["括弧開", "括弧閉"].include?(subtype1) 48 | false 49 | else 50 | true 51 | end 52 | end 53 | 54 | def first_of_phrase? 55 | case 56 | when ["助詞", "助動詞"].include?(type) 57 | false 58 | when ["非自立", "接尾"].include?(subtype1) 59 | false 60 | when subtype1 == "自立" && ["する", "できる"].include?(root_form) 61 | false 62 | else 63 | true 64 | end 65 | end 66 | 67 | def inspect 68 | to_s.inspect 69 | end 70 | 71 | def last_of_ikku? 72 | case 73 | when ["名詞接続", "格助詞", "係助詞", "連体化", "接続助詞", "並立助詞", "副詞化", "数接続", "連体詞"].include?(type) 74 | false 75 | when conjugation2 == "連用タ接続" 76 | false 77 | when conjugation1 == "サ変・スル" && conjugation2 == "連用形" 78 | false 79 | when type == "動詞" && ["仮定形", "未然形"].include?(conjugation2) 80 | false 81 | when type == "名詞" && subtype1 == "非自立" && pronunciation == "ン" 82 | false 83 | else 84 | true 85 | end 86 | end 87 | 88 | def last_of_phrase? 89 | type != "接頭詞" 90 | end 91 | 92 | def normal? 93 | stat == STAT_ID_FOR_NORMAL 94 | end 95 | 96 | def pronunciation 97 | feature[8] 98 | end 99 | 100 | def pronunciation_length 101 | @pronunciation_length ||= begin 102 | if pronunciation 103 | pronunciation_mora.length 104 | else 105 | 0 106 | end 107 | end 108 | end 109 | 110 | def pronunciation_mora 111 | if pronunciation 112 | pronunciation.tr("ぁ-ゔ","ァ-ヴ").gsub(/[^アイウエオカ-モヤユヨラ-ロワヲンヴー]/, "") 113 | end 114 | end 115 | 116 | def root_form 117 | feature[6] 118 | end 119 | 120 | def stat 121 | @node.stat 122 | end 123 | 124 | def subtype1 125 | feature[1] 126 | end 127 | 128 | def subtype2 129 | feature[2] 130 | end 131 | 132 | def subtype3 133 | feature[3] 134 | end 135 | 136 | def surface 137 | @node.surface 138 | end 139 | 140 | def to_s 141 | surface 142 | end 143 | 144 | def type 145 | feature[0] 146 | end 147 | end 148 | end 149 | -------------------------------------------------------------------------------- /spec/ikku/reviewer_spec.rb: -------------------------------------------------------------------------------- 1 | require "spec_helper" 2 | 3 | RSpec.describe Ikku::Reviewer do 4 | let(:instance) do 5 | described_class.new(rule: rule) 6 | end 7 | 8 | let(:rule) do 9 | nil 10 | end 11 | 12 | let(:text) do 13 | "古池や蛙飛び込む水の音" 14 | end 15 | 16 | describe "#find" do 17 | subject do 18 | instance.find(text) 19 | end 20 | 21 | context "with invalid song" do 22 | let(:text) do 23 | "test" 24 | end 25 | 26 | it { is_expected.to be_nil } 27 | end 28 | 29 | context "with valid song" do 30 | it { is_expected.to be_a Ikku::Song } 31 | end 32 | 33 | context "with text including song" do 34 | let(:text) do 35 | "ああ#{super()}ああ" 36 | end 37 | 38 | it { is_expected.to be_a Ikku::Song } 39 | end 40 | 41 | context "with text including song ending with 連用タ接続" do 42 | let(:text) do 43 | "リビングでコーヒー飲んでだめになってる" 44 | end 45 | 46 | it { is_expected.to be_nil } 47 | end 48 | 49 | context "with song ending with 仮定形" do 50 | let(:text) do 51 | "その人に金をあげたい人がいれば" 52 | end 53 | 54 | it { is_expected.to be_nil } 55 | end 56 | 57 | context "with song ending with 未然形 (い)" do 58 | let(:text) do 59 | "学会に多分ネイティブほとんどいない" 60 | end 61 | 62 | it { is_expected.to be_nil } 63 | end 64 | 65 | context "with song ending with ん as 非自立名詞" do 66 | let(:text) do 67 | "古池や蛙飛び込むかかったんだ" 68 | end 69 | 70 | it { is_expected.to be_nil } 71 | end 72 | end 73 | 74 | describe "#judge" do 75 | subject do 76 | instance.judge(text) 77 | end 78 | 79 | context "with valid song" do 80 | it { is_expected.to be true } 81 | end 82 | 83 | context "with invalid song" do 84 | let(:text) do 85 | "#{super()}ああ" 86 | end 87 | 88 | it { is_expected.to be false } 89 | end 90 | 91 | context "with rule option and valid song" do 92 | let(:rule) do 93 | [4, 3, 5] 94 | end 95 | 96 | let(:text) do 97 | "すもももももももものうち" 98 | end 99 | 100 | it { is_expected.to be true } 101 | end 102 | 103 | context "with rule option and invalid song" do 104 | let(:rule) do 105 | [4, 3, 5] 106 | end 107 | 108 | it { is_expected.to be false } 109 | end 110 | 111 | context "with phrase starting with independent verb (歩く)" do 112 | let(:text) do 113 | "なぜ鳩は頭を振って歩くのか" 114 | end 115 | 116 | it { is_expected.to be true } 117 | end 118 | 119 | context "with phrase including English" do 120 | let(:text) do 121 | "Apple#{super()}" 122 | end 123 | 124 | it { is_expected.to be false } 125 | end 126 | 127 | context "with phrase ending with 接頭詞" do 128 | let(:text) do 129 | "レバーのお汁飲んだので元気出た" 130 | end 131 | 132 | it { is_expected.to be false } 133 | end 134 | 135 | context "with song starting with symbol" do 136 | let(:text) do 137 | "、#{super()}" 138 | end 139 | 140 | it { is_expected.to be false } 141 | end 142 | 143 | context "with song ending with 連用タ接続 (撮っ)" do 144 | let(:text) do 145 | "新宿の桜と庭の写真撮っ" 146 | end 147 | 148 | it { is_expected.to be false } 149 | end 150 | 151 | context "with song including even parentheses" do 152 | let(:text) do 153 | "古池や「蛙<飛び込む>」水の音" 154 | end 155 | 156 | it { is_expected.to be true } 157 | end 158 | 159 | context "with song including odd parentheses" do 160 | let(:text) do 161 | "古池や「蛙<飛び込む」>水の音" 162 | end 163 | 164 | it { is_expected.to be false } 165 | end 166 | 167 | context "with song starting with parenthesis" do 168 | let(:text) do 169 | "(#{super()})" 170 | end 171 | 172 | it { is_expected.to be true } 173 | end 174 | 175 | context "with song ending with サ変・スル in 連用形 (-し)" do 176 | let(:text) do 177 | "炊きつけて画面眺めて満足し" 178 | end 179 | 180 | it { is_expected.to be false } 181 | end 182 | end 183 | 184 | describe "#search" do 185 | subject do 186 | instance.search(text) 187 | end 188 | 189 | context "without song" do 190 | let(:text) do 191 | "test" 192 | end 193 | 194 | it { is_expected.to be_a Array } 195 | end 196 | 197 | context "with valid song" do 198 | it { is_expected.to be_a Array } 199 | end 200 | 201 | context "with text including song" do 202 | let(:text) do 203 | "ああ#{super()}ああ" 204 | end 205 | 206 | it { is_expected.to be_a Array } 207 | end 208 | end 209 | end 210 | --------------------------------------------------------------------------------