├── .codeclimate.yml
├── .gitignore
├── .rspec
├── .rubocop.yml
├── .travis.yml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Gemfile
├── Guardfile
├── LICENSE.md
├── README.md
├── Rakefile
├── features
├── default.feature
└── support
│ └── env.rb
├── fixtures
└── test-app
│ └── source
│ ├── 2014-05-08-article0.html.md
│ ├── 2014-05-09-article1.html.md
│ ├── 2014-05-10-article2.html.md
│ ├── 2014-05-11-article3.html.md
│ ├── 2014-05-12-article4.html.md
│ ├── 2014-05-13-article5.html.md
│ ├── 2014-05-14-article6.html.md
│ ├── index.html.slim
│ ├── layout.slim
│ ├── layouts
│ └── article.slim
│ └── page.html.slim
├── lib
├── middleman-blog-similar.rb
├── middleman-blog-similar
│ ├── blog_article_extensions.rb
│ ├── database.rb
│ ├── extension.rb
│ ├── helpers.rb
│ ├── models
│ │ ├── article.rb
│ │ ├── migration.rb
│ │ ├── tag.rb
│ │ └── tagging.rb
│ ├── resource_list_manipulator.rb
│ ├── tagger
│ │ ├── entagger.rb
│ │ ├── mecab.rb
│ │ └── tags.rb
│ └── version.rb
└── middleman_extension.rb
├── middleman-blog-similar.gemspec
└── spec
├── middleman-blog-similar
├── extension_spec.rb
└── tagger_spec.rb
└── spec_helper.rb
/.codeclimate.yml:
--------------------------------------------------------------------------------
1 | engines:
2 | rubocop:
3 | enabled: false
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | coverage
3 | rdoc
4 | pkg
5 | .sass-cache
6 | .sassc
7 | .tmp
8 | Gemfile.lock
9 | docs
10 | .rbenv-*
11 | .*.swp
12 | build
13 | doc
14 | .yardoc
15 | tmp
16 | Makefile
17 | .mm-pid-*
18 | *.gem
19 | similar.db
20 |
--------------------------------------------------------------------------------
/.rspec:
--------------------------------------------------------------------------------
1 | --color
2 | --format documentation
3 |
--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
1 | AllCops:
2 | Exclude:
3 | - .gems/**/*
4 | - tmp/**/*
5 | Metrics/LineLength:
6 | Enabled: false
7 | Style/Documentation:
8 | Enabled: false
9 | Rails/HasAndBelongsToMany:
10 | Enabled: false
11 | Metrics/AbcSize:
12 | Enabled: false
13 | MethodLength:
14 | Max: 50
15 | CyclomaticComplexity:
16 | Max: 25
17 | PerceivedComplexity:
18 | Max: 20
19 | Style/RescueModifier:
20 | Enabled: false
21 | Style/PredicateName:
22 | Enabled: false
23 | Metrics/ClassLength:
24 | Enabled: false
25 | GuardClause:
26 | MinBodyLength: 3
27 | CaseIndentation:
28 | IndentOneStep: true
29 | NumericLiterals:
30 | MinDigits: 7
31 | FileName:
32 | Exclude:
33 | - lib/middleman-blog-similar.rb
34 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | rvm:
2 | - 2.2.6
3 | - 2.3.2
4 | - 2.4.1
5 | env:
6 | global:
7 | - TEST=true
8 | before_install:
9 | - curl -L "https://dl.bintray.com/ngs/travis/mecab-with-ipa-dic.tar.gz" -o mecab.tgz && sudo tar xvfz mecab.tgz -C /usr && rm -f mecab.tgz
10 | after_succes:
11 | - bundle exec codeclimate-test-reporter
12 | script: bundle exec rake test
13 | deploy:
14 | provider: rubygems
15 | api_key:
16 | secure: PjIBKKWhCcamn3+X/H2bfDn8m0X6nGTEwZw1LpBbWSiXTxJeUlp1uFjwQ82e8yG6Fk3vLlrCZRks3eM3NIzv9V2l1QHQdivLsha/nBkIpnXB4bIRvJG1bhPVmgOSZQCDJjzgUWSlDljzwFjxJ1NOSYQzbPyvOtu+Ks7/p+BJzHI=
17 | on:
18 | tags: true
19 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | 1.1.1
2 | -----
3 |
4 | * Improve MeCab.
5 |
6 | 1.1.0
7 | -----
8 |
9 | * TreeTagger and MeCab support.
10 |
11 | 1.0.0
12 | -----
13 |
14 | * Initial release.
15 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing
2 | In the spirit of [free software][free-sw], **everyone** is encouraged to help
3 | improve this project.
4 |
5 | [free-sw]: http://www.fsf.org/licensing/essays/free-sw.html
6 |
7 | Here are some ways *you* can contribute:
8 |
9 | * by using alpha, beta, and prerelease versions
10 | * by reporting bugs
11 | * by suggesting new features
12 | * by writing or editing documentation
13 | * by writing specifications
14 | * by writing code (**no patch is too small**: fix typos, add comments, clean up
15 | inconsistent whitespace)
16 | * by refactoring code
17 | * by closing [issues][]
18 | * by reviewing patches
19 |
20 | [issues]: https://github.com/ngs/middleman-similar/issues
21 |
22 | ## Submitting an Issue
23 | We use the [GitHub issue tracker][issues] to track bugs and features. Before
24 | submitting a bug report or feature request, check to make sure it hasn't
25 | already been submitted. When submitting a bug report, please include a [Gist][]
26 | that includes a stack trace and any details that may be necessary to reproduce
27 | the bug, including your gem version, Ruby version, and operating system.
28 | Ideally, a bug report should include a pull request with failing specs.
29 |
30 | [gist]: https://gist.github.com/
31 |
32 | ## Submitting a Pull Request
33 | 1. [Fork the repository.][fork]
34 | 2. [Create a topic branch.][branch]
35 | 3. Add specs for your unimplemented feature or bug fix.
36 | 4. Run `bundle exec rake test`. If your specs pass, return to step 3.
37 | 5. Implement your feature or bug fix.
38 | 6. Run `bundle exec rake test`. If your specs fail, return to step 5.
39 | 7. Add, commit, and push your changes.
40 | 8. [Submit a pull request.][pr]
41 |
42 | [fork]: http://help.github.com/fork-a-repo/
43 | [branch]: http://learn.github.com/p/branching.html
44 | [pr]: http://help.github.com/send-pull-requests/
45 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source 'https://rubygems.org'
2 |
3 | # Specify your gem's dependencies in middleman-blog-similar.gemspec
4 | gemspec
5 |
6 | group :development do
7 | gem 'aruba', '~> 0.7.4', require: false
8 | gem 'capybara', '~> 2.13.0', require: false
9 | gem 'codeclimate-test-reporter', '~> 1.0', require: false
10 | gem 'coveralls', require: false
11 | gem 'cucumber', '~> 2.0', require: false
12 | gem 'engtagger'
13 | gem 'guard-cucumber'
14 | gem 'guard-rake'
15 | gem 'guard-rspec'
16 | gem 'guard-rubocop', require: false
17 | gem 'middleman-cli', '>= 4.0.0'
18 | gem 'natto', '~> 1.1'
19 | gem 'pry', '~> 0.10', require: false
20 | gem 'rack', '~> 1.6.5', require: false
21 | gem 'rake', '~> 12.0', require: false
22 | gem 'redcarpet'
23 | gem 'rspec', '~> 3.0', require: false
24 | gem 'rspec-collection_matchers', require: false
25 | gem 'rspec-its', require: false
26 | gem 'rubocop', require: false
27 | gem 'slim'
28 | gem 'yard', '~> 0.9.8', require: false
29 | end
30 |
--------------------------------------------------------------------------------
/Guardfile:
--------------------------------------------------------------------------------
1 | guard 'rspec', cmd: 'bundle exec rspec' do
2 | watch(%r{^spec/.*\.rb$})
3 | watch(%r{^lib/(.+)\.rb$}) { |m| ["spec/#{m[1]}_spec.rb"] + Dir["spec/#{m[1]}/*_spec.rb"] }
4 | watch(%r{^lib/middleman-blog-similar/tagger/(.+)\.rb$}) { ['spec/middleman-blog-similar/tagger_spec.rb'] }
5 | end
6 |
7 | guard 'cucumber', cmd: 'bundle exec cucumber' do
8 | watch(%r{^features/.+\.feature$})
9 | watch(%r{^features/support/.+$}) { 'features' }
10 | watch(%r{^lib/.*\.rb$}) { 'features' }
11 | end
12 |
13 | guard :rubocop, cmd: 'bundle exec rake rubocop' do
14 | watch(/.+\.rb$/)
15 | watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) }
16 | end
17 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014 Atsushi Nagase
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | middleman-blog-similar
2 | ======================
3 |
4 | [][gem]
5 | [][travis]
6 | [][gemnasium]
7 | [][codeclimate]
8 | [][coveralls]
9 |
10 | `middleman-blog-similar` is an extension for [middleman-blog] that adds method to lookup similar article.
11 |
12 | Usage
13 | -----
14 |
15 | `Middleman::Blog::BlogArticle#similar_articles` returns an array of `Middleman::Blog::BlogArticle` instances.
16 |
17 | ```slim
18 | h2 Similar Entries
19 | ul
20 | - current_article.similar_articles.first(5).each do|article|
21 | li= link_to article.title, article.url
22 | ```
23 |
24 | `similar_articles` helper is also available in article pages.
25 |
26 | ```slim
27 | h2 Similar Entries
28 | ul
29 | - similar_articles.first(5).each do|article|
30 | li= link_to article.title, article.url
31 | ```
32 |
33 | Configuration
34 | -------------
35 |
36 | ### `Gemfile`
37 |
38 | ```ruby
39 | gem 'middleman-blog-similar'
40 | ```
41 |
42 | ### `config.rb`
43 |
44 | ```ruby
45 | activate :similar
46 | ```
47 |
48 | This extension finds similar articles using those are using tags by default.
49 |
50 | #### Built-in Tagger
51 |
52 | You can set taggers using `tagger:` option. [MeCab] and [EngTagger] adopters are built in this extension.
53 |
54 | ```ruby
55 | # Find by tags (default)
56 | activate :similar, tagger: :tags
57 |
58 | # Using MeCab / Need to add `gem 'natto'` in Gemfile
59 | activate :similar, tagger: :mecab
60 |
61 | # Using EngTagger / Need to add `gem 'entagger'` in Gemfile
62 | activate :similar, tagger: :entagger
63 | ```
64 |
65 | #### Using Lambda
66 |
67 | You can use lambda as tagger
68 |
69 | ```ruby
70 | # Resource is a Middleman::Blog::BlogArticle
71 | activate :similar, tagger: ->(resource) { [resource.data.category] }
72 | ```
73 |
74 | #### Multiple Taggers
75 |
76 | You can configure multiple taggers both built-in and lambda taggers.
77 |
78 | ```ruby
79 | activate :similar, tagger: {
80 | # key = tagger, value = weight
81 | mecab: 1,
82 | entagger: 1,
83 | tags: 3,
84 | # key = (ignored), value[0] = weight, value[1] = lambda
85 | custom: [5, ->(resource) { resource.data.category ? [resource.data.category] : [] }]
86 | }
87 | ```
88 |
89 | #### Database Location
90 |
91 | This extension uses [SQLite3] to calculate similarity between articles.
92 |
93 | Database location is `${PROJECT_ROOT}/.similar.db` by default.
94 |
95 | You can specify database location using `db:` option.
96 |
97 | ```ruby
98 | # Expands to ${HOME}/similar.db
99 | activate :similar, db: '~/similar.db'
100 |
101 | # Expands to ${PROJECT_ROOT}/tmp/middleman-blog-similar.db
102 | activate :similar, db: 'tmp/middleman-blog-similar.db'
103 |
104 | # Stores in memory database
105 | activate :similar, db: ':memory:'
106 | ```
107 |
108 | License
109 | -------
110 |
111 | Copyright (c) 2014-2017 [Atsushi Nagase]. MIT Licensed, see [LICENSE] for details.
112 |
113 | [middleman]: http://middlemanapp.com
114 | [middleman-blog]: https://github.com/middleman/middleman-blog
115 | [gem]: https://rubygems.org/gems/middleman-blog-similar
116 | [travis]: http://travis-ci.org/ngs/middleman-blog-similar
117 | [gemnasium]: https://gemnasium.com/ngs/middleman-blog-similar
118 | [codeclimate]: https://codeclimate.com/github/ngs/middleman-blog-similar
119 | [LICENSE]: LICENSE.md
120 | [Atsushi Nagase]: https://ngs.io
121 | [coveralls]: https://coveralls.io/github/ngs/middleman-blog-similar
122 | [MeCab]: http://taku910.github.io/mecab/
123 | [EngTagger]: https://github.com/yohasebe/engtagger
124 | [SQLite3]: https://www.sqlite.org/
125 |
--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
1 | require 'bundler'
2 | Bundler::GemHelper.install_tasks
3 |
4 | require 'cucumber/rake/task'
5 | require 'rspec/core/rake_task'
6 | require 'rubocop/rake_task'
7 | RuboCop::RakeTask.new
8 |
9 | RSpec::Core::RakeTask.new(:spec)
10 |
11 | Cucumber::Rake::Task.new(:cucumber, 'Run features that should pass') do |t|
12 | exempt_tags = ''
13 | exempt_tags << '--tags ~@nojava ' if RUBY_PLATFORM == 'java'
14 | t.cucumber_opts = "--color --tags ~@wip #{exempt_tags} --strict --format #{ENV['CUCUMBER_FORMAT'] || 'pretty'}"
15 | end
16 |
17 | require 'rake/clean'
18 |
19 | task test: %w[rubocop spec cucumber]
20 |
21 | begin
22 | require 'cane/rake_task'
23 |
24 | desc 'Run cane to check quality metrics'
25 | Cane::RakeTask.new(:quality) do |cane|
26 | cane.no_style = true
27 | cane.no_doc = true
28 | cane.abc_glob = 'lib/middleman-blog-similar/**/*.rb'
29 | end
30 | rescue LoadError
31 | warn 'cane not available, quality task not provided.'
32 | end
33 |
34 | desc 'Build HTML documentation'
35 | task :doc do
36 | sh 'bundle exec yard'
37 | end
38 |
--------------------------------------------------------------------------------
/features/default.feature:
--------------------------------------------------------------------------------
1 | Feature: Default
2 |
3 | Scenario: iterate simlar_articles
4 | Given a fixture app "test-app"
5 | And a file named "config.rb" with:
6 | """
7 | activate :blog do|blog|
8 | blog.layout = "article"
9 | end
10 | activate :similar
11 | """
12 | Given the Server is running at "test-app"
13 | When I go to "/2014/05/08/article0.html"
14 | Then I should see "
Article 0
"
15 | Then I should see ''
16 | Then I should see 'Article 6dog, Brown, cat'
17 | Then I should see 'Article 4dog, cat, fox'
18 | Then I should see 'Article 1dog, cat'
19 | Then I should see 'Article 5dog'
20 | When I go to "/page.html"
21 | Then I should see ''
22 |
--------------------------------------------------------------------------------
/features/support/env.rb:
--------------------------------------------------------------------------------
1 | require 'codeclimate-test-reporter'
2 | require 'coveralls'
3 | require 'simplecov'
4 |
5 | SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
6 | Coveralls::SimpleCov::Formatter,
7 | SimpleCov::Formatter::HTMLFormatter,
8 | CodeClimate::TestReporter::Formatter
9 | ]
10 |
11 | SimpleCov.start do
12 | add_filter '/features/'
13 | end
14 |
15 | ENV['COVERALLS_REPO_TOKEN'] && Coveralls.wear!
16 |
17 | ENV['TEST'] = 'true'
18 | ENV['AUTOLOAD_SPROCKETS'] = 'false'
19 |
20 | PROJECT_ROOT_PATH = File.dirname(File.dirname(File.dirname(__FILE__)))
21 | require 'middleman-core'
22 | require 'middleman-core/step_definitions'
23 | require 'middleman-blog'
24 | require File.join(PROJECT_ROOT_PATH, 'lib', 'middleman-blog-similar')
25 |
--------------------------------------------------------------------------------
/fixtures/test-app/source/2014-05-08-article0.html.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Article 0
3 | date: 2014-05-08 07:00
4 | tags: dog, cat, brown
5 | category: test
6 | ---
7 |
8 | content
9 |
--------------------------------------------------------------------------------
/fixtures/test-app/source/2014-05-09-article1.html.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Article 1
3 | date: 2014-05-09 07:00
4 | tags: dog, cat
5 | category: test
6 | ---
7 |
8 | content 1
9 |
--------------------------------------------------------------------------------
/fixtures/test-app/source/2014-05-10-article2.html.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Article 2
3 | date: 2014-05-10 07:00
4 | tags: quick, Fox
5 | ---
6 |
7 | The quick brown fox jumps over the lazy dog dog dog brown
8 |
--------------------------------------------------------------------------------
/fixtures/test-app/source/2014-05-11-article3.html.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Article 3
3 | date: 2014-05-11 07:00
4 | tags: fox
5 | ---
6 |
7 | それは昨日そのうちその開始方というののためにするたたい。まるで前を関係院はぼんやりこの相違たあるでもに始めてくるでがも矛盾教えなりならが、それほどには叫びなたましまい。悪口に思うでのはつい結果がすでにたなん。はなはだネルソンさんから拡張教師そう反抗でしかるです西洋その尻誰か立脚でとしてご反駁ますありですんて、この時分は誰かexpects坊ちゃんをなりて、向さんの方で国家の私が最もお周旋と出て私この世をおお話をするようとしきりにご発会を思っあっなて、どうもまるで講演が知れるてやっないのがありですです。またなおご人間に来らのもしっかり変と勧めたらて、その国家をは突き抜けべきてって隙で出ながらみるですべき。
8 |
--------------------------------------------------------------------------------
/fixtures/test-app/source/2014-05-12-article4.html.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Article 4
3 | date: 2014-05-12 07:00
4 | tags: dog, cat, fox
5 | ---
6 |
7 | content test 4
8 |
--------------------------------------------------------------------------------
/fixtures/test-app/source/2014-05-13-article5.html.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Article 5
3 | date: 2014-05-13 07:00
4 | tags: dog
5 | ---
6 |
7 | content!
8 |
--------------------------------------------------------------------------------
/fixtures/test-app/source/2014-05-14-article6.html.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Article 6
3 | date: 2014-05-14 07:00
4 | tags: dog, Brown, cat
5 | ---
6 |
7 | contents tests 6
8 |
--------------------------------------------------------------------------------
/fixtures/test-app/source/index.html.slim:
--------------------------------------------------------------------------------
1 | ---
2 | description: My Description
3 | title: My Title
4 | tags: dog, cat, quick, brown
5 | ---
6 |
7 | h1 Hello index
8 |
9 | p This is index
10 |
11 |
--------------------------------------------------------------------------------
/fixtures/test-app/source/layout.slim:
--------------------------------------------------------------------------------
1 | html
2 | head
3 | meta charset="utf-8"
4 | title= current_resource.data.title
5 | body data-similar-article-count=similar_articles.count
6 | .container
7 | = yield
8 |
--------------------------------------------------------------------------------
/fixtures/test-app/source/layouts/article.slim:
--------------------------------------------------------------------------------
1 | html
2 | head
3 | meta charset="utf-8"
4 | title= current_article.title
5 | body data-similar-article-count=similar_articles.count
6 | .container
7 | h1= current_article.title
8 | p.tags= current_article.tags.join ', '
9 | = yield
10 |
11 | h2 Similar Entries
12 | ul
13 | - similar_articles.each_with_index do|article, index|
14 | li class="a#{index}"
15 | = link_to article.url do
16 | span.title= article.title
17 | span.tags= article.tags.join ', '
18 |
--------------------------------------------------------------------------------
/fixtures/test-app/source/page.html.slim:
--------------------------------------------------------------------------------
1 | h1 Hello page
2 |
3 | p This is page
4 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar.rb:
--------------------------------------------------------------------------------
1 | require 'middleman-core'
2 | require 'middleman-blog-similar/version'
3 |
4 | ::Middleman::Extensions.register(:similar) do
5 | require 'middleman-blog-similar/extension'
6 | ::Middleman::Blog::SimilarExtension
7 | end
8 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/blog_article_extensions.rb:
--------------------------------------------------------------------------------
1 | module Middleman
2 | module Blog
3 | module Similar
4 | module BlogArticleExtensions
5 | def similar_articles
6 | locals[:similar_db].find_similar(self)
7 | end
8 | end
9 | end
10 | end
11 | end
12 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/database.rb:
--------------------------------------------------------------------------------
1 | require 'sqlite3'
2 | require 'active_record'
3 | require 'middleman-blog-similar/models/article'
4 | require 'middleman-blog-similar/models/tag'
5 | require 'middleman-blog-similar/models/tagging'
6 | require 'middleman-blog-similar/models/migration'
7 |
8 | module Middleman
9 | module Blog
10 | module Similar
11 | class Database
12 | attr_reader :taggers
13 | def initialize(path, taggers)
14 | ActiveRecord::Base.establish_connection(
15 | adapter: 'sqlite3',
16 | database: path
17 | )
18 | Migration.apply
19 | @taggers = taggers
20 | @id_map = {}
21 | end
22 |
23 | def store_articles(resources)
24 | @id_map = {}
25 | ActiveRecord::Base.transaction do
26 | ids = []
27 | resources.each do |res|
28 | next unless res.is_a?(Middleman::Blog::BlogArticle)
29 | execute_article res
30 | ids << res.page_id
31 | @id_map[res.page_id.to_s] = res
32 | end
33 | Article.where.not(page_id: ids).delete_all unless ids.empty?
34 | end
35 | end
36 |
37 | def execute_article(resource)
38 | source_file = resource.source_file
39 | page_id = resource.page_id
40 | digest = ::Digest::SHA1.file(source_file).hexdigest
41 | return page_id if Article.exists?(digest: digest, page_id: page_id)
42 | article = Article.find_or_create_by(page_id: page_id)
43 | new_tagging_ids = []
44 | @taggers.each do |tagger|
45 | tags = tagger[1].call resource
46 | tags = [tags] unless tags.is_a?(Array)
47 | tags.map(&:downcase).each do |tag_name|
48 | tag = Tag.find_or_create_by name: tag_name
49 | tagging = Tagging.find_or_create_by tag_id: tag.id, article_id: article.id
50 | tagging.weight = tagger[0]
51 | tagging.save!
52 | new_tagging_ids << tagging.id
53 | end
54 | end
55 | if new_tagging_ids.any?
56 | article.taggings.where.not(id: new_tagging_ids).delete_all
57 | end
58 | article.update! digest: digest
59 | page_id
60 | end
61 |
62 | def find_similar(article)
63 | article = Article.find_by(page_id: article.page_id)
64 | return [] unless article
65 | article.similar_article_page_ids.map do |page_id|
66 | @id_map[page_id]
67 | end.select(&:present?)
68 | end
69 | end
70 | end
71 | end
72 | end
73 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/extension.rb:
--------------------------------------------------------------------------------
1 | require 'middleman-blog-similar/blog_article_extensions'
2 | require 'middleman-blog-similar/helpers'
3 | require 'middleman-blog-similar/resource_list_manipulator'
4 | require 'middleman-blog-similar/database'
5 |
6 | module Middleman
7 | module Blog
8 | class SimilarExtension < ::Middleman::Extension
9 | option :tagger, :tags, 'Article tagger'
10 | option :db, '.similar.db', 'SQLite3 Database'
11 |
12 | self.defined_helpers = [Middleman::Blog::Similar::Helpers]
13 |
14 | def after_configuration
15 | require 'middleman-blog/blog_article'
16 | ::Middleman::Sitemap::Resource.send :include, Middleman::Blog::Similar::BlogArticleExtensions
17 |
18 | @taggers = []
19 | case options.tagger
20 | when String, Symbol
21 | @taggers << [1, load_tagger(options.tagger)]
22 | when Hash
23 | options.tagger.each do |k, v|
24 | if v.is_a?(Array)
25 | k = v[1]
26 | v = v[0]
27 | end
28 | @taggers << [v, load_tagger(k)]
29 | end
30 | else
31 | raise "Invalid type for tagger option: #{options.tagger.class}"
32 | end
33 | db_path = options.db
34 | db_path = File.expand_path(options.db, app.root) if db_path != ':memory:'
35 | @db = Middleman::Blog::Similar::Database.new db_path, @taggers
36 | @resource_list_manipulator = Middleman::Blog::Similar::ResourceListManipulator.new app, @db
37 | @app.sitemap.register_resource_list_manipulator :blog_similar, @resource_list_manipulator
38 | end
39 |
40 | def load_tagger(tagger)
41 | return tagger unless tagger.is_a?(String) || tagger.is_a?(Symbol)
42 | require "middleman-blog-similar/tagger/#{tagger}"
43 | ns = ::Middleman::Blog::Similar::Tagger
44 | tagger.to_s.split('/').each do |n|
45 | ns = ns.const_get n.camelize
46 | end
47 | ns.new
48 | rescue LoadError => e
49 | app.logger.error "Requested similar tagger '#{tagger}' not found."
50 | raise e
51 | end
52 | end
53 | end
54 | end
55 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/helpers.rb:
--------------------------------------------------------------------------------
1 | module Middleman
2 | module Blog
3 | module Similar
4 | module Helpers
5 | def similar_articles
6 | if is_blog_article?
7 | current_article.similar_articles
8 | else
9 | []
10 | end
11 | end
12 | end
13 | end
14 | end
15 | end
16 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/models/article.rb:
--------------------------------------------------------------------------------
1 | module Middleman
2 | module Blog
3 | module Similar
4 | class Article < ActiveRecord::Base
5 | has_many :taggings
6 | has_many :tags, through: :taggings
7 | def similar_article_page_ids
8 | return self.class.none if tags.empty?
9 | # http://stackoverflow.com/a/22472153
10 | res = ActiveRecord::Base.connection.select_all "
11 | SELECT rtr.article_id FROM taggings AS rtr
12 | INNER JOIN taggings rtr2
13 | ON (rtr2.tag_id = rtr.tag_id AND rtr2.article_id = #{id})
14 | LEFT JOIN
15 | (SELECT * FROM taggings WHERE article_id = #{id}) AS r
16 | ON rtr.tag_id = r.tag_id
17 | LEFT JOIN articles a ON a.id = rtr.article_id
18 | WHERE rtr.article_id != #{id}
19 | GROUP BY rtr.article_id
20 | HAVING COUNT(*) > 0
21 | ORDER BY COUNT(*) * rtr.weight DESC, a.page_id DESC"
22 | ids = res.to_hash.map { |h| h['article_id'] }
23 | page_id_map = {}
24 | articles = self.class.where(id: ids).select(:id, :page_id)
25 | articles.each do |a|
26 | page_id_map[a.id] = a.page_id
27 | end
28 | ids.map { |id| page_id_map[id] }
29 | end
30 | end
31 | end
32 | end
33 | end
34 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/models/migration.rb:
--------------------------------------------------------------------------------
1 | module Middleman
2 | module Blog
3 | module Similar
4 | module Migration
5 | def self.apply
6 | ActiveRecord::Schema.define(version: 201703240752) do # rubocop:disable Style/NumericLiterals
7 | unless ActiveRecord::Base.connection.data_source_exists? 'articles'
8 | create_table :articles do |table|
9 | table.column :page_id, :string, index: true, unique: true
10 | table.column :digest, :string, index: true
11 | end
12 | end
13 |
14 | unless ActiveRecord::Base.connection.data_source_exists? 'tags'
15 | create_table :tags do |table|
16 | table.column :name, :string, index: true, unique: true
17 | end
18 | end
19 |
20 | unless ActiveRecord::Base.connection.data_source_exists? 'taggings'
21 | create_table :taggings do |table|
22 | table.references :article, foreign_key: true
23 | table.references :tag, foreign_key: true
24 | table.column :weight, :integer, default: 1, null: false
25 | end
26 | end
27 | end
28 | end
29 | end
30 | end
31 | end
32 | end
33 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/models/tag.rb:
--------------------------------------------------------------------------------
1 | module Middleman
2 | module Blog
3 | module Similar
4 | class Tag < ActiveRecord::Base
5 | has_many :taggings
6 | has_many :articles, through: :taggings
7 | end
8 | end
9 | end
10 | end
11 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/models/tagging.rb:
--------------------------------------------------------------------------------
1 | module Middleman
2 | module Blog
3 | module Similar
4 | class Tagging < ActiveRecord::Base
5 | belongs_to :article
6 | belongs_to :tag
7 | end
8 | end
9 | end
10 | end
11 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/resource_list_manipulator.rb:
--------------------------------------------------------------------------------
1 | module Middleman
2 | module Blog
3 | module Similar
4 | class ResourceListManipulator
5 | attr_reader :article, :app, :db
6 | def initialize(app, db)
7 | @app = app
8 | @db = db
9 | end
10 |
11 | def manipulate_resource_list(resources)
12 | resources.each { |res| res.add_metadata locals: { similar_db: @db } }
13 | @db.store_articles resources
14 | resources
15 | end
16 | end
17 | end
18 | end
19 | end
20 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/tagger/entagger.rb:
--------------------------------------------------------------------------------
1 | require 'engtagger'
2 |
3 | module Middleman
4 | module Blog
5 | module Similar
6 | module Tagger
7 | class Entagger
8 | def call(article)
9 | tgr = EngTagger.new
10 | tagged = tgr.add_tags article.body.gsub(%r{?[^>]+>}, '')
11 | tgr.get_nouns(tagged).keys
12 | end
13 | end
14 | end
15 | end
16 | end
17 | end
18 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/tagger/mecab.rb:
--------------------------------------------------------------------------------
1 | require 'natto'
2 |
3 | module Middleman
4 | module Blog
5 | module Similar
6 | module Tagger
7 | class Mecab
8 | def call(article)
9 | Natto::MeCab.new.parse(article.body.gsub(%r{?[^>]+>}, ''))
10 | .split("\n")
11 | .map { |l| l.split("\t") }
12 | .select { |l| l[1] && l[1].start_with?('名詞,一般') }
13 | .map { |l| l[0] }
14 | end
15 | end
16 | end
17 | end
18 | end
19 | end
20 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/tagger/tags.rb:
--------------------------------------------------------------------------------
1 | module Middleman
2 | module Blog
3 | module Similar
4 | module Tagger
5 | class Tags
6 | def call(article)
7 | article.tags
8 | end
9 | end
10 | end
11 | end
12 | end
13 | end
14 |
--------------------------------------------------------------------------------
/lib/middleman-blog-similar/version.rb:
--------------------------------------------------------------------------------
1 | module Middleman
2 | module Blog
3 | module Similar
4 | VERSION = '2.0.1'.freeze
5 | end
6 | end
7 | end
8 |
--------------------------------------------------------------------------------
/lib/middleman_extension.rb:
--------------------------------------------------------------------------------
1 | require 'middleman-blog-similar'
2 |
--------------------------------------------------------------------------------
/middleman-blog-similar.gemspec:
--------------------------------------------------------------------------------
1 | # -*- encoding: utf-8 -*-
2 |
3 | $LOAD_PATH.push File.expand_path('../lib', __FILE__)
4 | require 'middleman-blog-similar/version'
5 |
6 | Gem::Specification.new do |s|
7 | s.name = 'middleman-blog-similar'
8 | s.version = Middleman::Blog::Similar::VERSION
9 | s.platform = Gem::Platform::RUBY
10 | s.authors = ['Atsushi Nagase']
11 | s.email = ['a@ngs.io']
12 | s.homepage = 'https://github.com/ngs/middleman-blog-similar'
13 | s.summary = 'Similar article extension for middleman-blog'
14 | s.description = <<-EOF
15 | Similar article extension for middleman-blog.
16 | Finds similar articles using tags, part-of-speech or custom lambda.
17 | EOF
18 | s.license = 'MIT'
19 | s.files = `git ls-files -z`.split("\0")
20 | s.test_files = `git ls-files -z -- {fixtures,features,spec}/*`.split("\0")
21 | s.require_paths = ['lib']
22 | s.required_ruby_version = '>= 2.2.0'
23 | s.add_runtime_dependency 'activerecord', '~> 5.0', '>= 5.0.0'
24 | s.add_runtime_dependency 'middleman-core', '~> 4.0', '>= 4.0.0'
25 | s.add_runtime_dependency 'middleman-blog', '~> 4.0', '>= 4.0.0'
26 | s.add_dependency 'sqlite3', '~> 1.3'
27 | end
28 |
--------------------------------------------------------------------------------
/spec/middleman-blog-similar/extension_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | # rubocop:disable Metrics/BlockLength
4 | describe Middleman::Blog::SimilarExtension do
5 | before(:all) { @app = middleman_app('test-app') { activate :similar, db: ':memory:' } }
6 | let(:app) { @app }
7 | let(:resource) { app.sitemap.resources.select { |res| res.page_id == page_id }.first }
8 | let(:page_id) { '2014-05-08-article0' }
9 | describe 'activation' do
10 | subject { app.extensions }
11 | its([:similar]) { is_expected.not_to be_nil }
12 | its([:blog]) { is_expected.not_to be_nil }
13 | context 'with unknown tagger' do
14 | let(:app) { middleman_app('test-app') { activate :similar, db: ':memory:', tagger: 'hoge' } }
15 | it { expect { app }.to raise_error LoadError, 'cannot load such file -- middleman-blog-similar/tagger/hoge' }
16 | end
17 | context 'with invalid tagger' do
18 | let(:app) { middleman_app('test-app') { activate :similar, db: ':memory:', tagger: 1 } }
19 | it { expect { app }.to raise_error RuntimeError, /Invalid type for tagger option/ }
20 | end
21 | end
22 | describe 'results' do
23 | subject { resource.similar_articles.map { |a| [a.page_id, a.data.category || ''].concat a.tags } }
24 | it { is_expected.to have(4).items }
25 | its([0]) { is_expected.to eq %w[2014-05-14-article6 dog Brown cat] }
26 | its([1]) { is_expected.to eq %w[2014-05-12-article4 dog cat fox] }
27 | its([2]) { is_expected.to eq %w[2014-05-09-article1 test dog cat] }
28 | its([3]) { is_expected.to eq %w[2014-05-13-article5 dog] }
29 | context 'when configured with weight map' do
30 | before :all do
31 | @app = middleman_app('test-app') do
32 | activate :similar,
33 | db: ':memory:',
34 | tagger: {
35 | tags: 1,
36 | category: [2, ->(article) { article.data.category ? [article.data.category] : [] }]
37 | }
38 | end
39 | end
40 | it { is_expected.to have(4).items }
41 | its([0]) { is_expected.to eq %w[2014-05-09-article1 test dog cat] }
42 | its([1]) { is_expected.to eq %w[2014-05-14-article6 dog Brown cat] }
43 | its([2]) { is_expected.to eq %w[2014-05-12-article4 dog cat fox] }
44 | its([3]) { is_expected.to eq %w[2014-05-13-article5 dog] }
45 | end
46 | end
47 | end
48 |
--------------------------------------------------------------------------------
/spec/middleman-blog-similar/tagger_spec.rb:
--------------------------------------------------------------------------------
1 | require 'spec_helper'
2 |
3 | describe 'tagger' do
4 | before(:all) { @app = middleman_app('test-app') {} }
5 | let(:app) { @app }
6 | let(:resource) { app.sitemap.resources.select { |res| res.page_id == page_id }.first }
7 | let(:page_id) { '2014-05-08-article0' }
8 | subject { described_class.new.call resource }
9 | describe ::Middleman::Blog::Similar::Tagger::Tags do
10 | it { is_expected.to eq %w[dog cat brown] }
11 | end
12 | describe ::Middleman::Blog::Similar::Tagger::Mecab do
13 | let(:page_id) { '2014-05-11-article3' }
14 | it { is_expected.to eq %w[叫び 悪口 教師 西洋 尻 時分 坊ちゃん 国家 この世 人間 国家 隙] }
15 | context 'written in English' do
16 | let(:page_id) { '2014-05-10-article2' }
17 | it { is_expected.to eq %w[quick brown fox jumps over the lazy dog dog dog] }
18 | end
19 | end
20 | describe ::Middleman::Blog::Similar::Tagger::Entagger do
21 | let(:page_id) { '2014-05-10-article2' }
22 | it { is_expected.to eq %w[fox jumps dog] }
23 | end
24 | end
25 |
--------------------------------------------------------------------------------
/spec/spec_helper.rb:
--------------------------------------------------------------------------------
1 | PROJECT_ROOT_PATH = File.dirname(File.dirname(__FILE__))
2 |
3 | require 'rubygems'
4 | $LOAD_PATH.unshift File.join(PROJECT_ROOT_PATH, 'lib')
5 | require 'rspec'
6 | require 'rspec/collection_matchers'
7 | require 'rspec/its'
8 | require 'middleman-core'
9 | require 'middleman-blog'
10 | require 'middleman-blog/helpers'
11 |
12 | require 'codeclimate-test-reporter'
13 | require 'coveralls'
14 | require 'simplecov'
15 |
16 | SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
17 | Coveralls::SimpleCov::Formatter,
18 | SimpleCov::Formatter::HTMLFormatter,
19 | CodeClimate::TestReporter::Formatter
20 | ]
21 |
22 | SimpleCov.root(File.expand_path(File.dirname(__FILE__) + '/..'))
23 | SimpleCov.start
24 |
25 | ENV['COVERALLS_REPO_TOKEN'] && Coveralls.wear!
26 |
27 | module SpecHelpers
28 | include FileUtils
29 |
30 | def middleman_app(fixture_path, &block)
31 | tmp_dir = File.expand_path('../../tmp', __FILE__)
32 | fixture_dir = File.expand_path('../../fixtures', __FILE__)
33 | fixture_tmp = File.join tmp_dir, 'rspec'
34 | root_dir = File.join fixture_tmp, fixture_path
35 | rmtree fixture_tmp
36 | mkdir_p tmp_dir
37 | cp_r fixture_dir, fixture_tmp
38 | ENV['MM_SOURCE'] = 'source'
39 | ENV['MM_ROOT'] = root_dir
40 | initialize_commands = @initialize_commands || []
41 | initialize_commands.unshift block
42 | initialize_commands.unshift lambda {
43 | set :environment, :development
44 | set :show_exceptions, false
45 | activate :blog
46 | }
47 | ::Middleman::Application.new do
48 | initialize_commands.each do |p|
49 | instance_exec(&p)
50 | end
51 | end
52 | end
53 | end
54 |
55 | RSpec.configure do |config|
56 | config.include SpecHelpers
57 | end
58 |
59 | require 'middleman-blog-similar/extension'
60 | require 'middleman-blog-similar'
61 | Dir.glob(PROJECT_ROOT_PATH + '/lib/middleman-blog-similar/tagger/*') { |file| require file }
62 |
63 | class String
64 | def unindent
65 | gsub(/^#{scan(/^\s*/).min_by(&:length)}/, '').sub(/\n$/, '')
66 | end
67 | end
68 |
--------------------------------------------------------------------------------