├── .codeclimate.yml ├── .gitignore ├── .rspec ├── .rubocop.yml ├── .travis.yml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── Gemfile ├── Guardfile ├── LICENSE.md ├── README.md ├── Rakefile ├── features ├── default.feature └── support │ └── env.rb ├── fixtures └── test-app │ └── source │ ├── 2014-05-08-article0.html.md │ ├── 2014-05-09-article1.html.md │ ├── 2014-05-10-article2.html.md │ ├── 2014-05-11-article3.html.md │ ├── 2014-05-12-article4.html.md │ ├── 2014-05-13-article5.html.md │ ├── 2014-05-14-article6.html.md │ ├── index.html.slim │ ├── layout.slim │ ├── layouts │ └── article.slim │ └── page.html.slim ├── lib ├── middleman-blog-similar.rb ├── middleman-blog-similar │ ├── blog_article_extensions.rb │ ├── database.rb │ ├── extension.rb │ ├── helpers.rb │ ├── models │ │ ├── article.rb │ │ ├── migration.rb │ │ ├── tag.rb │ │ └── tagging.rb │ ├── resource_list_manipulator.rb │ ├── tagger │ │ ├── entagger.rb │ │ ├── mecab.rb │ │ └── tags.rb │ └── version.rb └── middleman_extension.rb ├── middleman-blog-similar.gemspec └── spec ├── middleman-blog-similar ├── extension_spec.rb └── tagger_spec.rb └── spec_helper.rb /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | engines: 2 | rubocop: 3 | enabled: false 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | coverage 3 | rdoc 4 | pkg 5 | .sass-cache 6 | .sassc 7 | .tmp 8 | Gemfile.lock 9 | docs 10 | .rbenv-* 11 | .*.swp 12 | build 13 | doc 14 | .yardoc 15 | tmp 16 | Makefile 17 | .mm-pid-* 18 | *.gem 19 | similar.db 20 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --color 2 | --format documentation 3 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | AllCops: 2 | Exclude: 3 | - .gems/**/* 4 | - tmp/**/* 5 | Metrics/LineLength: 6 | Enabled: false 7 | Style/Documentation: 8 | Enabled: false 9 | Rails/HasAndBelongsToMany: 10 | Enabled: false 11 | Metrics/AbcSize: 12 | Enabled: false 13 | MethodLength: 14 | Max: 50 15 | CyclomaticComplexity: 16 | Max: 25 17 | PerceivedComplexity: 18 | Max: 20 19 | Style/RescueModifier: 20 | Enabled: false 21 | Style/PredicateName: 22 | Enabled: false 23 | Metrics/ClassLength: 24 | Enabled: false 25 | GuardClause: 26 | MinBodyLength: 3 27 | CaseIndentation: 28 | IndentOneStep: true 29 | NumericLiterals: 30 | MinDigits: 7 31 | FileName: 32 | Exclude: 33 | - lib/middleman-blog-similar.rb 34 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | rvm: 2 | - 2.2.6 3 | - 2.3.2 4 | - 2.4.1 5 | env: 6 | global: 7 | - TEST=true 8 | before_install: 9 | - curl -L "https://dl.bintray.com/ngs/travis/mecab-with-ipa-dic.tar.gz" -o mecab.tgz && sudo tar xvfz mecab.tgz -C /usr && rm -f mecab.tgz 10 | after_succes: 11 | - bundle exec codeclimate-test-reporter 12 | script: bundle exec rake test 13 | deploy: 14 | provider: rubygems 15 | api_key: 16 | secure: PjIBKKWhCcamn3+X/H2bfDn8m0X6nGTEwZw1LpBbWSiXTxJeUlp1uFjwQ82e8yG6Fk3vLlrCZRks3eM3NIzv9V2l1QHQdivLsha/nBkIpnXB4bIRvJG1bhPVmgOSZQCDJjzgUWSlDljzwFjxJ1NOSYQzbPyvOtu+Ks7/p+BJzHI= 17 | on: 18 | tags: true 19 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | 1.1.1 2 | ----- 3 | 4 | * Improve MeCab. 5 | 6 | 1.1.0 7 | ----- 8 | 9 | * TreeTagger and MeCab support. 10 | 11 | 1.0.0 12 | ----- 13 | 14 | * Initial release. 15 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | In the spirit of [free software][free-sw], **everyone** is encouraged to help 3 | improve this project. 4 | 5 | [free-sw]: http://www.fsf.org/licensing/essays/free-sw.html 6 | 7 | Here are some ways *you* can contribute: 8 | 9 | * by using alpha, beta, and prerelease versions 10 | * by reporting bugs 11 | * by suggesting new features 12 | * by writing or editing documentation 13 | * by writing specifications 14 | * by writing code (**no patch is too small**: fix typos, add comments, clean up 15 | inconsistent whitespace) 16 | * by refactoring code 17 | * by closing [issues][] 18 | * by reviewing patches 19 | 20 | [issues]: https://github.com/ngs/middleman-similar/issues 21 | 22 | ## Submitting an Issue 23 | We use the [GitHub issue tracker][issues] to track bugs and features. Before 24 | submitting a bug report or feature request, check to make sure it hasn't 25 | already been submitted. When submitting a bug report, please include a [Gist][] 26 | that includes a stack trace and any details that may be necessary to reproduce 27 | the bug, including your gem version, Ruby version, and operating system. 28 | Ideally, a bug report should include a pull request with failing specs. 29 | 30 | [gist]: https://gist.github.com/ 31 | 32 | ## Submitting a Pull Request 33 | 1. [Fork the repository.][fork] 34 | 2. [Create a topic branch.][branch] 35 | 3. Add specs for your unimplemented feature or bug fix. 36 | 4. Run `bundle exec rake test`. If your specs pass, return to step 3. 37 | 5. Implement your feature or bug fix. 38 | 6. Run `bundle exec rake test`. If your specs fail, return to step 5. 39 | 7. Add, commit, and push your changes. 40 | 8. [Submit a pull request.][pr] 41 | 42 | [fork]: http://help.github.com/fork-a-repo/ 43 | [branch]: http://learn.github.com/p/branching.html 44 | [pr]: http://help.github.com/send-pull-requests/ 45 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | # Specify your gem's dependencies in middleman-blog-similar.gemspec 4 | gemspec 5 | 6 | group :development do 7 | gem 'aruba', '~> 0.7.4', require: false 8 | gem 'capybara', '~> 2.13.0', require: false 9 | gem 'codeclimate-test-reporter', '~> 1.0', require: false 10 | gem 'coveralls', require: false 11 | gem 'cucumber', '~> 2.0', require: false 12 | gem 'engtagger' 13 | gem 'guard-cucumber' 14 | gem 'guard-rake' 15 | gem 'guard-rspec' 16 | gem 'guard-rubocop', require: false 17 | gem 'middleman-cli', '>= 4.0.0' 18 | gem 'natto', '~> 1.1' 19 | gem 'pry', '~> 0.10', require: false 20 | gem 'rack', '~> 1.6.5', require: false 21 | gem 'rake', '~> 12.0', require: false 22 | gem 'redcarpet' 23 | gem 'rspec', '~> 3.0', require: false 24 | gem 'rspec-collection_matchers', require: false 25 | gem 'rspec-its', require: false 26 | gem 'rubocop', require: false 27 | gem 'slim' 28 | gem 'yard', '~> 0.9.8', require: false 29 | end 30 | -------------------------------------------------------------------------------- /Guardfile: -------------------------------------------------------------------------------- 1 | guard 'rspec', cmd: 'bundle exec rspec' do 2 | watch(%r{^spec/.*\.rb$}) 3 | watch(%r{^lib/(.+)\.rb$}) { |m| ["spec/#{m[1]}_spec.rb"] + Dir["spec/#{m[1]}/*_spec.rb"] } 4 | watch(%r{^lib/middleman-blog-similar/tagger/(.+)\.rb$}) { ['spec/middleman-blog-similar/tagger_spec.rb'] } 5 | end 6 | 7 | guard 'cucumber', cmd: 'bundle exec cucumber' do 8 | watch(%r{^features/.+\.feature$}) 9 | watch(%r{^features/support/.+$}) { 'features' } 10 | watch(%r{^lib/.*\.rb$}) { 'features' } 11 | end 12 | 13 | guard :rubocop, cmd: 'bundle exec rake rubocop' do 14 | watch(/.+\.rb$/) 15 | watch(%r{(?:.+/)?\.rubocop\.yml$}) { |m| File.dirname(m[0]) } 16 | end 17 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 Atsushi Nagase 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | middleman-blog-similar 2 | ====================== 3 | 4 | [![Gem Version](https://badge.fury.io/rb/middleman-blog-similar.png)][gem] 5 | [![Build Status](https://travis-ci.org/ngs/middleman-blog-similar.svg?branch=master)][travis] 6 | [![Dependency Status](https://gemnasium.com/ngs/middleman-blog-similar.png?travis)][gemnasium] 7 | [![Code Quality](https://codeclimate.com/github/ngs/middleman-blog-similar.png)][codeclimate] 8 | [![Coverage Status](https://coveralls.io/repos/github/ngs/middleman-blog-similar/badge.svg)][coveralls] 9 | 10 | `middleman-blog-similar` is an extension for [middleman-blog] that adds method to lookup similar article. 11 | 12 | Usage 13 | ----- 14 | 15 | `Middleman::Blog::BlogArticle#similar_articles` returns an array of `Middleman::Blog::BlogArticle` instances. 16 | 17 | ```slim 18 | h2 Similar Entries 19 | ul 20 | - current_article.similar_articles.first(5).each do|article| 21 | li= link_to article.title, article.url 22 | ``` 23 | 24 | `similar_articles` helper is also available in article pages. 25 | 26 | ```slim 27 | h2 Similar Entries 28 | ul 29 | - similar_articles.first(5).each do|article| 30 | li= link_to article.title, article.url 31 | ``` 32 | 33 | Configuration 34 | ------------- 35 | 36 | ### `Gemfile` 37 | 38 | ```ruby 39 | gem 'middleman-blog-similar' 40 | ``` 41 | 42 | ### `config.rb` 43 | 44 | ```ruby 45 | activate :similar 46 | ``` 47 | 48 | This extension finds similar articles using those are using tags by default. 49 | 50 | #### Built-in Tagger 51 | 52 | You can set taggers using `tagger:` option. [MeCab] and [EngTagger] adopters are built in this extension. 53 | 54 | ```ruby 55 | # Find by tags (default) 56 | activate :similar, tagger: :tags 57 | 58 | # Using MeCab / Need to add `gem 'natto'` in Gemfile 59 | activate :similar, tagger: :mecab 60 | 61 | # Using EngTagger / Need to add `gem 'entagger'` in Gemfile 62 | activate :similar, tagger: :entagger 63 | ``` 64 | 65 | #### Using Lambda 66 | 67 | You can use lambda as tagger 68 | 69 | ```ruby 70 | # Resource is a Middleman::Blog::BlogArticle 71 | activate :similar, tagger: ->(resource) { [resource.data.category] } 72 | ``` 73 | 74 | #### Multiple Taggers 75 | 76 | You can configure multiple taggers both built-in and lambda taggers. 77 | 78 | ```ruby 79 | activate :similar, tagger: { 80 | # key = tagger, value = weight 81 | mecab: 1, 82 | entagger: 1, 83 | tags: 3, 84 | # key = (ignored), value[0] = weight, value[1] = lambda 85 | custom: [5, ->(resource) { resource.data.category ? [resource.data.category] : [] }] 86 | } 87 | ``` 88 | 89 | #### Database Location 90 | 91 | This extension uses [SQLite3] to calculate similarity between articles. 92 | 93 | Database location is `${PROJECT_ROOT}/.similar.db` by default. 94 | 95 | You can specify database location using `db:` option. 96 | 97 | ```ruby 98 | # Expands to ${HOME}/similar.db 99 | activate :similar, db: '~/similar.db' 100 | 101 | # Expands to ${PROJECT_ROOT}/tmp/middleman-blog-similar.db 102 | activate :similar, db: 'tmp/middleman-blog-similar.db' 103 | 104 | # Stores in memory database 105 | activate :similar, db: ':memory:' 106 | ``` 107 | 108 | License 109 | ------- 110 | 111 | Copyright (c) 2014-2017 [Atsushi Nagase]. MIT Licensed, see [LICENSE] for details. 112 | 113 | [middleman]: http://middlemanapp.com 114 | [middleman-blog]: https://github.com/middleman/middleman-blog 115 | [gem]: https://rubygems.org/gems/middleman-blog-similar 116 | [travis]: http://travis-ci.org/ngs/middleman-blog-similar 117 | [gemnasium]: https://gemnasium.com/ngs/middleman-blog-similar 118 | [codeclimate]: https://codeclimate.com/github/ngs/middleman-blog-similar 119 | [LICENSE]: LICENSE.md 120 | [Atsushi Nagase]: https://ngs.io 121 | [coveralls]: https://coveralls.io/github/ngs/middleman-blog-similar 122 | [MeCab]: http://taku910.github.io/mecab/ 123 | [EngTagger]: https://github.com/yohasebe/engtagger 124 | [SQLite3]: https://www.sqlite.org/ 125 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler' 2 | Bundler::GemHelper.install_tasks 3 | 4 | require 'cucumber/rake/task' 5 | require 'rspec/core/rake_task' 6 | require 'rubocop/rake_task' 7 | RuboCop::RakeTask.new 8 | 9 | RSpec::Core::RakeTask.new(:spec) 10 | 11 | Cucumber::Rake::Task.new(:cucumber, 'Run features that should pass') do |t| 12 | exempt_tags = '' 13 | exempt_tags << '--tags ~@nojava ' if RUBY_PLATFORM == 'java' 14 | t.cucumber_opts = "--color --tags ~@wip #{exempt_tags} --strict --format #{ENV['CUCUMBER_FORMAT'] || 'pretty'}" 15 | end 16 | 17 | require 'rake/clean' 18 | 19 | task test: %w[rubocop spec cucumber] 20 | 21 | begin 22 | require 'cane/rake_task' 23 | 24 | desc 'Run cane to check quality metrics' 25 | Cane::RakeTask.new(:quality) do |cane| 26 | cane.no_style = true 27 | cane.no_doc = true 28 | cane.abc_glob = 'lib/middleman-blog-similar/**/*.rb' 29 | end 30 | rescue LoadError 31 | warn 'cane not available, quality task not provided.' 32 | end 33 | 34 | desc 'Build HTML documentation' 35 | task :doc do 36 | sh 'bundle exec yard' 37 | end 38 | -------------------------------------------------------------------------------- /features/default.feature: -------------------------------------------------------------------------------- 1 | Feature: Default 2 | 3 | Scenario: iterate simlar_articles 4 | Given a fixture app "test-app" 5 | And a file named "config.rb" with: 6 | """ 7 | activate :blog do|blog| 8 | blog.layout = "article" 9 | end 10 | activate :similar 11 | """ 12 | Given the Server is running at "test-app" 13 | When I go to "/2014/05/08/article0.html" 14 | Then I should see "

Article 0

" 15 | Then I should see '' 16 | Then I should see '
  • Article 6dog, Brown, cat
  • ' 17 | Then I should see '
  • Article 4dog, cat, fox
  • ' 18 | Then I should see '
  • Article 1dog, cat
  • ' 19 | Then I should see '
  • Article 5dog
  • ' 20 | When I go to "/page.html" 21 | Then I should see '' 22 | -------------------------------------------------------------------------------- /features/support/env.rb: -------------------------------------------------------------------------------- 1 | require 'codeclimate-test-reporter' 2 | require 'coveralls' 3 | require 'simplecov' 4 | 5 | SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[ 6 | Coveralls::SimpleCov::Formatter, 7 | SimpleCov::Formatter::HTMLFormatter, 8 | CodeClimate::TestReporter::Formatter 9 | ] 10 | 11 | SimpleCov.start do 12 | add_filter '/features/' 13 | end 14 | 15 | ENV['COVERALLS_REPO_TOKEN'] && Coveralls.wear! 16 | 17 | ENV['TEST'] = 'true' 18 | ENV['AUTOLOAD_SPROCKETS'] = 'false' 19 | 20 | PROJECT_ROOT_PATH = File.dirname(File.dirname(File.dirname(__FILE__))) 21 | require 'middleman-core' 22 | require 'middleman-core/step_definitions' 23 | require 'middleman-blog' 24 | require File.join(PROJECT_ROOT_PATH, 'lib', 'middleman-blog-similar') 25 | -------------------------------------------------------------------------------- /fixtures/test-app/source/2014-05-08-article0.html.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Article 0 3 | date: 2014-05-08 07:00 4 | tags: dog, cat, brown 5 | category: test 6 | --- 7 | 8 | content 9 | -------------------------------------------------------------------------------- /fixtures/test-app/source/2014-05-09-article1.html.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Article 1 3 | date: 2014-05-09 07:00 4 | tags: dog, cat 5 | category: test 6 | --- 7 | 8 | content 1 9 | -------------------------------------------------------------------------------- /fixtures/test-app/source/2014-05-10-article2.html.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Article 2 3 | date: 2014-05-10 07:00 4 | tags: quick, Fox 5 | --- 6 | 7 | The quick brown fox jumps over the lazy dog dog dog brown 8 | -------------------------------------------------------------------------------- /fixtures/test-app/source/2014-05-11-article3.html.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Article 3 3 | date: 2014-05-11 07:00 4 | tags: fox 5 | --- 6 | 7 | それは昨日そのうちその開始方というののためにするたたい。まるで前を関係院はぼんやりこの相違たあるでもに始めてくるでがも矛盾教えなりならが、それほどには叫びなたましまい。悪口に思うでのはつい結果がすでにたなん。はなはだネルソンさんから拡張教師そう反抗でしかるです西洋その尻誰か立脚でとしてご反駁ますありですんて、この時分は誰かexpects坊ちゃんをなりて、向さんの方で国家の私が最もお周旋と出て私この世をおお話をするようとしきりにご発会を思っあっなて、どうもまるで講演が知れるてやっないのがありですです。またなおご人間に来らのもしっかり変と勧めたらて、その国家をは突き抜けべきてって隙で出ながらみるですべき。 8 | -------------------------------------------------------------------------------- /fixtures/test-app/source/2014-05-12-article4.html.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Article 4 3 | date: 2014-05-12 07:00 4 | tags: dog, cat, fox 5 | --- 6 | 7 | content test 4 8 | -------------------------------------------------------------------------------- /fixtures/test-app/source/2014-05-13-article5.html.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Article 5 3 | date: 2014-05-13 07:00 4 | tags: dog 5 | --- 6 | 7 | content! 8 | -------------------------------------------------------------------------------- /fixtures/test-app/source/2014-05-14-article6.html.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Article 6 3 | date: 2014-05-14 07:00 4 | tags: dog, Brown, cat 5 | --- 6 | 7 | contents tests 6 8 | -------------------------------------------------------------------------------- /fixtures/test-app/source/index.html.slim: -------------------------------------------------------------------------------- 1 | --- 2 | description: My Description 3 | title: My Title 4 | tags: dog, cat, quick, brown 5 | --- 6 | 7 | h1 Hello index 8 | 9 | p This is index 10 | 11 | -------------------------------------------------------------------------------- /fixtures/test-app/source/layout.slim: -------------------------------------------------------------------------------- 1 | html 2 | head 3 | meta charset="utf-8" 4 | title= current_resource.data.title 5 | body data-similar-article-count=similar_articles.count 6 | .container 7 | = yield 8 | -------------------------------------------------------------------------------- /fixtures/test-app/source/layouts/article.slim: -------------------------------------------------------------------------------- 1 | html 2 | head 3 | meta charset="utf-8" 4 | title= current_article.title 5 | body data-similar-article-count=similar_articles.count 6 | .container 7 | h1= current_article.title 8 | p.tags= current_article.tags.join ', ' 9 | = yield 10 | 11 | h2 Similar Entries 12 | ul 13 | - similar_articles.each_with_index do|article, index| 14 | li class="a#{index}" 15 | = link_to article.url do 16 | span.title= article.title 17 | span.tags= article.tags.join ', ' 18 | -------------------------------------------------------------------------------- /fixtures/test-app/source/page.html.slim: -------------------------------------------------------------------------------- 1 | h1 Hello page 2 | 3 | p This is page 4 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar.rb: -------------------------------------------------------------------------------- 1 | require 'middleman-core' 2 | require 'middleman-blog-similar/version' 3 | 4 | ::Middleman::Extensions.register(:similar) do 5 | require 'middleman-blog-similar/extension' 6 | ::Middleman::Blog::SimilarExtension 7 | end 8 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/blog_article_extensions.rb: -------------------------------------------------------------------------------- 1 | module Middleman 2 | module Blog 3 | module Similar 4 | module BlogArticleExtensions 5 | def similar_articles 6 | locals[:similar_db].find_similar(self) 7 | end 8 | end 9 | end 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/database.rb: -------------------------------------------------------------------------------- 1 | require 'sqlite3' 2 | require 'active_record' 3 | require 'middleman-blog-similar/models/article' 4 | require 'middleman-blog-similar/models/tag' 5 | require 'middleman-blog-similar/models/tagging' 6 | require 'middleman-blog-similar/models/migration' 7 | 8 | module Middleman 9 | module Blog 10 | module Similar 11 | class Database 12 | attr_reader :taggers 13 | def initialize(path, taggers) 14 | ActiveRecord::Base.establish_connection( 15 | adapter: 'sqlite3', 16 | database: path 17 | ) 18 | Migration.apply 19 | @taggers = taggers 20 | @id_map = {} 21 | end 22 | 23 | def store_articles(resources) 24 | @id_map = {} 25 | ActiveRecord::Base.transaction do 26 | ids = [] 27 | resources.each do |res| 28 | next unless res.is_a?(Middleman::Blog::BlogArticle) 29 | execute_article res 30 | ids << res.page_id 31 | @id_map[res.page_id.to_s] = res 32 | end 33 | Article.where.not(page_id: ids).delete_all unless ids.empty? 34 | end 35 | end 36 | 37 | def execute_article(resource) 38 | source_file = resource.source_file 39 | page_id = resource.page_id 40 | digest = ::Digest::SHA1.file(source_file).hexdigest 41 | return page_id if Article.exists?(digest: digest, page_id: page_id) 42 | article = Article.find_or_create_by(page_id: page_id) 43 | new_tagging_ids = [] 44 | @taggers.each do |tagger| 45 | tags = tagger[1].call resource 46 | tags = [tags] unless tags.is_a?(Array) 47 | tags.map(&:downcase).each do |tag_name| 48 | tag = Tag.find_or_create_by name: tag_name 49 | tagging = Tagging.find_or_create_by tag_id: tag.id, article_id: article.id 50 | tagging.weight = tagger[0] 51 | tagging.save! 52 | new_tagging_ids << tagging.id 53 | end 54 | end 55 | if new_tagging_ids.any? 56 | article.taggings.where.not(id: new_tagging_ids).delete_all 57 | end 58 | article.update! digest: digest 59 | page_id 60 | end 61 | 62 | def find_similar(article) 63 | article = Article.find_by(page_id: article.page_id) 64 | return [] unless article 65 | article.similar_article_page_ids.map do |page_id| 66 | @id_map[page_id] 67 | end.select(&:present?) 68 | end 69 | end 70 | end 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/extension.rb: -------------------------------------------------------------------------------- 1 | require 'middleman-blog-similar/blog_article_extensions' 2 | require 'middleman-blog-similar/helpers' 3 | require 'middleman-blog-similar/resource_list_manipulator' 4 | require 'middleman-blog-similar/database' 5 | 6 | module Middleman 7 | module Blog 8 | class SimilarExtension < ::Middleman::Extension 9 | option :tagger, :tags, 'Article tagger' 10 | option :db, '.similar.db', 'SQLite3 Database' 11 | 12 | self.defined_helpers = [Middleman::Blog::Similar::Helpers] 13 | 14 | def after_configuration 15 | require 'middleman-blog/blog_article' 16 | ::Middleman::Sitemap::Resource.send :include, Middleman::Blog::Similar::BlogArticleExtensions 17 | 18 | @taggers = [] 19 | case options.tagger 20 | when String, Symbol 21 | @taggers << [1, load_tagger(options.tagger)] 22 | when Hash 23 | options.tagger.each do |k, v| 24 | if v.is_a?(Array) 25 | k = v[1] 26 | v = v[0] 27 | end 28 | @taggers << [v, load_tagger(k)] 29 | end 30 | else 31 | raise "Invalid type for tagger option: #{options.tagger.class}" 32 | end 33 | db_path = options.db 34 | db_path = File.expand_path(options.db, app.root) if db_path != ':memory:' 35 | @db = Middleman::Blog::Similar::Database.new db_path, @taggers 36 | @resource_list_manipulator = Middleman::Blog::Similar::ResourceListManipulator.new app, @db 37 | @app.sitemap.register_resource_list_manipulator :blog_similar, @resource_list_manipulator 38 | end 39 | 40 | def load_tagger(tagger) 41 | return tagger unless tagger.is_a?(String) || tagger.is_a?(Symbol) 42 | require "middleman-blog-similar/tagger/#{tagger}" 43 | ns = ::Middleman::Blog::Similar::Tagger 44 | tagger.to_s.split('/').each do |n| 45 | ns = ns.const_get n.camelize 46 | end 47 | ns.new 48 | rescue LoadError => e 49 | app.logger.error "Requested similar tagger '#{tagger}' not found." 50 | raise e 51 | end 52 | end 53 | end 54 | end 55 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/helpers.rb: -------------------------------------------------------------------------------- 1 | module Middleman 2 | module Blog 3 | module Similar 4 | module Helpers 5 | def similar_articles 6 | if is_blog_article? 7 | current_article.similar_articles 8 | else 9 | [] 10 | end 11 | end 12 | end 13 | end 14 | end 15 | end 16 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/models/article.rb: -------------------------------------------------------------------------------- 1 | module Middleman 2 | module Blog 3 | module Similar 4 | class Article < ActiveRecord::Base 5 | has_many :taggings 6 | has_many :tags, through: :taggings 7 | def similar_article_page_ids 8 | return self.class.none if tags.empty? 9 | # http://stackoverflow.com/a/22472153 10 | res = ActiveRecord::Base.connection.select_all " 11 | SELECT rtr.article_id FROM taggings AS rtr 12 | INNER JOIN taggings rtr2 13 | ON (rtr2.tag_id = rtr.tag_id AND rtr2.article_id = #{id}) 14 | LEFT JOIN 15 | (SELECT * FROM taggings WHERE article_id = #{id}) AS r 16 | ON rtr.tag_id = r.tag_id 17 | LEFT JOIN articles a ON a.id = rtr.article_id 18 | WHERE rtr.article_id != #{id} 19 | GROUP BY rtr.article_id 20 | HAVING COUNT(*) > 0 21 | ORDER BY COUNT(*) * rtr.weight DESC, a.page_id DESC" 22 | ids = res.to_hash.map { |h| h['article_id'] } 23 | page_id_map = {} 24 | articles = self.class.where(id: ids).select(:id, :page_id) 25 | articles.each do |a| 26 | page_id_map[a.id] = a.page_id 27 | end 28 | ids.map { |id| page_id_map[id] } 29 | end 30 | end 31 | end 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/models/migration.rb: -------------------------------------------------------------------------------- 1 | module Middleman 2 | module Blog 3 | module Similar 4 | module Migration 5 | def self.apply 6 | ActiveRecord::Schema.define(version: 201703240752) do # rubocop:disable Style/NumericLiterals 7 | unless ActiveRecord::Base.connection.data_source_exists? 'articles' 8 | create_table :articles do |table| 9 | table.column :page_id, :string, index: true, unique: true 10 | table.column :digest, :string, index: true 11 | end 12 | end 13 | 14 | unless ActiveRecord::Base.connection.data_source_exists? 'tags' 15 | create_table :tags do |table| 16 | table.column :name, :string, index: true, unique: true 17 | end 18 | end 19 | 20 | unless ActiveRecord::Base.connection.data_source_exists? 'taggings' 21 | create_table :taggings do |table| 22 | table.references :article, foreign_key: true 23 | table.references :tag, foreign_key: true 24 | table.column :weight, :integer, default: 1, null: false 25 | end 26 | end 27 | end 28 | end 29 | end 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/models/tag.rb: -------------------------------------------------------------------------------- 1 | module Middleman 2 | module Blog 3 | module Similar 4 | class Tag < ActiveRecord::Base 5 | has_many :taggings 6 | has_many :articles, through: :taggings 7 | end 8 | end 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/models/tagging.rb: -------------------------------------------------------------------------------- 1 | module Middleman 2 | module Blog 3 | module Similar 4 | class Tagging < ActiveRecord::Base 5 | belongs_to :article 6 | belongs_to :tag 7 | end 8 | end 9 | end 10 | end 11 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/resource_list_manipulator.rb: -------------------------------------------------------------------------------- 1 | module Middleman 2 | module Blog 3 | module Similar 4 | class ResourceListManipulator 5 | attr_reader :article, :app, :db 6 | def initialize(app, db) 7 | @app = app 8 | @db = db 9 | end 10 | 11 | def manipulate_resource_list(resources) 12 | resources.each { |res| res.add_metadata locals: { similar_db: @db } } 13 | @db.store_articles resources 14 | resources 15 | end 16 | end 17 | end 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/tagger/entagger.rb: -------------------------------------------------------------------------------- 1 | require 'engtagger' 2 | 3 | module Middleman 4 | module Blog 5 | module Similar 6 | module Tagger 7 | class Entagger 8 | def call(article) 9 | tgr = EngTagger.new 10 | tagged = tgr.add_tags article.body.gsub(%r{]+>}, '') 11 | tgr.get_nouns(tagged).keys 12 | end 13 | end 14 | end 15 | end 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/tagger/mecab.rb: -------------------------------------------------------------------------------- 1 | require 'natto' 2 | 3 | module Middleman 4 | module Blog 5 | module Similar 6 | module Tagger 7 | class Mecab 8 | def call(article) 9 | Natto::MeCab.new.parse(article.body.gsub(%r{]+>}, '')) 10 | .split("\n") 11 | .map { |l| l.split("\t") } 12 | .select { |l| l[1] && l[1].start_with?('名詞,一般') } 13 | .map { |l| l[0] } 14 | end 15 | end 16 | end 17 | end 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/tagger/tags.rb: -------------------------------------------------------------------------------- 1 | module Middleman 2 | module Blog 3 | module Similar 4 | module Tagger 5 | class Tags 6 | def call(article) 7 | article.tags 8 | end 9 | end 10 | end 11 | end 12 | end 13 | end 14 | -------------------------------------------------------------------------------- /lib/middleman-blog-similar/version.rb: -------------------------------------------------------------------------------- 1 | module Middleman 2 | module Blog 3 | module Similar 4 | VERSION = '2.0.1'.freeze 5 | end 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /lib/middleman_extension.rb: -------------------------------------------------------------------------------- 1 | require 'middleman-blog-similar' 2 | -------------------------------------------------------------------------------- /middleman-blog-similar.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | $LOAD_PATH.push File.expand_path('../lib', __FILE__) 4 | require 'middleman-blog-similar/version' 5 | 6 | Gem::Specification.new do |s| 7 | s.name = 'middleman-blog-similar' 8 | s.version = Middleman::Blog::Similar::VERSION 9 | s.platform = Gem::Platform::RUBY 10 | s.authors = ['Atsushi Nagase'] 11 | s.email = ['a@ngs.io'] 12 | s.homepage = 'https://github.com/ngs/middleman-blog-similar' 13 | s.summary = 'Similar article extension for middleman-blog' 14 | s.description = <<-EOF 15 | Similar article extension for middleman-blog. 16 | Finds similar articles using tags, part-of-speech or custom lambda. 17 | EOF 18 | s.license = 'MIT' 19 | s.files = `git ls-files -z`.split("\0") 20 | s.test_files = `git ls-files -z -- {fixtures,features,spec}/*`.split("\0") 21 | s.require_paths = ['lib'] 22 | s.required_ruby_version = '>= 2.2.0' 23 | s.add_runtime_dependency 'activerecord', '~> 5.0', '>= 5.0.0' 24 | s.add_runtime_dependency 'middleman-core', '~> 4.0', '>= 4.0.0' 25 | s.add_runtime_dependency 'middleman-blog', '~> 4.0', '>= 4.0.0' 26 | s.add_dependency 'sqlite3', '~> 1.3' 27 | end 28 | -------------------------------------------------------------------------------- /spec/middleman-blog-similar/extension_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | # rubocop:disable Metrics/BlockLength 4 | describe Middleman::Blog::SimilarExtension do 5 | before(:all) { @app = middleman_app('test-app') { activate :similar, db: ':memory:' } } 6 | let(:app) { @app } 7 | let(:resource) { app.sitemap.resources.select { |res| res.page_id == page_id }.first } 8 | let(:page_id) { '2014-05-08-article0' } 9 | describe 'activation' do 10 | subject { app.extensions } 11 | its([:similar]) { is_expected.not_to be_nil } 12 | its([:blog]) { is_expected.not_to be_nil } 13 | context 'with unknown tagger' do 14 | let(:app) { middleman_app('test-app') { activate :similar, db: ':memory:', tagger: 'hoge' } } 15 | it { expect { app }.to raise_error LoadError, 'cannot load such file -- middleman-blog-similar/tagger/hoge' } 16 | end 17 | context 'with invalid tagger' do 18 | let(:app) { middleman_app('test-app') { activate :similar, db: ':memory:', tagger: 1 } } 19 | it { expect { app }.to raise_error RuntimeError, /Invalid type for tagger option/ } 20 | end 21 | end 22 | describe 'results' do 23 | subject { resource.similar_articles.map { |a| [a.page_id, a.data.category || ''].concat a.tags } } 24 | it { is_expected.to have(4).items } 25 | its([0]) { is_expected.to eq %w[2014-05-14-article6 dog Brown cat] } 26 | its([1]) { is_expected.to eq %w[2014-05-12-article4 dog cat fox] } 27 | its([2]) { is_expected.to eq %w[2014-05-09-article1 test dog cat] } 28 | its([3]) { is_expected.to eq %w[2014-05-13-article5 dog] } 29 | context 'when configured with weight map' do 30 | before :all do 31 | @app = middleman_app('test-app') do 32 | activate :similar, 33 | db: ':memory:', 34 | tagger: { 35 | tags: 1, 36 | category: [2, ->(article) { article.data.category ? [article.data.category] : [] }] 37 | } 38 | end 39 | end 40 | it { is_expected.to have(4).items } 41 | its([0]) { is_expected.to eq %w[2014-05-09-article1 test dog cat] } 42 | its([1]) { is_expected.to eq %w[2014-05-14-article6 dog Brown cat] } 43 | its([2]) { is_expected.to eq %w[2014-05-12-article4 dog cat fox] } 44 | its([3]) { is_expected.to eq %w[2014-05-13-article5 dog] } 45 | end 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /spec/middleman-blog-similar/tagger_spec.rb: -------------------------------------------------------------------------------- 1 | require 'spec_helper' 2 | 3 | describe 'tagger' do 4 | before(:all) { @app = middleman_app('test-app') {} } 5 | let(:app) { @app } 6 | let(:resource) { app.sitemap.resources.select { |res| res.page_id == page_id }.first } 7 | let(:page_id) { '2014-05-08-article0' } 8 | subject { described_class.new.call resource } 9 | describe ::Middleman::Blog::Similar::Tagger::Tags do 10 | it { is_expected.to eq %w[dog cat brown] } 11 | end 12 | describe ::Middleman::Blog::Similar::Tagger::Mecab do 13 | let(:page_id) { '2014-05-11-article3' } 14 | it { is_expected.to eq %w[叫び 悪口 教師 西洋 尻 時分 坊ちゃん 国家 この世 人間 国家 隙] } 15 | context 'written in English' do 16 | let(:page_id) { '2014-05-10-article2' } 17 | it { is_expected.to eq %w[quick brown fox jumps over the lazy dog dog dog] } 18 | end 19 | end 20 | describe ::Middleman::Blog::Similar::Tagger::Entagger do 21 | let(:page_id) { '2014-05-10-article2' } 22 | it { is_expected.to eq %w[fox jumps dog] } 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | PROJECT_ROOT_PATH = File.dirname(File.dirname(__FILE__)) 2 | 3 | require 'rubygems' 4 | $LOAD_PATH.unshift File.join(PROJECT_ROOT_PATH, 'lib') 5 | require 'rspec' 6 | require 'rspec/collection_matchers' 7 | require 'rspec/its' 8 | require 'middleman-core' 9 | require 'middleman-blog' 10 | require 'middleman-blog/helpers' 11 | 12 | require 'codeclimate-test-reporter' 13 | require 'coveralls' 14 | require 'simplecov' 15 | 16 | SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[ 17 | Coveralls::SimpleCov::Formatter, 18 | SimpleCov::Formatter::HTMLFormatter, 19 | CodeClimate::TestReporter::Formatter 20 | ] 21 | 22 | SimpleCov.root(File.expand_path(File.dirname(__FILE__) + '/..')) 23 | SimpleCov.start 24 | 25 | ENV['COVERALLS_REPO_TOKEN'] && Coveralls.wear! 26 | 27 | module SpecHelpers 28 | include FileUtils 29 | 30 | def middleman_app(fixture_path, &block) 31 | tmp_dir = File.expand_path('../../tmp', __FILE__) 32 | fixture_dir = File.expand_path('../../fixtures', __FILE__) 33 | fixture_tmp = File.join tmp_dir, 'rspec' 34 | root_dir = File.join fixture_tmp, fixture_path 35 | rmtree fixture_tmp 36 | mkdir_p tmp_dir 37 | cp_r fixture_dir, fixture_tmp 38 | ENV['MM_SOURCE'] = 'source' 39 | ENV['MM_ROOT'] = root_dir 40 | initialize_commands = @initialize_commands || [] 41 | initialize_commands.unshift block 42 | initialize_commands.unshift lambda { 43 | set :environment, :development 44 | set :show_exceptions, false 45 | activate :blog 46 | } 47 | ::Middleman::Application.new do 48 | initialize_commands.each do |p| 49 | instance_exec(&p) 50 | end 51 | end 52 | end 53 | end 54 | 55 | RSpec.configure do |config| 56 | config.include SpecHelpers 57 | end 58 | 59 | require 'middleman-blog-similar/extension' 60 | require 'middleman-blog-similar' 61 | Dir.glob(PROJECT_ROOT_PATH + '/lib/middleman-blog-similar/tagger/*') { |file| require file } 62 | 63 | class String 64 | def unindent 65 | gsub(/^#{scan(/^\s*/).min_by(&:length)}/, '').sub(/\n$/, '') 66 | end 67 | end 68 | --------------------------------------------------------------------------------